35#ifndef MADNESS_TENSOR_MXM_H__INCLUDED
36#define MADNESS_TENSOR_MXM_H__INCLUDED
67 template <
typename T,
typename Q,
typename S>
78 for (
long i=0; i<
dimi; ++i) {
80 for (
long j=0; j<dimj; ++j) {
89 template <
typename T,
typename Q,
typename S>
104 for (
long j=0; j<dimj; ++j) {
105 for (
long i=0; i<
dimi; ++i) {
113 template <
typename T,
typename Q,
typename S>
126 for (
long i=0; i<
dimi; ++i) {
127 for (
long j=0; j<dimj; ++j) {
138 template <
typename T,
typename Q,
typename S>
149 for (
long i=0; i<
dimi; ++i) {
150 for (
long j=0; j<dimj; ++j) {
170 template <
typename aT,
typename bT,
typename cT>
176 for (
long i=0; i<
dimi; ++i,
c+=dimj,++
a) {
177 for (
long j=0; j<dimj; ++j)
c[j] = 0.0;
181 for (
long j=0; j<dimj; ++j) {
189#if defined(HAVE_FAST_BLAS) && !defined(HAVE_INTEL_MKL)
198 template <
typename T>
202 cblas::gemm(
cblas::NoTrans,
cblas::NoTrans,dimj,
dimi,
dimk,
one,
b,dimj,
a,
dimk,
one,
c,dimj);
211 template <
typename T>
215 cblas::gemm(
cblas::NoTrans,
cblas::Trans,dimj,
dimi,
dimk,
one,
b,dimj,
a,
dimi,
one,
c,dimj);
224 template <
typename T>
228 cblas::gemm(
cblas::Trans,
cblas::NoTrans,dimj,
dimi,
dimk,
one,
b,
dimk,
a,
dimk,
one,
c,dimj);
237 template <
typename T>
241 cblas::gemm(
cblas::Trans,
cblas::Trans,dimj,
dimi,
dimk,
one,
b,
dimk,
a,
dimi,
one,
c,dimj);
256 template <
typename T>
262 if (
dimi==0 || dimj==0)
return;
264 for (
long i=0; i<
dimi*dimj; i++)
c[i] = 0.0;
269 cblas::gemm(
cblas::NoTrans,
cblas::Trans,dimj,
dimi,
dimk,
one,
b,
ldb,
a,
dimi,zero,
c,dimj);
277 template <
typename T>
309 template <
typename aT,
typename bT,
typename cT>
313 cblas::gemm(
cblas::NoTrans,
cblas::NoTrans,dimj,
dimi,
dimk,
one,
b,dimj,
a,
dimk,
one,
c,dimj);
322 template <
typename aT,
typename bT,
typename cT>
326 cblas::gemm(
cblas::NoTrans,
cblas::Trans,dimj,
dimi,
dimk,
one,
b,dimj,
a,
dimi,
one,
c,dimj);
335 template <
typename aT,
typename bT,
typename cT>
339 cblas::gemm(
cblas::Trans,
cblas::NoTrans,dimj,
dimi,
dimk,
one,
b,
dimk,
a,
dimk,
one,
c,dimj);
348 template <
typename aT,
typename bT,
typename cT>
352 cblas::gemm(
cblas::Trans,
cblas::Trans,dimj,
dimi,
dimk,
one,
b,
dimk,
a,
dimi,
one,
c,dimj);
367 template <
typename aT,
typename bT,
typename cT>
373 if (
dimi==0 || dimj==0)
return;
375 for (
long i=0; i<
dimi*dimj; i++)
c[i] = 0.0;
380 cblas::gemm(
cblas::NoTrans,
cblas::Trans,dimj,
dimi,
dimk,
one,
b,
ldb,
a,
dimi,zero,
c,dimj);
392 template <
typename T,
typename Q,
typename S>
399 template <
typename T,
typename Q,
typename S>
407 template <
typename T,
typename Q,
typename S>
414 template <
typename T,
typename Q,
typename S>
421 template <
typename aT,
typename bT,
typename cT>
448 for (
long i=0; i<
dimi; ++i,
c+=dimj) {
449 const double*
ai =
a+i;
456 const double*
bk0 =
p;
457 const double*
bk1 =
p+dimj;
458 const double*
bk2 =
p+dimj+dimj;
459 const double*
bk3 =
p+dimj+dimj+dimj;
460 for (
long j=0; j<dimj; ++j) {
466 const double*
bk =
b+
k*dimj;
467 for (
long j=0; j<dimj; ++j) {
495 for (
long i=0; i<
dimi2; i+=2) {
500 for (
long j=0; j<dimj; ++j) {
515 for (
long j=0; j<dimj; ++j) {
541 for (
long i=0; i<
dimi; ++i,
c+=dimj,
a+=
dimk) {
543 for (
long k=0;
k<
dimk4;
k+=4,
p+=4*dimj) {
548 const double*
bk0 =
p;
549 const double*
bk1 =
bk0+dimj;
550 const double*
bk2 =
bk1+dimj;
551 const double*
bk3 =
bk2+dimj;
552 for (
long j=0; j<dimj; ++j) {
558 for (
long j=0; j<dimj; ++j) {
580 long dimj2 = (dimj/2)*2;
589 for (
long i=0; i<
dimi; ++i,
c+=dimj,++
a) {
593 const double*
bj0 =
b;
598 for (
long k=0;
k<
nk; ++
k) {
608 for (
long k=0;
k<
nk; ++
k) {
622 template <
typename aT,
typename bT,
typename cT>
632 effj = (dimj | 3) + 1;
652 for (
long j=0; j<dimj; ++j)
c_work[j] = 0.0;
656 for (
long j=0; j<dimj; ++j) {
677 double*
c,
const double*
a,
const double*
b);
687 double*
c,
const double*
a,
const double*
b) {
708#elif defined(HAVE_IBMBGP)
710 const double*
a,
const double*
b);
double q(double t)
Definition DKops.h:18
Define BLAS like functions.
std::complex< double > double_complex
Definition cfft.h:14
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Macros and tools pertaining to the configuration of MADNESS.
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB, const integer m, const integer n, const integer k, const float alpha, const float *a, const integer lda, const float *b, const integer ldb, const float beta, float *c, const integer ldc)
Multiplies a matrix by a vector.
Definition cblas.h:352
@ NoTrans
Definition cblas_types.h:78
@ Trans
Definition cblas_types.h:79
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
static void mTxm_reference(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const Q *MADNESS_RESTRICT a, const S *MADNESS_RESTRICT b)
Matrix += Matrix transpose * matrix ... reference implementation (slow but correct)
Definition mxm.h:91
void mTxmT(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b)
Matrix += Matrix transpose * matrix transpose ... MKL interface version.
Definition mxm.h:238
void mxm(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b)
Matrix += Matrix * matrix ... BLAS/MKL interface version.
Definition mxm.h:199
static void mxm_reference(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const Q *MADNESS_RESTRICT a, const S *MADNESS_RESTRICT b)
Matrix += Matrix * matrix reference implementation (slow but correct)
Definition mxm.h:68
void mTxm(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b)
Matrix += Matrix transpose * matrix ... MKL interface version.
Definition mxm.h:212
static void mxmT_reference(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const Q *MADNESS_RESTRICT a, const S *MADNESS_RESTRICT b)
Matrix += Matrix * matrix transpose ... reference implementation (slow but correct)
Definition mxm.h:114
void mTxmq_reference(long dimi, long dimj, long dimk, cT *MADNESS_RESTRICT c, const aT *a, const bT *b, long ldb=-1)
Matrix = Matrix transpose * matrix ... slow reference implementation.
Definition mxm.h:171
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition mtxmq.h:96
static void mTxmT_reference(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const Q *MADNESS_RESTRICT a, const S *MADNESS_RESTRICT b)
Matrix += Matrix transpose * matrix transpose reference implementation (slow but correct)
Definition mxm.h:139
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition bgq_mtxm.cc:10
static XNonlinearSolver< std::vector< Function< T, NDIM > >, T, vector_function_allocator< T, NDIM > > nonlinear_vector_solver(World &world, const long nvec)
Definition nonlinsol.h:284
void mxmT(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b)
Matrix += Matrix * matrix transpose ... MKL interface version.
Definition mxm.h:225
void mTxmq(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b, long ldb=-1)
Matrix = Matrix transpose * matrix ... MKL interface version.
Definition mxm.h:257
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
double Q(double a)
Definition relops.cc:20
static const double c
Definition relops.cc:10
static const long k
Definition rk.cc:44
AtomicInt sum
Definition test_atomicint.cc:46
constexpr coord_t one(1.0)