33 #ifndef MADNESS_TENSOR_MTXMQ_H__INCLUDED
34 #define MADNESS_TENSOR_MTXMQ_H__INCLUDED
55 template <
typename aT,
typename bT,
typename cT>
56 void mTxmq(
long dimi,
long dimj,
long dimk,
57 cT* MADNESS_RESTRICT
c,
const aT*
a,
const bT*
b) {
61 cblas::gemm(
cblas::NoTrans,
cblas::Trans,dimj,dimi,dimk,one,
b,dimj,
a,dimi,zero,
c,dimj);
72 template <
typename aT,
typename bT,
typename cT>
73 void mTxmq(
long dimi,
long dimj,
long dimk,
74 cT* MADNESS_RESTRICT
c,
const aT*
a,
const bT*
b) {
76 for (
long i=0; i<dimi; ++i,
c+=dimj,++
a) {
77 for (
long j=0; j<dimj; ++j)
c[j] = 0.0;
78 const aT *aik_ptr =
a;
79 for (
long k=0;
k<dimk; ++
k,aik_ptr+=dimi) {
81 for (
long j=0; j<dimj; ++j) {
82 c[j] += aki*
b[
k*dimj+j];
95 template <
typename aT,
typename bT,
typename cT>
97 cT*
c,
const aT*
a,
const bT*
b) {
98 const int alignment = 4;
104 if (dimj%alignment) {
105 effj = (dimj | 3) + 1;
106 c_buf = (cT*)
malloc(
sizeof(cT)*dimi*effj);
110 if (ext_b%alignment) {
112 bT* b_buf = (bT*)
malloc(
sizeof(bT)*dimk*effj);
115 for (
long k=0;
k<dimk;
k++, bp += effj,
b += ext_b)
116 memcpy(bp,
b,
sizeof(bT)*dimj);
124 for (
long i=0; i<dimi; ++i,c_work+=effj,++
a) {
125 for (
long j=0; j<dimj; ++j) c_work[j] = 0.0;
126 const aT *aik_ptr =
a;
127 for (
long k=0;
k<dimk; ++
k,aik_ptr+=dimi) {
129 for (
long j=0; j<dimj; ++j) {
130 c_work[j] += aki*
b[
k*ext_b+j];
136 if (dimj%alignment) {
138 for (
long i=0; i<dimi; i++, ct += effj,
c += dimj)
139 memcpy(
c, ct,
sizeof(cT)*dimj);
145 if (free_b) free((bT*)
b);
149 double*
c,
const double*
a,
const double*
b);
151 __complex__
double*
c,
const __complex__
double*
a,
const __complex__
double*
b);
153 __complex__
double*
c,
const double*
a,
const __complex__
double*
b);
155 __complex__
double*
c,
const __complex__
double*
a,
const double*
b);
158 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
159 double*
c,
const double*
a,
const double*
b) {
164 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
165 __complex__
double*
c,
const __complex__
double*
a,
const __complex__
double*
b) {
170 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
171 __complex__
double*
c,
const double*
a,
const __complex__
double*
b) {
176 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
177 __complex__
double*
c,
const __complex__
double*
a,
const double*
b) {
180 #elif defined(HAVE_IBMBGP)
181 extern void bgpmTxmq(
long ni,
long nj,
long nk,
double* MADNESS_RESTRICT
c,
182 const double*
a,
const double*
b);
183 extern void bgpmTxmq(
long ni,
long nj,
long nk,
double_complex* MADNESS_RESTRICT
c,
187 inline void mTxmq(
long ni,
long nj,
long nk,
double* MADNESS_RESTRICT
c,
const double*
a,
const double*
b) {
188 bgpmTxmq(ni, nj, nk,
c,
a,
b);
193 bgpmTxmq(ni, nj, nk,
c,
a,
b);
Define BLAS like functions.
std::complex< double > double_complex
Definition: cfft.h:14
Macros and tools pertaining to the configuration of MADNESS.
std::complex< double > double_complex
Definition: mtxmq.h:42
void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB, const integer m, const integer n, const integer k, const float alpha, const float *a, const integer lda, const float *b, const integer ldb, const float beta, float *c, const integer ldc)
Multiplies a matrix by a vector.
Definition: cblas.h:352
@ NoTrans
Definition: cblas_types.h:78
@ Trans
Definition: cblas_types.h:79
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition: mtxmq.h:96
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition: bgq_mtxm.cc:10
void mTxmq(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b, long ldb=-1)
Matrix = Matrix transpose * matrix ... MKL interface version.
Definition: mxm.h:257
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
static const double c
Definition: relops.cc:10
static const long k
Definition: rk.cc:44