MADNESS 0.10.1
mtxmq.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30
31 $Id$
32*/
33#ifndef MADNESS_TENSOR_MTXMQ_H__INCLUDED
34#define MADNESS_TENSOR_MTXMQ_H__INCLUDED
35
37
38#ifdef HAVE_INTEL_MKL
40#endif
41
42typedef std::complex<double> double_complex;
43
44namespace madness {
45
46#ifdef HAVE_INTEL_MKL
47 /// Matrix = Matrix transpose * matrix ... MKL interface version
48
49 /// Does \c C=AT*B whereas mTxm does C=C+AT*B.
50 /// \code
51 /// c(i,j) = sum(k) a(k,i)*b(k,j) <------ does not accumulate into C
52 /// \endcode
53 ///
54 /// This is the SLOW reference implementation
55 template <typename aT, typename bT, typename cT>
56 void mTxmq(long dimi, long dimj, long dimk,
57 cT* MADNESS_RESTRICT c, const aT* a, const bT* b) {
58 const cT one = 1.0; // alpha in *gemm
59 const cT zero = 0.0; // beta in *gemm
60 //std::cout << "IN MKL version mTxmq " << tensor_type_names[TensorTypeData<aT>::id] << " " << tensor_type_names[TensorTypeData<bT>::id] << " " << tensor_type_names[TensorTypeData<cT>::id] << "\n";
61 cblas::gemm(cblas::NoTrans,cblas::Trans,dimj,dimi,dimk,one,b,dimj,a,dimi,zero,c,dimj);
62 }
63
64#elif
65
66 /// Matrix = Matrix transpose * matrix ... slow reference implementation
67
68 /// This routine does \c C=AT*B whereas mTxm does C=C+AT*B.
69 /// \code
70 /// c(i,j) = sum(k) a(k,i)*b(k,j) <------ does not accumulate into C
71 /// \endcode
72 template <typename aT, typename bT, typename cT>
73 void mTxmq(long dimi, long dimj, long dimk,
74 cT* MADNESS_RESTRICT c, const aT* a, const bT* b) {
75 //std::cout << "IN GENERIC mTxmq " << tensor_type_names[TensorTypeData<aT>::id] << " " << tensor_type_names[TensorTypeData<bT>::id] << " " << tensor_type_names[TensorTypeData<cT>::id] << "\n";
76 for (long i=0; i<dimi; ++i,c+=dimj,++a) {
77 for (long j=0; j<dimj; ++j) c[j] = 0.0;
78 const aT *aik_ptr = a;
79 for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {
80 aT aki = *aik_ptr;
81 for (long j=0; j<dimj; ++j) {
82 c[j] += aki*b[k*dimj+j];
83 }
84 }
85 }
86 }
87
88#endif
89
90 /*
91 * mtxm, but with padded buffers.
92 *
93 * ext_b is the extent of the b array, so shrink() isn't needed.
94 */
95 template <typename aT, typename bT, typename cT>
96 void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b,
97 cT* c, const aT* a, const bT* b) {
98 const int alignment = 4;
99 bool free_b = false;
100 long effj = dimj;
101
102 /* Setup a buffer for c if needed */
103 cT* c_buf = c;
104 if (dimj%alignment) {
105 effj = (dimj | 3) + 1;
106 c_buf = (cT*)malloc(sizeof(cT)*dimi*effj);
107 }
108
109 /* Copy b into a buffer if needed */
110 if (ext_b%alignment) {
111 free_b = true;
112 bT* b_buf = (bT*)malloc(sizeof(bT)*dimk*effj);
113
114 bT* bp = b_buf;
115 for (long k=0; k<dimk; k++, bp += effj, b += ext_b)
116 memcpy(bp, b, sizeof(bT)*dimj);
117
118 b = b_buf;
119 ext_b = effj;
120 }
121
122 cT* c_work = c_buf;
123 /* mTxm */
124 for (long i=0; i<dimi; ++i,c_work+=effj,++a) {
125 for (long j=0; j<dimj; ++j) c_work[j] = 0.0;
126 const aT *aik_ptr = a;
127 for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {
128 aT aki = *aik_ptr;
129 for (long j=0; j<dimj; ++j) {
130 c_work[j] += aki*b[k*ext_b+j];
131 }
132 }
133 }
134
135 /* Copy c out if needed */
136 if (dimj%alignment) {
137 cT* ct = c_buf;
138 for (long i=0; i<dimi; i++, ct += effj, c += dimj)
139 memcpy(c, ct, sizeof(cT)*dimj);
140
141 free(c_buf);
142 }
143
144 /* Free the buffer for b */
145 if (free_b) free((bT*)b);
146 }
147#ifdef HAVE_IBMBGQ
148 extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
149 double* c, const double* a, const double* b);
150 extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
151 __complex__ double* c, const __complex__ double* a, const __complex__ double* b);
152 extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
153 __complex__ double* c, const double* a, const __complex__ double* b);
154 extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
155 __complex__ double* c, const __complex__ double* a, const double* b);
156
157 template <>
158 inline void mTxmq_padding(long ni, long nj, long nk, long ej,
159 double* c, const double* a, const double* b) {
160 bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
161 }
162
163 template <>
164 inline void mTxmq_padding(long ni, long nj, long nk, long ej,
165 __complex__ double* c, const __complex__ double* a, const __complex__ double* b) {
166 bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
167 }
168
169 template <>
170 inline void mTxmq_padding(long ni, long nj, long nk, long ej,
171 __complex__ double* c, const double* a, const __complex__ double* b) {
172 bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
173 }
174
175 template <>
176 inline void mTxmq_padding(long ni, long nj, long nk, long ej,
177 __complex__ double* c, const __complex__ double* a, const double* b) {
178 bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
179 }
180#elif defined(HAVE_IBMBGP)
181 extern void bgpmTxmq(long ni, long nj, long nk, double* MADNESS_RESTRICT c,
182 const double* a, const double* b);
183 extern void bgpmTxmq(long ni, long nj, long nk, double_complex* MADNESS_RESTRICT c,
184 const double_complex* a, const double_complex* b);
185
186 template <>
187 inline void mTxmq(long ni, long nj, long nk, double* MADNESS_RESTRICT c, const double* a, const double* b) {
188 bgpmTxmq(ni, nj, nk, c, a, b);
189 }
190
191 template <>
192 inline void mTxmq(long ni, long nj, long nk, double_complex* MADNESS_RESTRICT c, const double_complex* a, const double_complex* b) {
193 bgpmTxmq(ni, nj, nk, c, a, b);
194 }
195
196// #elif defined(X86_64) && !defined(DISABLE_SSE3)
197// template <>
198// void mTxmq(long dimi, long dimj, long dimk,
199// double* MADNESS_RESTRICT c, const double* a, const double* b);
200
201// template <>
202// void mTxmq(long dimi, long dimj, long dimk,
203// double_complex* MADNESS_RESTRICT c, const double_complex* a, const double_complex* b);
204
205// #ifndef __INTEL_COMPILER
206// template <>
207// void mTxmq(long dimi, long dimj, long dimk,
208// double_complex* MADNESS_RESTRICT c, const double_complex* a, const double* b);
209// #endif
210
211// #elif defined(X86_32)
212// template <>
213// void mTxmq(long dimi, long dimj, long dimk,
214// double* MADNESS_RESTRICT c, const double* a, const double* b);
215#endif
216
217}
218
219#endif // MADNESS_TENSOR_MTXMQ_H__INCLUDED
Define BLAS like functions.
std::complex< double > double_complex
Definition cfft.h:14
char * malloc()
Macros and tools pertaining to the configuration of MADNESS.
std::complex< double > double_complex
Definition mtxmq.h:42
void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB, const integer m, const integer n, const integer k, const float alpha, const float *a, const integer lda, const float *b, const integer ldb, const float beta, float *c, const integer ldc)
Multiplies a matrix by a vector.
Definition cblas.h:352
@ NoTrans
Definition cblas_types.h:78
@ Trans
Definition cblas_types.h:79
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition mtxmq.h:96
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition bgq_mtxm.cc:10
void mTxmq(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b, long ldb=-1)
Matrix = Matrix transpose * matrix ... MKL interface version.
Definition mxm.h:257
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
static const double c
Definition relops.cc:10
static const long k
Definition rk.cc:44