MADNESS  0.10.1
mtxmq.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31  $Id$
32 */
33 #ifndef MADNESS_TENSOR_MTXMQ_H__INCLUDED
34 #define MADNESS_TENSOR_MTXMQ_H__INCLUDED
35 
36 #include <madness/madness_config.h>
37 
38 #ifdef HAVE_INTEL_MKL
39 #include <madness/tensor/cblas.h>
40 #endif
41 
42 typedef std::complex<double> double_complex;
43 
44 namespace madness {
45 
46 #ifdef HAVE_INTEL_MKL
47  /// Matrix = Matrix transpose * matrix ... MKL interface version
48 
49  /// Does \c C=AT*B whereas mTxm does C=C+AT*B.
50  /// \code
51  /// c(i,j) = sum(k) a(k,i)*b(k,j) <------ does not accumulate into C
52  /// \endcode
53  ///
54  /// This is the SLOW reference implementation
55  template <typename aT, typename bT, typename cT>
56  void mTxmq(long dimi, long dimj, long dimk,
57  cT* MADNESS_RESTRICT c, const aT* a, const bT* b) {
58  const cT one = 1.0; // alpha in *gemm
59  const cT zero = 0.0; // beta in *gemm
60  //std::cout << "IN MKL version mTxmq " << tensor_type_names[TensorTypeData<aT>::id] << " " << tensor_type_names[TensorTypeData<bT>::id] << " " << tensor_type_names[TensorTypeData<cT>::id] << "\n";
61  cblas::gemm(cblas::NoTrans,cblas::Trans,dimj,dimi,dimk,one,b,dimj,a,dimi,zero,c,dimj);
62  }
63 
64 #elif
65 
66  /// Matrix = Matrix transpose * matrix ... slow reference implementation
67 
68  /// This routine does \c C=AT*B whereas mTxm does C=C+AT*B.
69  /// \code
70  /// c(i,j) = sum(k) a(k,i)*b(k,j) <------ does not accumulate into C
71  /// \endcode
72  template <typename aT, typename bT, typename cT>
73  void mTxmq(long dimi, long dimj, long dimk,
74  cT* MADNESS_RESTRICT c, const aT* a, const bT* b) {
75  //std::cout << "IN GENERIC mTxmq " << tensor_type_names[TensorTypeData<aT>::id] << " " << tensor_type_names[TensorTypeData<bT>::id] << " " << tensor_type_names[TensorTypeData<cT>::id] << "\n";
76  for (long i=0; i<dimi; ++i,c+=dimj,++a) {
77  for (long j=0; j<dimj; ++j) c[j] = 0.0;
78  const aT *aik_ptr = a;
79  for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {
80  aT aki = *aik_ptr;
81  for (long j=0; j<dimj; ++j) {
82  c[j] += aki*b[k*dimj+j];
83  }
84  }
85  }
86  }
87 
88 #endif
89 
90  /*
91  * mtxm, but with padded buffers.
92  *
93  * ext_b is the extent of the b array, so shrink() isn't needed.
94  */
95  template <typename aT, typename bT, typename cT>
96  void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b,
97  cT* c, const aT* a, const bT* b) {
98  const int alignment = 4;
99  bool free_b = false;
100  long effj = dimj;
101 
102  /* Setup a buffer for c if needed */
103  cT* c_buf = c;
104  if (dimj%alignment) {
105  effj = (dimj | 3) + 1;
106  c_buf = (cT*)malloc(sizeof(cT)*dimi*effj);
107  }
108 
109  /* Copy b into a buffer if needed */
110  if (ext_b%alignment) {
111  free_b = true;
112  bT* b_buf = (bT*)malloc(sizeof(bT)*dimk*effj);
113 
114  bT* bp = b_buf;
115  for (long k=0; k<dimk; k++, bp += effj, b += ext_b)
116  memcpy(bp, b, sizeof(bT)*dimj);
117 
118  b = b_buf;
119  ext_b = effj;
120  }
121 
122  cT* c_work = c_buf;
123  /* mTxm */
124  for (long i=0; i<dimi; ++i,c_work+=effj,++a) {
125  for (long j=0; j<dimj; ++j) c_work[j] = 0.0;
126  const aT *aik_ptr = a;
127  for (long k=0; k<dimk; ++k,aik_ptr+=dimi) {
128  aT aki = *aik_ptr;
129  for (long j=0; j<dimj; ++j) {
130  c_work[j] += aki*b[k*ext_b+j];
131  }
132  }
133  }
134 
135  /* Copy c out if needed */
136  if (dimj%alignment) {
137  cT* ct = c_buf;
138  for (long i=0; i<dimi; i++, ct += effj, c += dimj)
139  memcpy(c, ct, sizeof(cT)*dimj);
140 
141  free(c_buf);
142  }
143 
144  /* Free the buffer for b */
145  if (free_b) free((bT*)b);
146  }
147 #ifdef HAVE_IBMBGQ
148  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
149  double* c, const double* a, const double* b);
150  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
151  __complex__ double* c, const __complex__ double* a, const __complex__ double* b);
152  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
153  __complex__ double* c, const double* a, const __complex__ double* b);
154  extern void bgq_mtxmq_padded(long ni, long nj, long nk, long ej,
155  __complex__ double* c, const __complex__ double* a, const double* b);
156 
157  template <>
158  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
159  double* c, const double* a, const double* b) {
160  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
161  }
162 
163  template <>
164  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
165  __complex__ double* c, const __complex__ double* a, const __complex__ double* b) {
166  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
167  }
168 
169  template <>
170  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
171  __complex__ double* c, const double* a, const __complex__ double* b) {
172  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
173  }
174 
175  template <>
176  inline void mTxmq_padding(long ni, long nj, long nk, long ej,
177  __complex__ double* c, const __complex__ double* a, const double* b) {
178  bgq_mtxmq_padded(ni, nj, nk, ej, c, a, b);
179  }
180 #elif defined(HAVE_IBMBGP)
181  extern void bgpmTxmq(long ni, long nj, long nk, double* MADNESS_RESTRICT c,
182  const double* a, const double* b);
183  extern void bgpmTxmq(long ni, long nj, long nk, double_complex* MADNESS_RESTRICT c,
184  const double_complex* a, const double_complex* b);
185 
186  template <>
187  inline void mTxmq(long ni, long nj, long nk, double* MADNESS_RESTRICT c, const double* a, const double* b) {
188  bgpmTxmq(ni, nj, nk, c, a, b);
189  }
190 
191  template <>
192  inline void mTxmq(long ni, long nj, long nk, double_complex* MADNESS_RESTRICT c, const double_complex* a, const double_complex* b) {
193  bgpmTxmq(ni, nj, nk, c, a, b);
194  }
195 
196 // #elif defined(X86_64) && !defined(DISABLE_SSE3)
197 // template <>
198 // void mTxmq(long dimi, long dimj, long dimk,
199 // double* MADNESS_RESTRICT c, const double* a, const double* b);
200 
201 // template <>
202 // void mTxmq(long dimi, long dimj, long dimk,
203 // double_complex* MADNESS_RESTRICT c, const double_complex* a, const double_complex* b);
204 
205 // #ifndef __INTEL_COMPILER
206 // template <>
207 // void mTxmq(long dimi, long dimj, long dimk,
208 // double_complex* MADNESS_RESTRICT c, const double_complex* a, const double* b);
209 // #endif
210 
211 // #elif defined(X86_32)
212 // template <>
213 // void mTxmq(long dimi, long dimj, long dimk,
214 // double* MADNESS_RESTRICT c, const double* a, const double* b);
215 #endif
216 
217 }
218 
219 #endif // MADNESS_TENSOR_MTXMQ_H__INCLUDED
Define BLAS like functions.
std::complex< double > double_complex
Definition: cfft.h:14
char * malloc()
Macros and tools pertaining to the configuration of MADNESS.
std::complex< double > double_complex
Definition: mtxmq.h:42
void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB, const integer m, const integer n, const integer k, const float alpha, const float *a, const integer lda, const float *b, const integer ldb, const float beta, float *c, const integer ldc)
Multiplies a matrix by a vector.
Definition: cblas.h:352
@ NoTrans
Definition: cblas_types.h:78
@ Trans
Definition: cblas_types.h:79
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition: mtxmq.h:96
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition: bgq_mtxm.cc:10
void mTxmq(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b, long ldb=-1)
Matrix = Matrix transpose * matrix ... MKL interface version.
Definition: mxm.h:257
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
static const double c
Definition: relops.cc:10
static const long k
Definition: rk.cc:44