MADNESS  0.10.1
cblas.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 */
31 
32 
33 #ifndef MADNESS_LINALG_CBLAS_H__INCLUDED
34 #define MADNESS_LINALG_CBLAS_H__INCLUDED
35 
36 /// \file cblas.h
37 /// \brief Define BLAS like functions
38 
39 
40 #include <madness/fortran_ctypes.h>
41 #include <madness/madness_config.h>
43 
44 // MKL direct macros produce a zillion warning messages about unused variables --- disable this warning just in this header
45 MADNESS_PRAGMA_GCC(diagnostic push)
46 MADNESS_PRAGMA_GCC(diagnostic ignored "-Wunused-value")
47 MADNESS_PRAGMA_CLANG(diagnostic push)
48 MADNESS_PRAGMA_CLANG(diagnostic ignored "-Wunused-value")
49 
50 #if defined(FORTRAN_LINKAGE_LC) || (defined(HAVE_INTEL_MKL) && defined(MKL_DIRECT_CALL))
51 
52 # define F77_SGEMM sgemm
53 # define F77_DGEMM dgemm
54 # define F77_CGEMM cgemm
55 # define F77_ZGEMM zgemm
56 #ifdef HAVE_INTEL_MKL
57 # define F77_SCGEMM scgemm
58 # define F77_DZGEMM dzgemm
59 #endif
60 # define F77_SGEMV sgemv
61 # define F77_DGEMV dgemv
62 # define F77_CGEMV cgemv
63 # define F77_ZGEMV zgemv
64 # define F77_SSCAL sscal
65 # define F77_DSCAL dscal
66 # define F77_CSCAL cscal
67 # define F77_ZSCAL zscal
68 # define F77_CSSCAL csscal
69 # define F77_ZDSCAL zdscal
70 # define F77_SDOT sdot
71 # define F77_DDOT ddot
72 # define F77_CDOTU cdotu
73 # define F77_ZDOTU zdotu
74 # define F77_SAXPY saxpy
75 # define F77_DAXPY daxpy
76 # define F77_CAXPY caxpy
77 # define F77_ZAXPY zaxpy
78 
79 #elif defined(FORTRAN_LINKAGE_LCU)
80 
81 # define F77_SGEMM sgemm_
82 # define F77_DGEMM dgemm_
83 # define F77_CGEMM cgemm_
84 # define F77_ZGEMM zgemm_
85 #ifdef HAVE_INTEL_MKL
86 # define F77_SCGEMM scgemm_
87 # define F77_DZGEMM dzgemm_
88 #endif
89 # define F77_SGEMV sgemv_
90 # define F77_DGEMV dgemv_
91 # define F77_CGEMV cgemv_
92 # define F77_ZGEMV zgemv_
93 # define F77_SSCAL sscal_
94 # define F77_DSCAL dscal_
95 # define F77_CSCAL cscal_
96 # define F77_ZSCAL zscal_
97 # define F77_CSSCAL csscal_
98 # define F77_ZDSCAL zdscal_
99 # define F77_SDOT sdot_
100 # define F77_DDOT ddot_
101 # define F77_CDOTU cdotu_
102 # define F77_ZDOTU zdotu_
103 # define F77_SAXPY saxpy_
104 # define F77_DAXPY daxpy_
105 # define F77_CAXPY caxpy_
106 # define F77_ZAXPY zaxpy_
107 
108 #elif defined(FORTRAN_LINKAGE_LCUU)
109 
110 # define F77_SGEMM sgemm__
111 # define F77_DGEMM dgemm__
112 # define F77_CGEMM cgemm__
113 # define F77_ZGEMM zgemm__
114 #ifdef HAVE_INTEL_MKL
115 # define F77_SCGEMM scgemm__
116 # define F77_DZGEMM dzgemm__
117 #endif
118 # define F77_SGEMV sgemv__
119 # define F77_DGEMV dgemv__
120 # define F77_CGEMV cgemv__
121 # define F77_ZGEMV zgemv__
122 # define F77_SSCAL sscal__
123 # define F77_DSCAL dscal__
124 # define F77_CSCAL cscal__
125 # define F77_ZSCAL zscal__
126 # define F77_CSSCAL csscal__
127 # define F77_ZDSCAL zdscal__
128 # define F77_SDOT sdot__
129 # define F77_DDOT ddot__
130 # define F77_CDOTU cdotu__
131 # define F77_ZDOTU zdotu__
132 # define F77_SAXPY saxpy__
133 # define F77_DAXPY daxpy__
134 # define F77_CAXPY caxpy__
135 # define F77_ZAXPY zaxpy__
136 
137 #elif defined(FORTRAN_LINKAGE_UC)
138 
139 # define F77_SGEMM SGEMM
140 # define F77_DGEMM DGEMM
141 # define F77_CGEMM CGEMM
142 # define F77_ZGEMM ZGEMM
143 #ifdef HAVE_INTEL_MKL
144 # define F77_SCGEMM SCGEMM
145 # define F77_DZGEMM DZGEMM
146 #endif
147 # define F77_SGEMV SGEMV
148 # define F77_DGEMV DGEMV
149 # define F77_CGEMV CGEMV
150 # define F77_ZGEMV ZGEMV
151 # define F77_SSCAL SSCAL
152 # define F77_DSCAL DSCAL
153 # define F77_CSCAL CSCAL
154 # define F77_ZSCAL ZSCAL
155 # define F77_CSSCAL CSSCAL
156 # define F77_ZDSCAL ZDSCAL
157 # define F77_SDOT SDOTU
158 # define F77_DDOT DDOTU
159 # define F77_CDOTU CDOTU
160 # define F77_ZDOTU ZDOTU
161 # define F77_SAXPY SAXPY
162 # define F77_DAXPY DAXPY
163 # define F77_CAXPY CAXPY
164 # define F77_ZAXPY ZAXPY
165 
166 #elif defined(FORTRAN_LINKAGE_UCU)
167 
168 # define F77_SGEMM SGEMM_
169 # define F77_DGEMM DGEMM_
170 # define F77_CGEMM CGEMM_
171 # define F77_ZGEMM ZGEMM_
172 #ifdef HAVE_INTEL_MKL
173 # define F77_SCGEMM SCGEMM_
174 # define F77_DZGEMM DZGEMM_
175 #endif
176 # define F77_SGEMV SGEMV_
177 # define F77_DGEMV DGEMV_
178 # define F77_CGEMV CGEMV_
179 # define F77_ZGEMV ZGEMV_
180 # define F77_SSCAL SSCAL_
181 # define F77_DSCAL DSCAL_
182 # define F77_CSCAL CSCAL_
183 # define F77_ZSCAL ZSCAL_
184 # define F77_CSSCAL CSSCAL_
185 # define F77_ZDSCAL ZDSCAL_
186 # define F77_SDOT SDOT_
187 # define F77_DDOT DDOTSUB_
188 # define F77_CDOTU CDOTU_
189 # define F77_ZDOTU ZDOTU_
190 # define F77_SAXPY SAXPY_
191 # define F77_DAXPY DAXPY_
192 # define F77_CAXPY CAXPY_
193 # define F77_ZAXPY ZAXPY_
194 
195 #else
196 // If detected another convention complain loudly.
197 # error "cblas.h does not support the current Fortran symbol convention -- please, edit and check in the changes."
198 #endif
199 
200 // process BLAS parts that are not directly callable in MKL
201 #if defined(FORTRAN_LINKAGE_LC)
202 # define F77_SGER sger
203 # define F77_DGER dger
204 # define F77_CGER cger
205 # define F77_ZGER zger
206 #elif defined(FORTRAN_LINKAGE_LCU)
207 # define F77_SGER sger_
208 # define F77_DGER dger_
209 # define F77_CGER cger_
210 # define F77_ZGER zger_
211 #elif defined(FORTRAN_LINKAGE_LCUU)
212 # define F77_SGER sger__
213 # define F77_DGER dger__
214 # define F77_CGER cger__
215 # define F77_ZGER zger__
216 #elif defined(FORTRAN_LINKAGE_UC)
217 # define F77_SGER SGER
218 # define F77_DGER DGER
219 # define F77_CGER CGER
220 # define F77_ZGER ZGER
221 #elif defined(FORTRAN_LINKAGE_UCU)
222 # define F77_SGER SGER_
223 # define F77_DGER DGER_
224 # define F77_CGER CGER_
225 # define F77_ZGER ZGER_
226 #else
227 // If detected another convention complain loudly.
228 # error "cblas.h does not support the current Fortran symbol convention -- please, edit and check in the changes."
229 #endif
230 
231 extern "C" {
232 
233 // BLAS _GER declarations, not directly callable via MKL
234 void F77_SGER(const integer *, const integer *, const float *, const float *,
235  const integer *, const float *, const integer *, float *,
236  const integer *);
237 void F77_DGER(const integer *, const integer *, const double *, const double *,
238  const integer *, const double *, const integer *, double *,
239  const integer *);
240 void F77_CGER(const integer *, const integer *, const complex_real4 *,
241  const complex_real4 *, const integer *, const complex_real4 *,
242  const integer *, complex_real4 *, const integer *);
243 void F77_ZGER(const integer *, const integer *, const complex_real8 *,
244  const complex_real8 *, const integer *, const complex_real8 *,
245  const integer *, complex_real8 *, const integer *);
246 }
247 
248 #ifndef MKL_DIRECT_CALL
249 
250 extern "C" {
251 
252  // BLAS _GEMM declarations
253  void F77_SGEMM(const char*, const char*, const integer*, const integer*,
254  const integer*, const float*, const float*, const integer*,
255  const float*, const integer*, const float*, float*, const integer*);
256  void F77_DGEMM(const char*, const char*, const integer*, const integer*,
257  const integer*, const double*, const double*, const integer*,
258  const double*, const integer*, const double*, double*, const integer*);
259  void F77_CGEMM(const char*, const char*, const integer*, const integer*,
260  const integer*, const complex_real4*, const complex_real4*,
261  const integer*, const complex_real4*, const integer*,
262  const complex_real4*, complex_real4*, const integer*);
263  void F77_ZGEMM(const char*, const char*, const integer*, const integer*,
264  const integer*, const complex_real8*, const complex_real8*,
265  const integer*, const complex_real8*, const integer*,
266  const complex_real8*, complex_real8*, const integer*);
267 
268 #ifdef HAVE_INTEL_MKL
269  void F77_SCGEMM(const char*, const char*, const integer*, const integer*,
270  const integer*, const complex_real4*, const real4*,
271  const integer*, const complex_real4*, const integer*,
272  const complex_real4*, complex_real4*, const integer*);
273  void F77_DZGEMM(const char*, const char*, const integer*, const integer*,
274  const integer*, const complex_real8*, const real8*,
275  const integer*, const complex_real8*, const integer*,
276  const complex_real8*, complex_real8*, const integer*);
277 #endif
278 
279  // BLAS _GEMV declarations
280  void F77_SGEMV(const char*, const integer*, const integer*, const float*,
281  const float*, const integer*, const float*, const integer*,
282  const float*, float*, const integer*);
283  void F77_DGEMV(const char*, const integer*, const integer*, const double*,
284  const double*, const integer*, const double*, const integer*,
285  const double*, double*, const integer*);
286  void F77_CGEMV(const char*, const integer*, const integer*, const complex_real4*,
287  const complex_real4*, const integer*, const complex_real4*,
288  const integer*, const complex_real4*, complex_real4*, const integer*);
289  void F77_ZGEMV(const char*, const integer*, const integer*, const complex_real8*,
290  const complex_real8*, const integer*, const complex_real8*,
291  const integer*, const complex_real8*, complex_real8*, const integer*);
292 
293  // BLAS _SCAL declarations
294  void F77_SSCAL(const integer*, const float*, float*, const integer*);
295  void F77_DSCAL(const integer*, const double*, double*, const integer*);
296  void F77_CSCAL(const integer*, const complex_real4*, complex_real4*, const integer*);
297  void F77_CSSCAL(const integer*, const float*, complex_real4*, const integer*);
298  void F77_ZSCAL(const integer*, const complex_real8*, complex_real8*, const integer*);
299  void F77_ZDSCAL(const integer*, const double*, complex_real8*, const integer*);
300 
301  // BLAS _DOT declarations
302  float F77_SDOT(const integer*, const float*, const integer*, const float*,
303  const integer*);
304  double F77_DDOT(const integer*, const double *, const integer*,
305  const double *, const integer*);
306  void F77_CDOTU(complex_real4*, const integer*, const complex_real4*, const integer*,
307  const complex_real4*, const integer*);
308  void F77_ZDOTU(complex_real8*, const integer*, const complex_real8*, const integer*,
309  const complex_real8*, const integer*);
310  //
311  // BLAS _AXPY declarations (INTEGER n, NUMERICAL alpha, NUMERICAL x, INTEGER incx, NUMERICAL y, INTEGER incy )
312  void F77_SAXPY(const integer*, const float*, const float*, const integer*,
313  float*, const integer*);
314  void F77_DAXPY(const integer*, const double*, const double*, const integer*,
315  double*, const integer*);
316  void F77_CAXPY(const integer*, const complex_real4*, const complex_real4*,
317  const integer*, complex_real4*, const integer*);
318  void F77_ZAXPY(const integer*, const complex_real8*, const complex_real8*,
319  const integer*, complex_real8*, const integer*);
320 }
321 #else
322 
323 # include <mkl.h>
324 
325 #endif // !defined(MKL_DIRECT_CALL)
326 
327 // some BLAS libraries use custom complex types in their interface, so need to include their definitions here
329 
330 namespace madness {
331 namespace cblas {
332 
333  /// Multiplies a matrix by a vector
334 
335  /// \f[
336  /// \mathbf{C} \leftarrow \alpha \mathbf{A}^{\mathrm{OpA}} \mathbf{B}^{\mathrm{OpB}} + \beta \mathbf{C}
337  /// \f]
338  /// \param OpA Operation to be applied to matrix \f$ \mathbf{A} \f$
339  /// \param OpB Operation to be applied to matrix \f$ \mathbf{B} \f$
340  /// \param m Rows in matrix \f$ \mathbf{C} \f$
341  /// \param n Columns in matrix \f$ \mathbf{C} \f$
342  /// \param k Inner dimension size for matrices \f$ \mathbf{A} \f$ and \f$ \mathbf{B} \f$
343  /// \param alpha Scaling factor applied to \f$ \mathbf{A} \f$ \c * \f$ \mathbf{B} \f$
344  /// \param a Pointer to matrix \f$ \mathbf{A} \f$
345  /// \param lda The size of the leading-order dimension of matrix \f$ \mathbf{A} \f$
346  /// \param b Pointer to matrix \f$ \mathbf{A} \f$
347  /// \param ldb The size of the leading-order dimension of matrix \f$ \mathbf{B} \f$
348  /// \param beta Scaling factor for matrix \f$ \mathbf{C} \f$
349  /// \param c Pointer to matrix \f$ \mathbf{C} \f$
350  /// \param ldc The size of the leading-order dimension of matrix \f$ \mathbf{C} \f$
351  ///@{
352  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
353  const integer m, const integer n, const integer k, const float alpha,
354  const float* a, const integer lda, const float* b, const integer ldb,
355  const float beta, float* c, const integer ldc)
356  {
357  MADNESS_ASSERT(OpA != ConjTrans);
358  MADNESS_ASSERT(OpB != ConjTrans);
359  static const char *op[] = { "n","t" };
360  F77_SGEMM(op[OpA], op[OpB], &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
361  }
362 
363  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
364  const integer m, const integer n, const integer k, const double alpha,
365  const double* a, const integer lda, const double* b, const integer ldb,
366  const double beta, double* c, const integer ldc) {
367  MADNESS_ASSERT(OpA != ConjTrans);
368  MADNESS_ASSERT(OpB != ConjTrans);
369  static const char *op[] = { "n","t" };
370  F77_DGEMM(op[OpA], op[OpB], &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
371  }
372 
373  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
374  const integer m, const integer n, const integer k,
375  const complex_real4 alpha, const complex_real4* a, const integer lda,
376  const complex_real4* b, const integer ldb, const complex_real4 beta,
377  complex_real4* c, const integer ldc) {
378  static const char *op[] = {"n", "t", "c"};
379  F77_CGEMM(op[OpA], op[OpB], &m, &n, &k, cblas::to_cptr(&alpha),
380  cblas::to_cptr(a), &lda, cblas::to_cptr(b), &ldb,
381  cblas::to_cptr(&beta), cblas::to_cptr(c), &ldc);
382  }
383 
384  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
385  const integer m, const integer n, const integer k,
386  const complex_real8 alpha, const complex_real8* a, const integer lda,
387  const complex_real8* b, const integer ldb, const complex_real8 beta,
388  complex_real8* c, const integer ldc) {
389  static const char *op[] = {"n", "t", "c"};
390  F77_ZGEMM(op[OpA], op[OpB], &m, &n, &k, cblas::to_zptr(&alpha),
391  cblas::to_zptr(a), &lda, cblas::to_zptr(b), &ldb,
392  cblas::to_zptr(&beta), cblas::to_zptr(c), &ldc);
393  }
394 
395 #ifdef HAVE_INTEL_MKL
396  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
397  const integer m, const integer n, const integer k,
398  const complex_real4 alpha, const complex_real4* a, const integer lda,
399  const real4* b, const integer ldb, const complex_real4 beta,
400  complex_real4* c, const integer ldc) {
401 
402  //static const char *op[] = { "n","t","c" };
403  //F77_CSGEMM(op[OpA], op[OpB], &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
404 
405  //Don't have CSGEMM ... only SCGEMM ... so use A*B = (BT * AT)T
406 
407  //complex_real4 ctrans[m*n]; // Here assume matrices are small and can be allocated on the stack
408  complex_real4* ctrans = new complex_real4[m*n];
409  static const char *opT[] = { "t","n","c" }; // Transpose of op ... conj-transpose not working yet
410  MADNESS_ASSERT(OpA!=ConjTrans && OpB!=ConjTrans);
411  const complex_real4 zero = 0.0;
412  F77_SCGEMM(opT[OpB], opT[OpA], &n, &m, &k, cblas::to_cptr(&alpha),
413  b, &ldb, cblas::to_cptr(a), &lda,
414  cblas::to_cptr(&zero), cblas::to_cptr(ctrans), &n);
415 
416  // In fortran have CTRANS(N,M) and fortran CTRANS(i,j) maps to C ctrans[j*n+i]
417 
418  if (beta == zero) {
419  for (integer i=0; i<n; i++) {
420  for (integer j=0; j<m; j++) {
421  c[i*ldc+j] = ctrans[j*n+i];
422  }
423  }
424  }
425  else
426  for (integer i=0; i<n; i++) {
427  for (integer j=0; j<m; j++) {
428  c[i*ldc+j] = beta*c[i*ldc+j] + ctrans[j*n+i];
429  }
430  }
431  delete [] ctrans;
432  }
433 
434  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
435  const integer m, const integer n, const integer k,
436  const complex_real4 alpha, const real4* a, const integer lda,
437  const complex_real4* b, const integer ldb, const complex_real4 beta,
438  complex_real4* c, const integer ldc) {
439  static const char *op[] = {"n", "t", "c"};
440  F77_SCGEMM(op[OpA], op[OpB], &m, &n, &k, cblas::to_cptr(&alpha),
441  a, &lda, cblas::to_cptr(b), &ldb,
442  cblas::to_cptr(&beta), cblas::to_cptr(c), &ldc);
443  }
444 
445  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
446  const integer m, const integer n, const integer k,
447  const complex_real8 alpha, const complex_real8* a, const integer lda,
448  const real8* b, const integer ldb, const complex_real8 beta,
449  complex_real8* c, const integer ldc) {
450 
451  //static const char *op[] = { "n","t","c" };
452  //F77_ZDGEMM(op[OpA], op[OpB], &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
453 
454  //Don't have ZDGEMM ... only DZGEMM ... so use A*B = (BT * AT)T
455 
456  //complex_real8 ctrans[m*n]; // Here assume matrices are small and can be allocated on the stack
457  complex_real8* ctrans = new complex_real8[m*n];
458  static const char *opT[] = { "t","n","c" }; // Transpose of op ... conj-transpose not working yet
459  MADNESS_ASSERT(OpA!=ConjTrans && OpB!=ConjTrans);
460  const complex_real8 zero = 0.0;
461  F77_DZGEMM(opT[OpB], opT[OpA], &n, &m, &k, cblas::to_zptr(&alpha),
462  b, &ldb, cblas::to_zptr(a), &lda,
463  cblas::to_zptr(&zero), cblas::to_zptr(ctrans), &n);
464 
465  // In fortran have CTRANS(N,M) and fortran CTRANS(i,j) maps to C ctrans[j*n+i]
466 
467  if (beta == zero) {
468  for (integer i=0; i<n; i++) {
469  for (integer j=0; j<m; j++) {
470  c[i*ldc+j] = ctrans[j*n+i];
471  }
472  }
473  }
474  else
475  for (integer i=0; i<n; i++) {
476  for (integer j=0; j<m; j++) {
477  c[i*ldc+j] = beta*c[i*ldc+j] + ctrans[j*n+i];
478  }
479  }
480  delete [] ctrans;
481  }
482 
483  inline void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB,
484  const integer m, const integer n, const integer k,
485  const complex_real8 alpha, const real8* a, const integer lda,
486  const complex_real8* b, const integer ldb, const complex_real8 beta,
487  complex_real8* c, const integer ldc) {
488  static const char *op[] = {"n", "t", "c"};
489  F77_DZGEMM(op[OpA], op[OpB], &m, &n, &k, cblas::to_zptr(&alpha), a, &lda,
491  cblas::to_zptr(c), &ldc);
492  }
493 
494 #endif
495 
496 
497  ///@}
498 
499  /// Multiplies a matrix by a vector
500 
501  /// \f[
502  /// \mathbf{y} \leftarrow \alpha \mathbf{A}^{\mathrm{OpA}} \mathbf{x} + \beta \mathbf{y}
503  /// \f]
504  /// \param OpA Operation to be applied to matrix \f$ \mathbf{A} \f$
505  /// \param m Rows in matrix \f$ \mathbf{A} \f$
506  /// \param n Columns in matrix \f$ \mathbf{A} \f$
507  /// \param alpha Scaling factor applied to \f$ \mathbf{A} \f$ \c * \f$ \mathbf{x} \f$
508  /// \param A Pointer to matrix \f$ \mathbf{A} \f$
509  /// \param lda The size of the leading-order dimension of matrix \f$ \mathbf{A} \f$
510  /// \param x Pointer to vector \f$ \mathbf{x} \f$
511  /// \param incx Stride of vector \f$ \mathbf{x} \f$
512  /// \param beta Scaling factor for vector \f$ \mathbf{y} \f$
513  /// \param y Pointer to vector \f$ \mathbf{y} \f$
514  /// \param incy Stride of vector \f$ \mathbf{y} \f$
515  ///@{
516  inline void gemv(const CBLAS_TRANSPOSE OpA, const integer m, const integer n,
517  const float alpha, const float *A, const integer lda, const float *x,
518  const integer incx, const float beta, float *y, const integer incy)
519  {
520  MADNESS_ASSERT(OpA != ConjTrans);
521  static const char *op[] = { "n","t" };
522  F77_SGEMV(op[OpA], &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy);
523  }
524 
525  inline void gemv(const CBLAS_TRANSPOSE OpA, const integer m, const integer n,
526  const double alpha, const double *A, const integer lda, const double *x,
527  const integer incx, const double beta, double *y, const integer incy)
528  {
529  MADNESS_ASSERT(OpA != ConjTrans);
530  static const char *op[] = { "n","t" };
531  F77_DGEMV(op[OpA], &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy);
532  }
533 
534  inline void gemv(const CBLAS_TRANSPOSE OpA, const integer m, const integer n,
535  const complex_real4 alpha, const complex_real4 *A, const integer lda,
536  const complex_real4 *x, const integer incx, const complex_real4 beta,
537  complex_real4 *y, const integer incy) {
538  static const char *op[] = {"n", "t", "c"};
540  &lda, cblas::to_cptr(x), &incx, cblas::to_cptr(&beta),
541  cblas::to_cptr(y), &incy);
542  }
543 
544  inline void gemv(const CBLAS_TRANSPOSE OpA, const integer m, const integer n,
545  const complex_real8 alpha, const complex_real8 *A, const integer lda,
546  const complex_real8 *x, const integer incx, const complex_real8 beta,
547  complex_real8 *y, const integer incy) {
548  static const char *op[] = {"n", "t", "c"};
550  &lda, cblas::to_zptr(x), &incx, cblas::to_zptr(&beta),
551  cblas::to_zptr(y), &incy);
552  }
553  ///@}
554 
555  /// Multiplies vector \f$ \mathbf{x} \f$ by the transform of vector \f$ \mathbf{y} \f$
556 
557  /// \f[
558  /// \mathbf{A} \leftarrow \alpha \mathbf{x} \mathbf{y}^{\mathrm{T}} + \mathbf{A}
559  /// \f]
560  /// \param m Rows in matrix \f$ \mathbf{A} \f$
561  /// \param n Columns in matrix \f$ \mathbf{A} \f$
562  /// \param alpha Scaling factor applied to \f$ \mathbf{x} \mathbf{y}^{\mathrm{T}} \f$
563  /// \param x Pointer to vector \f$ \mathbf{x} \f$
564  /// \param incx Stride of vector \f$ \mathbf{x} \f$
565  /// \param y Pointer to vector \f$ \mathbf{y} \f$
566  /// \param incy Stride of vector \f$ \mathbf{y} \f$
567  /// \param A Pointer to matrix \f$ \mathbf{A} \f$
568  /// \param lda The size of the leading-order dimension of matrix \f$ \mathbf{A} \f$
569  ///@{
570  inline void ger(const integer m, const integer n, const float alpha,
571  const float *x, const integer incx, const float *y, const integer incy,
572  float *A, const integer lda)
573  {
574  F77_SGER(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
575  }
576 
577  inline void ger(const integer m, const integer n, const double alpha,
578  const double *x, const integer incx, const double *y, const integer incy,
579  double *A, const integer lda)
580  {
581  F77_DGER(&m, &n, &alpha, x, &incx, y, &incy, A, &lda);
582  }
583 
584  inline void ger(const integer m, const integer n, const complex_real4 alpha,
585  const complex_real4 *x, const integer incx, const complex_real4 *y,
586  const integer incy, complex_real4 *A, const integer lda) {
587  F77_CGER(&m, &n, &alpha, x, &incx,
588  y, &incy, A, &lda);
589  }
590 
591  inline void ger(const integer m, const integer n, const complex_real8 alpha,
592  const complex_real8 *x, const integer incx, const complex_real8 *y,
593  const integer incy, complex_real8 *A, const integer lda) {
594  F77_ZGER(&m, &n, &alpha, x, &incx,
595  y, &incy, A, &lda);
596  }
597  ///@}
598 
599  /// Compute the dot product of vectors \f$ \mathbf{x} \f$ and \f$ \mathbf{y} \f$
600 
601  /// \f[
602  /// u \leftarrow \alpha \mathbf{x} \cdot \mathbf{y}
603  /// \f]
604  /// \param n Size of the vectors \f$ \mathbf{x} \f$ and \f$ \mathbf{y} \f$
605  /// \param x Pointer to vector \f$ \mathbf{x} \f$
606  /// \param incx Stride of vector \f$ \mathbf{x} \f$
607  /// \param y Pointer to vector \f$ \mathbf{y} \f$
608  /// \param incy Stride of vector \f$ \mathbf{y} \f$
609  /// \return The dot product of \c x and \c y
610  ///@{
611  inline float dot(const integer n, const float* x, const integer incx,
612  const float* y, const integer incy)
613  {
614  return F77_SDOT(&n, x, &incx, y, &incy);
615  }
616 
617  inline double dot(const integer n, const double* x, const integer incx,
618  const double* y, const integer incy)
619  {
620  return F77_DDOT(&n, x, &incx, y, &incy);
621  }
622 
623  inline complex_real4 dot(const integer n, const complex_real4* x,
624  const integer incx, const complex_real4* y, const integer incy)
625  {
626  complex_real4 result(0.0, 0.0);
627  F77_CDOTU(cblas::to_cptr(&result), &n, cblas::to_cptr(x), &incx, cblas::to_cptr(y), &incy);
628  return result;
629  }
630 
631  inline complex_real8 dot(const integer n, const complex_real8* x,
632  const integer incx, const complex_real8* y, const integer incy)
633  {
634  complex_real8 result(0.0, 0.0);
635  F77_ZDOTU(cblas::to_zptr(&result), &n, cblas::to_zptr(x), &incx, cblas::to_zptr(y), &incy);
636  return result;
637  }
638  ///@}
639 
640  /// Scale a vector
641 
642  /// \f[
643  /// \mathbf{x} \leftarrow \alpha \mathbf{x}
644  /// \f]
645  /// \param n The size of the vector
646  /// \param alpha The scaling factor for vector \f$ \mathbf{x} \f$
647  /// \param x Pointer to vector \f$ \mathbf{x} \f$
648  /// \param incx Stride for vector \f$ \mathbf{x} \f$
649  ///@{
650  inline void scal(const integer n, const float alpha, float* x, const integer incx) {
651  F77_SSCAL(&n, &alpha, x, &incx);
652  }
653 
654  inline void scal(const integer n, const double alpha, double* x, const integer incx) {
655  F77_DSCAL(&n, &alpha, x, &incx);
656  }
657 
658  inline void scal(const integer n, const complex_real4 alpha, complex_real4* x, const integer incx) {
659  F77_CSCAL(&n, cblas::to_cptr(&alpha), cblas::to_cptr(x), &incx);
660  }
661 
662  inline void scal(const integer n, const complex_real8 alpha, complex_real8* x, const integer incx) {
663  F77_ZSCAL(&n, cblas::to_zptr(&alpha), cblas::to_zptr(x), &incx);
664  }
665 
666  inline void scal(const integer n, const float alpha, complex_real4* x, const integer incx) {
667  F77_CSSCAL(&n, &alpha, cblas::to_cptr(x), &incx);
668  }
669 
670  inline void scal(const integer n, const double alpha, complex_real8* x, const integer incx) {
671  F77_ZDSCAL(&n, &alpha, cblas::to_zptr(x), &incx);
672  }
673  ///@}
674 
675  /// Scale and add a vector to another
676 
677  /// \f[
678  /// \mathbf{y} \leftarrow \alpha \mathbf{x} + \mathbf{y}
679  /// \f]
680  /// \param n The size of the vector
681  /// \param alpha The scaling factor for vector \f$ \mathbf{x} \f$
682  /// \param x Pointer to vector \f$ \mathbf{x} \f$
683  /// \param incx Stride for vector \f$ \mathbf{x} \f$
684  /// \param y Pointer to vector \f$ \mathbf{y} \f$
685  /// \param incy Stride for vector \f$ \mathbf{y} \f$
686  ///@{
687  inline void axpy(const integer n, const float alpha, float* x, const integer incx,
688  float* y, const integer incy) {
689  F77_SAXPY(&n, &alpha, x, &incx, y, &incy);
690  }
691 
692  inline void axpy(const integer n, const double alpha, double* x, const integer incx,
693  double* y, const integer incy) {
694  F77_DAXPY(&n, &alpha, x, &incx, y, &incy);
695  }
696 
697  inline void axpy(const integer n, const complex_real4 alpha, complex_real4* x, const integer incx,
698  complex_real4* y, const integer incy) {
699  F77_CAXPY(&n, cblas::to_cptr(&alpha), cblas::to_cptr(x), &incx, cblas::to_cptr(y), &incy);
700  }
701 
702  inline void axpy(const integer n, const complex_real8 alpha, complex_real8* x, const integer incx,
703  complex_real8* y, const integer incy) {
704  F77_ZAXPY(&n, cblas::to_zptr(&alpha), cblas::to_zptr(x), &incx, cblas::to_zptr(y), &incy);
705  }
706  ///@}
707 
708 
709 } // namespace cblas
710 } // namespace madness
711 
712 MADNESS_PRAGMA_CLANG(diagnostic pop)
713 MADNESS_PRAGMA_GCC(diagnostic pop)
714 
715 #endif // MADNESS_LINALG_CBLAS_H__INCLUDED
716 
void F77_SGEMV(const char *, const integer *, const integer *, const float *, const float *, const integer *, const float *, const integer *, const float *, float *, const integer *)
float F77_SDOT(const integer *, const float *, const integer *, const float *, const integer *)
void F77_CAXPY(const integer *, const complex_real4 *, const complex_real4 *, const integer *, complex_real4 *, const integer *)
void F77_DGEMV(const char *, const integer *, const integer *, const double *, const double *, const integer *, const double *, const integer *, const double *, double *, const integer *)
void F77_SSCAL(const integer *, const float *, float *, const integer *)
void F77_SGEMM(const char *, const char *, const integer *, const integer *, const integer *, const float *, const float *, const integer *, const float *, const integer *, const float *, float *, const integer *)
void F77_ZGEMM(const char *, const char *, const integer *, const integer *, const integer *, const complex_real8 *, const complex_real8 *, const integer *, const complex_real8 *, const integer *, const complex_real8 *, complex_real8 *, const integer *)
void F77_ZGER(const integer *, const integer *, const complex_real8 *, const complex_real8 *, const integer *, const complex_real8 *, const integer *, complex_real8 *, const integer *)
void F77_DGEMM(const char *, const char *, const integer *, const integer *, const integer *, const double *, const double *, const integer *, const double *, const integer *, const double *, double *, const integer *)
void F77_ZDOTU(complex_real8 *, const integer *, const complex_real8 *, const integer *, const complex_real8 *, const integer *)
void F77_CDOTU(complex_real4 *, const integer *, const complex_real4 *, const integer *, const complex_real4 *, const integer *)
void F77_SGER(const integer *, const integer *, const float *, const float *, const integer *, const float *, const integer *, float *, const integer *)
void F77_ZSCAL(const integer *, const complex_real8 *, complex_real8 *, const integer *)
void F77_CSSCAL(const integer *, const float *, complex_real4 *, const integer *)
void F77_SAXPY(const integer *, const float *, const float *, const integer *, float *, const integer *)
void F77_DGER(const integer *, const integer *, const double *, const double *, const integer *, const double *, const integer *, double *, const integer *)
double F77_DDOT(const integer *, const double *, const integer *, const double *, const integer *)
void F77_ZDSCAL(const integer *, const double *, complex_real8 *, const integer *)
void F77_CGEMV(const char *, const integer *, const integer *, const complex_real4 *, const complex_real4 *, const integer *, const complex_real4 *, const integer *, const complex_real4 *, complex_real4 *, const integer *)
void F77_DAXPY(const integer *, const double *, const double *, const integer *, double *, const integer *)
void F77_ZAXPY(const integer *, const complex_real8 *, const complex_real8 *, const integer *, complex_real8 *, const integer *)
void F77_DSCAL(const integer *, const double *, double *, const integer *)
void F77_CSCAL(const integer *, const complex_real4 *, complex_real4 *, const integer *)
void F77_CGEMM(const char *, const char *, const integer *, const integer *, const integer *, const complex_real4 *, const complex_real4 *, const integer *, const complex_real4 *, const integer *, const complex_real4 *, complex_real4 *, const integer *)
void F77_CGER(const integer *, const integer *, const complex_real4 *, const complex_real4 *, const integer *, const complex_real4 *, const integer *, complex_real4 *, const integer *)
void F77_ZGEMV(const char *, const integer *, const integer *, const complex_real8 *, const complex_real8 *, const integer *, const complex_real8 *, const integer *, const complex_real8 *, complex_real8 *, const integer *)
Define types used by CBLAS API.
Definition: test_ar.cc:118
int integer
Definition: crayio.c:25
Correspondence between C++ and Fortran types.
std::complex< double > complex_real8
Fortran double complex.
Definition: fortran_ctypes.h:83
std::complex< float > complex_real4
Fortran single complex.
Definition: fortran_ctypes.h:88
double real8
Fortran double precision.
Definition: fortran_ctypes.h:73
float real4
Fortran single precision.
Definition: fortran_ctypes.h:78
const double m
Definition: gfit.cc:199
const double beta
Definition: gygi_soltion.cc:62
Tensor< double > op(const Tensor< double > &x)
Definition: kain.cc:508
Macros and tools pertaining to the configuration of MADNESS.
#define MADNESS_PRAGMA_CLANG(x)
Definition: madness_config.h:200
#define MADNESS_PRAGMA_GCC(x)
Definition: madness_config.h:205
Defines madness::MadnessException for exception handling.
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition: madness_exception.h:134
void gemv(const CBLAS_TRANSPOSE OpA, const integer m, const integer n, const float alpha, const float *A, const integer lda, const float *x, const integer incx, const float beta, float *y, const integer incy)
Multiplies a matrix by a vector.
Definition: cblas.h:516
void ger(const integer m, const integer n, const float alpha, const float *x, const integer incx, const float *y, const integer incy, float *A, const integer lda)
Multiplies vector by the transform of vector .
Definition: cblas.h:570
void scal(const integer n, const float alpha, float *x, const integer incx)
Scale a vector.
Definition: cblas.h:650
float dot(const integer n, const float *x, const integer incx, const float *y, const integer incy)
Compute the dot product of vectors and .
Definition: cblas.h:611
void axpy(const integer n, const float alpha, float *x, const integer incx, float *y, const integer incy)
Scale and add a vector to another.
Definition: cblas.h:687
void gemm(const CBLAS_TRANSPOSE OpA, const CBLAS_TRANSPOSE OpB, const integer m, const integer n, const integer k, const float alpha, const float *a, const integer lda, const float *b, const integer ldb, const float beta, float *c, const integer ldc)
Multiplies a matrix by a vector.
Definition: cblas.h:352
const blas_complex_float * to_cptr(const T *ptr)
Definition: cblas_types.h:86
const blas_complex_double * to_zptr(const T *ptr)
Definition: cblas_types.h:99
CBLAS_TRANSPOSE
Matrix operations for BLAS function calls.
Definition: cblas_types.h:77
@ ConjTrans
Definition: cblas_types.h:80
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
double pop(std::vector< double > &v)
Definition: timer.cc:11
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
static const double c
Definition: relops.cc:10
static const long k
Definition: rk.cc:44
const double alpha
Definition: test_coulomb.cc:54