MADNESS 0.10.1
elem.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30*/
31
32#ifndef MADNESS_TENSOR_ELEM_H__INCLUDED
33#define MADNESS_TENSOR_ELEM_H__INCLUDED
34
37
38#ifdef MADNESS_HAS_ELEMENTAL_EMBEDDED
39
43#include <madness/world/print.h>
45
46#include <ctime>
47#if defined(HAVE_EL_H)
48# include <El.hpp>
49namespace elem = El;
50#elif defined(HAVE_ELEMENTAL_H)
51# include <elemental.hpp>
52#else
53# error "MADNESS_HAS_ELEMENTAL set but neither HAVE_EL_H nor HAVE_ELEMENTAL_H set: file an issue at " MADNESS_PACKAGE_URL
54#endif
55
56namespace madness {
57
58 namespace detail {
59
60 template <typename T>
61 struct Value {
62 int i;
63 int j;
64 T t;
65 Value(int i, int j, T t) : i(i), j(j), t(t) {}
66 Value(){} // required for use in STL container
67 template <typename Archive>
68 void serialize(const Archive& ar) {
69 ar & i & j & t;
70 }
71 };
72
73 template <typename T>
74 class MadToElemDistCopy {
75 elem::DistMatrix<T>& d;
76 public:
77 MadToElemDistCopy(elem::DistMatrix<T>& d) : d(d) {}
78
79 void operator()(const Value<T>& v) {
80 d.Set(v.i, v.j, v.t);
81 }
82 };
83
84 template <typename T>
85 class ElemToMadDistCopy {
86 DistributedMatrix<T>& d;
87 public:
88 ElemToMadDistCopy(DistributedMatrix<T>& d) : d(d) {}
89
90 void operator()(const Value<T>& v) {
91 d.set(v.i, v.j, v.t);
92 }
93 };
94
95 }
96
97
98 /// Backport of more recent Elemental DistMatrix API
99 template <typename T>
100 ProcessID Owner(const elem::DistMatrix<T>& d, int i, int j) {
101 int RowOwner = (i+d.ColAlign()) % d.ColStride(); // is Col/Row Align in later versions ... no ment
102 int ColOwner = (j+d.RowAlign()) % d.RowStride();
103 return RowOwner+ColOwner*d.ColStride();
104 }
105
106
107 /// Copy a MADNESS distributed matrix into an Elemental distributed matrix
108
109 /// Should work for any distribution of either
110 template <typename T>
111 void copy_to_elemental(const DistributedMatrix<T>& din, elem::DistMatrix<T>& dout) {
112 BinSorter< detail::Value<T> , detail::MadToElemDistCopy<T> > s(din.get_world(), detail::MadToElemDistCopy<T>(dout));
113
114 int64_t ilo, ihi, jlo, jhi;
115 din.local_colrange(ilo,ihi);
116 din.local_rowrange(jlo,jhi);
117 const Tensor<T>& t = din.data();
118 for (int64_t i=ilo; i<=ihi; i++) {
119 for (int64_t j=jlo; j<=jhi; j++) {
120 ProcessID owner = Owner(dout, i, j);
121 s.insert(owner, detail::Value<T>(i,j,t(i-ilo, j-jlo)));
122 }
123 }
124
125 s.finish();
126 }
127
128 /// Copy a MADNESS distributed matrix from an Elemental distributed matrix
129
130 /// Should work for any distribution of either
131 template <typename T>
132 void copy_from_elemental(const elem::DistMatrix<T>& din, DistributedMatrix<T>& dout) {
133 BinSorter< detail::Value<T> , detail::ElemToMadDistCopy<T> > s(dout.get_world(), detail::ElemToMadDistCopy<T>(dout));
134
135 const int64_t colShift = din.ColShift(); // first row we own
136 const int64_t rowShift = din.RowShift(); // first col we own
137 const int64_t colStride = din.ColStride();
138 const int64_t rowStride = din.RowStride();
139 const int64_t localHeight = din.LocalHeight();
140 const int64_t localWidth = din.LocalWidth();
141
142 for( int64_t jLocal=0; jLocal<localWidth; ++jLocal ) {
143 for( int64_t iLocal=0; iLocal<localHeight; ++iLocal ) {
144 const int64_t i = colShift + iLocal*colStride;
145 const int64_t j = rowShift + jLocal*rowStride;
146
147 const ProcessID owner = dout.owner(i,j);
148 s.insert(owner, detail::Value<T>(i,j,din.GetLocal(iLocal,jLocal)));
149 }
150 }
151
152 s.finish();
153 }
154
155 /** \brief Generalized real-symmetric or complex-Hermitian eigenproblem.
156
157 This function uses the Elemental HermitianGenDefiniteEig routine.
158
159 A should be selfadjoint and B positive definite.
160
161 \verbatim
162 Specifies the problem type to be solved:
163 = 1: A*x = (lambda)*B*x
164 = 2: A*B*x = (lambda)*x (TODO)
165 = 3: B*A*x = (lambda)*x (TODO)
166 \endverbatim
167
168 */
169 template <typename T>
170 void sygv(const DistributedMatrix<T>& A,
171 const DistributedMatrix<T>& B,
172 int itype,
173 DistributedMatrix<T>& X,
174 Tensor< typename Tensor<T>::scalar_type >& e)
175 {
176 const int64_t n = A.coldim();
177 const int64_t m = A.rowdim();
178 MADNESS_ASSERT(n==m);
179 MADNESS_ASSERT(n==B.coldim() && m==B.rowdim());
180
181 const int blocksize = 128;
182 const elem::Grid GG(A.get_world().mpi.comm().Get_mpi_comm() );
183 elem::SetBlocksize(blocksize);
184
185 elem::DistMatrix<T> EA(n,n,GG);
186 elem::DistMatrix<T> EB(n,n,GG);
187
188 copy_to_elemental(A,EA);
189 copy_to_elemental(B,EB);
190
191 elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
192 elem::UpperOrLower uplo = elem::CharToUpperOrLower('U');
193 elem::DistMatrix<T> Xd(n,n,GG);
194 elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
195
196 // 0.83+ ???
197 elem::HermitianGenDefiniteEig(eigType, uplo, EA, EB, wd, Xd, elem::SortType::ASCENDING);
198
199 // 0.79-0.82 ?
200 //elem::HermitianGenDefiniteEig(eigType, uplo, EA, EB, wd, Xd);
201 //elem::hermitian_eig::Sort(wd, Xd);
202
203 A.get_world().mpi.Barrier();
204
205 X = DistributedMatrix<T>(A.distribution());
206 e = Tensor<typename Tensor<T>::scalar_type>(n);
207
208 copy_from_elemental(Xd, X);
209
210 const int localHeight1 = wd.LocalHeight();
211 const int colShift1 = wd.ColShift(); // first row we own
212 const int colStride1= wd.ColStride();
213 T * buffer = e.ptr();
214 for( int iLocal=0; iLocal<localHeight1; ++iLocal ) {
215 const int jLocal=0;
216 const int i = colShift1 + iLocal*colStride1;
217 //buffer[i]= wd.Get( iLocal, jLocal);
218 buffer[i]= wd.GetLocal( iLocal, jLocal);
219 }
220
221 A.get_world().gop.sum(e.ptr(),n);
222 }
223
224 /** \brief Generalized real-symmetric or complex-Hermitian eigenproblem.
225
226 This function uses the Elemental HermitianGenDefiniteEig routine.
227
228 A should be selfadjoint and B positive definite.
229
230 \verbatim
231 Specifies the problem type to be solved:
232 = 1: A*x = (lambda)*B*x
233 = 2: A*B*x = (lambda)*x (TODO)
234 = 3: B*A*x = (lambda)*x (TODO)
235 \endverbatim
236
237 */
238 template <typename T>
239 void sygvp(World& world,
240 const Tensor<T>& a, const Tensor<T>& B, int itype,
241 Tensor<T>& V, Tensor< typename Tensor<T>::scalar_type >& e) {
242 TENSOR_ASSERT(a.ndim() == 2, "sygv requires a matrix",a.ndim(),&a);
243 TENSOR_ASSERT(a.dim(0) == a.dim(1), "sygv requires square matrix",0,&a);
244 TENSOR_ASSERT(B.ndim() == 2, "sygv requires a matrix",B.ndim(),&a);
245 TENSOR_ASSERT(B.dim(0) == B.dim(1), "sygv requires square matrix",0,&a);
246 TENSOR_ASSERT(a.iscontiguous(),"sygvp requires a contiguous matrix (a)",0,&a);
247 TENSOR_ASSERT(B.iscontiguous(),"sygvp requires a contiguous matrix (B)",0,&B);
248
249 world.gop.broadcast(a.ptr(), a.size(), 0);
250 world.gop.broadcast(B.ptr(), B.size(), 0);
251
252 const int n = a.dim(1);
253
254 e = Tensor<typename Tensor<T>::scalar_type>(n);
255 V = Tensor<T>(n,n);
256
257 if (a.dim(0) <= 4) {
258 // Work around bug in elemental/pmrrr/mkl/something for n=2,3
259 sygv(a, B, itype, V, e);
260 return;
261 }
262
263 world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
264
265 const int blocksize = 128;
266 try {
267 const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
268 elem::SetBlocksize(blocksize);
269
270 elem::DistMatrix<T> gd( n, n, GG );
271
272 const int colShift = gd.ColShift(); // first row we own
273 const int rowShift = gd.RowShift(); // first col we own
274 const int colStride =gd.ColStride();
275 const int rowStride = gd.RowStride();
276 const int localHeight = gd.LocalHeight();
277 const int localWidth = gd.LocalWidth();
278 {
279 const T * buffer = a.ptr();
280 for( int jLocal=0; jLocal<localWidth; ++jLocal )
281 {
282 for( int iLocal=0; iLocal<localHeight; ++iLocal )
283 {
284 const int i = colShift + iLocal*colStride;
285 const int j = rowShift + jLocal*rowStride;
286 //gd.Set( iLocal, jLocal, buffer[i+j*n] );
287 gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
288 }
289 }
290 }
291 //gd.Print("gs");
292
293 elem::DistMatrix<T> hd( n, n, GG );
294 {
295 const T * buffer = B.ptr();
296 for( int jLocal=0; jLocal<localWidth; ++jLocal )
297 {
298 for( int iLocal=0; iLocal<localHeight; ++iLocal )
299 {
300 const int i = colShift + iLocal*colStride;
301 const int j = rowShift + jLocal*rowStride;
302 //hd.Set( iLocal, jLocal, buffer[i+j*(n)] );
303 hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)] );
304 }
305 }
306 }
307 // hd.Print("hd");
308
309 world.mpi.Barrier();
310
311 elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
312 //const char* uu="U";
313 elem::UpperOrLower uplo = elem::CharToUpperOrLower('U');
314 elem::DistMatrix<T> Xd( n, n, GG );
315 elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
316
317
318 // 0.83+ ???
319 elem::HermitianGenDefiniteEig( eigType, uplo, gd, hd, wd, Xd,elem::SortType::ASCENDING);
320
321 // 0.79-0.82 ?
322 //elem::HermitianGenDefiniteEig( eigType, uplo, gd, hd, wd, Xd);
323 //elem::hermitian_eig::Sort( wd, Xd );
324
325 world.mpi.Barrier();
326 //Xd.Print("Xs");
327
328 //retrive eigenvalues
329 {
330 const int localHeight1 = wd.LocalHeight();
331 const int colShift1 = wd.ColShift(); // first row we own
332 const int colStride1 =wd.ColStride();
333 T * buffer = e.ptr();
334 for( int iLocal=0; iLocal<localHeight1; ++iLocal )
335 {
336 const int jLocal=0;
337 const int i = colShift1 + iLocal*colStride1;
338 //buffer[i]= wd.Get( iLocal, jLocal);
339 buffer[i]= wd.GetLocal( iLocal, jLocal);
340 }
341 }
342 //world.gop.broadcast(e.ptr(),e.size(), 0);
343 world.gop.sum(e.ptr(),n);
344 //if(myrank ==0) cout<< e << endl;
345 //retrive eigenvectors
346 {
347 T * buffer = V.ptr();
348 for( int jLocal=0; jLocal<localWidth; ++jLocal )
349 {
350 for( int iLocal=0; iLocal<localHeight; ++iLocal )
351 {
352 const int i = colShift + iLocal*colStride;
353 const int j = rowShift + jLocal*rowStride;
354 //buffer[i+j*n]= Xd.Get( iLocal, jLocal);
355 buffer[i+j*n]= Xd.GetLocal( iLocal, jLocal);
356 }
357 }
358 }
359 world.gop.sum(V.ptr(), n*n);
361 //world.gop.broadcast(V.ptr(),V.size(), 0);
362 //if(myrank ==0)cout<< V << endl;
363 }
364 catch (TensorException S) {
365 std::cerr << S << std::endl;
366 }
367
368 world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
369 }
370
371 /** \brief Solve Ax = b for general A using the Elemental.
372 The solution is computed through (partially pivoted) Gaussian elimination.
373
374 A should be a square matrix (float, double, float_complex,
375 double_complex) and b should be either a vector, or a matrix with
376 each vector stored in a column (i.e., b[n,nrhs]).
377
378 It will solve Ax=b as written.
379
380 b can be a vector or a matrix, the only restriction is that satisfies b.rows()==A.rows()
381
382 */
383 template <typename T>
384 void gesvp(World& world, const Tensor<T>& a, const Tensor<T>& b, Tensor<T>& x) {
385
386 TENSOR_ASSERT(a.ndim() == 2, "gesv requires matrix",a.ndim(),&a);
387
388 int n = a.dim(0), m = a.dim(1), nrhs = b.dim(1);
389
390 TENSOR_ASSERT(m == n, "gesv requires square matrix",0,&a);
391 TENSOR_ASSERT(b.ndim() <= 2, "gesv require a vector or matrix for the RHS",b.ndim(),&b);
392 TENSOR_ASSERT(a.dim(0) == b.dim(0), "gesv matrix and RHS must conform",b.ndim(),&b);
393 TENSOR_ASSERT(a.iscontiguous(),"gesvp requires a contiguous matrix (a)",0,&a);
394 TENSOR_ASSERT(b.iscontiguous(),"gesvp requires a contiguous matrix (b)",0,&b);
395 world.gop.broadcast(a.ptr(), a.size(), 0);
396 world.gop.broadcast(b.ptr(), b.size(), 0);
397
398 Tensor<T> AT = transpose(a);
399
400 world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
401
402 int blocksize = 128;
403 try {
404 const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
405 elem::SetBlocksize(blocksize);
406 elem::DistMatrix<T> gd( n, n, GG );
407
408
409 {
410 const int colShift = gd.ColShift(); // 1st row local
411 const int rowShift = gd.RowShift(); // 1st col local
412 const int colStride =gd.ColStride();
413 const int rowStride = gd.RowStride();
414 const int localHeight = gd.LocalHeight();
415 const int localWidth = gd.LocalWidth();
416 {
417 const T * buffer = AT.ptr();
418 for( int jLocal=0; jLocal<localWidth; ++jLocal )
419 {
420 for( int iLocal=0; iLocal<localHeight; ++iLocal )
421 {
422 const int i = colShift + iLocal*colStride;
423 const int j = rowShift + jLocal*rowStride;
424 //gd.Set( iLocal, jLocal, buffer[i+j*n] );
425 gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
426 }
427 }
428 }
429 }
430 Tensor<T> bT;
431 if (nrhs == 1) {
432 x = Tensor<T>(n);
433 bT = Tensor<T>(n);
434 bT = copy(b);
435 x = copy(b); //creating the correct size
436 }
437 else {
438 x = Tensor<T>(n,nrhs);
439 bT = transpose(b);
440 }
441
442 for(int i=0; i< x.size(); ++i) {
443 T * buffer = x.ptr() ;
444 buffer[i] = 0.0;
445 }
446
447 //cout << "Caught a tensor exception \n";
448 //cout << bT <<endl;
449 elem::DistMatrix<T> hd( n, nrhs, GG );
450 {
451 const int colShift = hd.ColShift(); // 1st row local
452 const int rowShift = hd.RowShift(); // 1st col local
453 const int colStride =hd.ColStride();
454 const int rowStride = hd.RowStride();
455 const int localHeight = hd.LocalHeight();
456 const int localWidth = hd.LocalWidth();
457 {
458 const T * buffer = bT.ptr();
459 for( int jLocal=0; jLocal<localWidth; ++jLocal )
460 {
461 for( int iLocal=0; iLocal<localHeight; ++iLocal )
462 {
463 const int i = colShift + iLocal*colStride;
464 const int j = rowShift + jLocal*rowStride;
465 //hd.Set( iLocal, jLocal, buffer[i+j*(n)]);
466 hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)]);
467
468 }
469 }
470 }
471 }
472 world.mpi.Barrier();
473
474 elem::GaussianElimination(gd, hd);
475
476 world.mpi.Barrier();
477 {
478 const int colShift = hd.ColShift(); // 1st row local
479 const int rowShift = hd.RowShift(); // 1st col local
480 const int colStride =hd.ColStride();
481 const int rowStride = hd.RowStride();
482 const int localHeight = hd.LocalHeight();
483 const int localWidth = hd.LocalWidth();
484
485 T * buffer = x.ptr();
486 for( int jLocal=0; jLocal<localWidth; ++jLocal )
487 {
488 for( int iLocal=0; iLocal<localHeight; ++iLocal )
489 {
490 const int i = colShift + iLocal*colStride;
491 const int j = rowShift + jLocal*rowStride;
492 //buffer[j+i*nrhs]= hd.Get( iLocal, jLocal);
493 buffer[j+i*nrhs]= hd.GetLocal( iLocal, jLocal);
494 }
495 }
496 }
497 world.gop.sum(x.ptr(), n*nrhs);
498 //world.gop.broadcast(x.ptr(),x.size(), 0);
499 //if(myrank ==0) cout<< x << endl;
500
501 }
502 catch (TensorException S) {
503 std::cerr << S << std::endl;
504 }
505 world.gop.fence(); //<<<<<< Essential to quiesce MADNESS threads/comms
506 }
507}
508
509#else
510
512
513namespace madness {
514 // sequential fall back code
515 template <typename T>
516 void sygvp(World& world,
517 const Tensor<T>& a, const Tensor<T>& B, int itype,
518 Tensor<T>& V, Tensor< typename Tensor<T>::scalar_type >& e) {
519 sygv(a, B, itype, V, e);
522 }
523
524 // sequential fall back code
525 template <typename T>
526 void gesvp(World& world, const Tensor<T>& a, const Tensor<T>& b, Tensor<T>& x) {
527 gesv(a, b, x);
528 }
529 // START BRYAN ADDITIONS
530 // sequential fall back code
531 template <typename T>
532 void geevp(World& world, const Tensor<T>& A, Tensor<T>& VR, Tensor<std::complex<T>>& e) {
533 geev(A, VR, e);
534 }
535
536 // sequential fall back code
537 template <typename T>
538 void ggevp(World& world, const Tensor<T>& A, Tensor<T>& B, Tensor<T>& VR,
539 Tensor<std::complex<T>>& e) {
540 ggev(A, B, VR, e);
541 }
542 // END BRYAN ADDITIONS
543}
544
545#endif //MADNESS_HAS_ELEMENTAL_EMBEDDED
546
547#endif // MADNESS_TENSOR_ELEM_H__INCLUDED
This header should include pretty much everything needed for the parallel runtime.
Definition test_ar.cc:118
Definition test_ar.cc:141
A tensor is a multidimension array.
Definition tensor.h:317
TensorTypeData< T >::scalar_type scalar_type
C++ typename of the real type associated with a complex type.
Definition tensor.h:409
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:754
A parallel world class.
Definition world.h:132
WorldGopInterface & gop
Global operations.
Definition world.h:205
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
std::enable_if_t< ! is_default_serializable< Archive, T >::value &&is_archive< Archive >::value, void > serialize(const Archive &ar, const T *t, unsigned int n)
Serialize (or deserialize) an array of non-fundamental stuff.
Definition archive.h:497
static const double v
Definition hatom_sf_dirac.cc:20
Macros and tools pertaining to the configuration of MADNESS.
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void sygv(const Tensor< T > &A, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Generalized real-symmetric or complex-Hermitian eigenproblem.
Definition lapack.cc:1052
void geevp(World &world, const Tensor< T > &A, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Definition elem.h:532
response_space transpose(response_space &f)
Definition basic_operators.cc:10
void gesv(const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Solve Ax = b for general A using the LAPACK *gesv routines.
Definition lapack.cc:804
void gesvp(World &world, const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Definition elem.h:526
void geev(const Tensor< T > &A, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Real non-symmetric or complex non-Hermitian eigenproblem.
Definition lapack.cc:1008
void ggev(const Tensor< T > &A, Tensor< T > &B, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Generalized real-non-symmetric or complex-non-Hermitian eigenproblem.
Definition lapack.cc:1115
void ggevp(World &world, const Tensor< T > &A, Tensor< T > &B, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Definition elem.h:538
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2002
void sygvp(World &world, const Tensor< T > &a, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Definition elem.h:516
static const double b
Definition nonlinschro.cc:119
static const double d
Definition nonlinschro.cc:121
static const double a
Definition nonlinschro.cc:118
Defines simple templates for printing to std::cout "a la Python".
static const double m
Definition relops.cc:9
static double V(const coordT &r)
Definition tdse.cc:288
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
#define TENSOR_ASSERT(condition, msg, value, t)
Definition tensorexcept.h:130
void e()
Definition test_sig.cc:75
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43