32#ifndef MADNESS_TENSOR_ELEM_H__INCLUDED
33#define MADNESS_TENSOR_ELEM_H__INCLUDED
38#ifdef MADNESS_HAS_ELEMENTAL_EMBEDDED
50#elif defined(HAVE_ELEMENTAL_H)
51# include <elemental.hpp>
53# error "MADNESS_HAS_ELEMENTAL set but neither HAVE_EL_H nor HAVE_ELEMENTAL_H set: file an issue at " MADNESS_PACKAGE_URL
65 Value(
int i,
int j,
T t) : i(i), j(j), t(t) {}
67 template <
typename Archive>
74 class MadToElemDistCopy {
75 elem::DistMatrix<T>&
d;
77 MadToElemDistCopy(elem::DistMatrix<T>&
d) :
d(
d) {}
79 void operator()(
const Value<T>&
v) {
85 class ElemToMadDistCopy {
86 DistributedMatrix<T>&
d;
88 ElemToMadDistCopy(DistributedMatrix<T>&
d) :
d(
d) {}
90 void operator()(
const Value<T>&
v) {
100 ProcessID Owner(
const elem::DistMatrix<T>&
d,
int i,
int j) {
101 int RowOwner = (i+
d.ColAlign()) %
d.ColStride();
102 int ColOwner = (j+
d.RowAlign()) %
d.RowStride();
103 return RowOwner+ColOwner*
d.ColStride();
110 template <
typename T>
111 void copy_to_elemental(
const DistributedMatrix<T>& din, elem::DistMatrix<T>& dout) {
112 BinSorter< detail::Value<T> , detail::MadToElemDistCopy<T> >
s(din.get_world(), detail::MadToElemDistCopy<T>(dout));
114 int64_t ilo, ihi, jlo, jhi;
115 din.local_colrange(ilo,ihi);
116 din.local_rowrange(jlo,jhi);
117 const Tensor<T>& t = din.data();
118 for (int64_t i=ilo; i<=ihi; i++) {
119 for (int64_t j=jlo; j<=jhi; j++) {
121 s.insert(owner, detail::Value<T>(i,j,t(i-ilo, j-jlo)));
131 template <
typename T>
132 void copy_from_elemental(
const elem::DistMatrix<T>& din, DistributedMatrix<T>& dout) {
133 BinSorter< detail::Value<T> , detail::ElemToMadDistCopy<T> >
s(dout.get_world(), detail::ElemToMadDistCopy<T>(dout));
135 const int64_t colShift = din.ColShift();
136 const int64_t rowShift = din.RowShift();
137 const int64_t colStride = din.ColStride();
138 const int64_t rowStride = din.RowStride();
139 const int64_t localHeight = din.LocalHeight();
140 const int64_t localWidth = din.LocalWidth();
142 for( int64_t jLocal=0; jLocal<localWidth; ++jLocal ) {
143 for( int64_t iLocal=0; iLocal<localHeight; ++iLocal ) {
144 const int64_t i = colShift + iLocal*colStride;
145 const int64_t j = rowShift + jLocal*rowStride;
148 s.insert(owner, detail::Value<T>(i,j,din.GetLocal(iLocal,jLocal)));
169 template <
typename T>
170 void sygv(
const DistributedMatrix<T>&
A,
171 const DistributedMatrix<T>&
B,
173 DistributedMatrix<T>& X,
176 const int64_t n =
A.coldim();
177 const int64_t
m =
A.rowdim();
181 const int blocksize = 128;
182 const elem::Grid GG(
A.get_world().mpi.comm().Get_mpi_comm() );
183 elem::SetBlocksize(blocksize);
185 elem::DistMatrix<T> EA(n,n,GG);
186 elem::DistMatrix<T> EB(n,n,GG);
188 copy_to_elemental(
A,EA);
189 copy_to_elemental(
B,EB);
191 elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
192 elem::UpperOrLower uplo = elem::CharToUpperOrLower(
'U');
193 elem::DistMatrix<T> Xd(n,n,GG);
194 elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
197 elem::HermitianGenDefiniteEig(eigType, uplo, EA, EB, wd, Xd, elem::SortType::ASCENDING);
203 A.get_world().mpi.Barrier();
205 X = DistributedMatrix<T>(
A.distribution());
206 e = Tensor<typename Tensor<T>::scalar_type>(n);
208 copy_from_elemental(Xd, X);
210 const int localHeight1 = wd.LocalHeight();
211 const int colShift1 = wd.ColShift();
212 const int colStride1= wd.ColStride();
213 T * buffer =
e.ptr();
214 for(
int iLocal=0; iLocal<localHeight1; ++iLocal ) {
216 const int i = colShift1 + iLocal*colStride1;
218 buffer[i]= wd.GetLocal( iLocal, jLocal);
221 A.get_world().gop.sum(
e.ptr(),n);
238 template <
typename T>
239 void sygvp(World& world,
240 const Tensor<T>&
a,
const Tensor<T>&
B,
int itype,
246 TENSOR_ASSERT(
a.iscontiguous(),
"sygvp requires a contiguous matrix (a)",0,&
a);
247 TENSOR_ASSERT(
B.iscontiguous(),
"sygvp requires a contiguous matrix (B)",0,&
B);
249 world.gop.broadcast(
a.ptr(),
a.size(), 0);
250 world.gop.broadcast(
B.ptr(),
B.size(), 0);
252 const int n =
a.dim(1);
254 e = Tensor<typename Tensor<T>::scalar_type>(n);
265 const int blocksize = 128;
267 const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
268 elem::SetBlocksize(blocksize);
270 elem::DistMatrix<T>
gd( n, n, GG );
272 const int colShift =
gd.ColShift();
273 const int rowShift =
gd.RowShift();
274 const int colStride =
gd.ColStride();
275 const int rowStride =
gd.RowStride();
276 const int localHeight =
gd.LocalHeight();
277 const int localWidth =
gd.LocalWidth();
279 const T * buffer =
a.ptr();
280 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
282 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
284 const int i = colShift + iLocal*colStride;
285 const int j = rowShift + jLocal*rowStride;
287 gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
293 elem::DistMatrix<T> hd( n, n, GG );
295 const T * buffer =
B.ptr();
296 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
298 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
300 const int i = colShift + iLocal*colStride;
301 const int j = rowShift + jLocal*rowStride;
303 hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)] );
311 elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
313 elem::UpperOrLower uplo = elem::CharToUpperOrLower(
'U');
314 elem::DistMatrix<T> Xd( n, n, GG );
315 elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
319 elem::HermitianGenDefiniteEig( eigType, uplo,
gd, hd, wd, Xd,elem::SortType::ASCENDING);
330 const int localHeight1 = wd.LocalHeight();
331 const int colShift1 = wd.ColShift();
332 const int colStride1 =wd.ColStride();
333 T * buffer =
e.ptr();
334 for(
int iLocal=0; iLocal<localHeight1; ++iLocal )
337 const int i = colShift1 + iLocal*colStride1;
339 buffer[i]= wd.GetLocal( iLocal, jLocal);
343 world.gop.sum(
e.ptr(),n);
347 T * buffer =
V.ptr();
348 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
350 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
352 const int i = colShift + iLocal*colStride;
353 const int j = rowShift + jLocal*rowStride;
355 buffer[i+j*n]= Xd.GetLocal( iLocal, jLocal);
359 world.gop.sum(
V.ptr(), n*n);
364 catch (TensorException S) {
365 std::cerr << S << std::endl;
383 template <
typename T>
384 void gesvp(World& world,
const Tensor<T>&
a,
const Tensor<T>&
b, Tensor<T>& x) {
388 int n =
a.dim(0),
m =
a.dim(1), nrhs =
b.dim(1);
391 TENSOR_ASSERT(
b.ndim() <= 2,
"gesv require a vector or matrix for the RHS",
b.ndim(),&
b);
392 TENSOR_ASSERT(
a.dim(0) ==
b.dim(0),
"gesv matrix and RHS must conform",
b.ndim(),&
b);
393 TENSOR_ASSERT(
a.iscontiguous(),
"gesvp requires a contiguous matrix (a)",0,&
a);
394 TENSOR_ASSERT(
b.iscontiguous(),
"gesvp requires a contiguous matrix (b)",0,&
b);
395 world.gop.broadcast(
a.ptr(),
a.size(), 0);
396 world.gop.broadcast(
b.ptr(),
b.size(), 0);
404 const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
405 elem::SetBlocksize(blocksize);
406 elem::DistMatrix<T>
gd( n, n, GG );
410 const int colShift =
gd.ColShift();
411 const int rowShift =
gd.RowShift();
412 const int colStride =
gd.ColStride();
413 const int rowStride =
gd.RowStride();
414 const int localHeight =
gd.LocalHeight();
415 const int localWidth =
gd.LocalWidth();
417 const T * buffer = AT.ptr();
418 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
420 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
422 const int i = colShift + iLocal*colStride;
423 const int j = rowShift + jLocal*rowStride;
425 gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
438 x = Tensor<T>(n,nrhs);
442 for(
int i=0; i< x.size(); ++i) {
443 T * buffer = x.ptr() ;
449 elem::DistMatrix<T> hd( n, nrhs, GG );
451 const int colShift = hd.ColShift();
452 const int rowShift = hd.RowShift();
453 const int colStride =hd.ColStride();
454 const int rowStride = hd.RowStride();
455 const int localHeight = hd.LocalHeight();
456 const int localWidth = hd.LocalWidth();
458 const T * buffer = bT.ptr();
459 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
461 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
463 const int i = colShift + iLocal*colStride;
464 const int j = rowShift + jLocal*rowStride;
466 hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)]);
474 elem::GaussianElimination(
gd, hd);
478 const int colShift = hd.ColShift();
479 const int rowShift = hd.RowShift();
480 const int colStride =hd.ColStride();
481 const int rowStride = hd.RowStride();
482 const int localHeight = hd.LocalHeight();
483 const int localWidth = hd.LocalWidth();
485 T * buffer = x.ptr();
486 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
488 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
490 const int i = colShift + iLocal*colStride;
491 const int j = rowShift + jLocal*rowStride;
493 buffer[j+i*nrhs]= hd.GetLocal( iLocal, jLocal);
497 world.gop.sum(x.ptr(), n*nrhs);
502 catch (TensorException S) {
503 std::cerr << S << std::endl;
515 template <
typename T>
525 template <
typename T>
531 template <
typename T>
537 template <
typename T>
This header should include pretty much everything needed for the parallel runtime.
Definition test_ar.cc:118
Definition test_ar.cc:141
A tensor is a multidimensional array.
Definition tensor.h:317
TensorTypeData< T >::scalar_type scalar_type
C++ typename of the real type associated with a complex type.
Definition tensor.h:409
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:756
A parallel world class.
Definition world.h:132
WorldGopInterface & gop
Global operations.
Definition world.h:207
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28
std::enable_if_t< ! is_default_serializable< Archive, T >::value &&is_archive< Archive >::value, void > serialize(const Archive &ar, const T *t, unsigned int n)
Serialize (or deserialize) an array of non-fundamental stuff.
Definition archive.h:498
static const double v
Definition hatom_sf_dirac.cc:20
Macros and tools pertaining to the configuration of MADNESS.
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Definition potentialmanager.cc:41
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void sygv(const Tensor< T > &A, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Generalized real-symmetric or complex-Hermitian eigenproblem.
Definition lapack.cc:1052
void geevp(World &world, const Tensor< T > &A, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Definition elem.h:532
response_space transpose(response_space &f)
Definition basic_operators.cc:10
void gesv(const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Solve Ax = b for general A using the LAPACK *gesv routines.
Definition lapack.cc:804
void gesvp(World &world, const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Definition elem.h:526
void geev(const Tensor< T > &A, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Real non-symmetric or complex non-Hermitian eigenproblem.
Definition lapack.cc:1008
void ggev(const Tensor< T > &A, Tensor< T > &B, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Generalized real-non-symmetric or complex-non-Hermitian eigenproblem.
Definition lapack.cc:1115
void ggevp(World &world, const Tensor< T > &A, Tensor< T > &B, Tensor< T > &VR, Tensor< std::complex< T > > &e)
Definition elem.h:538
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2096
void sygvp(World &world, const Tensor< T > &a, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Definition elem.h:516
static const double b
Definition nonlinschro.cc:119
static const double d
Definition nonlinschro.cc:121
static const double a
Definition nonlinschro.cc:118
Defines simple templates for printing to std::cout "a la Python".
static const double m
Definition relops.cc:9
static double V(const coordT &r)
Definition tdse.cc:288
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
#define TENSOR_ASSERT(condition, msg, value, t)
Definition tensorexcept.h:130
void e()
Definition test_sig.cc:75
double gd(const coord_t &r)
Definition testgconv.cc:128
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43