1 #ifndef MADNESS_DISTRIBUTED_MATRIX_H
2 #define MADNESS_DISTRIBUTED_MATRIX_H
56 class DistributedMatrixDistribution;
57 template <
typename T>
class DistributedMatrix;
63 DistributedMatrix<T>
concatenate_rows(
const DistributedMatrix<T>&
a,
const DistributedMatrix<T>&
b);
66 DistributedMatrix<T>
interleave_rows(
const DistributedMatrix<T>&
a,
const DistributedMatrix<T>&
b);
295 void get_range(
int p, int64_t& ilow, int64_t& ihigh, int64_t& jlow, int64_t& jhigh)
const {
305 ihigh= std::min(ilow+
tilen-1,
n-1);
306 jhigh= std::min(jlow+
tilem-1,
m-1);
387 template <
typename T>
394 static T idij(
const int64_t i,
const int64_t j) {
return (i==j) ?
T(1) :
T(0);}
441 DistributedMatrixDistribution::operator=(
A);
460 template <
typename funcT>
462 for (int64_t i=
ilo; i<=
ihi; i++) {
463 for (int64_t j=
jlo; j<=
jhi; j++) {
544 int64_t i1 = std::min(
ihi,ihigh);
545 int64_t j1 = std::min(
jhi,jhigh);
546 if (i0<=i1 && j0<=j1) {
565 int64_t i1 = std::min(
ihi,ihigh);
566 int64_t j1 = std::min(
jhi,jhigh);
567 if (i0<=i1 && j0<=j1) {
574 int newrowdim = jhigh - jlow + 1;
586 int64_t j1 = std::min(
jhi,jhigh);
587 if (i0<=i1 && j0<=j1) {
592 template <
typename R>
614 return copy(*
this)+=
A;
628 void set(int64_t i, int64_t j,
const T x) {
634 T get(int64_t i, int64_t j)
const {
645 template <
typename T>
659 static inline DistributedMatrixDistribution
661 if (world.
size()*coltile < n) coltile = (n-1)/world.
size() + 1;
662 coltile = std::min(coltile,n);
663 if ((coltile&0x1)) ++coltile;
677 template <
typename T>
690 static inline DistributedMatrixDistribution
692 if (world.
size()*rowtile <
m) rowtile = (
m-1)/world.
size() + 1;
693 rowtile = std::min(rowtile,
m);
705 template <
typename T>
722 template <
typename T>
724 MADNESS_CHECK(
a.rowdim()==
b.rowdim() &&
a.coldim()==
b.coldim() &&
a.coltile()==
b.coltile() &&
a.rowtile()==
b.rowtile());
743 template <
typename T>
745 MADNESS_CHECK(
a.coldim()==
b.coldim() &&
a.coltile()==
b.coltile() &&
a.is_column_distributed() &&
b.is_column_distributed());
747 int64_t ma =
a.rowdim();
748 int64_t mb =
b.rowdim();
753 a.local_colrange(ilow, ihigh);
774 template <
typename T>
777 MADNESS_CHECK(
a.coltile()==
b.coltile() &&
b.coltile()==
c.coltile() &&
c.coltile()==
d.coltile());
778 MADNESS_CHECK(
a.is_column_distributed() &&
b.is_column_distributed() &&
c.is_column_distributed() &&
d.is_column_distributed());
780 int64_t ma =
a.rowdim();
781 int64_t mb =
b.rowdim();
782 int64_t mc =
c.rowdim();
783 int64_t md =
d.rowdim();
787 if(
a.local_size() > 0) result.
data()(
_ ,
Slice(0,ma-1) ) =
a.data()(
___);
788 if(
b.local_size() > 0) result.
data()(
_ ,
Slice(ma, ma+mb-1) ) =
b.data()(
___);
789 if(
c.local_size() > 0) result.
data()(
_ ,
Slice(ma+mb, ma+mb+mc-1) ) =
c.data()(
___);
790 if(
d.local_size() > 0) result.
data()(
_ ,
Slice(ma+mb+mc, -1) ) =
d.data()(
___);
807 template <
typename T>
809 MADNESS_CHECK(
a.rowdim()==
b.rowdim() &&
a.rowtile()==
b.rowtile() &&
a.is_row_distributed() &&
b.is_row_distributed());
811 int64_t ma =
a.coldim();
812 int64_t mt = ma +
b.coldim();
816 if(
a.local_size() > 0)
c.data()(
Slice(0,ma-1),
_ ) =
a.data()(
___);
817 if(
a.local_size() > 0)
c.data()(
Slice(ma,-1),
_ ) =
b.data()(
___);
real_convolution_3d A(World &world)
Definition: DKops.h:230
This header should include pretty much everything needed for the parallel runtime.
Definition: test_ar.cc:118
Definition: distributed_matrix.h:68
int64_t jlo
Definition: distributed_matrix.h:86
bool is_row_distributed() const
Returns true if the matrix is row distributed (i.e., column dimension not distributed)
Definition: distributed_matrix.h:356
int64_t Prow
Row of processor grid for this processor.
Definition: distributed_matrix.h:84
virtual ~DistributedMatrixDistribution()
Definition: distributed_matrix.h:371
int64_t local_ilow() const
Returns the first column index on this processor (0 if no data present)
Definition: distributed_matrix.h:265
int64_t m
Row dimension of A(n,m)
Definition: distributed_matrix.h:78
int64_t tilem
Tile size for row.
Definition: distributed_matrix.h:80
int64_t local_jhigh() const
Returns the last row index on this processor (0 if no data present)
Definition: distributed_matrix.h:282
void get_colrange(int p, int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on processor p.
Definition: distributed_matrix.h:319
ProcessID owner(int64_t i, int64_t j) const
Returns the number of the process that owns element (i,j)
Definition: distributed_matrix.h:364
void local_colrange(int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on this processor.
Definition: distributed_matrix.h:247
friend DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition: distributed_matrix.h:660
DistributedMatrixDistribution()
Default constructor makes an invalid distribution.
Definition: distributed_matrix.h:132
World * pworld
Definition: distributed_matrix.h:74
void get_rowrange(int p, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on processor p.
Definition: distributed_matrix.h:333
int64_t local_rowdim() const
Returns the no. of row elements stored on this processor.
Definition: distributed_matrix.h:240
void clear()
Resets state to same as default constructor.
Definition: distributed_matrix.h:154
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition: distributed_matrix.h:192
int64_t ilo
Definition: distributed_matrix.h:85
int64_t coltile() const
Returns the column tile size.
Definition: distributed_matrix.h:200
int64_t jhi
Range of row indices on this processor.
Definition: distributed_matrix.h:86
int64_t jdim
Dimension of data on this processor.
Definition: distributed_matrix.h:87
int64_t P
No. of processors.
Definition: distributed_matrix.h:75
int64_t process_coldim() const
Returns the no. of processors in the column dimension.
Definition: distributed_matrix.h:216
int64_t idim
Definition: distributed_matrix.h:87
friend DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
Definition: distributed_matrix.h:691
int64_t local_coldim() const
Returns the no. of column elements stored on this processor.
Definition: distributed_matrix.h:234
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition: distributed_matrix.h:228
int64_t Prowdim
Row dimension of processor grid.
Definition: distributed_matrix.h:82
bool operator==(const DistributedMatrixDistribution &d) const
Definition: distributed_matrix.h:159
void local_rowrange(int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on this processor.
Definition: distributed_matrix.h:258
int64_t tilen
Tile size for column.
Definition: distributed_matrix.h:79
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed)
Definition: distributed_matrix.h:350
World & get_world() const
Returns the associated world.
Definition: distributed_matrix.h:344
int64_t n
Column dimension of A(n,m)
Definition: distributed_matrix.h:77
DistributedMatrixDistribution(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs distribution and size info for a matrix (for use by factory functions only)
Definition: distributed_matrix.h:103
int64_t Pcol
Column of processor grid for this processor.
Definition: distributed_matrix.h:83
int64_t local_jlow() const
Returns the first row index on this processor (0 if no data present)
Definition: distributed_matrix.h:276
ProcessID rank
My processor rank.
Definition: distributed_matrix.h:76
const DistributedMatrixDistribution & distribution() const
Returns the distribution (aka *this)
Definition: distributed_matrix.h:360
int64_t rowtile() const
Returns the row tile size.
Definition: distributed_matrix.h:208
int64_t ihi
Range of column indices on this processor.
Definition: distributed_matrix.h:85
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition: distributed_matrix.h:184
int64_t local_ihigh() const
Returns the last column index on this processor (-1 if no data present)
Definition: distributed_matrix.h:270
void get_range(int p, int64_t &ilow, int64_t &ihigh, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive ranges of column and row indicies on processor p.
Definition: distributed_matrix.h:295
int64_t process_rowdim() const
Returns the no. of processors in the row dimension.
Definition: distributed_matrix.h:222
int64_t Pcoldim
Column dimension of processor grid.
Definition: distributed_matrix.h:81
Manages data associated with a row/column/block distributed array.
Definition: distributed_matrix.h:388
void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix< T > &U) const
Definition: distributed_matrix.h:573
virtual ~DistributedMatrix()
Definition: distributed_matrix.h:447
DistributedMatrix< T > & operator+=(const DistributedMatrix< T > &A)
Inplace addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:601
DistributedMatrix(const DistributedMatrix< T > &A, bool deepcopy=false)
Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true)
Definition: distributed_matrix.h:432
void set(int64_t i, int64_t j, const T x)
Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException.
Definition: distributed_matrix.h:628
void fill_identity()
Definition: distributed_matrix.h:478
void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor< T > &s) const
Copy from distributed matrix into replicated patch (inclusive index range; collective call)
Definition: distributed_matrix.h:560
DistributedMatrix< T > & operator*=(const T s)
Inplace scale by a constant.
Definition: distributed_matrix.h:622
void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor< T > &s)
Copy from replicated patch (inclusive index range) into the distributed matrix.
Definition: distributed_matrix.h:541
DistributedMatrix< T > operator+(const DistributedMatrix< T > &A) const
Out of place addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:612
Tensor< T > & data()
Returns reference to the local data.
Definition: distributed_matrix.h:496
void clear()
Frees memory and resets state to same as default constructor.
Definition: distributed_matrix.h:451
void copy_from_replicated(const Tensor< T > &s)
Copy from the replicated (m,n) matrix into the distributed matrix.
Definition: distributed_matrix.h:517
bool has_same_dimension_and_distribution(const DistributedMatrix< R > &A)
Definition: distributed_matrix.h:593
void copy_to_replicated(Tensor< T > &s) const
Copy from the distributed (m,n) matrix into the replicated matrix (collective call)
Definition: distributed_matrix.h:528
Tensor< T > t
The data.
Definition: distributed_matrix.h:392
void fill(T value)
Fills the matrix with a scalar.
Definition: distributed_matrix.h:473
DistributedMatrix(const DistributedMatrixDistribution &d)
Constructs a distributed matrix with given distribution info.
Definition: distributed_matrix.h:424
const Tensor< T > & data() const
Returns const reference to data.
Definition: distributed_matrix.h:511
T get(int64_t i, int64_t j) const
Gets element (i,j) if (i,j) is local, otherwise throws MadnessException.
Definition: distributed_matrix.h:634
void fill(const funcT &f)
Fills the matrix with the provided function of the indices.
Definition: distributed_matrix.h:461
static T idij(const int64_t i, const int64_t j)
Definition: distributed_matrix.h:394
DistributedMatrix()
Default constructor makes an empty matrix that cannot be used except as a target for assignemnt.
Definition: distributed_matrix.h:417
DistributedMatrix(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero.
Definition: distributed_matrix.h:408
DistributedMatrix< T > & operator=(const DistributedMatrix< T > &A)
Assigment copies dimensions, distribution, and shallow copy of content.
Definition: distributed_matrix.h:439
A slice defines a sub-range or patch of a dimension.
Definition: slice.h:103
A tensor is a multidimension array.
Definition: tensor.h:317
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition: worldgop.h:870
A parallel world class.
Definition: world.h:132
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition: world.h:328
WorldGopInterface & gop
Global operations.
Definition: world.h:205
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition: derivatives.cc:72
const double m
Definition: gfit.cc:199
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
#define max(a, b)
Definition: lda.h:51
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition: madness_exception.h:190
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
DistributedMatrix< T > column_distributed_matrix(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition: distributed_matrix.h:678
static const std::vector< Slice > ___
Entire dimension.
Definition: slice.h:128
DistributedMatrix< T > row_distributed_matrix(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distributed by rows (column dimension is not distributed)
Definition: distributed_matrix.h:706
DistributedMatrix< T > interleave_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b interleaved.
Definition: distributed_matrix.h:723
static DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition: distributed_matrix.h:660
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition: mra.h:2002
static const Slice _(0,-1, 1)
NDIM & f
Definition: mra.h:2416
static DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
Definition: distributed_matrix.h:691
DistributedMatrix< T > concatenate_columns(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a row-distributed matrix with rows of a and b contatenated.
Definition: distributed_matrix.h:808
DistributedMatrix< T > concatenate_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a column-distributed matrix with rows of a and b contatenated.
Definition: distributed_matrix.h:744
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
static const double c
Definition: relops.cc:10
Defines and implements most of Tensor.
void d()
Definition: test_sig.cc:79
static const double pi
Definition: testcosine.cc:6
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:43