MADNESS  0.10.1
distributed_matrix.h
Go to the documentation of this file.
1 #ifndef MADNESS_DISTRIBUTED_MATRIX_H
2 #define MADNESS_DISTRIBUTED_MATRIX_H
3 
4 /*
5  This file is part of MADNESS.
6 
7  Copyright (C) 2007,2010 Oak Ridge National Laboratory
8 
9  This program is free software; you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation; either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 
23  For more information please contact:
24 
25  Robert J. Harrison
26  Oak Ridge National Laboratory
27  One Bethel Valley Road
28  P.O. Box 2008, MS-6367
29 
30  email: harrisonrj@ornl.gov
31  tel: 865-241-3937
32  fax: 865-572-0680
33 */
34 
35 // THE STUFF IN THIS FILE IS IN TRANSITION! THE API AND
36 // IMPLEMENTATION WILL BOTH SHIFT RAPIDLY AS WE TRANSITION FROM
37 // REPLICATED TO DISTRIBUTED MATRIX ALGORITHMS, AND SUBSEQUENTLY
38 // REFINE THE DESIGN AND INTERFACE TO 3RD PARTY PACKAGES.
39 
40 #include <madness/world/MADworld.h>
41 #include <utility>
42 #include <madness/tensor/tensor.h>
43 
44 namespace madness {
45 
46  // If in a fit of misplaced enthusiasm you desire to change
47  // int64_t to either long or std::size_t you should be aware that
48  // some uses below may contain quantities greater than may be
49  // represented in a 32-bit integer and may also be negative.
50  // I.e., a simple global replace will fail, though the existing
51  // test suite may not detect that. Also, large skinny matrices
52  // could easily need more than 32 bit integers to address.
53 
54 
55  // Forward declarations for friends
56  class DistributedMatrixDistribution;
57  template <typename T> class DistributedMatrix;
58 
59  static inline DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile=0);
60  static inline DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile=0);
61 
62  template <typename T>
63  DistributedMatrix<T> concatenate_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
64 
65  template <typename T>
66  DistributedMatrix<T> interleave_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
67 
69  friend DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile);
70  friend DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile);
71  template <typename T> friend class DistributedMatrix;
72 
73  protected:
75  int64_t P; ///< No. of processors
76  ProcessID rank; ///< My processor rank
77  int64_t n; ///< Column dimension of A(n,m)
78  int64_t m; ///< Row dimension of A(n,m)
79  int64_t tilen; ///< Tile size for column
80  int64_t tilem; ///< Tile size for row
81  int64_t Pcoldim; ///< Column dimension of processor grid
82  int64_t Prowdim; ///< Row dimension of processor grid
83  int64_t Pcol; ///< Column of processor grid for this processor
84  int64_t Prow; ///< Row of processor grid for this processor
85  int64_t ilo,ihi; ///< Range of column indices on this processor
86  int64_t jlo,jhi; ///< Range of row indices on this processor
87  int64_t idim,jdim; ///< Dimension of data on this processor
88 
89 
90  /// Constructs distribution and size info for a matrix (for use by factory functions only)
91 
92  /// This routine is dumb and just copies the given arguments,
93  /// hence it can easily make an invalid matrix. The smarts
94  /// are in the factory functions, hence this constructor is
95  /// not for general use.
96  ///
97  /// The matrix is tiled over a grid of processes as specified by the tile sizes.
98  /// @param[in] world The world
99  /// @param[in] n The matrix column dimension
100  /// @param[in] m The matrix row dimension
101  /// @param[in] coltile Tile size for the columns
102  /// @param[in] rowtile Tile size for the rows
103  DistributedMatrixDistribution(World& world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
104  : pworld(&world)
105  , P(world.size())
106  , rank(world.rank())
107  , n(n)
108  , m(m)
109  , tilen(coltile)
110  , tilem(rowtile)
111  , Pcoldim((n-1)/tilen+1)
112  , Prowdim((m-1)/tilem+1)
113  , Pcol(rank/Prowdim)
114  , Prow(rank - Pcol*Prowdim)
115  , ilo(Pcol*tilen)
116  , ihi(std::min(ilo+tilen-1,n-1))
117  , jlo(Prow*tilem)
118  , jhi(std::min(jlo+tilem-1,m-1))
119  , idim(std::max(ihi-ilo+1,int64_t(0)))
120  , jdim(std::max(jhi-jlo+1,int64_t(0)))
121  {
122  if (ilo > ihi || jlo > jhi) {
123  ilo = jlo = 0;
124  ihi = jhi = -1;
125  }
126  }
127 
128 
129  public:
130 
131  /// Default constructor makes an invalid distribution
133  : pworld(0)
134  , P(0)
135  , rank(0)
136  , n(0)
137  , m(0)
138  , tilen(0)
139  , tilem(0)
140  , Pcoldim(0)
141  , Prowdim(0)
142  , Pcol(0)
143  , Prow(0)
144  , ilo(0)
145  , ihi(-1)
146  , jlo(0)
147  , jhi(-1)
148  , idim(0)
149  , jdim(0)
150  {}
151 
152 
153  /// Resets state to same as default constructor
154  void clear() {
155  pworld = (World*)(0);
156  P = rank = n = m = tilen = tilem = Pcoldim = Prowdim = Pcol = Prow = ilo = ihi = jlo = jhi = idim = jdim = 0;
157  }
158 
160  return
161  pworld == d.pworld &&
162  P == d.P &&
163  rank == d.rank &&
164  n == d.n &&
165  m == d.m &&
166  tilen == d.tilen &&
167  tilem == d.tilem &&
168  Pcoldim == d.Pcoldim &&
169  Prowdim == d.Prowdim &&
170  Pcol == d.Pcol &&
171  Prow == d.Prow &&
172  ilo == d.ilo &&
173  ihi == d.ihi &&
174  jlo == d.jlo &&
175  jhi == d.jhi &&
176  idim == d.idim &&
177  jdim == d.jdim;
178  }
179 
180 
181  /// Returns the column dimension of the matrix ... i.e., n for A(n,m)
182 
183  /// @return Column dimension of the matrix ... i.e., n for A(n,m)
184  int64_t coldim() const {
185  return n;
186  }
187 
188  /// Returns the row dimension of the matrix ... i.e., m for A(n,m)
189 
190 
191  /// @return Row dimension of the matrix ... i.e., m for A(n,m)
192  int64_t rowdim() const {
193  return m;
194  }
195 
196 
197  /// Returns the column tile size
198 
199  /// @return Column tile size
200  int64_t coltile() const {
201  return tilen;
202  }
203 
204 
205  /// Returns the row tile size
206 
207  /// @return Row tile size
208  int64_t rowtile() const {
209  return tilem;
210  }
211 
212 
213  /// Returns the no. of processors in the column dimension
214 
215  /// @return No. of processors in the column dimension
216  int64_t process_coldim() const {return Pcoldim;}
217 
218 
219  /// Returns the no. of processors in the row dimension
220 
221  /// @return No. of processors in the rown dimension
222  int64_t process_rowdim() const {return Prowdim;}
223 
224 
225  /// Returns the total no. of elements stored on this processor
226 
227  /// @return Total no. of elements stored on this processor
228  int64_t local_size() const {return idim*jdim;}
229 
230 
231  /// Returns the no. of column elements stored on this processor
232 
233  /// @return No. of column elements stored on this processor (may be zero)
234  int64_t local_coldim() const {return idim;}
235 
236 
237  /// Returns the no. of row elements stored on this processor
238 
239  /// @return No. of row elements stored on this processor
240  int64_t local_rowdim() const {return jdim;}
241 
242  /// Returns the inclusive range of column indices on this processor
243 
244  /// If there is no data on this processor it returns ilow=0 and ihigh=-1
245  /// @param[out] ilow First column index on this processor (0 if no data)
246  /// @param[out] ihigh Last column index on this processor (-1 if no data)
247  void local_colrange(int64_t& ilow, int64_t& ihigh) const {
248  ilow = ilo;
249  ihigh = ihi;
250  }
251 
252 
253  /// Returns the inclusive range of row indices on this processor
254 
255  /// If there is no data on this processor it returns jlow=0 and jhigh=-1
256  /// @param[out] jlow First row index on this processor (0 if no data)
257  /// @param[out] jhigh Last row index on this processor (-1 if no data)
258  void local_rowrange(int64_t& jlow, int64_t& jhigh) const {
259  jlow = jlo;
260  jhigh = jhi;
261  }
262 
263 
264  /// Returns the first column index on this processor (0 if no data present)
265  int64_t local_ilow() const {
266  return ilo;
267  }
268 
269  /// Returns the last column index on this processor (-1 if no data present)
270  int64_t local_ihigh() const {
271  return ihi;
272  }
273 
274 
275  /// Returns the first row index on this processor (0 if no data present)
276  int64_t local_jlow() const {
277  return jlo;
278  }
279 
280 
281  /// Returns the last row index on this processor (0 if no data present)
282  int64_t local_jhigh() const {
283  return jhi;
284  }
285 
286 
287  /// Returns the inclusive ranges of column and row indicies on processor p
288 
289  /// If is no data on processor p it returns ilow=jlow=0 and ihigh=jhigh=-1
290  /// @param[in] p The processor p of interest
291  /// @param[out] ilow The first column index on the processor
292  /// @param[out] ihigh The last column index on the processor (-1 if none)
293  /// @param[out] jlow The first row index on the processor
294  /// @param[out] jhigh The last row index on the processor (-1 if none)
295  void get_range(int p, int64_t& ilow, int64_t& ihigh, int64_t& jlow, int64_t& jhigh) const {
296  int pi = p/Prowdim;
297  int pj = p - pi*Prowdim;
298  if (pi >= process_coldim() || pj >= process_rowdim()) {
299  ilow = jlow = 0;
300  ihigh = jhigh = -1;
301  }
302  else {
303  ilow = pi*tilen;
304  jlow = pj*tilem;
305  ihigh= std::min(ilow+tilen-1,n-1);
306  jhigh= std::min(jlow+tilem-1,m-1);
307  }
308 
309  return;
310  }
311 
312 
313  /// Returns the inclusive range of column indices on processor p
314 
315  /// If is no data on processor p it returns ilow=0 and ihigh=-1
316  /// @param[in] p The processor p of interest
317  /// @param[out] ilow The first column index on the processor
318  /// @param[out] ihigh The last column index on the processor (-1 if none)
319  void get_colrange(int p, int64_t& ilow, int64_t& ihigh) const {
320  int64_t jlow, jhigh;
321  get_range(p, ilow, ihigh, jlow, jhigh);
322 
323  return;
324  }
325 
326 
327  /// Returns the inclusive range of row indices on processor p
328 
329  /// If is no data on processor p it returns jlow=0 and jhigh=-1
330  /// @param[in] p The processor p of interest
331  /// @param[out] jlow The first row index on the processor
332  /// @param[out] jhigh The last row index on the processor (-1 if none)
333  void get_rowrange(int p, int64_t& jlow, int64_t& jhigh) const {
334  int64_t ilow, ihigh;
335  get_range(p, ilow, ihigh, jlow, jhigh);
336 
337  return;
338  }
339 
340 
341  /// Returns the associated world
342 
343  /// @return The world
344  World& get_world() const {return *pworld;}
345 
346 
347  /// Returns true if the matrix is column distributed (i.e., row dimension not distributed)
348 
349  /// @return True if the matrix is column distributed (i.e., row dimension not distributed)
350  bool is_column_distributed() const {return process_rowdim()==1;}
351 
352 
353  /// Returns true if the matrix is row distributed (i.e., column dimension not distributed)
354 
355  /// @return True if the matrix is row distributed (i.e., column dimension not distributed)
356  bool is_row_distributed() const {return process_coldim()==1;}
357 
358 
359  /// Returns the distribution (aka *this)
360  const DistributedMatrixDistribution& distribution() const {return *this;}
361 
362 
363  /// Returns the number of the process that owns element (i,j)
364  ProcessID owner(int64_t i, int64_t j) const {
365  int pcol = i/coltile();
366  int prow = j/rowtile();
367 
368  return pcol*process_rowdim() + prow;
369  }
370 
372  };
373 
374 
375  /// Manages data associated with a row/column/block distributed array
376 
377  /// The class itself provides limited functionality for accessing the
378  /// data and is primarily intended to provide base functionality for
379  /// use by matrix algorithms and other matrix classes.
380  ///
381  /// The constructor is deliberately simple. Factory functions
382  /// are expected to be the main construction tool.
383  ///
384  /// Assignment and copy are shallow just like for tensor and for the same reasons.
385  ///
386  /// To get a deep copy use the copy function (again just like for tensors).
387  template <typename T>
389  friend DistributedMatrix<T> interleave_rows<T>(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
390  friend DistributedMatrix<T> concatenate_rows<T>(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
391 
392  Tensor<T> t; ///< The data
393 
394  static T idij(const int64_t i, const int64_t j) {return (i==j) ? T(1) : T(0);}
395 
396  protected:
397 
398  /// Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero
399 
400  /// [deprecated ... use factory functions instead]
401  ///
402  /// The matrix is tiled over a grid of processes as specified by the tile sizes.
403  /// @param[in] world The world
404  /// @param[in] n The matrix column dimension
405  /// @param[in] m The matrix row dimension
406  /// @param[in] coltile Tile size for the columns
407  /// @param[in] rowtile Tile size for the rows
408  DistributedMatrix(World& world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
410  {
411  if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
412  }
413 
414  public:
415 
416  /// Default constructor makes an empty matrix that cannot be used except as a target for assignemnt
419  , t()
420  {}
421 
422 
423  /// Constructs a distributed matrix with given distribution info
426  {
427  if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
428  }
429 
430 
431  /// Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true)
432  DistributedMatrix(const DistributedMatrix<T>& A, bool deepcopy=false)
434  , t(deepcopy ? copy(A.t) : A.t)
435  {}
436 
437 
438  /// Assigment copies dimensions, distribution, and shallow copy of content
440  if (this != &A) {
441  DistributedMatrixDistribution::operator=(A);
442  t = A.t;
443  }
444  return *this;
445  }
446 
447  virtual ~DistributedMatrix() {}
448 
449 
450  /// Frees memory and resets state to same as default constructor
451  void clear() {
453  t.clear();
454  }
455 
456 
457  /// Fills the matrix with the provided function of the indices
458 
459  /// @param[in] f The matrix is filled using \c a[i,j]=f(i,j)
460  template <typename funcT>
461  void fill(const funcT& f) {
462  for (int64_t i=ilo; i<=ihi; i++) {
463  for (int64_t j=jlo; j<=jhi; j++) {
464  t(i-ilo,j-jlo) = f(i,j);
465  }
466  }
467  }
468 
469 
470  /// Fills the matrix with a scalar
471 
472  /// @param[in] value The matrix is filled using \c a[i,j]=value
473  void fill(T value) {
474  t.fill(value);
475  }
476 
477 
478  void fill_identity() {
480  }
481 
482 
483  /// Returns reference to the local data
484 
485  /// The local data is a tensor dimension \c (local_coldim,local_rowdim) and if either of the dimensions
486  /// are zero there is no data. A natural way to loop thru the data that gives you the actual row and column indices is
487  /// \code
488  /// const Tensor<T>& t = A.data();
489  /// for (int64_t i=A.get_ilow(); i<=A.get_ihigh(); i++) {
490  /// for (int64_t j=A.get_jlow(); j<=A.get_jhigh(); j++) {
491  /// the ij'th element is t(i-ilo, j-jlo)
492  /// }
493  /// }
494  /// \endcode
495  /// @return Reference to non-constant tensor holding the local data
496  Tensor<T>& data() {return t;}
497 
498  /// Returns const reference to data
499 
500  /// The local data is a tensor dimension \c (local_coldim,local_rowdim) and if either of the dimensions
501  /// are zero there is no data. A natural way to loop thru the data that gives you the actual row and column indices is
502  /// \code
503  /// const Tensor<T>& t = A.data();
504  /// for (int64_t i=A.get_ilow(); i<=A.get_ihigh(); i++) {
505  /// for (int64_t j=A.get_jlow(); j<=A.get_jhigh(); j++) {
506  /// the ij'th element is t(i-ilo, j-jlo)
507  /// }
508  /// }
509  /// \endcode
510  /// @return Reference to constant tensor holding the local data
511  const Tensor<T>& data() const {return t;}
512 
513 
514  /// Copy from the replicated \c (m,n) matrix into the distributed matrix
515 
516  /// @param[in] s The input tensor
518  if (local_size() > 0) t(___) = s(Slice(ilo,ihi),Slice(jlo,jhi));
519  }
520 
521  /// Copy from the distributed \c (m,n) matrix into the replicated matrix (collective call)
522 
523  /// The entire output tensor is zeroed, the local data copied
524  /// into it, and then a global sum performed to replicate the
525  /// data.
526  /// @param[out] s The output tensor (assumed already allocated
527  /// to be at least large enough in both dimensions)
528  void copy_to_replicated(Tensor<T>& s) const {
529  MADNESS_CHECK(s.iscontiguous());
530  s = 0.0;
531  if (local_size() > 0) s(Slice(ilo,ihi),Slice(jlo,jhi)) = t(___);
532  get_world().gop.sum(s.ptr(), s.size());
533  }
534 
535  /// Copy from replicated patch (inclusive index range) into the distributed matrix
536 
537  /// @param[in] ilow First \c i index in patch
538  /// @param[in] ihigh Last \c i index in patch
539  /// @param[in] jlow First \c j index in patch
540  /// @param[in] jhigh Last \c j index in patch
541  void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor<T>& s) {
542  int64_t i0 = std::max(ilo,ilow);
543  int64_t j0 = std::max(jlo,jlow);
544  int64_t i1 = std::min(ihi,ihigh);
545  int64_t j1 = std::min(jhi,jhigh);
546  if (i0<=i1 && j0<=j1) {
547  t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo)) = s(Slice(i0-ilow,i1-ilow),Slice(j0-jlow,j1-jlow));
548  }
549  }
550 
551  /// Copy from distributed matrix into replicated patch (inclusive index range; collective call)
552 
553  /// The entire output tensor is zeroed, relevant local data
554  /// copied into it, and then a global sum performed to
555  /// replicate the data.
556  /// @param[in] ilow First \c i index in patch
557  /// @param[in] ihigh Last \c i index in patch
558  /// @param[in] jlow First \c j index in patch
559  /// @param[in] jhigh Last \c j index in patch
560  void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor<T>& s) const {
561  MADNESS_CHECK(s.iscontiguous());
562  s = 0;
563  int64_t i0 = std::max(ilo,ilow);
564  int64_t j0 = std::max(jlo,jlow);
565  int64_t i1 = std::min(ihi,ihigh);
566  int64_t j1 = std::min(jhi,jhigh);
567  if (i0<=i1 && j0<=j1) {
568  t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo)) = s(Slice(i0-ilow,i1-ilow),Slice(j0-jlow,j1-jlow));
569  }
570  get_world().gop.sum(s.ptr(), s.size());
571  }
572 
573  void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix<T>& U) const {
574  int newrowdim = jhigh - jlow + 1;
575  MADNESS_CHECK(jlow >= 0);
576  MADNESS_CHECK(jhigh < rowdim());
577  MADNESS_CHECK(newrowdim == U.rowdim());
578  MADNESS_CHECK(coldim() == U.coldim());
581  MADNESS_CHECK(coltile() == U.coltile());
582 
583  int64_t i0 = ilo;
584  int64_t j0 = std::max(jlo,jlow);
585  int64_t i1 = ihi;
586  int64_t j1 = std::min(jhi,jhigh);
587  if (i0<=i1 && j0<=j1) {
588  U.data()(___) = t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo));
589  }
590  }
591 
592  template <typename R>
595  }
596 
597  /// Inplace addition --- dimensions and distribution must be identical
598 
599  /// @param[in] A The matrix to add to the current matrix
600  /// @return A reference to the current matrix
603  t += A.t;
604  return *this;
605  }
606 
607 
608  /// Out of place addition --- dimensions and distribution must be identical
609 
610  /// @param[in] A The matrix to add to the current matrix
611  /// @return A new matrix with the same dimensions and distribution as the inputs
614  return copy(*this)+=A;
615  }
616 
617 
618  /// Inplace scale by a constant
619 
620  /// @param[in] s The scaling factor
621  /// @return A reference to the current matrix
623  t.scale(s);
624  return *this;
625  }
626 
627  /// Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException
628  void set(int64_t i, int64_t j, const T x) {
629  MADNESS_CHECK(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
630  t(i-ilo,j-jlo) = x;
631  }
632 
633  /// Gets element (i,j) if (i,j) is local, otherwise throws MadnessException
634  T get(int64_t i, int64_t j) const {
635  MADNESS_CHECK(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
636  return t(i-ilo,j-jlo);
637  }
638  };
639 
640 
641  /// Deep copy of content
642 
643  /// @param[in] A The matrix to be copied
644  /// @return A new matrix with identical dimensions, distribution and content (deep copy)
645  template <typename T>
647  return DistributedMatrix<T>(A,true);
648  }
649 
650 
651  /// Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
652 
653  /// Quietly forces an even column tile size for ease of use in the systolic matrix algorithms
654  /// @param[in] world The world
655  /// @param[in] n The column (first) dimension
656  /// @param[in] m The row (second) dimension
657  /// @param[in] coltile Tile size for columns forced to be even (default is to use all processes)
658  /// @return An object encoding the dimension and distribution information
659  static inline DistributedMatrixDistribution
660  column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile) { // default coltile=0 above
661  if (world.size()*coltile < n) coltile = (n-1)/world.size() + 1;
662  coltile = std::min(coltile,n);
663  if ((coltile&0x1)) ++coltile; // ??? Was before the previous statement
664 
665  return DistributedMatrixDistribution(world, n, m, coltile, m);
666  }
667 
668 
669  /// Generates an (n,m) matrix distributed by columns (row dimension is not distributed)
670 
671  /// Quietly forces an even column tile size for ease of use in the systolic matrix algorithms
672  /// @param[in] world The world
673  /// @param[in] n The column (first) dimension
674  /// @param[in] m The row (second) dimension
675  /// @param[in] coltile Tile size for columns forced to be even (default is to use all processes)
676  /// @return A new zero matrix with the requested dimensions and distribution
677  template <typename T>
678  DistributedMatrix<T> column_distributed_matrix(World& world, int64_t n, int64_t m, int64_t coltile=0) {
680  }
681 
682 
683  /// Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
684 
685  /// @param[in] world The world
686  /// @param[in] n The column (first) dimension
687  /// @param[in] m The row (second) dimension
688  /// @param[in] rowtile Tile size for row (default is to use all processes)
689  /// @return An object encoding the dimension and distribution information
690  static inline DistributedMatrixDistribution
691  row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile) { // default rowtile=0 above
692  if (world.size()*rowtile < m) rowtile = (m-1)/world.size() + 1;
693  rowtile = std::min(rowtile,m);
694 
695  return DistributedMatrixDistribution(world, n, m, n, rowtile);
696  }
697 
698  /// Generates an (n,m) matrix distributed by rows (column dimension is not distributed)
699 
700  /// @param[in] world The world
701  /// @param[in] n The column (first) dimension
702  /// @param[in] m The row (second) dimension
703  /// @param[in] rowtile Tile size for row (default is to use all processes)
704  /// @return A new zero matrix with the requested dimensions and distribution
705  template <typename T>
706  DistributedMatrix<T> row_distributed_matrix(World& world, int64_t n, int64_t m, int64_t rowtile=0) {
707  return DistributedMatrix<T>(row_distributed_matrix_distribution(world, n, m, rowtile));
708  }
709 
710 
711  /// Generates a distributed matrix with rows of \c a and \c b interleaved
712 
713  /// I.e., the even rows of the result will be rows of \c a , and the
714  /// odd rows those of \c b .
715  ///
716  /// The matrices a and b must have the same dimensions and be
717  /// identically distributed. The result will have a doubled column
718  /// dimension and column tile size. The row dimension is unchanged.
719  /// @param[in] a The matrix providing even rows of the result
720  /// @param[in] b The matrix providing odd rows of the result
721  /// @return The result matrix
722  template <typename T>
724  MADNESS_CHECK(a.rowdim()==b.rowdim() && a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.rowtile()==b.rowtile());
725 
726  DistributedMatrix<T> c(a.get_world(), a.coldim()*2, a.rowdim(), a.coltile()*2, a.rowtile());
727  c.data()(Slice(0,-1,2),_) = a.data()(___);
728  c.data()(Slice(1,-1,2),_) = b.data()(___);
729  }
730 
731 
732  /// Generates a column-distributed matrix with rows of \c a and \c b contatenated
733 
734  /// I.e., c[i,j] = a[i,j] if j<na or b[i,j-na] if j>=na
735  ///
736  /// The matrices a and b must have the same column size (i.e., the
737  /// same number of rows) and be column distributed with the same
738  /// column tilesze. The result is also column distributed with
739  /// the same column tilesize as the input matrices.
740  /// @param[in] a The first matrix
741  /// @param[in] b The second matrix
742  /// @return The result matrix
743  template <typename T>
745  MADNESS_CHECK(a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.is_column_distributed() && b.is_column_distributed());
746 
747  int64_t ma = a.rowdim();
748  int64_t mb = b.rowdim();
749 
750  DistributedMatrix<T> c(a.get_world(), a.coldim(), ma+mb, a.coltile(), ma+mb);
751 
752  int64_t ilow, ihigh;
753  a.local_colrange(ilow, ihigh);
754  if (ilow <= ihigh) {
755  c.data()(_,Slice(0,ma-1)) = a.data()(___);
756  c.data()(_,Slice(ma,-1)) = b.data()(___);
757  }
758 
759  return c;
760  }
761 
762  /// Generates a column-distributed matrix with rows of \c a, \c b, \c c, and \c d contatenated in order
763 
764  /// I.e., c[i,j] = a[i,j] if j in [0,na), b[i,j-na] if j in [na,na+nb), c[i,j-na-nb] if j in [na+nb,na+nb+nc), etc.
765  /// The matrices must have the same column size (i.e., the same
766  /// number of rows) and be column distributed with the same column
767  /// tilesze. The result is also column distributed with the same
768  /// column tilesize as the input matrices.
769  /// @param[in] a The first matrix
770  /// @param[in] b The second matrix
771  /// @param[in] c The third matrix
772  /// @param[in] d The fourth matrix
773  /// @return The result matrix
774  template <typename T>
776  MADNESS_CHECK(a.coldim()==b.coldim() && b.coldim()==c.coldim() && c.coldim()==d.coldim());
777  MADNESS_CHECK(a.coltile()==b.coltile() && b.coltile()==c.coltile() && c.coltile()==d.coltile());
778  MADNESS_CHECK(a.is_column_distributed() && b.is_column_distributed() && c.is_column_distributed() && d.is_column_distributed());
779 
780  int64_t ma = a.rowdim();
781  int64_t mb = b.rowdim();
782  int64_t mc = c.rowdim();
783  int64_t md = d.rowdim();
784 
785  DistributedMatrix<T> result(a.get_world(), a.coldim(), ma+mb+mc+md, a.coltile(), ma+mb+mc+md);
786 
787  if(a.local_size() > 0) result.data()( _ , Slice(0,ma-1) ) = a.data()(___);
788  if(b.local_size() > 0) result.data()( _ , Slice(ma, ma+mb-1) ) = b.data()(___);
789  if(c.local_size() > 0) result.data()( _ , Slice(ma+mb, ma+mb+mc-1) ) = c.data()(___);
790  if(d.local_size() > 0) result.data()( _ , Slice(ma+mb+mc, -1) ) = d.data()(___);
791 
792  return result;
793  }
794 
795 
796  /// Generates a row-distributed matrix with rows of \c a and \c b contatenated
797 
798  /// I.e., c[i,j] = a[i,j] if i<ma or b[i-ma,j] if i>=ma
799  ///
800  /// The matrices a and b must have the same row size (i.e., the
801  /// same number of columns) and be row distributed with the same
802  /// row tilesze. The result is also row distributed with
803  /// the same row tilesize as the input matrices.
804  /// @param[in] a The first matrix
805  /// @param[in] b The second matrix
806  /// @return The result matrix
807  template <typename T>
809  MADNESS_CHECK(a.rowdim()==b.rowdim() && a.rowtile()==b.rowtile() && a.is_row_distributed() && b.is_row_distributed());
810 
811  int64_t ma = a.coldim();
812  int64_t mt = ma + b.coldim();
813 
814  DistributedMatrix<T> c(a.get_world(), mt, a.rowdim(), b.rowtile(), mt);
815 
816  if(a.local_size() > 0) c.data()( Slice(0,ma-1), _ ) = a.data()(___);
817  if(a.local_size() > 0) c.data()( Slice(ma,-1), _ ) = b.data()(___);
818 
819  return c;
820  }
821 }
822 
823 #endif
real_convolution_3d A(World &world)
Definition: DKops.h:230
This header should include pretty much everything needed for the parallel runtime.
Definition: test_ar.cc:118
Definition: distributed_matrix.h:68
int64_t jlo
Definition: distributed_matrix.h:86
bool is_row_distributed() const
Returns true if the matrix is row distributed (i.e., column dimension not distributed)
Definition: distributed_matrix.h:356
int64_t Prow
Row of processor grid for this processor.
Definition: distributed_matrix.h:84
virtual ~DistributedMatrixDistribution()
Definition: distributed_matrix.h:371
int64_t local_ilow() const
Returns the first column index on this processor (0 if no data present)
Definition: distributed_matrix.h:265
int64_t m
Row dimension of A(n,m)
Definition: distributed_matrix.h:78
int64_t tilem
Tile size for row.
Definition: distributed_matrix.h:80
int64_t local_jhigh() const
Returns the last row index on this processor (0 if no data present)
Definition: distributed_matrix.h:282
void get_colrange(int p, int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on processor p.
Definition: distributed_matrix.h:319
ProcessID owner(int64_t i, int64_t j) const
Returns the number of the process that owns element (i,j)
Definition: distributed_matrix.h:364
void local_colrange(int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on this processor.
Definition: distributed_matrix.h:247
friend DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition: distributed_matrix.h:660
DistributedMatrixDistribution()
Default constructor makes an invalid distribution.
Definition: distributed_matrix.h:132
World * pworld
Definition: distributed_matrix.h:74
void get_rowrange(int p, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on processor p.
Definition: distributed_matrix.h:333
int64_t local_rowdim() const
Returns the no. of row elements stored on this processor.
Definition: distributed_matrix.h:240
void clear()
Resets state to same as default constructor.
Definition: distributed_matrix.h:154
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition: distributed_matrix.h:192
int64_t ilo
Definition: distributed_matrix.h:85
int64_t coltile() const
Returns the column tile size.
Definition: distributed_matrix.h:200
int64_t jhi
Range of row indices on this processor.
Definition: distributed_matrix.h:86
int64_t jdim
Dimension of data on this processor.
Definition: distributed_matrix.h:87
int64_t P
No. of processors.
Definition: distributed_matrix.h:75
int64_t process_coldim() const
Returns the no. of processors in the column dimension.
Definition: distributed_matrix.h:216
int64_t idim
Definition: distributed_matrix.h:87
friend DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
Definition: distributed_matrix.h:691
int64_t local_coldim() const
Returns the no. of column elements stored on this processor.
Definition: distributed_matrix.h:234
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition: distributed_matrix.h:228
int64_t Prowdim
Row dimension of processor grid.
Definition: distributed_matrix.h:82
bool operator==(const DistributedMatrixDistribution &d) const
Definition: distributed_matrix.h:159
void local_rowrange(int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on this processor.
Definition: distributed_matrix.h:258
int64_t tilen
Tile size for column.
Definition: distributed_matrix.h:79
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed)
Definition: distributed_matrix.h:350
World & get_world() const
Returns the associated world.
Definition: distributed_matrix.h:344
int64_t n
Column dimension of A(n,m)
Definition: distributed_matrix.h:77
DistributedMatrixDistribution(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs distribution and size info for a matrix (for use by factory functions only)
Definition: distributed_matrix.h:103
int64_t Pcol
Column of processor grid for this processor.
Definition: distributed_matrix.h:83
int64_t local_jlow() const
Returns the first row index on this processor (0 if no data present)
Definition: distributed_matrix.h:276
ProcessID rank
My processor rank.
Definition: distributed_matrix.h:76
const DistributedMatrixDistribution & distribution() const
Returns the distribution (aka *this)
Definition: distributed_matrix.h:360
int64_t rowtile() const
Returns the row tile size.
Definition: distributed_matrix.h:208
int64_t ihi
Range of column indices on this processor.
Definition: distributed_matrix.h:85
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition: distributed_matrix.h:184
int64_t local_ihigh() const
Returns the last column index on this processor (-1 if no data present)
Definition: distributed_matrix.h:270
void get_range(int p, int64_t &ilow, int64_t &ihigh, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive ranges of column and row indicies on processor p.
Definition: distributed_matrix.h:295
int64_t process_rowdim() const
Returns the no. of processors in the row dimension.
Definition: distributed_matrix.h:222
int64_t Pcoldim
Column dimension of processor grid.
Definition: distributed_matrix.h:81
Manages data associated with a row/column/block distributed array.
Definition: distributed_matrix.h:388
void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix< T > &U) const
Definition: distributed_matrix.h:573
virtual ~DistributedMatrix()
Definition: distributed_matrix.h:447
DistributedMatrix< T > & operator+=(const DistributedMatrix< T > &A)
Inplace addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:601
DistributedMatrix(const DistributedMatrix< T > &A, bool deepcopy=false)
Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true)
Definition: distributed_matrix.h:432
void set(int64_t i, int64_t j, const T x)
Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException.
Definition: distributed_matrix.h:628
void fill_identity()
Definition: distributed_matrix.h:478
void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor< T > &s) const
Copy from distributed matrix into replicated patch (inclusive index range; collective call)
Definition: distributed_matrix.h:560
DistributedMatrix< T > & operator*=(const T s)
Inplace scale by a constant.
Definition: distributed_matrix.h:622
void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor< T > &s)
Copy from replicated patch (inclusive index range) into the distributed matrix.
Definition: distributed_matrix.h:541
DistributedMatrix< T > operator+(const DistributedMatrix< T > &A) const
Out of place addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:612
Tensor< T > & data()
Returns reference to the local data.
Definition: distributed_matrix.h:496
void clear()
Frees memory and resets state to same as default constructor.
Definition: distributed_matrix.h:451
void copy_from_replicated(const Tensor< T > &s)
Copy from the replicated (m,n) matrix into the distributed matrix.
Definition: distributed_matrix.h:517
bool has_same_dimension_and_distribution(const DistributedMatrix< R > &A)
Definition: distributed_matrix.h:593
void copy_to_replicated(Tensor< T > &s) const
Copy from the distributed (m,n) matrix into the replicated matrix (collective call)
Definition: distributed_matrix.h:528
Tensor< T > t
The data.
Definition: distributed_matrix.h:392
void fill(T value)
Fills the matrix with a scalar.
Definition: distributed_matrix.h:473
DistributedMatrix(const DistributedMatrixDistribution &d)
Constructs a distributed matrix with given distribution info.
Definition: distributed_matrix.h:424
const Tensor< T > & data() const
Returns const reference to data.
Definition: distributed_matrix.h:511
T get(int64_t i, int64_t j) const
Gets element (i,j) if (i,j) is local, otherwise throws MadnessException.
Definition: distributed_matrix.h:634
void fill(const funcT &f)
Fills the matrix with the provided function of the indices.
Definition: distributed_matrix.h:461
static T idij(const int64_t i, const int64_t j)
Definition: distributed_matrix.h:394
DistributedMatrix()
Default constructor makes an empty matrix that cannot be used except as a target for assignemnt.
Definition: distributed_matrix.h:417
DistributedMatrix(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero.
Definition: distributed_matrix.h:408
DistributedMatrix< T > & operator=(const DistributedMatrix< T > &A)
Assigment copies dimensions, distribution, and shallow copy of content.
Definition: distributed_matrix.h:439
A slice defines a sub-range or patch of a dimension.
Definition: slice.h:103
A tensor is a multidimension array.
Definition: tensor.h:317
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition: worldgop.h:870
A parallel world class.
Definition: world.h:132
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition: world.h:328
WorldGopInterface & gop
Global operations.
Definition: world.h:205
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition: derivatives.cc:72
const double m
Definition: gfit.cc:199
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
#define max(a, b)
Definition: lda.h:51
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition: madness_exception.h:190
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
DistributedMatrix< T > column_distributed_matrix(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition: distributed_matrix.h:678
static const std::vector< Slice > ___
Entire dimension.
Definition: slice.h:128
DistributedMatrix< T > row_distributed_matrix(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distributed by rows (column dimension is not distributed)
Definition: distributed_matrix.h:706
DistributedMatrix< T > interleave_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b interleaved.
Definition: distributed_matrix.h:723
static DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition: distributed_matrix.h:660
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition: mra.h:2002
static const Slice _(0,-1, 1)
NDIM & f
Definition: mra.h:2416
static DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
Definition: distributed_matrix.h:691
DistributedMatrix< T > concatenate_columns(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a row-distributed matrix with rows of a and b contatenated.
Definition: distributed_matrix.h:808
DistributedMatrix< T > concatenate_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a column-distributed matrix with rows of a and b contatenated.
Definition: distributed_matrix.h:744
Definition: mraimpl.h:50
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
static const double c
Definition: relops.cc:10
Defines and implements most of Tensor.
void d()
Definition: test_sig.cc:79
static const double pi
Definition: testcosine.cc:6
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:43