MADNESS 0.10.1
distributed_matrix.h
Go to the documentation of this file.
1#ifndef MADNESS_DISTRIBUTED_MATRIX_H
2#define MADNESS_DISTRIBUTED_MATRIX_H
3
4/*
5 This file is part of MADNESS.
6
7 Copyright (C) 2007,2010 Oak Ridge National Laboratory
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23 For more information please contact:
24
25 Robert J. Harrison
26 Oak Ridge National Laboratory
27 One Bethel Valley Road
28 P.O. Box 2008, MS-6367
29
30 email: harrisonrj@ornl.gov
31 tel: 865-241-3937
32 fax: 865-572-0680
33*/
34
35// THE STUFF IN THIS FILE IS IN TRANSITION! THE API AND
36// IMPLEMENTATION WILL BOTH SHIFT RAPIDLY AS WE TRANSITION FROM
37// REPLICATED TO DISTRIBUTED MATRIX ALGORITHMS, AND SUBSEQUENTLY
38// REFINE THE DESIGN AND INTERFACE TO 3RD PARTY PACKAGES.
39
41#include <utility>
43
44namespace madness {
45
46 // If in a fit of misplaced enthusiasm you desire to change
47 // int64_t to either long or std::size_t you should be aware that
48 // some uses below may contain quantities greater than may be
49 // represented in a 32-bit integer and may also be negative.
50 // I.e., a simple global replace will fail, though the existing
51 // test suite may not detect that. Also, large skinny matrices
52 // could easily need more than 32 bit integers to address.
53
54
55 // Forward declarations for friends
56 class DistributedMatrixDistribution;
57 template <typename T> class DistributedMatrix;
58
59 static inline DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile=0);
60 static inline DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile=0);
61
62 template <typename T>
63 DistributedMatrix<T> concatenate_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
64
65 template <typename T>
66 DistributedMatrix<T> interleave_rows(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
67
70 friend DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile);
71 template <typename T> friend class DistributedMatrix;
72
73 protected:
75 int64_t P; ///< No. of processors
76 ProcessID rank; ///< My processor rank
77 int64_t n; ///< Column dimension of A(n,m)
78 int64_t m; ///< Row dimension of A(n,m)
79 int64_t tilen; ///< Tile size for column
80 int64_t tilem; ///< Tile size for row
81 int64_t Pcoldim; ///< Column dimension of processor grid
82 int64_t Prowdim; ///< Row dimension of processor grid
83 int64_t Pcol; ///< Column of processor grid for this processor
84 int64_t Prow; ///< Row of processor grid for this processor
85 int64_t ilo,ihi; ///< Range of column indices on this processor
86 int64_t jlo,jhi; ///< Range of row indices on this processor
87 int64_t idim,jdim; ///< Dimension of data on this processor
88
89
90 /// Constructs distribution and size info for a matrix (for use by factory functions only)
91
92 /// This routine is dumb and just copies the given arguments,
93 /// hence it can easily make an invalid matrix. The smarts
94 /// are in the factory functions, hence this constructor is
95 /// not for general use.
96 ///
97 /// The matrix is tiled over a grid of processes as specified by the tile sizes.
98 /// @param[in] world The world
99 /// @param[in] n The matrix column dimension
100 /// @param[in] m The matrix row dimension
101 /// @param[in] coltile Tile size for the columns
102 /// @param[in] rowtile Tile size for the rows
103 DistributedMatrixDistribution(World& world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
104 : pworld(&world)
105 , P(world.size())
106 , rank(world.rank())
107 , n(n)
108 , m(m)
109 , tilen(coltile)
110 , tilem(rowtile)
111 , Pcoldim((n-1)/tilen+1)
112 , Prowdim((m-1)/tilem+1)
113 , Pcol(rank/Prowdim)
114 , Prow(rank - Pcol*Prowdim)
115 , ilo(Pcol*tilen)
116 , ihi(std::min(ilo+tilen-1,n-1))
117 , jlo(Prow*tilem)
118 , jhi(std::min(jlo+tilem-1,m-1))
119 , idim(std::max(ihi-ilo+1,int64_t(0)))
120 , jdim(std::max(jhi-jlo+1,int64_t(0)))
121 {
122 if (ilo > ihi || jlo > jhi) {
123 ilo = jlo = 0;
124 ihi = jhi = -1;
125 }
126 }
127
128
129 public:
130
131 /// Default constructor makes an invalid distribution
133 : pworld(0)
134 , P(0)
135 , rank(0)
136 , n(0)
137 , m(0)
138 , tilen(0)
139 , tilem(0)
140 , Pcoldim(0)
141 , Prowdim(0)
142 , Pcol(0)
143 , Prow(0)
144 , ilo(0)
145 , ihi(-1)
146 , jlo(0)
147 , jhi(-1)
148 , idim(0)
149 , jdim(0)
150 {}
151
152
153 /// Resets state to same as default constructor
154 void clear() {
155 pworld = (World*)(0);
156 P = rank = n = m = tilen = tilem = Pcoldim = Prowdim = Pcol = Prow = ilo = ihi = jlo = jhi = idim = jdim = 0;
157 }
158
160 return
161 pworld == d.pworld &&
162 P == d.P &&
163 rank == d.rank &&
164 n == d.n &&
165 m == d.m &&
166 tilen == d.tilen &&
167 tilem == d.tilem &&
168 Pcoldim == d.Pcoldim &&
169 Prowdim == d.Prowdim &&
170 Pcol == d.Pcol &&
171 Prow == d.Prow &&
172 ilo == d.ilo &&
173 ihi == d.ihi &&
174 jlo == d.jlo &&
175 jhi == d.jhi &&
176 idim == d.idim &&
177 jdim == d.jdim;
178 }
179
180
181 /// Returns the column dimension of the matrix ... i.e., n for A(n,m)
182
183 /// @return Column dimension of the matrix ... i.e., n for A(n,m)
184 int64_t coldim() const {
185 return n;
186 }
187
188 /// Returns the row dimension of the matrix ... i.e., m for A(n,m)
189
190
191 /// @return Row dimension of the matrix ... i.e., m for A(n,m)
192 int64_t rowdim() const {
193 return m;
194 }
195
196
197 /// Returns the column tile size
198
199 /// @return Column tile size
200 int64_t coltile() const {
201 return tilen;
202 }
203
204
205 /// Returns the row tile size
206
207 /// @return Row tile size
208 int64_t rowtile() const {
209 return tilem;
210 }
211
212
213 /// Returns the no. of processors in the column dimension
214
215 /// @return No. of processors in the column dimension
216 int64_t process_coldim() const {return Pcoldim;}
217
218
219 /// Returns the no. of processors in the row dimension
220
221 /// @return No. of processors in the rown dimension
222 int64_t process_rowdim() const {return Prowdim;}
223
224
225 /// Returns the total no. of elements stored on this processor
226
227 /// @return Total no. of elements stored on this processor
228 int64_t local_size() const {return idim*jdim;}
229
230
231 /// Returns the no. of column elements stored on this processor
232
233 /// @return No. of column elements stored on this processor (may be zero)
234 int64_t local_coldim() const {return idim;}
235
236
237 /// Returns the no. of row elements stored on this processor
238
239 /// @return No. of row elements stored on this processor
240 int64_t local_rowdim() const {return jdim;}
241
242 /// Returns the inclusive range of column indices on this processor
243
244 /// If there is no data on this processor it returns ilow=0 and ihigh=-1
245 /// @param[out] ilow First column index on this processor (0 if no data)
246 /// @param[out] ihigh Last column index on this processor (-1 if no data)
247 void local_colrange(int64_t& ilow, int64_t& ihigh) const {
248 ilow = ilo;
249 ihigh = ihi;
250 }
251
252
253 /// Returns the inclusive range of row indices on this processor
254
255 /// If there is no data on this processor it returns jlow=0 and jhigh=-1
256 /// @param[out] jlow First row index on this processor (0 if no data)
257 /// @param[out] jhigh Last row index on this processor (-1 if no data)
258 void local_rowrange(int64_t& jlow, int64_t& jhigh) const {
259 jlow = jlo;
260 jhigh = jhi;
261 }
262
263
264 /// Returns the first column index on this processor (0 if no data present)
265 int64_t local_ilow() const {
266 return ilo;
267 }
268
269 /// Returns the last column index on this processor (-1 if no data present)
270 int64_t local_ihigh() const {
271 return ihi;
272 }
273
274
275 /// Returns the first row index on this processor (0 if no data present)
276 int64_t local_jlow() const {
277 return jlo;
278 }
279
280
281 /// Returns the last row index on this processor (0 if no data present)
282 int64_t local_jhigh() const {
283 return jhi;
284 }
285
286
287 /// Returns the inclusive ranges of column and row indicies on processor p
288
289 /// If is no data on processor p it returns ilow=jlow=0 and ihigh=jhigh=-1
290 /// @param[in] p The processor p of interest
291 /// @param[out] ilow The first column index on the processor
292 /// @param[out] ihigh The last column index on the processor (-1 if none)
293 /// @param[out] jlow The first row index on the processor
294 /// @param[out] jhigh The last row index on the processor (-1 if none)
295 void get_range(int p, int64_t& ilow, int64_t& ihigh, int64_t& jlow, int64_t& jhigh) const {
296 int pi = p/Prowdim;
297 int pj = p - pi*Prowdim;
298 if (pi >= process_coldim() || pj >= process_rowdim()) {
299 ilow = jlow = 0;
300 ihigh = jhigh = -1;
301 }
302 else {
303 ilow = pi*tilen;
304 jlow = pj*tilem;
305 ihigh= std::min(ilow+tilen-1,n-1);
306 jhigh= std::min(jlow+tilem-1,m-1);
307 }
308
309 return;
310 }
311
312
313 /// Returns the inclusive range of column indices on processor p
314
315 /// If is no data on processor p it returns ilow=0 and ihigh=-1
316 /// @param[in] p The processor p of interest
317 /// @param[out] ilow The first column index on the processor
318 /// @param[out] ihigh The last column index on the processor (-1 if none)
319 void get_colrange(int p, int64_t& ilow, int64_t& ihigh) const {
320 int64_t jlow, jhigh;
321 get_range(p, ilow, ihigh, jlow, jhigh);
322
323 return;
324 }
325
326
327 /// Returns the inclusive range of row indices on processor p
328
329 /// If is no data on processor p it returns jlow=0 and jhigh=-1
330 /// @param[in] p The processor p of interest
331 /// @param[out] jlow The first row index on the processor
332 /// @param[out] jhigh The last row index on the processor (-1 if none)
333 void get_rowrange(int p, int64_t& jlow, int64_t& jhigh) const {
334 int64_t ilow, ihigh;
335 get_range(p, ilow, ihigh, jlow, jhigh);
336
337 return;
338 }
339
340
341 /// Returns the associated world
342
343 /// @return The world
344 World& get_world() const {return *pworld;}
345
346
347 /// Returns true if the matrix is column distributed (i.e., row dimension not distributed)
348
349 /// @return True if the matrix is column distributed (i.e., row dimension not distributed)
350 bool is_column_distributed() const {return process_rowdim()==1;}
351
352
353 /// Returns true if the matrix is row distributed (i.e., column dimension not distributed)
354
355 /// @return True if the matrix is row distributed (i.e., column dimension not distributed)
356 bool is_row_distributed() const {return process_coldim()==1;}
357
358
359 /// Returns the distribution (aka *this)
360 const DistributedMatrixDistribution& distribution() const {return *this;}
361
362
363 /// Returns the number of the process that owns element (i,j)
364 ProcessID owner(int64_t i, int64_t j) const {
365 int pcol = i/coltile();
366 int prow = j/rowtile();
367
368 return pcol*process_rowdim() + prow;
369 }
370
372 };
373
374
375 /// Manages data associated with a row/column/block distributed array
376
377 /// The class itself provides limited functionality for accessing the
378 /// data and is primarily intended to provide base functionality for
379 /// use by matrix algorithms and other matrix classes.
380 ///
381 /// The constructor is deliberately simple. Factory functions
382 /// are expected to be the main construction tool.
383 ///
384 /// Assignment and copy are shallow just like for tensor and for the same reasons.
385 ///
386 /// To get a deep copy use the copy function (again just like for tensors).
387 template <typename T>
389 friend DistributedMatrix<T> interleave_rows<T>(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
390 friend DistributedMatrix<T> concatenate_rows<T>(const DistributedMatrix<T>& a, const DistributedMatrix<T>& b);
391
392 Tensor<T> t; ///< The data
393
394 static T idij(const int64_t i, const int64_t j) {return (i==j) ? T(1) : T(0);}
395
396 protected:
397
398 /// Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero
399
400 /// [deprecated ... use factory functions instead]
401 ///
402 /// The matrix is tiled over a grid of processes as specified by the tile sizes.
403 /// @param[in] world The world
404 /// @param[in] n The matrix column dimension
405 /// @param[in] m The matrix row dimension
406 /// @param[in] coltile Tile size for the columns
407 /// @param[in] rowtile Tile size for the rows
408 DistributedMatrix(World& world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
410 {
411 if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
412 }
413
414 public:
415
416 /// Default constructor makes an empty matrix that cannot be used except as a target for assignemnt
421
422
423 /// Constructs a distributed matrix with given distribution info
429
430
431 /// Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true)
432 DistributedMatrix(const DistributedMatrix<T>& A, bool deepcopy=false)
434 , t(deepcopy ? copy(A.t) : A.t)
435 {}
436
437
438 /// Assigment copies dimensions, distribution, and shallow copy of content
440 if (this != &A) {
441 DistributedMatrixDistribution::operator=(A);
442 t = A.t;
443 }
444 return *this;
445 }
446
448
449
450 /// Frees memory and resets state to same as default constructor
451 void clear() {
453 t.clear();
454 }
455
456
457 /// Fills the matrix with the provided function of the indices
458
459 /// @param[in] f The matrix is filled using \c a[i,j]=f(i,j)
460 template <typename funcT>
461 void fill(const funcT& f) {
462 for (int64_t i=ilo; i<=ihi; i++) {
463 for (int64_t j=jlo; j<=jhi; j++) {
464 t(i-ilo,j-jlo) = f(i,j);
465 }
466 }
467 }
468
469
470 /// Fills the matrix with a scalar
471
472 /// @param[in] value The matrix is filled using \c a[i,j]=value
473 void fill(T value) {
474 t.fill(value);
475 }
476
477
481
482
483 /// Returns reference to the local data
484
485 /// The local data is a tensor dimension \c (local_coldim,local_rowdim) and if either of the dimensions
486 /// are zero there is no data. A natural way to loop thru the data that gives you the actual row and column indices is
487 /// \code
488 /// const Tensor<T>& t = A.data();
489 /// for (int64_t i=A.get_ilow(); i<=A.get_ihigh(); i++) {
490 /// for (int64_t j=A.get_jlow(); j<=A.get_jhigh(); j++) {
491 /// the ij'th element is t(i-ilo, j-jlo)
492 /// }
493 /// }
494 /// \endcode
495 /// @return Reference to non-constant tensor holding the local data
496 Tensor<T>& data() {return t;}
497
498 /// Returns const reference to data
499
500 /// The local data is a tensor dimension \c (local_coldim,local_rowdim) and if either of the dimensions
501 /// are zero there is no data. A natural way to loop thru the data that gives you the actual row and column indices is
502 /// \code
503 /// const Tensor<T>& t = A.data();
504 /// for (int64_t i=A.get_ilow(); i<=A.get_ihigh(); i++) {
505 /// for (int64_t j=A.get_jlow(); j<=A.get_jhigh(); j++) {
506 /// the ij'th element is t(i-ilo, j-jlo)
507 /// }
508 /// }
509 /// \endcode
510 /// @return Reference to constant tensor holding the local data
511 const Tensor<T>& data() const {return t;}
512
513
514 /// Copy from the replicated \c (m,n) matrix into the distributed matrix
515
516 /// @param[in] s The input tensor
518 if (local_size() > 0) t(___) = s(Slice(ilo,ihi),Slice(jlo,jhi));
519 }
520
521 /// Copy from the distributed \c (m,n) matrix into the replicated matrix (collective call)
522
523 /// The entire output tensor is zeroed, the local data copied
524 /// into it, and then a global sum performed to replicate the
525 /// data.
526 /// @param[out] s The output tensor (assumed already allocated
527 /// to be at least large enough in both dimensions)
529 MADNESS_CHECK(s.iscontiguous());
530 s = 0.0;
531 if (local_size() > 0) s(Slice(ilo,ihi),Slice(jlo,jhi)) = t(___);
532 get_world().gop.sum(s.ptr(), s.size());
533 }
534
535 /// Copy from replicated patch (inclusive index range) into the distributed matrix
536
537 /// @param[in] ilow First \c i index in patch
538 /// @param[in] ihigh Last \c i index in patch
539 /// @param[in] jlow First \c j index in patch
540 /// @param[in] jhigh Last \c j index in patch
541 void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor<T>& s) {
542 int64_t i0 = std::max(ilo,ilow);
543 int64_t j0 = std::max(jlo,jlow);
544 int64_t i1 = std::min(ihi,ihigh);
545 int64_t j1 = std::min(jhi,jhigh);
546 if (i0<=i1 && j0<=j1) {
547 t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo)) = s(Slice(i0-ilow,i1-ilow),Slice(j0-jlow,j1-jlow));
548 }
549 }
550
551 /// Copy from distributed matrix into replicated patch (inclusive index range; collective call)
552
553 /// The entire output tensor is zeroed, relevant local data
554 /// copied into it, and then a global sum performed to
555 /// replicate the data.
556 /// @param[in] ilow First \c i index in patch
557 /// @param[in] ihigh Last \c i index in patch
558 /// @param[in] jlow First \c j index in patch
559 /// @param[in] jhigh Last \c j index in patch
560 void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor<T>& s) const {
561 MADNESS_CHECK(s.iscontiguous());
562 s = 0;
563 int64_t i0 = std::max(ilo,ilow);
564 int64_t j0 = std::max(jlo,jlow);
565 int64_t i1 = std::min(ihi,ihigh);
566 int64_t j1 = std::min(jhi,jhigh);
567 if (i0<=i1 && j0<=j1) {
568 t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo)) = s(Slice(i0-ilow,i1-ilow),Slice(j0-jlow,j1-jlow));
569 }
570 get_world().gop.sum(s.ptr(), s.size());
571 }
572
573 void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix<T>& U) const {
574 int newrowdim = jhigh - jlow + 1;
575 MADNESS_CHECK(jlow >= 0);
576 MADNESS_CHECK(jhigh < rowdim());
577 MADNESS_CHECK(newrowdim == U.rowdim());
578 MADNESS_CHECK(coldim() == U.coldim());
581 MADNESS_CHECK(coltile() == U.coltile());
582
583 int64_t i0 = ilo;
584 int64_t j0 = std::max(jlo,jlow);
585 int64_t i1 = ihi;
586 int64_t j1 = std::min(jhi,jhigh);
587 if (i0<=i1 && j0<=j1) {
588 U.data()(___) = t(Slice(i0-ilo,i1-ilo),Slice(j0-jlo,j1-jlo));
589 }
590 }
591
592 template <typename R>
596
597 /// Inplace addition --- dimensions and distribution must be identical
598
599 /// @param[in] A The matrix to add to the current matrix
600 /// @return A reference to the current matrix
606
607
608 /// Out of place addition --- dimensions and distribution must be identical
609
610 /// @param[in] A The matrix to add to the current matrix
611 /// @return A new matrix with the same dimensions and distribution as the inputs
616
617
618 /// Inplace scale by a constant
619
620 /// @param[in] s The scaling factor
621 /// @return A reference to the current matrix
623 t.scale(s);
624 return *this;
625 }
626
627 /// Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException
628 void set(int64_t i, int64_t j, const T x) {
629 MADNESS_CHECK(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
630 t(i-ilo,j-jlo) = x;
631 }
632
633 /// Gets element (i,j) if (i,j) is local, otherwise throws MadnessException
634 T get(int64_t i, int64_t j) const {
635 MADNESS_CHECK(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
636 return t(i-ilo,j-jlo);
637 }
638 };
639
640
641 /// Deep copy of content
642
643 /// @param[in] A The matrix to be copied
644 /// @return A new matrix with identical dimensions, distribution and content (deep copy)
645 template <typename T>
649
650
651 /// Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
652
653 /// Quietly forces an even column tile size for ease of use in the systolic matrix algorithms
654 /// @param[in] world The world
655 /// @param[in] n The column (first) dimension
656 /// @param[in] m The row (second) dimension
657 /// @param[in] coltile Tile size for columns forced to be even (default is to use all processes)
658 /// @return An object encoding the dimension and distribution information
659 static inline DistributedMatrixDistribution
660 column_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t coltile) { // default coltile=0 above
661 if (world.size()*coltile < n) coltile = (n-1)/world.size() + 1;
662 coltile = std::min(coltile,n);
663 if ((coltile&0x1)) ++coltile; // ??? Was before the previous statement
664
665 return DistributedMatrixDistribution(world, n, m, coltile, m);
666 }
667
668
669 /// Generates an (n,m) matrix distributed by columns (row dimension is not distributed)
670
671 /// Quietly forces an even column tile size for ease of use in the systolic matrix algorithms
672 /// @param[in] world The world
673 /// @param[in] n The column (first) dimension
674 /// @param[in] m The row (second) dimension
675 /// @param[in] coltile Tile size for columns forced to be even (default is to use all processes)
676 /// @return A new zero matrix with the requested dimensions and distribution
677 template <typename T>
678 DistributedMatrix<T> column_distributed_matrix(World& world, int64_t n, int64_t m, int64_t coltile=0) {
680 }
681
682
683 /// Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
684
685 /// @param[in] world The world
686 /// @param[in] n The column (first) dimension
687 /// @param[in] m The row (second) dimension
688 /// @param[in] rowtile Tile size for row (default is to use all processes)
689 /// @return An object encoding the dimension and distribution information
690 static inline DistributedMatrixDistribution
691 row_distributed_matrix_distribution(World& world, int64_t n, int64_t m, int64_t rowtile) { // default rowtile=0 above
692 if (world.size()*rowtile < m) rowtile = (m-1)/world.size() + 1;
693 rowtile = std::min(rowtile,m);
694
695 return DistributedMatrixDistribution(world, n, m, n, rowtile);
696 }
697
698 /// Generates an (n,m) matrix distributed by rows (column dimension is not distributed)
699
700 /// @param[in] world The world
701 /// @param[in] n The column (first) dimension
702 /// @param[in] m The row (second) dimension
703 /// @param[in] rowtile Tile size for row (default is to use all processes)
704 /// @return A new zero matrix with the requested dimensions and distribution
705 template <typename T>
706 DistributedMatrix<T> row_distributed_matrix(World& world, int64_t n, int64_t m, int64_t rowtile=0) {
708 }
709
710
711 /// Generates a distributed matrix with rows of \c a and \c b interleaved
712
713 /// I.e., the even rows of the result will be rows of \c a , and the
714 /// odd rows those of \c b .
715 ///
716 /// The matrices a and b must have the same dimensions and be
717 /// identically distributed. The result will have a doubled column
718 /// dimension and column tile size. The row dimension is unchanged.
719 /// @param[in] a The matrix providing even rows of the result
720 /// @param[in] b The matrix providing odd rows of the result
721 /// @return The result matrix
722 template <typename T>
724 MADNESS_CHECK(a.rowdim()==b.rowdim() && a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.rowtile()==b.rowtile());
725
726 DistributedMatrix<T> c(a.get_world(), a.coldim()*2, a.rowdim(), a.coltile()*2, a.rowtile());
727 c.data()(Slice(0,-1,2),_) = a.data()(___);
728 c.data()(Slice(1,-1,2),_) = b.data()(___);
729 }
730
731
732 /// Generates a column-distributed matrix with rows of \c a and \c b contatenated
733
734 /// I.e., c[i,j] = a[i,j] if j<na or b[i,j-na] if j>=na
735 ///
736 /// The matrices a and b must have the same column size (i.e., the
737 /// same number of rows) and be column distributed with the same
738 /// column tilesze. The result is also column distributed with
739 /// the same column tilesize as the input matrices.
740 /// @param[in] a The first matrix
741 /// @param[in] b The second matrix
742 /// @return The result matrix
743 template <typename T>
745 MADNESS_CHECK(a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.is_column_distributed() && b.is_column_distributed());
746
747 int64_t ma = a.rowdim();
748 int64_t mb = b.rowdim();
749
750 DistributedMatrix<T> c(a.get_world(), a.coldim(), ma+mb, a.coltile(), ma+mb);
751
752 int64_t ilow, ihigh;
753 a.local_colrange(ilow, ihigh);
754 if (ilow <= ihigh) {
755 c.data()(_,Slice(0,ma-1)) = a.data()(___);
756 c.data()(_,Slice(ma,-1)) = b.data()(___);
757 }
758
759 return c;
760 }
761
762 /// Generates a column-distributed matrix with rows of \c a, \c b, \c c, and \c d contatenated in order
763
764 /// I.e., c[i,j] = a[i,j] if j in [0,na), b[i,j-na] if j in [na,na+nb), c[i,j-na-nb] if j in [na+nb,na+nb+nc), etc.
765 /// The matrices must have the same column size (i.e., the same
766 /// number of rows) and be column distributed with the same column
767 /// tilesze. The result is also column distributed with the same
768 /// column tilesize as the input matrices.
769 /// @param[in] a The first matrix
770 /// @param[in] b The second matrix
771 /// @param[in] c The third matrix
772 /// @param[in] d The fourth matrix
773 /// @return The result matrix
774 template <typename T>
776 MADNESS_CHECK(a.coldim()==b.coldim() && b.coldim()==c.coldim() && c.coldim()==d.coldim());
777 MADNESS_CHECK(a.coltile()==b.coltile() && b.coltile()==c.coltile() && c.coltile()==d.coltile());
778 MADNESS_CHECK(a.is_column_distributed() && b.is_column_distributed() && c.is_column_distributed() && d.is_column_distributed());
779
780 int64_t ma = a.rowdim();
781 int64_t mb = b.rowdim();
782 int64_t mc = c.rowdim();
783 int64_t md = d.rowdim();
784
785 DistributedMatrix<T> result(a.get_world(), a.coldim(), ma+mb+mc+md, a.coltile(), ma+mb+mc+md);
786
787 if(a.local_size() > 0) result.data()( _ , Slice(0,ma-1) ) = a.data()(___);
788 if(b.local_size() > 0) result.data()( _ , Slice(ma, ma+mb-1) ) = b.data()(___);
789 if(c.local_size() > 0) result.data()( _ , Slice(ma+mb, ma+mb+mc-1) ) = c.data()(___);
790 if(d.local_size() > 0) result.data()( _ , Slice(ma+mb+mc, -1) ) = d.data()(___);
791
792 return result;
793 }
794
795
796 /// Generates a row-distributed matrix with rows of \c a and \c b contatenated
797
798 /// I.e., c[i,j] = a[i,j] if i<ma or b[i-ma,j] if i>=ma
799 ///
800 /// The matrices a and b must have the same row size (i.e., the
801 /// same number of columns) and be row distributed with the same
802 /// row tilesze. The result is also row distributed with
803 /// the same row tilesize as the input matrices.
804 /// @param[in] a The first matrix
805 /// @param[in] b The second matrix
806 /// @return The result matrix
807 template <typename T>
809 MADNESS_CHECK(a.rowdim()==b.rowdim() && a.rowtile()==b.rowtile() && a.is_row_distributed() && b.is_row_distributed());
810
811 int64_t ma = a.coldim();
812 int64_t mt = ma + b.coldim();
813
814 DistributedMatrix<T> c(a.get_world(), mt, a.rowdim(), b.rowtile(), mt);
815
816 if(a.local_size() > 0) c.data()( Slice(0,ma-1), _ ) = a.data()(___);
817 if(a.local_size() > 0) c.data()( Slice(ma,-1), _ ) = b.data()(___);
818
819 return c;
820 }
821}
822
823#endif
This header should include pretty much everything needed for the parallel runtime.
Definition test_ar.cc:118
Definition distributed_matrix.h:68
World & get_world() const
Returns the associated world.
Definition distributed_matrix.h:344
int64_t jlo
Definition distributed_matrix.h:86
bool is_row_distributed() const
Returns true if the matrix is row distributed (i.e., column dimension not distributed)
Definition distributed_matrix.h:356
int64_t Prow
Row of processor grid for this processor.
Definition distributed_matrix.h:84
virtual ~DistributedMatrixDistribution()
Definition distributed_matrix.h:371
int64_t local_ilow() const
Returns the first column index on this processor (0 if no data present)
Definition distributed_matrix.h:265
int64_t m
Row dimension of A(n,m)
Definition distributed_matrix.h:78
int64_t tilem
Tile size for row.
Definition distributed_matrix.h:80
int64_t local_jhigh() const
Returns the last row index on this processor (0 if no data present)
Definition distributed_matrix.h:282
void get_colrange(int p, int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on processor p.
Definition distributed_matrix.h:319
ProcessID owner(int64_t i, int64_t j) const
Returns the number of the process that owns element (i,j)
Definition distributed_matrix.h:364
void local_colrange(int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on this processor.
Definition distributed_matrix.h:247
friend DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition distributed_matrix.h:660
DistributedMatrixDistribution()
Default constructor makes an invalid distribution.
Definition distributed_matrix.h:132
World * pworld
Definition distributed_matrix.h:74
void get_rowrange(int p, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on processor p.
Definition distributed_matrix.h:333
int64_t local_rowdim() const
Returns the no. of row elements stored on this processor.
Definition distributed_matrix.h:240
void clear()
Resets state to same as default constructor.
Definition distributed_matrix.h:154
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition distributed_matrix.h:192
int64_t ilo
Definition distributed_matrix.h:85
int64_t coltile() const
Returns the column tile size.
Definition distributed_matrix.h:200
int64_t jhi
Range of row indices on this processor.
Definition distributed_matrix.h:86
int64_t jdim
Dimension of data on this processor.
Definition distributed_matrix.h:87
int64_t P
No. of processors.
Definition distributed_matrix.h:75
int64_t process_coldim() const
Returns the no. of processors in the column dimension.
Definition distributed_matrix.h:216
int64_t idim
Definition distributed_matrix.h:87
friend DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
Definition distributed_matrix.h:691
int64_t local_coldim() const
Returns the no. of column elements stored on this processor.
Definition distributed_matrix.h:234
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition distributed_matrix.h:228
int64_t Prowdim
Row dimension of processor grid.
Definition distributed_matrix.h:82
bool operator==(const DistributedMatrixDistribution &d) const
Definition distributed_matrix.h:159
void local_rowrange(int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on this processor.
Definition distributed_matrix.h:258
int64_t tilen
Tile size for column.
Definition distributed_matrix.h:79
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed)
Definition distributed_matrix.h:350
int64_t n
Column dimension of A(n,m)
Definition distributed_matrix.h:77
DistributedMatrixDistribution(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs distribution and size info for a matrix (for use by factory functions only)
Definition distributed_matrix.h:103
int64_t Pcol
Column of processor grid for this processor.
Definition distributed_matrix.h:83
int64_t local_jlow() const
Returns the first row index on this processor (0 if no data present)
Definition distributed_matrix.h:276
ProcessID rank
My processor rank.
Definition distributed_matrix.h:76
const DistributedMatrixDistribution & distribution() const
Returns the distribution (aka *this)
Definition distributed_matrix.h:360
int64_t rowtile() const
Returns the row tile size.
Definition distributed_matrix.h:208
int64_t ihi
Range of column indices on this processor.
Definition distributed_matrix.h:85
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition distributed_matrix.h:184
int64_t local_ihigh() const
Returns the last column index on this processor (-1 if no data present)
Definition distributed_matrix.h:270
void get_range(int p, int64_t &ilow, int64_t &ihigh, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive ranges of column and row indicies on processor p.
Definition distributed_matrix.h:295
int64_t process_rowdim() const
Returns the no. of processors in the row dimension.
Definition distributed_matrix.h:222
int64_t Pcoldim
Column dimension of processor grid.
Definition distributed_matrix.h:81
Manages data associated with a row/column/block distributed array.
Definition distributed_matrix.h:388
Tensor< T > & data()
Returns reference to the local data.
Definition distributed_matrix.h:496
DistributedMatrix< T > & operator=(const DistributedMatrix< T > &A)
Assigment copies dimensions, distribution, and shallow copy of content.
Definition distributed_matrix.h:439
void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix< T > &U) const
Definition distributed_matrix.h:573
virtual ~DistributedMatrix()
Definition distributed_matrix.h:447
DistributedMatrix(const DistributedMatrix< T > &A, bool deepcopy=false)
Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true)
Definition distributed_matrix.h:432
DistributedMatrix< T > & operator*=(const T s)
Inplace scale by a constant.
Definition distributed_matrix.h:622
void set(int64_t i, int64_t j, const T x)
Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException.
Definition distributed_matrix.h:628
void fill_identity()
Definition distributed_matrix.h:478
void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor< T > &s) const
Copy from distributed matrix into replicated patch (inclusive index range; collective call)
Definition distributed_matrix.h:560
void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor< T > &s)
Copy from replicated patch (inclusive index range) into the distributed matrix.
Definition distributed_matrix.h:541
void clear()
Frees memory and resets state to same as default constructor.
Definition distributed_matrix.h:451
void copy_from_replicated(const Tensor< T > &s)
Copy from the replicated (m,n) matrix into the distributed matrix.
Definition distributed_matrix.h:517
bool has_same_dimension_and_distribution(const DistributedMatrix< R > &A)
Definition distributed_matrix.h:593
void copy_to_replicated(Tensor< T > &s) const
Copy from the distributed (m,n) matrix into the replicated matrix (collective call)
Definition distributed_matrix.h:528
DistributedMatrix< T > operator+(const DistributedMatrix< T > &A) const
Out of place addition — dimensions and distribution must be identical.
Definition distributed_matrix.h:612
Tensor< T > t
The data.
Definition distributed_matrix.h:392
void fill(T value)
Fills the matrix with a scalar.
Definition distributed_matrix.h:473
DistributedMatrix< T > & operator+=(const DistributedMatrix< T > &A)
Inplace addition — dimensions and distribution must be identical.
Definition distributed_matrix.h:601
DistributedMatrix(const DistributedMatrixDistribution &d)
Constructs a distributed matrix with given distribution info.
Definition distributed_matrix.h:424
T get(int64_t i, int64_t j) const
Gets element (i,j) if (i,j) is local, otherwise throws MadnessException.
Definition distributed_matrix.h:634
void fill(const funcT &f)
Fills the matrix with the provided function of the indices.
Definition distributed_matrix.h:461
static T idij(const int64_t i, const int64_t j)
Definition distributed_matrix.h:394
DistributedMatrix()
Default constructor makes an empty matrix that cannot be used except as a target for assignemnt.
Definition distributed_matrix.h:417
DistributedMatrix(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero.
Definition distributed_matrix.h:408
const Tensor< T > & data() const
Returns const reference to data.
Definition distributed_matrix.h:511
A slice defines a sub-range or patch of a dimension.
Definition slice.h:103
A tensor is a multidimension array.
Definition tensor.h:317
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:870
A parallel world class.
Definition world.h:132
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:318
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:328
WorldGopInterface & gop
Global operations.
Definition world.h:205
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
#define max(a, b)
Definition lda.h:51
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:182
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
DistributedMatrix< T > concatenate_columns(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a row-distributed matrix with rows of a and b contatenated.
Definition distributed_matrix.h:808
static const std::vector< Slice > ___
Entire dimension.
Definition slice.h:128
DistributedMatrix< T > interleave_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b interleaved.
Definition distributed_matrix.h:723
static DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition distributed_matrix.h:660
static const Slice _(0,-1, 1)
NDIM & f
Definition mra.h:2416
static DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed)
Definition distributed_matrix.h:691
DistributedMatrix< T > column_distributed_matrix(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates an (n,m) matrix distributed by columns (row dimension is not distributed)
Definition distributed_matrix.h:678
DistributedMatrix< T > concatenate_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a column-distributed matrix with rows of a and b contatenated.
Definition distributed_matrix.h:744
DistributedMatrix< T > row_distributed_matrix(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distributed by rows (column dimension is not distributed)
Definition distributed_matrix.h:706
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2002
Definition mraimpl.h:50
static const double b
Definition nonlinschro.cc:119
static const double d
Definition nonlinschro.cc:121
static const double a
Definition nonlinschro.cc:118
static const double c
Definition relops.cc:10
static const double m
Definition relops.cc:9
Defines and implements most of Tensor.
static const double pi
Definition testcosine.cc:6
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43