MADNESS  0.10.1
tensor_macros.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31 
32  $Id$
33 */
34 
35 
36 #ifndef MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED
37 #define MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED
38 
39 /*!
40  \file tensor_macros.h
41  \brief Macros for easy and efficient iteration over tensors.
42 
43  \ingroup tensor
44 
45 Several different macros have been defined to make it
46 easy to iterate over expressions involving tensors. They
47 vary in their generality, ease of use, and efficiency.
48 
49 The most general, most easy to use, but also most inefficient,
50 and least safe, is
51 \code
52 ITERATOR(t, expression)
53 \endcode
54 where \c t is a Tensor of any type, size or dimension that is used to
55 define the range of the loop indices, and expression can be nearly
56 anything, including multi-line expressions performing arbitrary
57 operations on multiple tensors. The loop indices, going
58 from left to right in the dimensions, are
59 \code
60 _i, _j, _k, ...
61 \endcode
62 
63 E.g., to add two matrices together (there are more efficient ways
64 to do this, such as \c a+=b )
65 \code
66 Tensor<long> a(4,2), b(4,2);
67 ITERATOR(a, a(_i,_j) += b(_i,_j));
68 \endcode
69 
70 E.g., to print out the indices of all elements of a matrix
71 greater than 0.5;
72 \code
73 Tensor<float> m(5,5);
74 m.fillrandom();
75 ITERATOR(m, if (m(_i,_j) > 0.5) {
76  cout << _i << " " << _j << endl;
77  });
78 \endcode
79 
80 To make it possible to index arbitrary dimension tensors, the macro
81 \c IND has been defined as the indices for the highest supported
82 dimension. E.g., to elementwise divide the contents of two tensors of
83 unknown dimension
84 \code
85 ITERATOR(x, x(IND)/y(IND));
86 \endcode
87 
88 Note that using \c IND employs bounds checking where as direct indexing
89 with \c _i , etc., does not.
90 
91 The generality of these macros is offset by their inefficiency and
92 lack of safety. The inefficiency is twofold. First, the \c ITERATOR
93 macro generates a separate block of code for each possible dimension.
94 This could cause code bloat and increased compilation time. To
95 solve this problem, the macros \c ITERATOR1 , \c ITERATOR2, etc., have
96 been defined, with the corresponding \c IND1 , \c IND2 , etc. These
97 macros may be applied to tensor expressions of the appropriate
98 dimension.
99 
100 The second inefficiency is at runtime, due to the explicit indexing of
101 all the tensor expressions and the inability to optimize the order
102 in which memory is traversed. The lack of safety is the inability
103 to check that the tensors in the expression conform and that the
104 indices are not out of bounds.
105 
106 The safety and cost of explicit indexing are addressed by the macros
107 \c UNARYITERATOR , \c BINARYITERATOR , and \c TERNARYITERATOR , along
108 with their specialization to specific numbers of dimensions (again by
109 appending the dimension number to the name of the macro). These
110 macros are safer since you have to explicitly name the tensors you are
111 iterating over, so that the macro can now check that the input tensors
112 conform. The cost of looping is reduced by replacing explicit
113 indexing with pointer arithmetic. These macros still define the loop
114 indices \c _i , \c _j , etc., but also define \c _p0 , \c _p1 , etc.,
115 as pointers to the current elements of tensor argument 0, tensor
116 argument 1, etc..
117 
118 E.g., set elements of a 3-d tensor, \c t , of type \c double to a
119 function of the indices
120 \code
121 UNARYITERATOR(double, t, *_p0 = 1.0/(_i + _j + 1.0));
122 \endcode
123 
124 E.g., to merge two \c double tensors as real and imaginary parts
125 of complex tensor of any dimension
126 \code
127 TERNARYITERATOR(double_complex, c, double, r, double, i,
128 . *_p0 = double_complex(*_p1, *_p2));
129 \endcode
130 
131 However, we still have the problems that if the dimensions of a tensor
132 have been reordered, the loops will go through memory inefficiently,
133 and the dimension independent macros still generate redundant code
134 blocks. Also, the innermost loop might not be very long and will
135 be inefficient.
136 
137 The most general, efficient and code-compact macros internally use the
138 \c TensorIterator , which you could also use directly. Since there is
139 no nest of explicit loops, the tensor indices are no longer available
140 as \c _i , \c _j , etc.. Furthermore, the \c TensorIterator can
141 reorder the loops to optimize the memory traversal, and fuse
142 dimensions to make the innermost loop longer for better vectorization
143 and reduced loop overhead.
144 
145 The most efficient macros for iteration are \c UNARY_OPTIMIZED_ITERATOR ,
146 \c BINARY_OPTIMIZED_ITERATOR , and \c TERNARY_OPTIMIZED_ITERATOR .
147 As before, these define the pointers \c _p0 , \c _p1, \c _p2 , which
148 point to the current (and corresponding) element of each argument
149 tensor. However, unlike the previous macros there is no guarantee
150 that the elements are looped thru in the order expected by a
151 simple nest of loops. Furthermore, the indices are completely
152 unvailable. In addition to using the iterators for optimal
153 traversal, these macros attempt to use a single loop
154 for optimal vector performance.
155 
156 E.g., the most efficient and safe way to perform the previous example
157 of merging two \c double tensors as real and imaginary parts
158 of a complex tensor of any dimension
159 \code
160 TERNARY_OPTIMIZED_ITERATOR(double_complex, c, double, r, double, i,
161  . *_p0 = double_complex(*_p1, *_p2));
162 \endcode
163 This is precisely how most internal operations are implemented.
164 
165 In some situations it is necessary to preserve the expected
166 order of loops and to not fuse dimensions. The macros
167 \c UNARY_UNOPTIMIZED_ITERATOR , \c BINARY_UNOPTIMIZED_ITERATOR ,
168 and \c TERNARY_UNOPTIMIZED_ITERATOR use the \c TensorIterator
169 but disable loop reordering and fusing. Once these optimizations
170 have been turned off, the loop indices are avaiable, if needed,
171 from the \c ind[] member of the iterator (which is named
172 \c _iter ).
173 
174 E.g., the fillindex() method is implemented as follows
175 \code
176 long count = 0;
177 UNARY_UNOPTIMIZED_ITERATOR(T, (*this), *_p0 = (T) count++);
178 \endcode
179 
180 \em NB: None of the above iterator macros can be nested ... use the
181 actual tensor iterator to do this.
182 
183 Recommendation --- for both efficiency and safety, use the optimized
184 macros (\c UNARY_OPTMIZED_ITERATOR , etc.), unless it is necessary to
185 preserve loop order, in which case use the unoptimized versions. If
186 you need the loop indices, use the macros \c UNARY_ITERATOR, etc.,
187 unless you have a very general expression that they cannot handle. In
188 this last instance, or for ease of rapid implementation, use the general
189 \c ITERATOR macro first described.
190 
191 */
192 
193 // don't change this without changing the iterator macros
194 #define TENSOR_MAXDIM 6
195 
196 /// Macros IND1, ..., IND6, and IND are a convenience for indexing in macro iterators.
197 
198 #define IND1 _i
199 #define IND2 _i,_j
200 #define IND3 _i,_j,_k
201 #define IND4 _i,_j,_k,_l
202 #define IND5 _i,_j,_k,_l,_m
203 #define IND6 _i,_j,_k,_l,_m,_n
204 #define IND IND6
205 
206 
207 #define ITERATOR1(t,exp) do { \
208  long __xd0=t.dim(0),_index=0; \
209  for (long _i=0; _i<__xd0; ++_i) {exp;_index++;} } while (0)
210 
211 #define ITERATOR2(t,exp) do { \
212  long __xd0=t.dim(0), __xd1=t.dim(1), _index=0; \
213 for (long _i=0; _i<__xd0; ++_i) { \
214  for (long _j=0; _j<__xd1; ++_j) {exp;_index++;} } } while (0)
215 
216 #define ITERATOR3(t,exp) do { \
217  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), _index=0; \
218 for (long _i=0; _i<__xd0; ++_i) { \
219  for (long _j=0; _j<__xd1; ++_j) { \
220  for (long _k=0; _k<__xd2; ++_k) {exp;_index++;} } } } while (0)
221 
222 #define ITERATOR4(t,exp) do { \
223  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
224  __xd3=t.dim(3), _index=0; \
225 for (long _i=0; _i<__xd0; ++_i) { \
226  for (long _j=0; _j<__xd1; ++_j) { \
227  for (long _k=0; _k<__xd2; ++_k) { \
228  for (long _l=0; _l<__xd3; ++_l) {exp;_index++;} } } } } while (0)
229 
230 #define ITERATOR5(t,exp) do { \
231  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
232  __xd3=t.dim(3), __xd4=t.dim(4), _index=0; \
233 for (long _i=0; _i<__xd0; ++_i) { \
234  for (long _j=0; _j<__xd1; ++_j) { \
235  for (long _k=0; _k<__xd2; ++_k) { \
236  for (long _l=0; _l<__xd3; ++_l) { \
237  for (long _m=0; _m<__xd4; ++_m) {exp;_index++;} } } } } } while (0)
238 
239 #define ITERATOR6(t,exp) do { \
240  long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
241  __xd3=t.dim(3), __xd4=t.dim(4), __xd5=t.dim(5), _index=0;; \
242 for (long _i=0; _i<__xd0; ++_i) { \
243  for (long _j=0; _j<__xd1; ++_j) { \
244  for (long _k=0; _k<__xd2; ++_k) { \
245  for (long _l=0; _l<__xd3; ++_l) { \
246  for (long _m=0; _m<__xd4; ++_m) { \
247  for (long _n=0; _n<__xd5; ++_n) {exp;_index++;} } } } } } } while(0)
248 
249 #define ITERATOR(t,exp) do { \
250  long _j=0, _k=0, _l=0, _m=0, _n=0; \
251  if (t.ndim() == 1) {ITERATOR1(t,exp);} \
252  else if (t.ndim() == 2) {ITERATOR2(t,exp);} \
253  else if (t.ndim() == 3) {ITERATOR3(t,exp);} \
254  else if (t.ndim() == 4) {ITERATOR4(t,exp);} \
255  else if (t.ndim() == 5) {ITERATOR5(t,exp);} \
256  else if (t.ndim() == 6) {ITERATOR6(t,exp);} \
257  else {TENSOR_ASSERT(t.ndim() <= 6,"ndim confused?",t.ndim(),&t);} \
258  } while(0)
259 
260 // Inside iterator access pointer to current element as _p0 (pointer to
261 // argument number 0). _i, _j, _k, ..., also defined
262 #define UNARYITERATOR1(X,x,exp) do { \
263  long __xd0=x.dim(0); \
264  long __xs0=x.stride(0); \
265 X* MADNESS_RESTRICT _p0=x.ptr(); \
266 for (long _i=0; _i<__xd0; ++_i,_p0+=__xs0) { \
267  exp; \
268 } } while(0)
269 
270 #define UNARYITERATOR2(X,x,exp) do { \
271  long __xd0=x.dim(0), __xd1=x.dim(1); \
272  long __xs0=x.stride(0), __xs1=x.stride(1); \
273 X* MADNESS_RESTRICT __xp0=x.ptr(); \
274 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
275  X* MADNESS_RESTRICT _p0=__xp0; \
276  for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1) { \
277  exp; \
278  } } } while(0)
279 
280 #define UNARYITERATOR3(X,x,exp) do { \
281  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
282  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
283 X* MADNESS_RESTRICT __xp0=x.ptr(); \
284 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
285  X* MADNESS_RESTRICT __xp1=__xp0; \
286  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
287  X* MADNESS_RESTRICT _p0=__xp1; \
288  for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2) { \
289  exp; \
290  } } } } while(0)
291 
292 #define UNARYITERATOR4(X,x,exp) do { \
293  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
294  __xd3=x.dim(3); \
295  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
296  __xs3=x.stride(3); \
297 X* MADNESS_RESTRICT __xp0=x.ptr(); \
298 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
299  X* MADNESS_RESTRICT __xp1=__xp0; \
300  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
301  X* MADNESS_RESTRICT __xp2=__xp1; \
302  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
303  X* MADNESS_RESTRICT _p0=__xp2; \
304  for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3) { \
305  exp; \
306  } } } } } while(0)
307 
308 #define UNARYITERATOR5(X,x,exp) do { \
309  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
310  __xd3=x.dim(3), __xd4=x.dim(4); \
311  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
312  __xs3=x.stride(3), __xs4=x.stride(4); \
313 X* MADNESS_RESTRICT __xp0=x.ptr(); \
314 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
315  X* MADNESS_RESTRICT __xp1=__xp0; \
316  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
317  X* MADNESS_RESTRICT __xp2=__xp1; \
318  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
319  X* MADNESS_RESTRICT __xp3=__xp2; \
320  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3) { \
321  X* MADNESS_RESTRICT _p0 =__xp3; \
322  for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4) { \
323  exp; \
324  } } } } } } while(0)
325 
326 #define UNARYITERATOR6(X,x,exp) do { \
327  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
328  __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
329  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
330  __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
331 X* MADNESS_RESTRICT __xp0=x.ptr(); \
332 for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
333  X* MADNESS_RESTRICT __xp1=__xp0; \
334  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
335  X* MADNESS_RESTRICT __xp2=__xp1; \
336  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
337  X* MADNESS_RESTRICT __xp3=__xp2; \
338  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3) { \
339  X* MADNESS_RESTRICT __xp4=__xp3; \
340  for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4) { \
341  X* MADNESS_RESTRICT _p0=__xp4; \
342  for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5) { \
343  exp; \
344  } } } } } } } while(0)
345 
346 #define UNARYITERATOR(X,x,exp) do { \
347  long _j=0, _k=0, _l=0, _m=0, _n=0; \
348  if (x.ndim() == 1) UNARYITERATOR1(X,x,exp); \
349  else if (x.ndim() == 2) UNARYITERATOR2(X,x,exp); \
350  else if (x.ndim() == 3) UNARYITERATOR3(X,x,exp); \
351  else if (x.ndim() == 4) UNARYITERATOR4(X,x,exp); \
352  else if (x.ndim() == 5) UNARYITERATOR5(X,x,exp); \
353  else if (x.ndim() == 6) UNARYITERATOR6(X,x,exp); \
354  else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} } while(0)
355 
356 // Inside iterator access pointers to current elements as _p0 & _p1
357 // _i, _j, _k, ... also defined
358 #define BINARYITERATOR1(X,x,Y,y,exp) do { \
359 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
360  long __xd0=x.dim(0); \
361  long __xs0=x.stride(0); \
362  long __ys0=y.stride(0); \
363 X* MADNESS_RESTRICT _p0=x.ptr(); \
364 Y* MADNESS_RESTRICT _p1=y.ptr(); \
365 for (long _i=0; _i<__xd0; ++_i, _p0+=__xs0, _p1+=__ys0) { \
366  exp; \
367 } } while(0)
368 
369 #define BINARYITERATOR2(X,x,Y,y,exp) do { \
370 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
371  long __xd0=x.dim(0), __xd1=x.dim(1); \
372  long __xs0=x.stride(0), __xs1=x.stride(1); \
373  long __ys0=y.stride(0), __ys1=y.stride(1); \
374 X* MADNESS_RESTRICT __xp0=x.ptr(); \
375 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
376 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
377  X* MADNESS_RESTRICT _p0=__xp0; \
378  Y* MADNESS_RESTRICT _p1=__yp0; \
379  for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1, _p1+=__ys1) { \
380  exp; \
381  } } } while(0)
382 
383 #define BINARYITERATOR3(X,x,Y,y,exp) do { \
384 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
385  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
386  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
387  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2); \
388 X* MADNESS_RESTRICT __xp0=x.ptr(); \
389 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
390 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
391  X* MADNESS_RESTRICT __xp1=__xp0; \
392  Y* MADNESS_RESTRICT __yp1=__yp0; \
393  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
394  X* MADNESS_RESTRICT _p0=__xp1; \
395  Y* MADNESS_RESTRICT _p1=__yp1; \
396  for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2, _p1+=__ys2) { \
397  exp; \
398  } } } } while(0)
399 
400 #define BINARYITERATOR4(X,x,Y,y,exp) do { \
401 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
402  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
403  __xd3=x.dim(3); \
404  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
405  __xs3=x.stride(3); \
406  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
407  __ys3=y.stride(3); \
408 X* MADNESS_RESTRICT __xp0=x.ptr(); \
409 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
410 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
411  X* MADNESS_RESTRICT __xp1=__xp0; \
412  Y* MADNESS_RESTRICT __yp1=__yp0; \
413  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
414  X* MADNESS_RESTRICT __xp2=__xp1; \
415  Y* MADNESS_RESTRICT __yp2=__yp1; \
416  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
417  X* MADNESS_RESTRICT _p0=__xp2; \
418  Y* MADNESS_RESTRICT _p1=__yp2; \
419  for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3, _p1+=__ys3) { \
420  exp; \
421  } } } } } while(0)
422 
423 #define BINARYITERATOR5(X,x,Y,y,exp) do { \
424 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
425  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
426  __xd3=x.dim(3), __xd4=x.dim(4); \
427  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
428  __xs3=x.stride(3), __xs4=x.stride(4); \
429  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
430  __ys3=y.stride(3), __ys4=y.stride(4); \
431 X* MADNESS_RESTRICT __xp0=x.ptr(); \
432 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
433 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
434  X* MADNESS_RESTRICT __xp1=__xp0; \
435  Y* MADNESS_RESTRICT __yp1=__yp0; \
436  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
437  X* MADNESS_RESTRICT __xp2=__xp1; \
438  Y* MADNESS_RESTRICT __yp2=__yp1; \
439  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
440  X* MADNESS_RESTRICT __xp3=__xp2; \
441  Y* MADNESS_RESTRICT __yp3=__yp2; \
442  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3) { \
443  X* MADNESS_RESTRICT _p0=__xp3; \
444  Y* MADNESS_RESTRICT _p1=__yp3; \
445  for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4, _p1+=__ys4) { \
446  exp; \
447  } } } } } } while(0)
448 
449 #define BINARYITERATOR6(X,x,Y,y,exp) do { \
450 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
451  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
452  __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
453  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
454  __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
455  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
456  __ys3=y.stride(3), __ys4=y.stride(4), __ys5=y.stride(5); \
457 X* MADNESS_RESTRICT __xp0=x.ptr(); \
458 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
459 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
460  X* MADNESS_RESTRICT __xp1=__xp0; \
461  Y* MADNESS_RESTRICT __yp1=__yp0; \
462  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
463  X* MADNESS_RESTRICT __xp2=__xp1; \
464  Y* MADNESS_RESTRICT __yp2=__yp1; \
465  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
466  X* MADNESS_RESTRICT __xp3=__xp2; \
467  Y* MADNESS_RESTRICT __yp3=__yp2; \
468  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3) { \
469  X* MADNESS_RESTRICT __xp4=__xp3; \
470  Y* MADNESS_RESTRICT __yp4=__yp3; \
471  for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4, __yp4+=__ys4) { \
472  X* MADNESS_RESTRICT _p0=__xp4; \
473  Y* MADNESS_RESTRICT _p1=__yp4; \
474  for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5, _p1+=__ys5) { \
475  exp; \
476  } } } } } } } while(0)
477 
478 #define BINARYITERATOR(X,x,Y,y,exp) do { \
479  long _j=0, _k=0, _l=0, _m=0, _n=0; \
480  if (x.ndim() == 1) BINARYITERATOR1(X,x,Y,y,exp); \
481  else if (x.ndim() == 2) BINARYITERATOR2(X,x,Y,y,exp); \
482  else if (x.ndim() == 3) BINARYITERATOR3(X,x,Y,y,exp); \
483  else if (x.ndim() == 4) BINARYITERATOR4(X,x,Y,y,exp); \
484  else if (x.ndim() == 5) BINARYITERATOR5(X,x,Y,y,exp); \
485  else if (x.ndim() == 6) BINARYITERATOR6(X,x,Y,y,exp); \
486  else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} \
487 } while(0)
488 
489 // Inside iterator access pointers to current elements as _p0, _p1, _p2
490 // _i, _j, _k, ... also defined
491 #define TERNARYITERATOR1(X,x,Y,y,Z,z,exp) do { \
492 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
493 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
494  long __xd0=x.dim(0); \
495  long __xs0=x.stride(0); \
496  long __ys0=y.stride(0); \
497  long __zs0=z.stride(0); \
498 X* MADNESS_RESTRICT _p0=x.ptr(); \
499 Y* MADNESS_RESTRICT _p1=y.ptr(); \
500 Z* MADNESS_RESTRICT _p2=z.ptr(); \
501 for (long _i=0; _i<__xd0; ++_i, _p0+=__xs0, _p1+=__ys0, _p2+=__zs0) { \
502  exp; \
503 } } while(0)
504 
505 #define TERNARYITERATOR2(X,x,Y,y,Z,z,exp) do { \
506 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
507 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
508  long __xd0=x.dim(0), __xd1=x.dim(1); \
509  long __xs0=x.stride(0), __xs1=x.stride(1); \
510  long __ys0=y.stride(0), __ys1=y.stride(1); \
511  long __zs0=z.stride(0), __zs1=z.stride(1); \
512 X* MADNESS_RESTRICT __xp0=x.ptr(); \
513 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
514 Z* MADNESS_RESTRICT __zp0=z.ptr(); \
515 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
516  X* MADNESS_RESTRICT _p0=__xp0; \
517  Y* MADNESS_RESTRICT _p1=__yp0; \
518  Z* MADNESS_RESTRICT _p2=__zp0; \
519  for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1, _p1+=__ys1, _p2+=__zs1) { \
520  exp; \
521  } } } while(0)
522 
523 #define TERNARYITERATOR3(X,x,Y,y,Z,z,exp) do { \
524 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
525 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
526  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
527  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
528  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2); \
529  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2); \
530 X* MADNESS_RESTRICT __xp0=x.ptr(); \
531 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
532 Z* MADNESS_RESTRICT __zp0=z.ptr(); \
533 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
534  X* MADNESS_RESTRICT __xp1=__xp0; \
535  Y* MADNESS_RESTRICT __yp1=__yp0; \
536  Z* MADNESS_RESTRICT __zp1=__zp0; \
537  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
538  X* MADNESS_RESTRICT _p0=__xp1; \
539  Y* MADNESS_RESTRICT _p1=__yp1; \
540  Z* MADNESS_RESTRICT _p2=__zp1; \
541  for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2, _p1+=__ys2, _p2+=__zs2) { \
542  exp; \
543  } } } } while(0)
544 
545 #define TERNARYITERATOR4(X,x,Y,y,Z,z,exp) do { \
546 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
547 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
548  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
549  __xd3=x.dim(3); \
550  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
551  __xs3=x.stride(3); \
552  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
553  __ys3=y.stride(3); \
554  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
555  __zs3=z.stride(3); \
556 X* MADNESS_RESTRICT __xp0=x.ptr(); \
557 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
558 Z* MADNESS_RESTRICT __zp0=z.ptr(); \
559 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
560  X* MADNESS_RESTRICT __xp1=__xp0; \
561  Y* MADNESS_RESTRICT __yp1=__yp0; \
562  Z* MADNESS_RESTRICT __zp1=__zp0; \
563  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
564  X* MADNESS_RESTRICT __xp2=__xp1; \
565  Y* MADNESS_RESTRICT __yp2=__yp1; \
566  Z* MADNESS_RESTRICT __zp2=__zp1; \
567  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
568  X* MADNESS_RESTRICT _p0=__xp2; \
569  Y* MADNESS_RESTRICT _p1=__yp2; \
570  Z* MADNESS_RESTRICT _p2=__zp2; \
571  for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3, _p1+=__ys3, _p2+=__zs3) { \
572  exp; \
573  } } } } } while(0)
574 
575 #define TERNARYITERATOR5(X,x,Y,y,Z,z,exp) do { \
576 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
577 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
578  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
579  __xd3=x.dim(3), __xd4=x.dim(4); \
580  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
581  __xs3=x.stride(3), __xs4=x.stride(4); \
582  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
583  __ys3=y.stride(3), __ys4=y.stride(4); \
584  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
585  __zs3=z.stride(3), __zs4=z.stride(4); \
586 X* MADNESS_RESTRICT __xp0=x.ptr(); \
587 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
588 Z* MADNESS_RESTRICT __zp0=z.ptr(); \
589 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
590  X* MADNESS_RESTRICT __xp1=__xp0; \
591  Y* MADNESS_RESTRICT __yp1=__yp0; \
592  Z* MADNESS_RESTRICT __zp1=__zp0; \
593  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
594  X* MADNESS_RESTRICT __xp2=__xp1; \
595  Y* MADNESS_RESTRICT __yp2=__yp1; \
596  Z* MADNESS_RESTRICT __zp2=__zp1; \
597  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
598  X* MADNESS_RESTRICT __xp3=__xp2; \
599  Y* MADNESS_RESTRICT __yp3=__yp2; \
600  Z* MADNESS_RESTRICT __zp3=__zp2; \
601  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3, __zp3+=__zs3) { \
602  X* MADNESS_RESTRICT _p0=__xp3; \
603  Y* MADNESS_RESTRICT _p1=__yp3; \
604  Z* MADNESS_RESTRICT _p2=__zp3; \
605  for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4, _p1+=__ys4, _p2+=__zs4) { \
606  exp; \
607  } } } } } } while(0)
608 
609 #define TERNARYITERATOR6(X,x,Y,y,Z,z,exp) do { \
610 TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
611 TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
612  long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
613  __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
614  long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
615  __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
616  long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
617  __ys3=y.stride(3), __ys4=y.stride(4), __ys5=y.stride(5); \
618  long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
619  __zs3=z.stride(3), __zs4=z.stride(4), __zs5=z.stride(5); \
620 X* MADNESS_RESTRICT __xp0=x.ptr(); \
621 Y* MADNESS_RESTRICT __yp0=y.ptr(); \
622 Z* MADNESS_RESTRICT __zp0=z.ptr(); \
623 for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
624  X* MADNESS_RESTRICT __xp1=__xp0; \
625  Y* MADNESS_RESTRICT __yp1=__yp0; \
626  Z* MADNESS_RESTRICT __zp1=__zp0; \
627  for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
628  X* MADNESS_RESTRICT __xp2=__xp1; \
629  Y* MADNESS_RESTRICT __yp2=__yp1; \
630  Z* MADNESS_RESTRICT __zp2=__zp1; \
631  for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
632  X* MADNESS_RESTRICT __xp3=__xp2; \
633  Y* MADNESS_RESTRICT __yp3=__yp2; \
634  Z* MADNESS_RESTRICT __zp3=__zp2; \
635  for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3, __zp3+=__zs3) { \
636  X* MADNESS_RESTRICT __xp4=__xp3; \
637  Y* MADNESS_RESTRICT __yp4=__yp3; \
638  Z* MADNESS_RESTRICT __zp4=__zp3; \
639  for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4, __yp4+=__ys4, __zp4+=__zs4) { \
640  X* MADNESS_RESTRICT _p0=__xp4; \
641  Y* MADNESS_RESTRICT _p1=__yp4; \
642  Z* MADNESS_RESTRICT _p2=__zp4; \
643  for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5, _p1+=__ys5, _p2+=__zs5) { \
644  exp; \
645  } } } } } } } while(0)
646 
647 #define TERNARYITERATOR(X,x,Y,y,Z,z,exp) do { \
648  long _j=0, _k=0, _l=0, _m=0, _n=0; \
649  if (x.ndim() == 1) TERNARYITERATOR1(X,x,Y,y,Z,z,exp); \
650  else if (x.ndim() == 2) TERNARYITERATOR2(X,x,Y,y,Z,z,exp); \
651  else if (x.ndim() == 3) TERNARYITERATOR3(X,x,Y,y,Z,z,exp); \
652  else if (x.ndim() == 4) TERNARYITERATOR4(X,x,Y,y,Z,z,exp); \
653  else if (x.ndim() == 5) TERNARYITERATOR5(X,x,Y,y,Z,z,exp); \
654  else if (x.ndim() == 6) TERNARYITERATOR6(X,x,Y,y,Z,z,exp); \
655  else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} \
656 } while(0)
657 
658 #define UNARY_OPTIMIZED_ITERATOR(X,x,exp) do { \
659  if (x.iscontiguous()) { \
660  X* MADNESS_RESTRICT _p0 = x.ptr(); \
661  for (long _j=0; _j<x.size(); ++_j,++_p0) {exp;} \
662  } \
663  else { \
664  for (TensorIterator<REMCONST(X)> iter=x.unary_iterator(1); iter._p0; ++iter) { \
665  long _dimj = iter.dimj; \
666  X* MADNESS_RESTRICT _p0 = iter._p0; \
667  long _s0 = iter._s0; \
668  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0) { \
669  exp; \
670  } \
671  } \
672  } \
673 } while(0)
674 
675 
676 // Can optimize these by moving definition of stride out of loop.
677 
678 #define UNARY_UNOPTIMIZED_ITERATOR(X,x,exp) do { \
679  for (TensorIterator<REMCONST(X)> iter=x.unary_iterator(1,false,false); iter._p0; ++iter) { \
680  long _dimj = iter.dimj; \
681  X* MADNESS_RESTRICT _p0 = iter._p0; \
682  long _s0 = iter._s0; \
683  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0) { \
684  exp; \
685  } \
686  } } while(0)
687 
688 // Use this inside another iterator macro ... pointers are _q* instead of _p*
689 // Iterator is iter2.
690 
691 #define UNARY_UNOPTIMIZED_ITERATOR_NESTED(X,x,exp) do { \
692  for (TensorIterator<REMCONST(X)> iter2=x.unary_iterator(1,false,false); iter2._p0; ++iter2) { \
693  long _dimj2 = iter2.dimj; \
694  X* MADNESS_RESTRICT _q0 = iter2._p0; \
695  long _s20 = iter2._s0; \
696  for (long _j2=0; _j2<_dimj2; ++_j2, _q0+=_s20) { \
697  exp; \
698  } \
699  } } while(0)
700 
701 #define BINARY_OPTIMIZED_ITERATOR(X,x,Y,y,exp) do { \
702  if (x.iscontiguous() && y.iscontiguous() && x.size()==y.size()) { \
703  X* MADNESS_RESTRICT _p0 = x.ptr(); \
704  Y* MADNESS_RESTRICT _p1 = y.ptr(); \
705  for (long _j=0; _j<x.size(); ++_j,++_p0,++_p1) {exp;} \
706  } \
707  else { \
708  for (TensorIterator<REMCONST(X),REMCONST(Y)> iter=x.binary_iterator(y,1); iter._p0; ++iter) { \
709  long _dimj = iter.dimj; \
710  X* MADNESS_RESTRICT _p0 = iter._p0; \
711  Y* MADNESS_RESTRICT _p1 = iter._p1; \
712  long _s0 = iter._s0; \
713  long _s1 = iter._s1; \
714  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0, _p1+=_s1) { \
715  exp; \
716  } \
717  } } } while(0)
718 
719 #define TERNARY_OPTIMIZED_ITERATOR(X,x,Y,y,Z,z,exp) do { \
720  if (x.iscontiguous() && y.iscontiguous() && z.iscontiguous() && x.size()==y.size() && x.size()==z.size()) { \
721  X* MADNESS_RESTRICT _p0 = x.ptr(); \
722  Y* MADNESS_RESTRICT _p1 = y.ptr(); \
723  Z* MADNESS_RESTRICT _p2 = z.ptr(); \
724  for (long _j=0; _j<x.size(); ++_j,++_p0,++_p1,++_p2) {exp;} \
725  } \
726  else { \
727  for (TensorIterator<REMCONST(X),REMCONST(Y),REMCONST(Z)> iter=x.ternary_iterator(y,z,1); iter._p0; ++iter) { \
728  long _dimj = iter.dimj; \
729  X* MADNESS_RESTRICT _p0 = iter._p0; \
730  Y* MADNESS_RESTRICT _p1 = iter._p1; \
731  Z* MADNESS_RESTRICT _p2 = iter._p2; \
732  long _s0 = iter._s0; \
733  long _s1 = iter._s1; \
734  long _s2 = iter._s2; \
735  for (long _j=0; _j<_dimj; ++_j, _p0+=_s0, _p1+=_s1, _p2+=_s2) { \
736  exp; \
737  } \
738  } } } while(0)
739 
740 #endif // MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED