MADNESS 0.10.1
tensor_macros.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30
31
32 $Id$
33*/
34
35
36#ifndef MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED
37#define MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED
38
39/*!
40 \file tensor_macros.h
41 \brief Macros for easy and efficient iteration over tensors.
42
43 \ingroup tensor
44
45Several different macros have been defined to make it
46easy to iterate over expressions involving tensors. They
47vary in their generality, ease of use, and efficiency.
48
49The most general, most easy to use, but also most inefficient,
50and least safe, is
51\code
52ITERATOR(t, expression)
53\endcode
54where \c t is a Tensor of any type, size or dimension that is used to
55define the range of the loop indices, and expression can be nearly
56anything, including multi-line expressions performing arbitrary
57operations on multiple tensors. The loop indices, going
58from left to right in the dimensions, are
59\code
60_i, _j, _k, ...
61\endcode
62
63E.g., to add two matrices together (there are more efficient ways
64to do this, such as \c a+=b )
65\code
66Tensor<long> a(4,2), b(4,2);
67ITERATOR(a, a(_i,_j) += b(_i,_j));
68\endcode
69
70E.g., to print out the indices of all elements of a matrix
71greater than 0.5;
72\code
73Tensor<float> m(5,5);
74m.fillrandom();
75ITERATOR(m, if (m(_i,_j) > 0.5) {
76 cout << _i << " " << _j << endl;
77 });
78\endcode
79
80To make it possible to index arbitrary dimension tensors, the macro
81\c IND has been defined as the indices for the highest supported
82dimension. E.g., to elementwise divide the contents of two tensors of
83unknown dimension
84\code
85ITERATOR(x, x(IND)/y(IND));
86\endcode
87
88Note that using \c IND employs bounds checking where as direct indexing
89with \c _i , etc., does not.
90
91The generality of these macros is offset by their inefficiency and
92lack of safety. The inefficiency is twofold. First, the \c ITERATOR
93macro generates a separate block of code for each possible dimension.
94This could cause code bloat and increased compilation time. To
95solve this problem, the macros \c ITERATOR1 , \c ITERATOR2, etc., have
96been defined, with the corresponding \c IND1 , \c IND2 , etc. These
97macros may be applied to tensor expressions of the appropriate
98dimension.
99
100The second inefficiency is at runtime, due to the explicit indexing of
101all the tensor expressions and the inability to optimize the order
102in which memory is traversed. The lack of safety is the inability
103to check that the tensors in the expression conform and that the
104indices are not out of bounds.
105
106The safety and cost of explicit indexing are addressed by the macros
107\c UNARYITERATOR , \c BINARYITERATOR , and \c TERNARYITERATOR , along
108with their specialization to specific numbers of dimensions (again by
109appending the dimension number to the name of the macro). These
110macros are safer since you have to explicitly name the tensors you are
111iterating over, so that the macro can now check that the input tensors
112conform. The cost of looping is reduced by replacing explicit
113indexing with pointer arithmetic. These macros still define the loop
114indices \c _i , \c _j , etc., but also define \c _p0 , \c _p1 , etc.,
115as pointers to the current elements of tensor argument 0, tensor
116argument 1, etc..
117
118E.g., set elements of a 3-d tensor, \c t , of type \c double to a
119function of the indices
120\code
121UNARYITERATOR(double, t, *_p0 = 1.0/(_i + _j + 1.0));
122\endcode
123
124E.g., to merge two \c double tensors as real and imaginary parts
125of complex tensor of any dimension
126\code
127TERNARYITERATOR(double_complex, c, double, r, double, i,
128. *_p0 = double_complex(*_p1, *_p2));
129\endcode
130
131However, we still have the problems that if the dimensions of a tensor
132have been reordered, the loops will go through memory inefficiently,
133and the dimension independent macros still generate redundant code
134blocks. Also, the innermost loop might not be very long and will
135be inefficient.
136
137The most general, efficient and code-compact macros internally use the
138\c TensorIterator , which you could also use directly. Since there is
139no nest of explicit loops, the tensor indices are no longer available
140as \c _i , \c _j , etc.. Furthermore, the \c TensorIterator can
141reorder the loops to optimize the memory traversal, and fuse
142dimensions to make the innermost loop longer for better vectorization
143and reduced loop overhead.
144
145The most efficient macros for iteration are \c UNARY_OPTIMIZED_ITERATOR ,
146\c BINARY_OPTIMIZED_ITERATOR , and \c TERNARY_OPTIMIZED_ITERATOR .
147As before, these define the pointers \c _p0 , \c _p1, \c _p2 , which
148point to the current (and corresponding) element of each argument
149tensor. However, unlike the previous macros there is no guarantee
150that the elements are looped thru in the order expected by a
151simple nest of loops. Furthermore, the indices are completely
152unvailable. In addition to using the iterators for optimal
153traversal, these macros attempt to use a single loop
154for optimal vector performance.
155
156E.g., the most efficient and safe way to perform the previous example
157of merging two \c double tensors as real and imaginary parts
158of a complex tensor of any dimension
159\code
160TERNARY_OPTIMIZED_ITERATOR(double_complex, c, double, r, double, i,
161 . *_p0 = double_complex(*_p1, *_p2));
162\endcode
163This is precisely how most internal operations are implemented.
164
165In some situations it is necessary to preserve the expected
166order of loops and to not fuse dimensions. The macros
167\c UNARY_UNOPTIMIZED_ITERATOR , \c BINARY_UNOPTIMIZED_ITERATOR ,
168and \c TERNARY_UNOPTIMIZED_ITERATOR use the \c TensorIterator
169but disable loop reordering and fusing. Once these optimizations
170have been turned off, the loop indices are avaiable, if needed,
171from the \c ind[] member of the iterator (which is named
172\c _iter ).
173
174E.g., the fillindex() method is implemented as follows
175\code
176long count = 0;
177UNARY_UNOPTIMIZED_ITERATOR(T, (*this), *_p0 = (T) count++);
178\endcode
179
180\em NB: None of the above iterator macros can be nested ... use the
181actual tensor iterator to do this.
182
183Recommendation --- for both efficiency and safety, use the optimized
184macros (\c UNARY_OPTMIZED_ITERATOR , etc.), unless it is necessary to
185preserve loop order, in which case use the unoptimized versions. If
186you need the loop indices, use the macros \c UNARY_ITERATOR, etc.,
187unless you have a very general expression that they cannot handle. In
188this last instance, or for ease of rapid implementation, use the general
189\c ITERATOR macro first described.
190
191*/
192
193// don't change this without changing the iterator macros
194#define TENSOR_MAXDIM 6
195
196/// Macros IND1, ..., IND6, and IND are a convenience for indexing in macro iterators.
197
198#define IND1 _i
199#define IND2 _i,_j
200#define IND3 _i,_j,_k
201#define IND4 _i,_j,_k,_l
202#define IND5 _i,_j,_k,_l,_m
203#define IND6 _i,_j,_k,_l,_m,_n
204#define IND IND6
205
206
207#define ITERATOR1(t,exp) do { \
208 long __xd0=t.dim(0),_index=0; \
209 for (long _i=0; _i<__xd0; ++_i) {exp;_index++;} } while (0)
210
211#define ITERATOR2(t,exp) do { \
212 long __xd0=t.dim(0), __xd1=t.dim(1), _index=0; \
213for (long _i=0; _i<__xd0; ++_i) { \
214 for (long _j=0; _j<__xd1; ++_j) {exp;_index++;} } } while (0)
215
216#define ITERATOR3(t,exp) do { \
217 long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), _index=0; \
218for (long _i=0; _i<__xd0; ++_i) { \
219 for (long _j=0; _j<__xd1; ++_j) { \
220 for (long _k=0; _k<__xd2; ++_k) {exp;_index++;} } } } while (0)
221
222#define ITERATOR4(t,exp) do { \
223 long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
224 __xd3=t.dim(3), _index=0; \
225for (long _i=0; _i<__xd0; ++_i) { \
226 for (long _j=0; _j<__xd1; ++_j) { \
227 for (long _k=0; _k<__xd2; ++_k) { \
228 for (long _l=0; _l<__xd3; ++_l) {exp;_index++;} } } } } while (0)
229
230#define ITERATOR5(t,exp) do { \
231 long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
232 __xd3=t.dim(3), __xd4=t.dim(4), _index=0; \
233for (long _i=0; _i<__xd0; ++_i) { \
234 for (long _j=0; _j<__xd1; ++_j) { \
235 for (long _k=0; _k<__xd2; ++_k) { \
236 for (long _l=0; _l<__xd3; ++_l) { \
237 for (long _m=0; _m<__xd4; ++_m) {exp;_index++;} } } } } } while (0)
238
239#define ITERATOR6(t,exp) do { \
240 long __xd0=t.dim(0), __xd1=t.dim(1), __xd2=t.dim(2), \
241 __xd3=t.dim(3), __xd4=t.dim(4), __xd5=t.dim(5), _index=0;; \
242for (long _i=0; _i<__xd0; ++_i) { \
243 for (long _j=0; _j<__xd1; ++_j) { \
244 for (long _k=0; _k<__xd2; ++_k) { \
245 for (long _l=0; _l<__xd3; ++_l) { \
246 for (long _m=0; _m<__xd4; ++_m) { \
247 for (long _n=0; _n<__xd5; ++_n) {exp;_index++;} } } } } } } while(0)
248
249#define ITERATOR(t,exp) do { \
250 long _j=0, _k=0, _l=0, _m=0, _n=0; \
251 if (t.ndim() == 1) {ITERATOR1(t,exp);} \
252 else if (t.ndim() == 2) {ITERATOR2(t,exp);} \
253 else if (t.ndim() == 3) {ITERATOR3(t,exp);} \
254 else if (t.ndim() == 4) {ITERATOR4(t,exp);} \
255 else if (t.ndim() == 5) {ITERATOR5(t,exp);} \
256 else if (t.ndim() == 6) {ITERATOR6(t,exp);} \
257 else {TENSOR_ASSERT(t.ndim() <= 6,"ndim confused?",t.ndim(),&t);} \
258 } while(0)
259
260// Inside iterator access pointer to current element as _p0 (pointer to
261// argument number 0). _i, _j, _k, ..., also defined
262#define UNARYITERATOR1(X,x,exp) do { \
263 long __xd0=x.dim(0); \
264 long __xs0=x.stride(0); \
265X* MADNESS_RESTRICT _p0=x.ptr(); \
266for (long _i=0; _i<__xd0; ++_i,_p0+=__xs0) { \
267 exp; \
268} } while(0)
269
270#define UNARYITERATOR2(X,x,exp) do { \
271 long __xd0=x.dim(0), __xd1=x.dim(1); \
272 long __xs0=x.stride(0), __xs1=x.stride(1); \
273X* MADNESS_RESTRICT __xp0=x.ptr(); \
274for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
275 X* MADNESS_RESTRICT _p0=__xp0; \
276 for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1) { \
277 exp; \
278 } } } while(0)
279
280#define UNARYITERATOR3(X,x,exp) do { \
281 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
282 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
283X* MADNESS_RESTRICT __xp0=x.ptr(); \
284for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
285 X* MADNESS_RESTRICT __xp1=__xp0; \
286 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
287 X* MADNESS_RESTRICT _p0=__xp1; \
288 for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2) { \
289 exp; \
290 } } } } while(0)
291
292#define UNARYITERATOR4(X,x,exp) do { \
293 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
294 __xd3=x.dim(3); \
295 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
296 __xs3=x.stride(3); \
297X* MADNESS_RESTRICT __xp0=x.ptr(); \
298for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
299 X* MADNESS_RESTRICT __xp1=__xp0; \
300 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
301 X* MADNESS_RESTRICT __xp2=__xp1; \
302 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
303 X* MADNESS_RESTRICT _p0=__xp2; \
304 for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3) { \
305 exp; \
306 } } } } } while(0)
307
308#define UNARYITERATOR5(X,x,exp) do { \
309 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
310 __xd3=x.dim(3), __xd4=x.dim(4); \
311 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
312 __xs3=x.stride(3), __xs4=x.stride(4); \
313X* MADNESS_RESTRICT __xp0=x.ptr(); \
314for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
315 X* MADNESS_RESTRICT __xp1=__xp0; \
316 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
317 X* MADNESS_RESTRICT __xp2=__xp1; \
318 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
319 X* MADNESS_RESTRICT __xp3=__xp2; \
320 for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3) { \
321 X* MADNESS_RESTRICT _p0 =__xp3; \
322 for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4) { \
323 exp; \
324 } } } } } } while(0)
325
326#define UNARYITERATOR6(X,x,exp) do { \
327 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
328 __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
329 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
330 __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
331X* MADNESS_RESTRICT __xp0=x.ptr(); \
332for (long _i=0; _i<__xd0; ++_i,__xp0+=__xs0) { \
333 X* MADNESS_RESTRICT __xp1=__xp0; \
334 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1) { \
335 X* MADNESS_RESTRICT __xp2=__xp1; \
336 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2) { \
337 X* MADNESS_RESTRICT __xp3=__xp2; \
338 for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3) { \
339 X* MADNESS_RESTRICT __xp4=__xp3; \
340 for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4) { \
341 X* MADNESS_RESTRICT _p0=__xp4; \
342 for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5) { \
343 exp; \
344 } } } } } } } while(0)
345
346#define UNARYITERATOR(X,x,exp) do { \
347 long _j=0, _k=0, _l=0, _m=0, _n=0; \
348 if (x.ndim() == 1) UNARYITERATOR1(X,x,exp); \
349 else if (x.ndim() == 2) UNARYITERATOR2(X,x,exp); \
350 else if (x.ndim() == 3) UNARYITERATOR3(X,x,exp); \
351 else if (x.ndim() == 4) UNARYITERATOR4(X,x,exp); \
352 else if (x.ndim() == 5) UNARYITERATOR5(X,x,exp); \
353 else if (x.ndim() == 6) UNARYITERATOR6(X,x,exp); \
354 else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} } while(0)
355
356// Inside iterator access pointers to current elements as _p0 & _p1
357// _i, _j, _k, ... also defined
358#define BINARYITERATOR1(X,x,Y,y,exp) do { \
359TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
360 long __xd0=x.dim(0); \
361 long __xs0=x.stride(0); \
362 long __ys0=y.stride(0); \
363X* MADNESS_RESTRICT _p0=x.ptr(); \
364Y* MADNESS_RESTRICT _p1=y.ptr(); \
365for (long _i=0; _i<__xd0; ++_i, _p0+=__xs0, _p1+=__ys0) { \
366 exp; \
367} } while(0)
368
369#define BINARYITERATOR2(X,x,Y,y,exp) do { \
370TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
371 long __xd0=x.dim(0), __xd1=x.dim(1); \
372 long __xs0=x.stride(0), __xs1=x.stride(1); \
373 long __ys0=y.stride(0), __ys1=y.stride(1); \
374X* MADNESS_RESTRICT __xp0=x.ptr(); \
375Y* MADNESS_RESTRICT __yp0=y.ptr(); \
376for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
377 X* MADNESS_RESTRICT _p0=__xp0; \
378 Y* MADNESS_RESTRICT _p1=__yp0; \
379 for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1, _p1+=__ys1) { \
380 exp; \
381 } } } while(0)
382
383#define BINARYITERATOR3(X,x,Y,y,exp) do { \
384TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
385 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
386 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
387 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2); \
388X* MADNESS_RESTRICT __xp0=x.ptr(); \
389Y* MADNESS_RESTRICT __yp0=y.ptr(); \
390for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
391 X* MADNESS_RESTRICT __xp1=__xp0; \
392 Y* MADNESS_RESTRICT __yp1=__yp0; \
393 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
394 X* MADNESS_RESTRICT _p0=__xp1; \
395 Y* MADNESS_RESTRICT _p1=__yp1; \
396 for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2, _p1+=__ys2) { \
397 exp; \
398 } } } } while(0)
399
400#define BINARYITERATOR4(X,x,Y,y,exp) do { \
401TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
402 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
403 __xd3=x.dim(3); \
404 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
405 __xs3=x.stride(3); \
406 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
407 __ys3=y.stride(3); \
408X* MADNESS_RESTRICT __xp0=x.ptr(); \
409Y* MADNESS_RESTRICT __yp0=y.ptr(); \
410for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
411 X* MADNESS_RESTRICT __xp1=__xp0; \
412 Y* MADNESS_RESTRICT __yp1=__yp0; \
413 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
414 X* MADNESS_RESTRICT __xp2=__xp1; \
415 Y* MADNESS_RESTRICT __yp2=__yp1; \
416 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
417 X* MADNESS_RESTRICT _p0=__xp2; \
418 Y* MADNESS_RESTRICT _p1=__yp2; \
419 for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3, _p1+=__ys3) { \
420 exp; \
421 } } } } } while(0)
422
423#define BINARYITERATOR5(X,x,Y,y,exp) do { \
424TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
425 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
426 __xd3=x.dim(3), __xd4=x.dim(4); \
427 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
428 __xs3=x.stride(3), __xs4=x.stride(4); \
429 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
430 __ys3=y.stride(3), __ys4=y.stride(4); \
431X* MADNESS_RESTRICT __xp0=x.ptr(); \
432Y* MADNESS_RESTRICT __yp0=y.ptr(); \
433for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
434 X* MADNESS_RESTRICT __xp1=__xp0; \
435 Y* MADNESS_RESTRICT __yp1=__yp0; \
436 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
437 X* MADNESS_RESTRICT __xp2=__xp1; \
438 Y* MADNESS_RESTRICT __yp2=__yp1; \
439 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
440 X* MADNESS_RESTRICT __xp3=__xp2; \
441 Y* MADNESS_RESTRICT __yp3=__yp2; \
442 for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3) { \
443 X* MADNESS_RESTRICT _p0=__xp3; \
444 Y* MADNESS_RESTRICT _p1=__yp3; \
445 for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4, _p1+=__ys4) { \
446 exp; \
447 } } } } } } while(0)
448
449#define BINARYITERATOR6(X,x,Y,y,exp) do { \
450TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
451 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
452 __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
453 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
454 __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
455 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
456 __ys3=y.stride(3), __ys4=y.stride(4), __ys5=y.stride(5); \
457X* MADNESS_RESTRICT __xp0=x.ptr(); \
458Y* MADNESS_RESTRICT __yp0=y.ptr(); \
459for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0) { \
460 X* MADNESS_RESTRICT __xp1=__xp0; \
461 Y* MADNESS_RESTRICT __yp1=__yp0; \
462 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1) { \
463 X* MADNESS_RESTRICT __xp2=__xp1; \
464 Y* MADNESS_RESTRICT __yp2=__yp1; \
465 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2) { \
466 X* MADNESS_RESTRICT __xp3=__xp2; \
467 Y* MADNESS_RESTRICT __yp3=__yp2; \
468 for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3) { \
469 X* MADNESS_RESTRICT __xp4=__xp3; \
470 Y* MADNESS_RESTRICT __yp4=__yp3; \
471 for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4, __yp4+=__ys4) { \
472 X* MADNESS_RESTRICT _p0=__xp4; \
473 Y* MADNESS_RESTRICT _p1=__yp4; \
474 for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5, _p1+=__ys5) { \
475 exp; \
476 } } } } } } } while(0)
477
478#define BINARYITERATOR(X,x,Y,y,exp) do { \
479 long _j=0, _k=0, _l=0, _m=0, _n=0; \
480 if (x.ndim() == 1) BINARYITERATOR1(X,x,Y,y,exp); \
481 else if (x.ndim() == 2) BINARYITERATOR2(X,x,Y,y,exp); \
482 else if (x.ndim() == 3) BINARYITERATOR3(X,x,Y,y,exp); \
483 else if (x.ndim() == 4) BINARYITERATOR4(X,x,Y,y,exp); \
484 else if (x.ndim() == 5) BINARYITERATOR5(X,x,Y,y,exp); \
485 else if (x.ndim() == 6) BINARYITERATOR6(X,x,Y,y,exp); \
486 else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} \
487} while(0)
488
489// Inside iterator access pointers to current elements as _p0, _p1, _p2
490// _i, _j, _k, ... also defined
491#define TERNARYITERATOR1(X,x,Y,y,Z,z,exp) do { \
492TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
493TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
494 long __xd0=x.dim(0); \
495 long __xs0=x.stride(0); \
496 long __ys0=y.stride(0); \
497 long __zs0=z.stride(0); \
498X* MADNESS_RESTRICT _p0=x.ptr(); \
499Y* MADNESS_RESTRICT _p1=y.ptr(); \
500Z* MADNESS_RESTRICT _p2=z.ptr(); \
501for (long _i=0; _i<__xd0; ++_i, _p0+=__xs0, _p1+=__ys0, _p2+=__zs0) { \
502 exp; \
503} } while(0)
504
505#define TERNARYITERATOR2(X,x,Y,y,Z,z,exp) do { \
506TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
507TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
508 long __xd0=x.dim(0), __xd1=x.dim(1); \
509 long __xs0=x.stride(0), __xs1=x.stride(1); \
510 long __ys0=y.stride(0), __ys1=y.stride(1); \
511 long __zs0=z.stride(0), __zs1=z.stride(1); \
512X* MADNESS_RESTRICT __xp0=x.ptr(); \
513Y* MADNESS_RESTRICT __yp0=y.ptr(); \
514Z* MADNESS_RESTRICT __zp0=z.ptr(); \
515for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
516 X* MADNESS_RESTRICT _p0=__xp0; \
517 Y* MADNESS_RESTRICT _p1=__yp0; \
518 Z* MADNESS_RESTRICT _p2=__zp0; \
519 for (long _j=0; _j<__xd1; ++_j, _p0+=__xs1, _p1+=__ys1, _p2+=__zs1) { \
520 exp; \
521 } } } while(0)
522
523#define TERNARYITERATOR3(X,x,Y,y,Z,z,exp) do { \
524TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
525TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
526 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2); \
527 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2); \
528 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2); \
529 long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2); \
530X* MADNESS_RESTRICT __xp0=x.ptr(); \
531Y* MADNESS_RESTRICT __yp0=y.ptr(); \
532Z* MADNESS_RESTRICT __zp0=z.ptr(); \
533for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
534 X* MADNESS_RESTRICT __xp1=__xp0; \
535 Y* MADNESS_RESTRICT __yp1=__yp0; \
536 Z* MADNESS_RESTRICT __zp1=__zp0; \
537 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
538 X* MADNESS_RESTRICT _p0=__xp1; \
539 Y* MADNESS_RESTRICT _p1=__yp1; \
540 Z* MADNESS_RESTRICT _p2=__zp1; \
541 for (long _k=0; _k<__xd2; ++_k, _p0+=__xs2, _p1+=__ys2, _p2+=__zs2) { \
542 exp; \
543 } } } } while(0)
544
545#define TERNARYITERATOR4(X,x,Y,y,Z,z,exp) do { \
546TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
547TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
548 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
549 __xd3=x.dim(3); \
550 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
551 __xs3=x.stride(3); \
552 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
553 __ys3=y.stride(3); \
554 long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
555 __zs3=z.stride(3); \
556X* MADNESS_RESTRICT __xp0=x.ptr(); \
557Y* MADNESS_RESTRICT __yp0=y.ptr(); \
558Z* MADNESS_RESTRICT __zp0=z.ptr(); \
559for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
560 X* MADNESS_RESTRICT __xp1=__xp0; \
561 Y* MADNESS_RESTRICT __yp1=__yp0; \
562 Z* MADNESS_RESTRICT __zp1=__zp0; \
563 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
564 X* MADNESS_RESTRICT __xp2=__xp1; \
565 Y* MADNESS_RESTRICT __yp2=__yp1; \
566 Z* MADNESS_RESTRICT __zp2=__zp1; \
567 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
568 X* MADNESS_RESTRICT _p0=__xp2; \
569 Y* MADNESS_RESTRICT _p1=__yp2; \
570 Z* MADNESS_RESTRICT _p2=__zp2; \
571 for (long _l=0; _l<__xd3; ++_l, _p0+=__xs3, _p1+=__ys3, _p2+=__zs3) { \
572 exp; \
573 } } } } } while(0)
574
575#define TERNARYITERATOR5(X,x,Y,y,Z,z,exp) do { \
576TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
577TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
578 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
579 __xd3=x.dim(3), __xd4=x.dim(4); \
580 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
581 __xs3=x.stride(3), __xs4=x.stride(4); \
582 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
583 __ys3=y.stride(3), __ys4=y.stride(4); \
584 long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
585 __zs3=z.stride(3), __zs4=z.stride(4); \
586X* MADNESS_RESTRICT __xp0=x.ptr(); \
587Y* MADNESS_RESTRICT __yp0=y.ptr(); \
588Z* MADNESS_RESTRICT __zp0=z.ptr(); \
589for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
590 X* MADNESS_RESTRICT __xp1=__xp0; \
591 Y* MADNESS_RESTRICT __yp1=__yp0; \
592 Z* MADNESS_RESTRICT __zp1=__zp0; \
593 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
594 X* MADNESS_RESTRICT __xp2=__xp1; \
595 Y* MADNESS_RESTRICT __yp2=__yp1; \
596 Z* MADNESS_RESTRICT __zp2=__zp1; \
597 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
598 X* MADNESS_RESTRICT __xp3=__xp2; \
599 Y* MADNESS_RESTRICT __yp3=__yp2; \
600 Z* MADNESS_RESTRICT __zp3=__zp2; \
601 for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3, __zp3+=__zs3) { \
602 X* MADNESS_RESTRICT _p0=__xp3; \
603 Y* MADNESS_RESTRICT _p1=__yp3; \
604 Z* MADNESS_RESTRICT _p2=__zp3; \
605 for (long _m=0; _m<__xd4; ++_m, _p0+=__xs4, _p1+=__ys4, _p2+=__zs4) { \
606 exp; \
607 } } } } } } while(0)
608
609#define TERNARYITERATOR6(X,x,Y,y,Z,z,exp) do { \
610TENSOR_ASSERT(x.conforms(y),"first and second tensors do not conform",0,&x); \
611TENSOR_ASSERT(x.conforms(z),"first and third tensors do not conform",0,&x); \
612 long __xd0=x.dim(0), __xd1=x.dim(1), __xd2=x.dim(2), \
613 __xd3=x.dim(3), __xd4=x.dim(4), __xd5=x.dim(5); \
614 long __xs0=x.stride(0), __xs1=x.stride(1), __xs2=x.stride(2), \
615 __xs3=x.stride(3), __xs4=x.stride(4), __xs5=x.stride(5); \
616 long __ys0=y.stride(0), __ys1=y.stride(1), __ys2=y.stride(2), \
617 __ys3=y.stride(3), __ys4=y.stride(4), __ys5=y.stride(5); \
618 long __zs0=z.stride(0), __zs1=z.stride(1), __zs2=z.stride(2), \
619 __zs3=z.stride(3), __zs4=z.stride(4), __zs5=z.stride(5); \
620X* MADNESS_RESTRICT __xp0=x.ptr(); \
621Y* MADNESS_RESTRICT __yp0=y.ptr(); \
622Z* MADNESS_RESTRICT __zp0=z.ptr(); \
623for (long _i=0; _i<__xd0; ++_i, __xp0+=__xs0, __yp0+=__ys0, __zp0+=__zs0) { \
624 X* MADNESS_RESTRICT __xp1=__xp0; \
625 Y* MADNESS_RESTRICT __yp1=__yp0; \
626 Z* MADNESS_RESTRICT __zp1=__zp0; \
627 for (long _j=0; _j<__xd1; ++_j, __xp1+=__xs1, __yp1+=__ys1, __zp1+=__zs1) { \
628 X* MADNESS_RESTRICT __xp2=__xp1; \
629 Y* MADNESS_RESTRICT __yp2=__yp1; \
630 Z* MADNESS_RESTRICT __zp2=__zp1; \
631 for (long _k=0; _k<__xd2; ++_k, __xp2+=__xs2, __yp2+=__ys2, __zp2+=__zs2) { \
632 X* MADNESS_RESTRICT __xp3=__xp2; \
633 Y* MADNESS_RESTRICT __yp3=__yp2; \
634 Z* MADNESS_RESTRICT __zp3=__zp2; \
635 for (long _l=0; _l<__xd3; ++_l, __xp3+=__xs3, __yp3+=__ys3, __zp3+=__zs3) { \
636 X* MADNESS_RESTRICT __xp4=__xp3; \
637 Y* MADNESS_RESTRICT __yp4=__yp3; \
638 Z* MADNESS_RESTRICT __zp4=__zp3; \
639 for (long _m=0; _m<__xd4; ++_m, __xp4+=__xs4, __yp4+=__ys4, __zp4+=__zs4) { \
640 X* MADNESS_RESTRICT _p0=__xp4; \
641 Y* MADNESS_RESTRICT _p1=__yp4; \
642 Z* MADNESS_RESTRICT _p2=__zp4; \
643 for (long _n=0; _n<__xd5; ++_n, _p0+=__xs5, _p1+=__ys5, _p2+=__zs5) { \
644 exp; \
645 } } } } } } } while(0)
646
647#define TERNARYITERATOR(X,x,Y,y,Z,z,exp) do { \
648 long _j=0, _k=0, _l=0, _m=0, _n=0; \
649 if (x.ndim() == 1) TERNARYITERATOR1(X,x,Y,y,Z,z,exp); \
650 else if (x.ndim() == 2) TERNARYITERATOR2(X,x,Y,y,Z,z,exp); \
651 else if (x.ndim() == 3) TERNARYITERATOR3(X,x,Y,y,Z,z,exp); \
652 else if (x.ndim() == 4) TERNARYITERATOR4(X,x,Y,y,Z,z,exp); \
653 else if (x.ndim() == 5) TERNARYITERATOR5(X,x,Y,y,Z,z,exp); \
654 else if (x.ndim() == 6) TERNARYITERATOR6(X,x,Y,y,Z,z,exp); \
655 else {TENSOR_ASSERT(x.ndim() <= 6,"ndim confused?",x.ndim(),&x);} \
656} while(0)
657
658#define UNARY_OPTIMIZED_ITERATOR(X,x,exp) do { \
659 if (x.iscontiguous()) { \
660 X* MADNESS_RESTRICT _p0 = x.ptr(); \
661 for (long _j=0; _j<x.size(); ++_j,++_p0) {exp;} \
662 } \
663 else { \
664 for (TensorIterator<REMCONST(X)> iter=x.unary_iterator(1); iter._p0; ++iter) { \
665 long _dimj = iter.dimj; \
666 X* MADNESS_RESTRICT _p0 = iter._p0; \
667 long _s0 = iter._s0; \
668 for (long _j=0; _j<_dimj; ++_j, _p0+=_s0) { \
669 exp; \
670 } \
671 } \
672 } \
673} while(0)
674
675
676// Can optimize these by moving definition of stride out of loop.
677
678#define UNARY_UNOPTIMIZED_ITERATOR(X,x,exp) do { \
679 for (TensorIterator<REMCONST(X)> iter=x.unary_iterator(1,false,false); iter._p0; ++iter) { \
680 long _dimj = iter.dimj; \
681 X* MADNESS_RESTRICT _p0 = iter._p0; \
682 long _s0 = iter._s0; \
683 for (long _j=0; _j<_dimj; ++_j, _p0+=_s0) { \
684 exp; \
685 } \
686 } } while(0)
687
688// Use this inside another iterator macro ... pointers are _q* instead of _p*
689// Iterator is iter2.
690
691#define UNARY_UNOPTIMIZED_ITERATOR_NESTED(X,x,exp) do { \
692 for (TensorIterator<REMCONST(X)> iter2=x.unary_iterator(1,false,false); iter2._p0; ++iter2) { \
693 long _dimj2 = iter2.dimj; \
694 X* MADNESS_RESTRICT _q0 = iter2._p0; \
695 long _s20 = iter2._s0; \
696 for (long _j2=0; _j2<_dimj2; ++_j2, _q0+=_s20) { \
697 exp; \
698 } \
699 } } while(0)
700
701#define BINARY_OPTIMIZED_ITERATOR(X,x,Y,y,exp) do { \
702 if (x.iscontiguous() && y.iscontiguous() && x.size()==y.size()) { \
703 X* MADNESS_RESTRICT _p0 = x.ptr(); \
704 Y* MADNESS_RESTRICT _p1 = y.ptr(); \
705 for (long _j=0; _j<x.size(); ++_j,++_p0,++_p1) {exp;} \
706 } \
707 else { \
708 for (TensorIterator<REMCONST(X),REMCONST(Y)> iter=x.binary_iterator(y,1); iter._p0; ++iter) { \
709 long _dimj = iter.dimj; \
710 X* MADNESS_RESTRICT _p0 = iter._p0; \
711 Y* MADNESS_RESTRICT _p1 = iter._p1; \
712 long _s0 = iter._s0; \
713 long _s1 = iter._s1; \
714 for (long _j=0; _j<_dimj; ++_j, _p0+=_s0, _p1+=_s1) { \
715 exp; \
716 } \
717 } } } while(0)
718
719#define TERNARY_OPTIMIZED_ITERATOR(X,x,Y,y,Z,z,exp) do { \
720 if (x.iscontiguous() && y.iscontiguous() && z.iscontiguous() && x.size()==y.size() && x.size()==z.size()) { \
721 X* MADNESS_RESTRICT _p0 = x.ptr(); \
722 Y* MADNESS_RESTRICT _p1 = y.ptr(); \
723 Z* MADNESS_RESTRICT _p2 = z.ptr(); \
724 for (long _j=0; _j<x.size(); ++_j,++_p0,++_p1,++_p2) {exp;} \
725 } \
726 else { \
727 for (TensorIterator<REMCONST(X),REMCONST(Y),REMCONST(Z)> iter=x.ternary_iterator(y,z,1); iter._p0; ++iter) { \
728 long _dimj = iter.dimj; \
729 X* MADNESS_RESTRICT _p0 = iter._p0; \
730 Y* MADNESS_RESTRICT _p1 = iter._p1; \
731 Z* MADNESS_RESTRICT _p2 = iter._p2; \
732 long _s0 = iter._s0; \
733 long _s1 = iter._s1; \
734 long _s2 = iter._s2; \
735 for (long _j=0; _j<_dimj; ++_j, _p0+=_s0, _p1+=_s1, _p2+=_s2) { \
736 exp; \
737 } \
738 } } } while(0)
739
740#endif // MADNESS_TENSOR_TENSOR_MACROS_H__INCLUDED