MADNESS 0.10.1
funcimpl.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30*/
31
32#ifndef MADNESS_MRA_FUNCIMPL_H__INCLUDED
33#define MADNESS_MRA_FUNCIMPL_H__INCLUDED
34
35/// \file funcimpl.h
36/// \brief Provides FunctionCommonData, FunctionImpl and FunctionFactory
37
39#include <madness/world/print.h>
40#include <madness/misc/misc.h>
43
45#include <madness/mra/indexit.h>
46#include <madness/mra/key.h>
50
51#include <madness/mra/leafop.h>
52
53#include <array>
54#include <iostream>
55#include <type_traits>
56
57namespace madness {
58 template <typename T, std::size_t NDIM>
59 class DerivativeBase;
60
61 template<typename T, std::size_t NDIM>
62 class FunctionImpl;
63
64 template<typename T, std::size_t NDIM>
65 class FunctionNode;
66
67 template<typename T, std::size_t NDIM>
68 class Function;
69
70 template<typename T, std::size_t NDIM>
71 class FunctionFactory;
72
73 template<typename T, std::size_t NDIM, std::size_t MDIM>
74 class CompositeFunctorInterface;
75
76 template<int D>
78
79}
80
81namespace madness {
82
83
84 /// A simple process map
85 template<typename keyT>
86 class SimplePmap : public WorldDCPmapInterface<keyT> {
87 private:
88 const int nproc;
90
91 public:
92 SimplePmap(World& world) : nproc(world.nproc()), me(world.rank())
93 { }
94
95 ProcessID owner(const keyT& key) const {
96 if (key.level() == 0)
97 return 0;
98 else
99 return key.hash() % nproc;
100 }
101 };
102
103 /// A pmap that locates children on odd levels with their even level parents
104 template <typename keyT>
105 class LevelPmap : public WorldDCPmapInterface<keyT> {
106 private:
107 const int nproc;
108 public:
109 LevelPmap() : nproc(0) {};
110
111 LevelPmap(World& world) : nproc(world.nproc()) {}
112
113 /// Find the owner of a given key
114 ProcessID owner(const keyT& key) const {
115 Level n = key.level();
116 if (n == 0) return 0;
117 hashT hash;
118 if (n <= 3 || (n&0x1)) hash = key.hash();
119 else hash = key.parent().hash();
120 return hash%nproc;
121 }
122 };
123
124
125 /// FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree
126 template<typename T, std::size_t NDIM>
128 public:
131 private:
132 // Should compile OK with these volatile but there should
133 // be no need to set as volatile since the container internally
134 // stores the entire entry as volatile
135
136 coeffT _coeffs; ///< The coefficients, if any
137 double _norm_tree; ///< After norm_tree will contain norm of coefficients summed up tree
138 bool _has_children; ///< True if there are children
139 coeffT buffer; ///< The coefficients, if any
140 double dnorm=-1.0; ///< norm of the d coefficients, also defined if there are no d coefficients
141 double snorm=-1.0; ///< norm of the s coefficients
142
143 public:
144 typedef WorldContainer<Key<NDIM> , FunctionNode<T, NDIM> > dcT; ///< Type of container holding the nodes
145 /// Default constructor makes node without coeff or children
147 _coeffs(), _norm_tree(1e300), _has_children(false) {
148 }
149
150 /// Constructor from given coefficients with optional children
151
152 /// Note that only a shallow copy of the coeff are taken so
153 /// you should pass in a deep copy if you want the node to
154 /// take ownership.
155 explicit
159
160 explicit
164
165 explicit
169
172 dnorm(other.dnorm), snorm(other.snorm) {
173 }
174
177 if (this != &other) {
178 coeff() = copy(other.coeff());
179 _norm_tree = other._norm_tree;
181 dnorm=other.dnorm;
182 snorm=other.snorm;
184 }
185 return *this;
186 }
187
188 /// Copy with possible type conversion of coefficients, copying all other state
189
190 /// Choose to not overload copy and type conversion operators
191 /// so there are no automatic type conversions.
192 template<typename Q>
194 convert() const {
195 return FunctionNode<Q, NDIM> (madness::convert<Q,T>(coeff()), _norm_tree, snorm, dnorm, _has_children);
196 }
197
198 /// Returns true if there are coefficients in this node
199 bool
200 has_coeff() const {
201 return _coeffs.has_data();
202 }
203
204
205 /// Returns true if this node has children
206 bool
207 has_children() const {
208 return _has_children;
209 }
210
211 /// Returns true if this does not have children
212 bool
213 is_leaf() const {
214 return !_has_children;
215 }
216
217 /// Returns true if this node is invalid (no coeffs and no children)
218 bool
219 is_invalid() const {
220 return !(has_coeff() || has_children());
221 }
222
223 /// Returns a non-const reference to the tensor containing the coeffs
224
225 /// Returns an empty tensor if there are no coefficients.
226 coeffT&
228 MADNESS_ASSERT(_coeffs.ndim() == -1 || (_coeffs.dim(0) <= 2
229 * MAXK && _coeffs.dim(0) >= 0));
230 return const_cast<coeffT&>(_coeffs);
231 }
232
233 /// Returns a const reference to the tensor containing the coeffs
234
235 /// Returns an empty tensor if there are no coefficeints.
236 const coeffT&
237 coeff() const {
238 return const_cast<const coeffT&>(_coeffs);
239 }
240
241 /// Returns the number of coefficients in this node
242 size_t size() const {
243 return _coeffs.size();
244 }
245
246 public:
247
248 /// reduces the rank of the coefficients (if applicable)
249 void reduceRank(const double& eps) {
250 _coeffs.reduce_rank(eps);
251 }
252
253 /// Sets \c has_children attribute to value of \c flag.
254 void set_has_children(bool flag) {
255 _has_children = flag;
256 }
257
258 /// Sets \c has_children attribute to true recurring up to ensure connected
260 //madness::print(" set_chi_recu: ", key, *this);
261 //PROFILE_MEMBER_FUNC(FunctionNode); // Too fine grain for routine profiling
262 if (!(has_children() || has_coeff() || key.level()==0)) {
263 // If node already knows it has children or it has
264 // coefficients then it must already be connected to
265 // its parent. If not, the node was probably just
266 // created for this operation and must be connected to
267 // its parent.
268 Key<NDIM> parent = key.parent();
269 // Task on next line used to be TaskAttributes::hipri()) ... but deferring execution of this
270 // makes sense since it is not urgent and lazy connection will likely mean that less forwarding
271 // will happen since the upper level task will have already made the connection.
272 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
273 //const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
274 //madness::print(" set_chi_recu: forwarding",key,parent);
275 }
276 _has_children = true;
277 }
278
279 /// Sets \c has_children attribute to value of \c !flag
280 void set_is_leaf(bool flag) {
281 _has_children = !flag;
282 }
283
284 /// Takes a \em shallow copy of the coeff --- same as \c this->coeff()=coeff
285 void set_coeff(const coeffT& coeffs) {
286 coeff() = coeffs;
287 if ((_coeffs.has_data()) and ((_coeffs.dim(0) < 0) || (_coeffs.dim(0)>2*MAXK))) {
288 print("set_coeff: may have a problem");
289 print("set_coeff: coeff.dim[0] =", coeffs.dim(0), ", 2* MAXK =", 2*MAXK);
290 }
291 MADNESS_ASSERT(coeffs.dim(0)<=2*MAXK && coeffs.dim(0)>=0);
292 }
293
294 /// Clears the coefficients (has_coeff() will subsequently return false)
295 void clear_coeff() {
296 coeff()=coeffT();
297 }
298
299 /// Scale the coefficients of this node
300 template <typename Q>
301 void scale(Q a) {
302 _coeffs.scale(a);
303 }
304
305 /// Sets the value of norm_tree
308 }
309
310 /// Gets the value of norm_tree
311 double get_norm_tree() const {
312 return _norm_tree;
313 }
314
315 /// return the precomputed norm of the (virtual) d coefficients
316 double get_dnorm() const {
317 return dnorm;
318 }
319
320 /// set the precomputed norm of the (virtual) s coefficients
321 void set_snorm(const double sn) {
322 snorm=sn;
323 }
324
325 /// set the precomputed norm of the (virtual) d coefficients
326 void set_dnorm(const double dn) {
327 dnorm=dn;
328 }
329
330 /// get the precomputed norm of the (virtual) s coefficients
331 double get_snorm() const {
332 return snorm;
333 }
334
336 snorm = 0.0;
337 dnorm = 0.0;
338 if (coeff().size() == 0) { ;
339 } else if (coeff().dim(0) == cdata.vk[0]) {
340 snorm = coeff().normf();
341
342 } else if (coeff().is_full_tensor()) {
343 Tensor<T> c = copy(coeff().get_tensor());
344 snorm = c(cdata.s0).normf();
345 c(cdata.s0) = 0.0;
346 dnorm = c.normf();
347
348 } else if (coeff().is_svd_tensor()) {
349 coeffT c= coeff()(cdata.s0);
350 snorm = c.normf();
351 double norm = coeff().normf();
352 dnorm = sqrt(norm * norm - snorm * snorm);
353
354 } else {
355 MADNESS_EXCEPTION("cannot use compute_dnorm", 1);
356 }
357 }
358
359
360 /// General bi-linear operation --- this = this*alpha + other*beta
361
362 /// This/other may not have coefficients. Has_children will be
363 /// true in the result if either this/other have children.
364 template <typename Q, typename R>
365 void gaxpy_inplace(const T& alpha, const FunctionNode<Q,NDIM>& other, const R& beta) {
366 //PROFILE_MEMBER_FUNC(FuncNode); // Too fine grain for routine profiling
367 if (other.has_children())
368 _has_children = true;
369 if (has_coeff()) {
370 if (other.has_coeff()) {
371 coeff().gaxpy(alpha,other.coeff(),beta);
372 }
373 else {
374 coeff().scale(alpha);
375 }
376 }
377 else if (other.has_coeff()) {
378 coeff() = other.coeff()*beta; //? Is this the correct type conversion?
379 }
380 }
381
382 /// Accumulate inplace and if necessary connect node to parent
383 void accumulate2(const tensorT& t, const typename FunctionNode<T,NDIM>::dcT& c,
384 const Key<NDIM>& key) {
385 // double cpu0=cpu_time();
386 if (has_coeff()) {
387 MADNESS_ASSERT(coeff().is_full_tensor());
388 // if (coeff().type==TT_FULL) {
389 coeff() += coeffT(t,-1.0,TT_FULL);
390 // } else {
391 // tensorT cc=coeff().full_tensor_copy();;
392 // cc += t;
393 // coeff()=coeffT(cc,args);
394 // }
395 }
396 else {
397 // No coeff and no children means the node is newly
398 // created for this operation and therefore we must
399 // tell its parent that it exists.
400 coeff() = coeffT(t,-1.0,TT_FULL);
401 // coeff() = copy(t);
402 // coeff() = coeffT(t,args);
403 if ((!_has_children) && key.level()> 0) {
404 Key<NDIM> parent = key.parent();
405 if (c.is_local(parent))
406 const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
407 else
408 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
409 }
410 }
411 //double cpu1=cpu_time();
412 }
413
414
415 /// Accumulate inplace and if necessary connect node to parent
416 void accumulate(const coeffT& t, const typename FunctionNode<T,NDIM>::dcT& c,
417 const Key<NDIM>& key, const TensorArgs& args) {
418 if (has_coeff()) {
419 coeff().add_SVD(t,args.thresh);
420 if (buffer.rank()<coeff().rank()) {
421 if (buffer.has_data()) {
422 buffer.add_SVD(coeff(),args.thresh);
423 } else {
424 buffer=copy(coeff());
425 }
426 coeff()=coeffT();
427 }
428
429 } else {
430 // No coeff and no children means the node is newly
431 // created for this operation and therefore we must
432 // tell its parent that it exists.
433 coeff() = copy(t);
434 if ((!_has_children) && key.level()> 0) {
435 Key<NDIM> parent = key.parent();
436 if (c.is_local(parent))
437 const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
438 else
439 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
440 }
441 }
442 }
443
444 void consolidate_buffer(const TensorArgs& args) {
445 if ((coeff().has_data()) and (buffer.has_data())) {
446 coeff().add_SVD(buffer,args.thresh);
447 } else if (buffer.has_data()) {
448 coeff()=buffer;
449 }
450 buffer=coeffT();
451 }
452
454 return this->_coeffs.trace_conj((rhs._coeffs));
455 }
456
457 template <typename Archive>
458 void serialize(Archive& ar) {
460 }
461
462 /// like operator<<(ostream&, const FunctionNode<T,NDIM>&) but
463 /// produces a sequence JSON-formatted key-value pairs
464 /// @warning enclose the output in curly braces to make
465 /// a valid JSON object
466 void print_json(std::ostream& s) const {
467 s << "\"has_coeff\":" << this->has_coeff()
468 << ",\"has_children\":" << this->has_children() << ",\"norm\":";
469 double norm = this->has_coeff() ? this->coeff().normf() : 0.0;
470 if (norm < 1e-12)
471 norm = 0.0;
472 double nt = this->get_norm_tree();
473 if (nt == 1e300)
474 nt = 0.0;
475 s << norm << ",\"norm_tree\":" << nt << ",\"snorm\":"
476 << this->get_snorm() << ",\"dnorm\":" << this->get_dnorm()
477 << ",\"rank\":" << this->coeff().rank();
478 if (this->coeff().is_assigned())
479 s << ",\"dim\":" << this->coeff().dim(0);
480 }
481
482 };
483
484 template <typename T, std::size_t NDIM>
485 std::ostream& operator<<(std::ostream& s, const FunctionNode<T,NDIM>& node) {
486 s << "(has_coeff=" << node.has_coeff() << ", has_children=" << node.has_children() << ", norm=";
487 double norm = node.has_coeff() ? node.coeff().normf() : 0.0;
488 if (norm < 1e-12)
489 norm = 0.0;
490 double nt = node.get_norm_tree();
491 if (nt == 1e300) nt = 0.0;
492 s << norm << ", norm_tree, s/dnorm =" << nt << ", " << node.get_snorm() << " " << node.get_dnorm() << "), rank="<< node.coeff().rank()<<")";
493 if (node.coeff().is_assigned()) s << " dim " << node.coeff().dim(0) << " ";
494 return s;
495 }
496
497
498 /// returns true if the result of a hartree_product is a leaf node (compute norm & error)
499 template<typename T, size_t NDIM>
501
504 long k;
505 bool do_error_leaf_op() const {return false;}
506
507 hartree_leaf_op() = default;
508 hartree_leaf_op(const implT* f, const long& k) : f(f), k(k) {}
509
510 /// no pre-determination
511 bool operator()(const Key<NDIM>& key) const {return false;}
512
513 /// no post-determination
514 bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
515 MADNESS_EXCEPTION("no post-determination in hartree_leaf_op",1);
516 return true;
517 }
518
519 /// post-determination: true if f is a leaf and the result is well-represented
520
521 /// @param[in] key the hi-dimensional key (breaks into keys for f and g)
522 /// @param[in] fcoeff coefficients of f of its appropriate key in NS form
523 /// @param[in] gcoeff coefficients of g of its appropriate key in NS form
524 bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const {
525
526 if (key.level()<2) return false;
527 Slice s = Slice(0,k-1);
528 std::vector<Slice> s0(NDIM/2,s);
529
530 const double tol=f->get_thresh();
531 const double thresh=f->truncate_tol(tol, key)*0.3; // custom factor to "ensure" accuracy
532 // include the wavelets in the norm, makes it much more accurate
533 const double fnorm=fcoeff.normf();
534 const double gnorm=gcoeff.normf();
535
536 // if the final norm is small, perform the hartree product and return
537 const double norm=fnorm*gnorm; // computing the outer product
538 if (norm < thresh) return true;
539
540 // norm of the scaling function coefficients
541 const double sfnorm=fcoeff(s0).normf();
542 const double sgnorm=gcoeff(s0).normf();
543
544 // get the error of both functions and of the pair function;
545 // need the abs for numerics: sfnorm might be equal fnorm.
546 const double ferror=sqrt(std::abs(fnorm*fnorm-sfnorm*sfnorm));
547 const double gerror=sqrt(std::abs(gnorm*gnorm-sgnorm*sgnorm));
548
549 // if the expected error is small, perform the hartree product and return
550 const double error=fnorm*gerror + ferror*gnorm + ferror*gerror;
551 // const double error=sqrt(fnorm*fnorm*gnorm*gnorm - sfnorm*sfnorm*sgnorm*sgnorm);
552
553 if (error < thresh) return true;
554 return false;
555 }
556 template <typename Archive> void serialize (Archive& ar) {
557 ar & f & k;
558 }
559 };
560
561 /// returns true if the result of the convolution operator op with some provided
562 /// coefficients will be small
563 template<typename T, size_t NDIM, typename opT>
564 struct op_leaf_op {
566
567 const opT* op; ///< the convolution operator
568 const implT* f; ///< the source or result function, needed for truncate_tol
569 bool do_error_leaf_op() const {return true;}
570
571 op_leaf_op() = default;
572 op_leaf_op(const opT* op, const implT* f) : op(op), f(f) {}
573
574 /// pre-determination: we can't know if this will be a leaf node before we got the final coeffs
575 bool operator()(const Key<NDIM>& key) const {return true;}
576
577 /// post-determination: return true if operator and coefficient norms are small
578 bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
579 if (key.level()<2) return false;
580 const double cnorm=coeff.normf();
581 return this->operator()(key,cnorm);
582 }
583
584 /// post-determination: return true if operator and coefficient norms are small
585 bool operator()(const Key<NDIM>& key, const double& cnorm) const {
586 if (key.level()<2) return false;
587
588 typedef Key<opT::opdim> opkeyT;
589 const opkeyT source=op->get_source_key(key);
590
591 const double thresh=f->truncate_tol(f->get_thresh(),key);
592 const std::vector<opkeyT>& disp = op->get_disp(key.level());
593 const opkeyT& d = *disp.begin(); // use the zero-displacement for screening
594 const double opnorm = op->norm(key.level(), d, source);
595 const double norm=opnorm*cnorm;
596 return norm<thresh;
597
598 }
599
600 template <typename Archive> void serialize (Archive& ar) {
601 ar & op & f;
602 }
603
604 };
605
606
607 /// returns true if the result of a hartree_product is a leaf node
608 /// criteria are error, norm and its effect on a convolution operator
609 template<typename T, size_t NDIM, size_t LDIM, typename opT>
611
614
616 const implL* g; // for use of its cdata only
617 const opT* op;
618 bool do_error_leaf_op() const {return false;}
619
621 hartree_convolute_leaf_op(const implT* f, const implL* g, const opT* op)
622 : f(f), g(g), op(op) {}
623
624 /// no pre-determination
625 bool operator()(const Key<NDIM>& key) const {return true;}
626
627 /// no post-determination
628 bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
629 MADNESS_EXCEPTION("no post-determination in hartree_convolute_leaf_op",1);
630 return true;
631 }
632
633 /// post-determination: true if f is a leaf and the result is well-represented
634
635 /// @param[in] key the hi-dimensional key (breaks into keys for f and g)
636 /// @param[in] fcoeff coefficients of f of its appropriate key in NS form
637 /// @param[in] gcoeff coefficients of g of its appropriate key in NS form
638 bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const {
639 // bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
640
641 if (key.level()<2) return false;
642
643 const double tol=f->get_thresh();
644 const double thresh=f->truncate_tol(tol, key);
645 // include the wavelets in the norm, makes it much more accurate
646 const double fnorm=fcoeff.normf();
647 const double gnorm=gcoeff.normf();
648
649 // norm of the scaling function coefficients
650 const double sfnorm=fcoeff(g->get_cdata().s0).normf();
651 const double sgnorm=gcoeff(g->get_cdata().s0).normf();
652
653 // if the final norm is small, perform the hartree product and return
654 const double norm=fnorm*gnorm; // computing the outer product
655 if (norm < thresh) return true;
656
657 // get the error of both functions and of the pair function
658 const double ferror=sqrt(fnorm*fnorm-sfnorm*sfnorm);
659 const double gerror=sqrt(gnorm*gnorm-sgnorm*sgnorm);
660
661 // if the expected error is small, perform the hartree product and return
662 const double error=fnorm*gerror + ferror*gnorm + ferror*gerror;
663 if (error < thresh) return true;
664
665 // now check if the norm of this and the norm of the operator are significant
666 const std::vector<Key<NDIM> >& disp = op->get_disp(key.level());
667 const Key<NDIM>& d = *disp.begin(); // use the zero-displacement for screening
668 const double opnorm = op->norm(key.level(), d, key);
669 const double final_norm=opnorm*sfnorm*sgnorm;
670 if (final_norm < thresh) return true;
671
672 return false;
673 }
674 template <typename Archive> void serialize (Archive& ar) {
675 ar & f & op;
676 }
677 };
678
679 template<typename T, size_t NDIM>
680 struct noop {
681 void operator()(const Key<NDIM>& key, const GenTensor<T>& coeff, const bool& is_leaf) const {}
682 bool operator()(const Key<NDIM>& key, const GenTensor<T>& fcoeff, const GenTensor<T>& gcoeff) const {
683 MADNESS_EXCEPTION("in noop::operator()",1);
684 return true;
685 }
686 template <typename Archive> void serialize (Archive& ar) {}
687
688 };
689
690 /// insert/replaces the coefficients into the function
691 template<typename T, std::size_t NDIM>
692 struct insert_op {
697
701 insert_op(const insert_op& other) : impl(other.impl) {}
702 void operator()(const keyT& key, const coeffT& coeff, const bool& is_leaf) const {
704 impl->get_coeffs().replace(key,nodeT(coeff,not is_leaf));
705 }
706 template <typename Archive> void serialize (Archive& ar) {
707 ar & impl;
708 }
709
710 };
711
712 /// inserts/accumulates coefficients into impl's tree
713
714 /// NOTE: will use buffer and will need consolidation after operation ended !! NOTE !!
715 template<typename T, std::size_t NDIM>
719
721 accumulate_op() = default;
723 accumulate_op(const accumulate_op& other) = default;
724 void operator()(const Key<NDIM>& key, const coeffT& coeff, const bool& is_leaf) const {
725 if (coeff.has_data())
726 impl->get_coeffs().task(key, &nodeT::accumulate, coeff, impl->get_coeffs(), key, impl->get_tensor_args());
727 }
728 template <typename Archive> void serialize (Archive& ar) {
729 ar & impl;
730 }
731
732 };
733
734
735template<size_t NDIM>
736 struct true_op {
737
738 template<typename T>
739 bool operator()(const Key<NDIM>& key, const T& t) const {return true;}
740
741 template<typename T, typename R>
742 bool operator()(const Key<NDIM>& key, const T& t, const R& r) const {return true;}
743 template <typename Archive> void serialize (Archive& ar) {}
744
745 };
746
747 /// shallow-copy, pared-down version of FunctionNode, for special purpose only
748 template<typename T, std::size_t NDIM>
749 struct ShallowNode {
753 double dnorm=-1.0;
756 : _coeffs(node.coeff()), _has_children(node.has_children()),
757 dnorm(node.get_dnorm()) {}
759 : _coeffs(node.coeff()), _has_children(node._has_children),
760 dnorm(node.dnorm) {}
761
762 const coeffT& coeff() const {return _coeffs;}
763 coeffT& coeff() {return _coeffs;}
764 bool has_children() const {return _has_children;}
765 bool is_leaf() const {return not _has_children;}
766 template <typename Archive>
767 void serialize(Archive& ar) {
768 ar & coeff() & _has_children & dnorm;
769 }
770 };
771
772
773 /// a class to track where relevant (parent) coeffs are
774
775 /// E.g. if a 6D function is composed of two 3D functions their coefficients must be tracked.
776 /// We might need coeffs from a box that does not exist, and to avoid searching for
777 /// parents we track which are their required respective boxes.
778 /// - CoeffTracker will refer either to a requested key, if it exists, or to its
779 /// outermost parent.
780 /// - Children must be made in sequential order to be able to track correctly.
781 ///
782 /// Usage: 1. make the child of a given CoeffTracker.
783 /// If the parent CoeffTracker refers to a leaf node (flag is_leaf)
784 /// the child will refer to the same node. Otherwise it will refer
785 /// to the child node.
786 /// 2. retrieve its coefficients (possible communication/ returns a Future).
787 /// Member variable key always refers to an existing node,
788 /// so we can fetch it. Once we have the node we can determine
789 /// if it has children which allows us to make a child (see 1. )
790 template<typename T, size_t NDIM>
792
796 typedef std::pair<Key<NDIM>,ShallowNode<T,NDIM> > datumT;
798
799 /// the funcimpl that has the coeffs
800 const implT* impl;
801 /// the current key, which must exists in impl
803 /// flag if key is a leaf node
805 /// the coefficients belonging to key
807 /// norm of d coefficients corresponding to key
808 double dnorm_=-1.0;
809
810 public:
811
812 /// default ctor
813 CoeffTracker() : impl(), key_(0), is_leaf_(unknown), coeff_() {} // Initialize key to avoid warnings of possible unititialied use
814
815 /// the initial ctor making the root key
817 if (impl) key_=impl->get_cdata().key0;
818 }
819
820 /// ctor with a pair<keyT,nodeT>
821 explicit CoeffTracker(const CoeffTracker& other, const datumT& datum)
822 : impl(other.impl), key_(other.key_), coeff_(datum.second.coeff()),
823 dnorm_(datum.second.dnorm) {
824 if (datum.second.is_leaf()) is_leaf_=yes;
825 else is_leaf_=no;
826 }
827
828 /// copy ctor
829 CoeffTracker(const CoeffTracker& other) : impl(other.impl), key_(other.key_),
830 is_leaf_(other.is_leaf_), coeff_(other.coeff_), dnorm_(other.dnorm_) {};
831
832 /// const reference to impl
833 const implT* get_impl() const {return impl;}
834
835 /// const reference to the coeffs
836 const coeffT& coeff() const {return coeff_;}
837
838 /// const reference to the key
839 const keyT& key() const {return key_;}
840
841 /// return the coefficients belonging to the passed-in key
842
843 /// if key equals tracked key just return the coeffs, otherwise
844 /// make the child coefficients.
845 /// @param[in] key return coeffs corresponding to this key
846 /// @return coefficients belonging to key
854
855 /// return the s and dnorm belonging to the passed-in key
856 double dnorm(const keyT& key) const {
857 if (key==key_) return dnorm_;
858 MADNESS_ASSERT(key.is_child_of(key_));
859 return 0.0;
860 }
861
862 /// const reference to is_leaf flag
863 const LeafStatus& is_leaf() const {return is_leaf_;}
864
865 /// make a child of this, ignoring the coeffs
866 CoeffTracker make_child(const keyT& child) const {
867
868 // fast return
869 if ((not impl) or impl->is_on_demand()) return CoeffTracker(*this);
870
871 // can't make a child without knowing if this is a leaf -- activate first
873
874 CoeffTracker result;
875 if (impl) {
876 result.impl=impl;
877 if (is_leaf_==yes) result.key_=key_;
878 if (is_leaf_==no) {
879 result.key_=child;
880 // check if child is direct descendent of this, but root node is special case
881 if (child.level()>0) MADNESS_ASSERT(result.key().level()==key().level()+1);
882 }
883 result.is_leaf_=unknown;
884 }
885 return result;
886 }
887
888 /// find the coefficients
889
890 /// this involves communication to a remote node
891 /// @return a Future<CoeffTracker> with the coefficients that key refers to
893
894 // fast return
895 if (not impl) return Future<CoeffTracker>(CoeffTracker());
897
898 // this will return a <keyT,nodeT> from a remote node
901
902 // construct a new CoeffTracker locally
903 return impl->world.taskq.add(*const_cast<CoeffTracker*> (this),
904 &CoeffTracker::forward_ctor,*this,datum1);
905 }
906
907 private:
908 /// taskq-compatible forwarding to the ctor
909 CoeffTracker forward_ctor(const CoeffTracker& other, const datumT& datum) const {
910 return CoeffTracker(other,datum);
911 }
912
913 public:
914 /// serialization
915 template <typename Archive> void serialize(const Archive& ar) {
916 int il=int(is_leaf_);
917 ar & impl & key_ & il & coeff_ & dnorm_;
919 }
920 };
921
922 template<typename T, std::size_t NDIM>
923 std::ostream&
924 operator<<(std::ostream& s, const CoeffTracker<T,NDIM>& ct) {
925 s << ct.key() << ct.is_leaf() << " " << ct.get_impl();
926 return s;
927 }
928
929 /// FunctionImpl holds all Function state to facilitate shallow copy semantics
930
931 /// Since Function assignment and copy constructors are shallow it
932 /// greatly simplifies maintaining consistent state to have all
933 /// (permanent) state encapsulated in a single class. The state
934 /// is shared between instances using a shared_ptr<FunctionImpl>.
935 ///
936 /// The FunctionImpl inherits all of the functionality of WorldContainer
937 /// (to store the coefficients) and WorldObject<WorldContainer> (used
938 /// for RMI and for its unqiue id).
939 ///
940 /// The class methods are public to avoid painful multiple friend template
941 /// declarations for Function and FunctionImpl ... but this trust should not be
942 /// abused ... NOTHING except FunctionImpl methods should mess with FunctionImplData.
943 /// The LB stuff might have to be an exception.
944 template <typename T, std::size_t NDIM>
945 class FunctionImpl : public WorldObject< FunctionImpl<T,NDIM> > {
946 private:
947 typedef WorldObject< FunctionImpl<T,NDIM> > woT; ///< Base class world object type
948 public:
949 typedef T typeT;
950 typedef FunctionImpl<T,NDIM> implT; ///< Type of this class (implementation)
951 typedef std::shared_ptr< FunctionImpl<T,NDIM> > pimplT; ///< pointer to this class
952 typedef Tensor<T> tensorT; ///< Type of tensor for anything but to hold coeffs
953 typedef Vector<Translation,NDIM> tranT; ///< Type of array holding translation
954 typedef Key<NDIM> keyT; ///< Type of key
955 typedef FunctionNode<T,NDIM> nodeT; ///< Type of node
956 typedef GenTensor<T> coeffT; ///< Type of tensor used to hold coeffs
957 typedef WorldContainer<keyT,nodeT> dcT; ///< Type of container holding the coefficients
958 typedef std::pair<const keyT,nodeT> datumT; ///< Type of entry in container
959 typedef Vector<double,NDIM> coordT; ///< Type of vector holding coordinates
960
961 //template <typename Q, int D> friend class Function;
962 template <typename Q, std::size_t D> friend class FunctionImpl;
963
965
966 /// getter
969 const std::vector<Vector<double,NDIM> >& get_special_points()const{return special_points;}
970
971 private:
972 int k; ///< Wavelet order
973 double thresh; ///< Screening threshold
974 int initial_level; ///< Initial level for refinement
975 int special_level; ///< Minimium level for refinement on special points
976 std::vector<Vector<double,NDIM> > special_points; ///< special points for further refinement (needed for composite functions or multiplication)
977 const Tensor<double> cell; ///< the size of the root cell in each dimension, unchangeable
978 int max_refine_level; ///< Do not refine below this level
979 int truncate_mode; ///< 0=default=(|d|<thresh), 1=(|d|<thresh/2^n), 2=(|d|<thresh/4^n);
980 bool autorefine; ///< If true, autorefine where appropriate
981 bool truncate_on_project; ///< If true projection inserts at level n-1 not n
982 TensorArgs targs; ///< type of tensor to be used in the FunctionNodes
983
985
986 std::shared_ptr< FunctionFunctorInterface<T,NDIM> > functor;
988
989 dcT coeffs; ///< The coefficients
990
991 // Disable the default copy constructor
993
994 public:
1003
1004 /// Initialize function impl from data in factory
1006 : WorldObject<implT>(factory._world)
1007 , world(factory._world)
1008 , k(factory._k)
1009 , thresh(factory._thresh)
1010 , initial_level(factory._initial_level)
1011 , special_level(factory._special_level)
1012 , special_points(factory._special_points)
1014 , max_refine_level(factory._max_refine_level)
1015 , truncate_mode(factory._truncate_mode)
1016 , autorefine(factory._autorefine)
1017 , truncate_on_project(factory._truncate_on_project)
1018 , targs(factory._thresh,FunctionDefaults<NDIM>::get_tensor_type())
1019 , cdata(FunctionCommonData<T,NDIM>::get(k))
1020 , functor(factory.get_functor())
1021 , tree_state(factory._tree_state)
1022 , coeffs(world,factory._pmap,false)
1023 //, bc(factory._bc)
1024 {
1025 // PROFILE_MEMBER_FUNC(FunctionImpl); // No need to profile this
1026 // !!! Ensure that all local state is correctly formed
1027 // before invoking process_pending for the coeffs and
1028 // for this. Otherwise, there is a race condition.
1029 MADNESS_ASSERT(k>0 && k<=MAXK);
1030
1031 bool empty = (factory._empty or is_on_demand());
1032 bool do_refine = factory._refine;
1033
1034 if (do_refine)
1035 initial_level = std::max(0,initial_level - 1);
1036
1037 if (empty) { // Do not set any coefficients at all
1038 // additional functors are only evaluated on-demand
1039 } else if (functor) { // Project function and optionally refine
1041 // set the union of the special points of functor and the ones explicitly given to FunctionFactory
1042 std::vector<coordT> functor_special_points=functor->special_points();
1043 if (!functor_special_points.empty()) special_points.insert(special_points.end(), functor_special_points.begin(), functor_special_points.end());
1044 // near special points refine as deeply as requested by the factory AND the functor
1045 special_level = std::max(special_level, functor->special_level());
1046
1047 typename dcT::const_iterator end = coeffs.end();
1048 for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
1049 if (it->second.is_leaf())
1050 woT::task(coeffs.owner(it->first), &implT::project_refine_op, it->first, do_refine,
1052 }
1053 }
1054 else { // Set as if a zero function
1055 initial_level = 1;
1057 }
1058
1060 this->process_pending();
1061 if (factory._fence && (functor || !empty)) world.gop.fence();
1062 }
1063
1064 /// Copy constructor
1065
1066 /// Allocates a \em new function in preparation for a deep copy
1067 ///
1068 /// By default takes pmap from other but can also specify a different pmap.
1069 /// Does \em not copy the coefficients ... creates an empty container.
1070 template <typename Q>
1072 const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& pmap,
1073 bool dozero) : FunctionImpl(other.world, other, pmap, dozero) {
1074 }
1075
1076 /// Copy constructor
1077
1078 /// Allocates a \em new function in preparation for a deep copy
1079 ///
1080 /// By default takes pmap from other but can also specify a different pmap.
1081 /// Does \em not copy the coefficients ... creates an empty container.
1082 ///
1083 /// uses a different world for the new function
1084 template <typename Q>
1086 const FunctionImpl<Q,NDIM>& other,
1087 const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& pmap,
1088 bool dozero)
1090 , world(world)
1091 , k(other.k)
1092 , thresh(other.thresh)
1096 , cell(other.cell)
1099 , autorefine(other.autorefine)
1101 , targs(other.targs)
1102 , cdata(FunctionCommonData<T,NDIM>::get(k))
1103 , functor()
1104 , tree_state(other.tree_state)
1105 , coeffs(world, pmap ? pmap : other.coeffs.get_pmap())
1106 {
1107 if (dozero) {
1108 initial_level = 1;
1110 //world.gop.fence(); <<<<<<<<<<<<<<<<<<<<<< needs a fence argument
1111 }
1113 this->process_pending();
1114 }
1115
1116 virtual ~FunctionImpl() { }
1117
1118 const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& get_pmap() const;
1119
1120 void replicate(bool fence=true) {
1121 coeffs.replicate(fence);
1122 }
1123
1124 void replicate_on_hosts(bool fence=true) {
1126 }
1127
1128 // remove all coeffs that are not local according to pmap
1129 void undo_replicate(bool fence=true) {
1130 std::list<keyT> keys;
1131 for (const auto& [key, node] : coeffs) if (not coeffs.is_local(key)) keys.push_back(key);
1132 for (const auto& key : keys) coeffs.erase(key);
1133 if (fence) world.gop.fence();
1134 }
1135
1136 void distribute(std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > > newmap) const {
1137 auto currentmap=coeffs.get_pmap();
1138 currentmap->redistribute(world,newmap);
1139 }
1140
1141 /// Copy coeffs from other into self
1142
1143 /// this and other might live in different worlds
1144 template <typename Q>
1145 void copy_coeffs(const FunctionImpl<Q,NDIM>& other, bool fence) {
1146 if (world.id()==other.world.id())
1147 copy_coeffs_same_world(other,false);
1148 else
1150 if (fence) world.gop.fence();
1151 }
1152
1153 /// Copy coefficients from other funcimpl with possibly different world and on a different node
1154 template<typename Q>
1156
1157 // copy coeffs from (a subset of) other's world
1158
1159 // if other's data is distributed, we need to fetch from all ranks
1160 if (other.get_coeffs().is_distributed()) {
1161 for (ProcessID pid=0; pid<other.world.size(); ++pid) {
1162 copy_remote_coeffs_from_pid<Q>(pid, other);
1163 }
1164
1165 // if other's data is replicated, all coeffs are on the rank that owns key0
1166 } else if (other.get_coeffs().is_replicated() or other.get_coeffs().is_host_replicated()) {
1167 auto key0=other.cdata.key0;
1168 copy_remote_coeffs_from_pid<Q>(other.get_pmap()->owner(key0), other);
1169 }
1170 }
1171
1172 /// Copy coefficients from other funcimpl with possibly different world and on a different node
1173 /// to this
1174 template <typename Q>
1176 typedef FunctionImpl<Q,NDIM> implQ; ///< Type of this class (implementation)
1177 // std::vector<unsigned char> v=other.task(pid, &implQ::serialize_remote_coeffs).get();
1178 auto v=other.task(pid, &implQ::serialize_remote_coeffs);
1180 }
1181
1182 /// invoked by copy_remote_coeffs_from_pid to serialize *local* coeffs
1183 std::vector<unsigned char> serialize_remote_coeffs() {
1184 std::vector<unsigned char> v;
1186 ar & get_coeffs();
1187 return v;
1188 }
1189
1190 /// insert coeffs from vector archive into this
1191 void insert_serialized_coeffs(std::vector<unsigned char>& v) {
1193 ar & get_coeffs();
1194 }
1195
1196 /// Copy coeffs from other into self
1197 template <typename Q>
1198 void copy_coeffs_same_world(const FunctionImpl<Q,NDIM>& other, bool fence) {
1199 for (const auto& [key, node] : other.coeffs) { // iterate over all entries in other
1200 coeffs.replace(key,node. template convert<T>());
1201 }
1202 if (fence)
1203 world.gop.fence();
1204 }
1205
1206 /// perform inplace gaxpy: this = alpha*this + beta*other
1207 /// @param[in] alpha prefactor for this
1208 /// @param[in] beta prefactor for other
1209 /// @param[in] g the other function, reconstructed
1210 /// @return *this = alpha*this + beta*other, in either reconstructed or redundant_after_merge state
1211 template<typename Q, typename R>
1212 void gaxpy_inplace_reconstructed(const T& alpha, const FunctionImpl<Q,NDIM>& g, const R& beta, const bool fence) {
1213 // merge g's tree into this' tree
1214 gaxpy_inplace(alpha,g,beta,fence);
1216 // this->merge_trees(beta,g,alpha,fence);
1217 // tree is now redundant_after_merge
1218 // sum down the sum coeffs into the leafs if possible to keep the state most clean
1219 if (fence) sum_down(fence);
1220 }
1221
1222 /// merge the trees of this and other, while multiplying them with the alpha or beta, resp
1223
1224 /// first step in an inplace gaxpy operation for reconstructed functions; assuming the same
1225 /// distribution for this and other
1226
1227 /// on output, *this = alpha* *this + beta * other
1228 /// @param[in] alpha prefactor for this
1229 /// @param[in] beta prefactor for other
1230 /// @param[in] other the other function, reconstructed
1231 template<typename Q, typename R>
1232 void merge_trees(const T alpha, const FunctionImpl<Q,NDIM>& other, const R beta, const bool fence=true) {
1233 MADNESS_ASSERT(get_pmap() == other.get_pmap());
1236 }
1237
1238 /// merge the trees of this and other, while multiplying them with the alpha or beta, resp
1239
1240 /// result and rhs do not have to have the same distribution or live in the same world
1241 /// result+=alpha* this
1242 /// @param[in] alpha prefactor for this
1243 template<typename Q, typename R>
1244 void accumulate_trees(FunctionImpl<Q,NDIM>& result, const R alpha, const bool fence=true) const {
1246 }
1247
1248 /// perform: this= alpha*f + beta*g, invoked by result
1249
1250 /// f and g are reconstructed, so we can save on the compress operation,
1251 /// walk down the joint tree, and add leaf coefficients; effectively refines
1252 /// to common finest level.
1253
1254 /// nothing returned, but leaves this's tree reconstructed and as sum of f and g
1255 /// @param[in] alpha prefactor for f
1256 /// @param[in] f first addend
1257 /// @param[in] beta prefactor for g
1258 /// @param[in] g second addend
1259 void gaxpy_oop_reconstructed(const double alpha, const implT& f,
1260 const double beta, const implT& g, const bool fence);
1261
1262 /// functor for the gaxpy_inplace method
1263 template <typename Q, typename R>
1266 FunctionImpl<T,NDIM>* f; ///< prefactor for current function impl
1267 T alpha; ///< the current function impl
1268 R beta; ///< prefactor for other function impl
1269 do_gaxpy_inplace() = default;
1271 bool operator()(typename rangeT::iterator& it) const {
1272 const keyT& key = it->first;
1273 const FunctionNode<Q,NDIM>& other_node = it->second;
1274 // Use send to get write accessor and automated construction if missing
1275 f->coeffs.send(key, &nodeT:: template gaxpy_inplace<Q,R>, alpha, other_node, beta);
1276 return true;
1277 }
1278 template <typename Archive>
1279 void serialize(Archive& ar) {
1280 ar & f & alpha & beta;
1281 }
1282 };
1283
1284 /// Inplace general bilinear operation
1285
1286 /// this's world can differ from other's world
1287 /// this = alpha * this + beta * other
1288 /// @param[in] alpha prefactor for the current function impl
1289 /// @param[in] other the other function impl
1290 /// @param[in] beta prefactor for other
1291 template <typename Q, typename R>
1292 void gaxpy_inplace(const T& alpha,const FunctionImpl<Q,NDIM>& other, const R& beta, bool fence) {
1293// MADNESS_ASSERT(get_pmap() == other.get_pmap());
1294 if (alpha != T(1.0)) scale_inplace(alpha,false);
1296 typedef do_gaxpy_inplace<Q,R> opT;
1297 other.world.taskq. template for_each<rangeT,opT>(rangeT(other.coeffs.begin(), other.coeffs.end()), opT(this, T(1.0), beta));
1298 if (fence)
1299 other.world.gop.fence();
1300 }
1301
1302 // loads a function impl from persistence
1303 // @param[in] ar the archive where the function impl is stored
1304 template <typename Archive>
1305 void load(Archive& ar) {
1306 // WE RELY ON K BEING STORED FIRST
1307 int kk = 0;
1308 ar & kk;
1309
1310 MADNESS_ASSERT(kk==k);
1311
1312 // note that functor should not be (re)stored
1314 & autorefine & truncate_on_project & tree_state;//nonstandard & compressed ; //& bc;
1315
1316 ar & coeffs;
1317 world.gop.fence();
1318 }
1319
1320 // saves a function impl to persistence
1321 // @param[in] ar the archive where the function impl is to be stored
1322 template <typename Archive>
1323 void store(Archive& ar) {
1324 // WE RELY ON K BEING STORED FIRST
1325
1326 // note that functor should not be (re)stored
1328 & autorefine & truncate_on_project & tree_state;//nonstandard & compressed ; //& bc;
1329
1330 ar & coeffs;
1331 world.gop.fence();
1332 }
1333
1334 /// Returns true if the function is compressed.
1335 bool is_compressed() const;
1336
1337 /// Returns true if the function is compressed.
1338 bool is_reconstructed() const;
1339
1340 /// Returns true if the function is redundant.
1341 bool is_redundant() const;
1342
1343 /// Returns true if the function is redundant_after_merge.
1344 bool is_redundant_after_merge() const;
1345
1346 bool is_nonstandard() const;
1347
1348 bool is_nonstandard_with_leaves() const;
1349
1350 bool is_on_demand() const;
1351
1352 bool has_leaves() const;
1353
1354 void set_tree_state(const TreeState& state) {
1355 tree_state=state;
1356 }
1357
1359
1360 void set_functor(const std::shared_ptr<FunctionFunctorInterface<T,NDIM> > functor1);
1361
1362 std::shared_ptr<FunctionFunctorInterface<T,NDIM> > get_functor();
1363
1364 std::shared_ptr<FunctionFunctorInterface<T,NDIM> > get_functor() const;
1365
1366 void unset_functor();
1367
1368
1370
1372 void set_tensor_args(const TensorArgs& t);
1373
1374 double get_thresh() const;
1375
1376 /// return the simulation cell
1377 const Tensor<double>& get_cell() const { return cell; }
1378
1379 void set_thresh(double value);
1380
1381 bool get_autorefine() const;
1382
1383 void set_autorefine(bool value);
1384
1385 int get_k() const;
1386
1387 const dcT& get_coeffs() const;
1388
1389 dcT& get_coeffs();
1390
1392
1393 void accumulate_timer(const double time) const; // !!!!!!!!!!!! REDUNDANT !!!!!!!!!!!!!!!
1394
1395 void print_timer() const;
1396
1397 void reset_timer();
1398
1399 /// Adds a constant to the function. Local operation, optional fence
1400
1401 /// In scaling function basis must add value to first polyn in
1402 /// each box with appropriate scaling for level. In wavelet basis
1403 /// need only add at level zero.
1404 /// @param[in] t the scalar to be added
1405 void add_scalar_inplace(T t, bool fence);
1406
1407 /// Initialize nodes to zero function at initial_level of refinement.
1408
1409 /// Works for either basis. No communication.
1410 void insert_zero_down_to_initial_level(const keyT& key);
1411
1412 /// Truncate according to the threshold with optional global fence
1413
1414 /// If thresh<=0 the default value of this->thresh is used
1415 /// @param[in] tol the truncation tolerance
1416 void truncate(double tol, bool fence);
1417
1418 /// Returns true if after truncation this node has coefficients
1419
1420 /// Assumed to be invoked on process owning key. Possible non-blocking
1421 /// communication.
1422 /// @param[in] key the key of the current function node
1423 Future<bool> truncate_spawn(const keyT& key, double tol);
1424
1425 /// Actually do the truncate operation
1426 /// @param[in] key the key to the current function node being evaluated for truncation
1427 /// @param[in] tol the tolerance for thresholding
1428 /// @param[in] v vector of Future<bool>'s that specify whether the current nodes children have coeffs
1429 bool truncate_op(const keyT& key, double tol, const std::vector< Future<bool> >& v);
1430
1431 /// Evaluate function at quadrature points in the specified box
1432
1433 /// @param[in] key the key indicating where the quadrature points are located
1434 /// @param[in] f the interface to the elementary function
1435 /// @param[in] qx quadrature points on a level=0 box
1436 /// @param[out] fval values
1437 void fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const;
1438
1439 /// Evaluate function at quadrature points in the specified box
1440
1441 /// @param[in] key the key indicating where the quadrature points are located
1442 /// @param[in] f the interface to the elementary function
1443 /// @param[in] qx quadrature points on a level=0 box
1444 /// @param[out] fval values
1445 void fcube(const keyT& key, T (*f)(const coordT&), const Tensor<double>& qx, tensorT& fval) const;
1446
1447 /// Returns cdata.key0
1448 const keyT& key0() const;
1449
1450 /// Prints the coeffs tree of the current function impl
1451 /// @param[in] maxlevel the maximum level of the tree for printing
1452 /// @param[out] os the ostream to where the output is sent
1453 void print_tree(std::ostream& os = std::cout, Level maxlevel = 10000) const;
1454
1455 /// Functor for the do_print_tree method
1456 void do_print_tree(const keyT& key, std::ostream& os, Level maxlevel) const;
1457
1458 /// Prints the coeffs tree of the current function impl (using GraphViz)
1459 /// @param[in] maxlevel the maximum level of the tree for printing
1460 /// @param[out] os the ostream to where the output is sent
1461 void print_tree_graphviz(std::ostream& os = std::cout, Level maxlevel = 10000) const;
1462
1463 /// Functor for the do_print_tree method (using GraphViz)
1464 void do_print_tree_graphviz(const keyT& key, std::ostream& os, Level maxlevel) const;
1465
1466 /// Same as print_tree() but in JSON format
1467 /// @param[out] os the ostream to where the output is sent
1468 /// @param[in] maxlevel the maximum level of the tree for printing
1469 void print_tree_json(std::ostream& os = std::cout, Level maxlevel = 10000) const;
1470
1471 /// Functor for the do_print_tree_json method
1472 void do_print_tree_json(const keyT& key, std::multimap<Level, std::tuple<tranT, std::string>>& data, Level maxlevel) const;
1473
1474 /// convert a number [0,limit] to a hue color code [blue,red],
1475 /// or, if log is set, a number [1.e-10,limit]
1477 double limit;
1478 bool log;
1479 static double lower() {return 1.e-10;};
1481 do_convert_to_color(const double limit, const bool log) : limit(limit), log(log) {}
1482 double operator()(double val) const {
1483 double color=0.0;
1484
1485 if (log) {
1486 double val2=log10(val) - log10(lower()); // will yield >0.0
1487 double upper=log10(limit) -log10(lower());
1488 val2=0.7-(0.7/upper)*val2;
1489 color= std::max(0.0,val2);
1490 color= std::min(0.7,color);
1491 } else {
1492 double hue=0.7-(0.7/limit)*(val);
1493 color= std::max(0.0,hue);
1494 }
1495 return color;
1496 }
1497 };
1498
1499
1500 /// Print a plane ("xy", "xz", or "yz") containing the point x to file
1501
1502 /// works for all dimensions; we walk through the tree, and if a leaf node
1503 /// inside the sub-cell touches the plane we print it in pstricks format
1504 void print_plane(const std::string filename, const int xaxis, const int yaxis, const coordT& el2);
1505
1506 /// collect the data for a plot of the MRA structure locally on each node
1507
1508 /// @param[in] xaxis the x-axis in the plot (can be any axis of the MRA box)
1509 /// @param[in] yaxis the y-axis in the plot (can be any axis of the MRA box)
1510 /// @param[in] el2 needs a description
1511 /// \todo Provide a description for el2
1512 Tensor<double> print_plane_local(const int xaxis, const int yaxis, const coordT& el2);
1513
1514 /// Functor for the print_plane method
1515 /// @param[in] filename the filename for the output
1516 /// @param[in] plotinfo plotting parameters
1517 /// @param[in] xaxis the x-axis in the plot (can be any axis of the MRA box)
1518 /// @param[in] yaxis the y-axis in the plot (can be any axis of the MRA box)
1519 void do_print_plane(const std::string filename, std::vector<Tensor<double> > plotinfo,
1520 const int xaxis, const int yaxis, const coordT el2);
1521
1522 /// print the grid (the roots of the quadrature of each leaf box)
1523 /// of this function in user xyz coordinates
1524 /// @param[in] filename the filename for the output
1525 void print_grid(const std::string filename) const;
1526
1527 /// return the keys of the local leaf boxes
1528 std::vector<keyT> local_leaf_keys() const;
1529
1530 /// print the grid in xyz format
1531
1532 /// the quadrature points and the key information will be written to file,
1533 /// @param[in] filename where the quadrature points will be written to
1534 /// @param[in] keys all leaf keys
1535 void do_print_grid(const std::string filename, const std::vector<keyT>& keys) const;
1536
1537 /// read data from a grid
1538
1539 /// @param[in] keyfile file with keys and grid points for each key
1540 /// @param[in] gridfile file with grid points, w/o key, but with same ordering
1541 /// @param[in] vnuc_functor subtract the values of this functor if regularization is needed
1542 template<size_t FDIM>
1543 typename std::enable_if<NDIM==FDIM>::type
1544 read_grid(const std::string keyfile, const std::string gridfile,
1545 std::shared_ptr< FunctionFunctorInterface<double,NDIM> > vnuc_functor) {
1546
1547 std::ifstream kfile(keyfile.c_str());
1548 std::ifstream gfile(gridfile.c_str());
1549 std::string line;
1550
1551 long ndata,ndata1;
1552 if (not (std::getline(kfile,line))) MADNESS_EXCEPTION("failed reading 1st line of key data",0);
1553 if (not (std::istringstream(line) >> ndata)) MADNESS_EXCEPTION("failed reading k",0);
1554 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 1st line of grid data",0);
1555 if (not (std::istringstream(line) >> ndata1)) MADNESS_EXCEPTION("failed reading k",0);
1556 MADNESS_CHECK(ndata==ndata1);
1557 if (not (std::getline(kfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of key data",0);
1558 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of grid data",0);
1559
1560 // the quadrature points in simulation coordinates of the root node
1561 const Tensor<double> qx=cdata.quad_x;
1562 const size_t npt = qx.dim(0);
1563
1564 // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM})
1565 long npoints=power<NDIM>(npt);
1566 // the number of boxes
1567 long nboxes=ndata/npoints;
1568 MADNESS_ASSERT(nboxes*npoints==ndata);
1569 print("reading ",nboxes,"boxes from file",gridfile,keyfile);
1570
1571 // these will be the data
1572 Tensor<T> values(cdata.vk,false);
1573
1574 int ii=0;
1575 std::string gline,kline;
1576 // while (1) {
1577 while (std::getline(kfile,kline)) {
1578
1579 double x,y,z,x1,y1,z1,val;
1580
1581 // get the key
1582 long nn;
1583 Translation l1,l2,l3;
1584 // line looks like: # key: n l1 l2 l3
1585 kline.erase(0,7);
1586 std::stringstream(kline) >> nn >> l1 >> l2 >> l3;
1587 // kfile >> s >> nn >> l1 >> l2 >> l3;
1588 const Vector<Translation,3> ll{ l1,l2,l3 };
1589 Key<3> key(nn,ll);
1590
1591 // this is borrowed from fcube
1592 const Vector<Translation,3>& l = key.translation();
1593 const Level n = key.level();
1594 const double h = std::pow(0.5,double(n));
1595 coordT c; // will hold the point in user coordinates
1598
1599
1600 if (NDIM == 3) {
1601 for (size_t i=0; i<npt; ++i) {
1602 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
1603 for (size_t j=0; j<npt; ++j) {
1604 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
1605 for (size_t k=0; k<npt; ++k) {
1606 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
1607 // fprintf(pFile,"%18.12f %18.12f %18.12f\n",c[0],c[1],c[2]);
1608 auto& success1 = std::getline(gfile,gline); MADNESS_CHECK(success1);
1609 auto& success2 = std::getline(kfile,kline); MADNESS_CHECK(success2);
1610 std::istringstream(gline) >> x >> y >> z >> val;
1611 std::istringstream(kline) >> x1 >> y1 >> z1;
1612 MADNESS_CHECK(std::fabs(x-c[0])<1.e-4);
1613 MADNESS_CHECK(std::fabs(x1-c[0])<1.e-4);
1614 MADNESS_CHECK(std::fabs(y-c[1])<1.e-4);
1615 MADNESS_CHECK(std::fabs(y1-c[1])<1.e-4);
1616 MADNESS_CHECK(std::fabs(z-c[2])<1.e-4);
1617 MADNESS_CHECK(std::fabs(z1-c[2])<1.e-4);
1618
1619 // regularize if a functor is given
1620 if (vnuc_functor) val-=(*vnuc_functor)(c);
1621 values(i,j,k)=val;
1622 }
1623 }
1624 }
1625 } else {
1626 MADNESS_EXCEPTION("only NDIM=3 in print_grid",0);
1627 }
1628
1629 // insert the new leaf node
1630 const bool has_children=false;
1631 coeffT coeff=coeffT(this->values2coeffs(key,values),targs);
1632 nodeT node(coeff,has_children);
1633 coeffs.replace(key,node);
1635 ii++;
1636 }
1637
1638 kfile.close();
1639 gfile.close();
1640 MADNESS_CHECK(ii==nboxes);
1641
1642 }
1643
1644
1645 /// read data from a grid
1646
1647 /// @param[in] gridfile file with keys and grid points and values for each key
1648 /// @param[in] vnuc_functor subtract the values of this functor if regularization is needed
1649 template<size_t FDIM>
1650 typename std::enable_if<NDIM==FDIM>::type
1651 read_grid2(const std::string gridfile,
1652 std::shared_ptr< FunctionFunctorInterface<double,NDIM> > vnuc_functor) {
1653
1654 std::ifstream gfile(gridfile.c_str());
1655 std::string line;
1656
1657 long ndata;
1658 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 1st line of grid data",0);
1659 if (not (std::istringstream(line) >> ndata)) MADNESS_EXCEPTION("failed reading k",0);
1660 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of grid data",0);
1661
1662 // the quadrature points in simulation coordinates of the root node
1663 const Tensor<double> qx=cdata.quad_x;
1664 const size_t npt = qx.dim(0);
1665
1666 // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM})
1667 long npoints=power<NDIM>(npt);
1668 // the number of boxes
1669 long nboxes=ndata/npoints;
1670 MADNESS_CHECK(nboxes*npoints==ndata);
1671 print("reading ",nboxes,"boxes from file",gridfile);
1672
1673 // these will be the data
1674 Tensor<T> values(cdata.vk,false);
1675
1676 int ii=0;
1677 std::string gline;
1678 // while (1) {
1679 while (std::getline(gfile,gline)) {
1680
1681 double x1,y1,z1,val;
1682
1683 // get the key
1684 long nn;
1685 Translation l1,l2,l3;
1686 // line looks like: # key: n l1 l2 l3
1687 gline.erase(0,7);
1688 std::stringstream(gline) >> nn >> l1 >> l2 >> l3;
1689 const Vector<Translation,3> ll{ l1,l2,l3 };
1690 Key<3> key(nn,ll);
1691
1692 // this is borrowed from fcube
1693 const Vector<Translation,3>& l = key.translation();
1694 const Level n = key.level();
1695 const double h = std::pow(0.5,double(n));
1696 coordT c; // will hold the point in user coordinates
1699
1700
1701 if (NDIM == 3) {
1702 for (int i=0; i<npt; ++i) {
1703 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
1704 for (int j=0; j<npt; ++j) {
1705 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
1706 for (int k=0; k<npt; ++k) {
1707 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
1708
1709 auto& success = std::getline(gfile,gline);
1710 MADNESS_CHECK(success);
1711 std::istringstream(gline) >> x1 >> y1 >> z1 >> val;
1712 MADNESS_CHECK(std::fabs(x1-c[0])<1.e-4);
1713 MADNESS_CHECK(std::fabs(y1-c[1])<1.e-4);
1714 MADNESS_CHECK(std::fabs(z1-c[2])<1.e-4);
1715
1716 // regularize if a functor is given
1717 if (vnuc_functor) val-=(*vnuc_functor)(c);
1718 values(i,j,k)=val;
1719 }
1720 }
1721 }
1722 } else {
1723 MADNESS_EXCEPTION("only NDIM=3 in print_grid",0);
1724 }
1725
1726 // insert the new leaf node
1727 const bool has_children=false;
1728 coeffT coeff=coeffT(this->values2coeffs(key,values),targs);
1729 nodeT node(coeff,has_children);
1730 coeffs.replace(key,node);
1731 const_cast<dcT&>(coeffs).send(key.parent(),
1733 coeffs, key.parent());
1734 ii++;
1735 }
1736
1737 gfile.close();
1738 MADNESS_CHECK(ii==nboxes);
1739
1740 }
1741
1742
1743 /// Compute by projection the scaling function coeffs in specified box
1744 /// @param[in] key the key to the current function node (box)
1745 tensorT project(const keyT& key) const;
1746
1747 /// Returns the truncation threshold according to truncate_method
1748
1749 /// here is our handwaving argument:
1750 /// this threshold will give each FunctionNode an error of less than tol. The
1751 /// total error can then be as high as sqrt(#nodes) * tol. Therefore in order
1752 /// to account for higher dimensions: divide tol by about the root of number
1753 /// of siblings (2^NDIM) that have a large error when we refine along a deep
1754 /// branch of the tree.
1755 double truncate_tol(double tol, const keyT& key) const;
1756
1757 int get_truncate_mode() const { return truncate_mode; };
1758
1759
1760 /// Returns patch referring to coeffs of child in parent box
1761 /// @param[in] child the key to the child function node (box)
1762 std::vector<Slice> child_patch(const keyT& child) const;
1763
1764 /// Projection with optional refinement w/ special points
1765 /// @param[in] key the key to the current function node (box)
1766 /// @param[in] do_refine should we continue refinement?
1767 /// @param[in] specialpts vector of special points in the function where we need
1768 /// to refine at a much finer level
1769 void project_refine_op(const keyT& key, bool do_refine,
1770 const std::vector<Vector<double,NDIM> >& specialpts);
1771
1772 /// Compute the Legendre scaling functions for multiplication
1773
1774 /// Evaluate parent polyn at quadrature points of a child. The prefactor of
1775 /// 2^n/2 is included. The tensor must be preallocated as phi(k,npt).
1776 /// Refer to the implementation notes for more info.
1777 /// @todo Robert please verify this comment. I don't understand this method.
1778 /// @param[in] np level of the parent function node (box)
1779 /// @param[in] nc level of the child function node (box)
1780 /// @param[in] lp translation of the parent function node (box)
1781 /// @param[in] lc translation of the child function node (box)
1782 /// @param[out] phi tensor of the legendre scaling functions
1783 void phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor<double>& phi) const;
1784
1785 /// Directly project parent coeffs to child coeffs
1786
1787 /// Currently used by diff, but other uses can be anticipated
1788
1789 /// @todo is this documentation correct?
1790 /// @param[in] child the key whose coeffs we are requesting
1791 /// @param[in] parent the (leaf) key of our function
1792 /// @param[in] s the (leaf) coeffs belonging to parent
1793 /// @return coeffs
1794 const coeffT parent_to_child(const coeffT& s, const keyT& parent, const keyT& child) const;
1795
1796 /// Directly project parent NS coeffs to child NS coeffs
1797
1798 /// return the NS coefficients if parent and child are the same,
1799 /// or construct sum coeffs from the parents and "add" zero wavelet coeffs
1800 /// @param[in] child the key whose coeffs we are requesting
1801 /// @param[in] parent the (leaf) key of our function
1802 /// @param[in] coeff the (leaf) coeffs belonging to parent
1803 /// @return coeffs in NS form
1804 coeffT parent_to_child_NS(const keyT& child, const keyT& parent,
1805 const coeffT& coeff) const;
1806
1807 /// Return the values when given the coeffs in scaling function basis
1808 /// @param[in] key the key of the function node (box)
1809 /// @param[in] coeff the tensor of scaling function coefficients for function node (box)
1810 /// @return function values for function node (box)
1811 template <typename Q>
1812 GenTensor<Q> coeffs2values(const keyT& key, const GenTensor<Q>& coeff) const {
1813 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1814 double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1815 return transform(coeff,cdata.quad_phit).scale(scale);
1816 }
1817
1818 /// convert S or NS coeffs to values on a 2k grid of the children
1819
1820 /// equivalent to unfiltering the NS coeffs and then converting all child S-coeffs
1821 /// to values in their respective boxes. If only S coeffs are provided d coeffs are
1822 /// assumed to be zero. Reverse operation to values2NScoeffs().
1823 /// @param[in] key the key of the current S or NS coeffs, level n
1824 /// @param[in] coeff coeffs in S or NS form; if S then d coeffs are assumed zero
1825 /// @param[in] s_only sanity check to avoid unintended discard of d coeffs
1826 /// @return function values on the quadrature points of the children of child (!)
1827 template <typename Q>
1829 const bool s_only) const {
1830 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1831
1832 // sanity checks
1833 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) == s_only);
1834 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) or (coeff.dim(0)==2*this->get_k()));
1835
1836 // this is a block-diagonal matrix with the quadrature points on the diagonal
1837 Tensor<double> quad_phit_2k(2*cdata.k,2*cdata.npt);
1838 quad_phit_2k(cdata.s[0],cdata.s[0])=cdata.quad_phit;
1839 quad_phit_2k(cdata.s[1],cdata.s[1])=cdata.quad_phit;
1840
1841 // the transformation matrix unfilters (cdata.hg) and transforms to values in one step
1842 const Tensor<double> transf = (s_only)
1843 ? inner(cdata.hg(Slice(0,k-1),_),quad_phit_2k) // S coeffs
1844 : inner(cdata.hg,quad_phit_2k); // NS coeffs
1845
1846 // increment the level since the coeffs2values part happens on level n+1
1847 const double scale = pow(2.0,0.5*NDIM*(key.level()+1))/
1849
1850 return transform(coeff,transf).scale(scale);
1851 }
1852
1853 /// Compute the function values for multiplication
1854
1855 /// Given S or NS coefficients from a parent cell, compute the value of
1856 /// the functions at the quadrature points of a child
1857 /// currently restricted to special cases
1858 /// @param[in] child key of the box in which we compute values
1859 /// @param[in] parent key of the parent box holding the coeffs
1860 /// @param[in] coeff coeffs of the parent box
1861 /// @param[in] s_only sanity check to avoid unintended discard of d coeffs
1862 /// @return function values on the quadrature points of the children of child (!)
1863 template <typename Q>
1864 GenTensor<Q> NS_fcube_for_mul(const keyT& child, const keyT& parent,
1865 const GenTensor<Q>& coeff, const bool s_only) const {
1866 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1867
1868 // sanity checks
1869 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) == s_only);
1870 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) or (coeff.dim(0)==2*this->get_k()));
1871
1872 // fast return if possible
1873 // if (child.level()==parent.level()) return NScoeffs2values(child,coeff,s_only);
1874
1875 if (s_only) {
1876
1877 Tensor<double> quad_phi[NDIM];
1878 // tmp tensor
1879 Tensor<double> phi1(cdata.k,cdata.npt);
1880
1881 for (std::size_t d=0; d<NDIM; ++d) {
1882
1883 // input is S coeffs (dimension k), output is values on 2*npt grid points
1884 quad_phi[d]=Tensor<double>(cdata.k,2*cdata.npt);
1885
1886 // for both children of "child" evaluate the Legendre polynomials
1887 // first the left child on level n+1 and translations 2l
1888 phi_for_mul(parent.level(),parent.translation()[d],
1889 child.level()+1, 2*child.translation()[d], phi1);
1890 quad_phi[d](_,Slice(0,k-1))=phi1;
1891
1892 // next the right child on level n+1 and translations 2l+1
1893 phi_for_mul(parent.level(),parent.translation()[d],
1894 child.level()+1, 2*child.translation()[d]+1, phi1);
1895 quad_phi[d](_,Slice(k,2*k-1))=phi1;
1896 }
1897
1898 const double scale = 1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1899 return general_transform(coeff,quad_phi).scale(scale);
1900 }
1901 MADNESS_EXCEPTION("you should not be here in NS_fcube_for_mul",1);
1902 return GenTensor<Q>();
1903 }
1904
1905 /// convert function values of the a child generation directly to NS coeffs
1906
1907 /// equivalent to converting the function values to 2^NDIM S coeffs and then
1908 /// filtering them to NS coeffs. Reverse operation to NScoeffs2values().
1909 /// @param[in] key key of the parent of the generation
1910 /// @param[in] values tensor holding function values of the 2^NDIM children of key
1911 /// @return NS coeffs belonging to key
1912 template <typename Q>
1913 GenTensor<Q> values2NScoeffs(const keyT& key, const GenTensor<Q>& values) const {
1914 //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1915
1916 // sanity checks
1917 MADNESS_ASSERT(values.dim(0)==2*this->get_k());
1918
1919 // this is a block-diagonal matrix with the quadrature points on the diagonal
1920 Tensor<double> quad_phit_2k(2*cdata.npt,2*cdata.k);
1921 quad_phit_2k(cdata.s[0],cdata.s[0])=cdata.quad_phiw;
1922 quad_phit_2k(cdata.s[1],cdata.s[1])=cdata.quad_phiw;
1923
1924 // the transformation matrix unfilters (cdata.hg) and transforms to values in one step
1925 const Tensor<double> transf=inner(quad_phit_2k,cdata.hgT);
1926
1927 // increment the level since the values2coeffs part happens on level n+1
1928 const double scale = pow(0.5,0.5*NDIM*(key.level()+1))
1930
1931 return transform(values,transf).scale(scale);
1932 }
1933
1934 /// Return the scaling function coeffs when given the function values at the quadrature points
1935 /// @param[in] key the key of the function node (box)
1936 /// @return function values for function node (box)
1937 template <typename Q>
1938 Tensor<Q> coeffs2values(const keyT& key, const Tensor<Q>& coeff) const {
1939 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1940 double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1941 return transform(coeff,cdata.quad_phit).scale(scale);
1942 }
1943
1944 template <typename Q>
1945 GenTensor<Q> values2coeffs(const keyT& key, const GenTensor<Q>& values) const {
1946 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1947 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1948 return transform(values,cdata.quad_phiw).scale(scale);
1949 }
1950
1951 template <typename Q>
1952 Tensor<Q> values2coeffs(const keyT& key, const Tensor<Q>& values) const {
1953 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1954 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1955 return transform(values,cdata.quad_phiw).scale(scale);
1956 }
1957
1958 /// Compute the function values for multiplication
1959
1960 /// Given coefficients from a parent cell, compute the value of
1961 /// the functions at the quadrature points of a child
1962 /// @param[in] child the key for the child function node (box)
1963 /// @param[in] parent the key for the parent function node (box)
1964 /// @param[in] coeff the coefficients of scaling function basis of the parent box
1965 template <typename Q>
1966 Tensor<Q> fcube_for_mul(const keyT& child, const keyT& parent, const Tensor<Q>& coeff) const {
1967 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1968 if (child.level() == parent.level()) {
1969 return coeffs2values(parent, coeff);
1970 }
1971 else if (child.level() < parent.level()) {
1972 MADNESS_EXCEPTION("FunctionImpl: fcube_for_mul: child-parent relationship bad?",0);
1973 }
1974 else {
1975 Tensor<double> phi[NDIM];
1976 for (std::size_t d=0; d<NDIM; ++d) {
1977 phi[d] = Tensor<double>(cdata.k,cdata.npt);
1978 phi_for_mul(parent.level(),parent.translation()[d],
1979 child.level(), child.translation()[d], phi[d]);
1980 }
1981 return general_transform(coeff,phi).scale(1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume()));;
1982 }
1983 }
1984
1985
1986 /// Compute the function values for multiplication
1987
1988 /// Given coefficients from a parent cell, compute the value of
1989 /// the functions at the quadrature points of a child
1990 /// @param[in] child the key for the child function node (box)
1991 /// @param[in] parent the key for the parent function node (box)
1992 /// @param[in] coeff the coefficients of scaling function basis of the parent box
1993 template <typename Q>
1994 GenTensor<Q> fcube_for_mul(const keyT& child, const keyT& parent, const GenTensor<Q>& coeff) const {
1995 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1996 if (child.level() == parent.level()) {
1997 return coeffs2values(parent, coeff);
1998 }
1999 else if (child.level() < parent.level()) {
2000 MADNESS_EXCEPTION("FunctionImpl: fcube_for_mul: child-parent relationship bad?",0);
2001 }
2002 else {
2003 Tensor<double> phi[NDIM];
2004 for (size_t d=0; d<NDIM; d++) {
2005 phi[d] = Tensor<double>(cdata.k,cdata.npt);
2006 phi_for_mul(parent.level(),parent.translation()[d],
2007 child.level(), child.translation()[d], phi[d]);
2008 }
2009 return general_transform(coeff,phi).scale(1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume()));
2010 }
2011 }
2012
2013
2014 /// Functor for the mul method
2015 template <typename L, typename R>
2016 void do_mul(const keyT& key, const Tensor<L>& left, const std::pair< keyT, Tensor<R> >& arg) {
2017 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
2018 const keyT& rkey = arg.first;
2019 const Tensor<R>& rcoeff = arg.second;
2020 //madness::print("do_mul: r", rkey, rcoeff.size());
2021 Tensor<R> rcube = fcube_for_mul(key, rkey, rcoeff);
2022 //madness::print("do_mul: l", key, left.size());
2023 Tensor<L> lcube = fcube_for_mul(key, key, left);
2024
2025 Tensor<T> tcube(cdata.vk,false);
2026 TERNARY_OPTIMIZED_ITERATOR(T, tcube, L, lcube, R, rcube, *_p0 = *_p1 * *_p2;);
2027 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2028 tcube = transform(tcube,cdata.quad_phiw).scale(scale);
2029 coeffs.replace(key, nodeT(coeffT(tcube,targs),false));
2030 }
2031
2032
2033 /// multiply the values of two coefficient tensors using a custom number of grid points
2034
2035 /// note both coefficient tensors have to refer to the same key!
2036 /// @param[in] c1 a tensor holding coefficients
2037 /// @param[in] c2 another tensor holding coeffs
2038 /// @param[in] npt number of grid points (optional, default is cdata.npt)
2039 /// @return coefficient tensor holding the product of the values of c1 and c2
2040 template<typename R>
2042 const int npt, const keyT& key) const {
2043 typedef TENSOR_RESULT_TYPE(T,R) resultT;
2044
2046
2047 // construct a tensor with the npt coeffs
2048 Tensor<T> c11(cdata2.vk), c22(cdata2.vk);
2049 c11(this->cdata.s0)=c1;
2050 c22(this->cdata.s0)=c2;
2051
2052 // it's sufficient to scale once
2053 double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2054 Tensor<T> c1value=transform(c11,cdata2.quad_phit).scale(scale);
2055 Tensor<R> c2value=transform(c22,cdata2.quad_phit);
2056 Tensor<resultT> resultvalue(cdata2.vk,false);
2057 TERNARY_OPTIMIZED_ITERATOR(resultT, resultvalue, T, c1value, R, c2value, *_p0 = *_p1 * *_p2;);
2058
2059 Tensor<resultT> result=transform(resultvalue,cdata2.quad_phiw);
2060
2061 // return a copy of the slice to have the tensor contiguous
2062 return copy(result(this->cdata.s0));
2063 }
2064
2065
2066 /// Functor for the binary_op method
2067 template <typename L, typename R, typename opT>
2068 void do_binary_op(const keyT& key, const Tensor<L>& left,
2069 const std::pair< keyT, Tensor<R> >& arg,
2070 const opT& op) {
2071 //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
2072 const keyT& rkey = arg.first;
2073 const Tensor<R>& rcoeff = arg.second;
2074 Tensor<R> rcube = fcube_for_mul(key, rkey, rcoeff);
2075 Tensor<L> lcube = fcube_for_mul(key, key, left);
2076
2077 Tensor<T> tcube(cdata.vk,false);
2078 op(key, tcube, lcube, rcube);
2079 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2080 tcube = transform(tcube,cdata.quad_phiw).scale(scale);
2081 coeffs.replace(key, nodeT(coeffT(tcube,targs),false));
2082 }
2083
2084 /// Invoked by result to perform result += alpha*left+beta*right in wavelet basis
2085
2086 /// Does not assume that any of result, left, right have the same distribution.
2087 /// For most purposes result will start as an empty so actually are implementing
2088 /// out of place gaxpy. If all functions have the same distribution there is
2089 /// no communication except for the optional fence.
2090 template <typename L, typename R>
2092 T beta, const FunctionImpl<R,NDIM>& right, bool fence) {
2093 // Loop over local nodes in both functions. Add in left and subtract right.
2094 // Not that efficient in terms of memory bandwidth but ensures we do
2095 // not miss any nodes.
2096 typename FunctionImpl<L,NDIM>::dcT::const_iterator left_end = left.coeffs.end();
2098 it!=left_end;
2099 ++it) {
2100 const keyT& key = it->first;
2101 const typename FunctionImpl<L,NDIM>::nodeT& other_node = it->second;
2102 coeffs.send(key, &nodeT:: template gaxpy_inplace<T,L>, 1.0, other_node, alpha);
2103 }
2104 typename FunctionImpl<R,NDIM>::dcT::const_iterator right_end = right.coeffs.end();
2106 it!=right_end;
2107 ++it) {
2108 const keyT& key = it->first;
2109 const typename FunctionImpl<L,NDIM>::nodeT& other_node = it->second;
2110 coeffs.send(key, &nodeT:: template gaxpy_inplace<T,R>, 1.0, other_node, beta);
2111 }
2112 if (fence)
2113 world.gop.fence();
2114 }
2115
2116 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
2117 /// @param[in] op the unary operator for the coefficients
2118 template <typename opT>
2119 void unary_op_coeff_inplace(const opT& op, bool fence) {
2120 typename dcT::iterator end = coeffs.end();
2121 for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) {
2122 const keyT& parent = it->first;
2123 nodeT& node = it->second;
2124 if (node.has_coeff()) {
2125 // op(parent, node.coeff());
2126 TensorArgs full(-1.0,TT_FULL);
2127 change_tensor_type(node.coeff(),full);
2128 op(parent, node.coeff().full_tensor());
2130 // op(parent,node);
2131 }
2132 }
2133 if (fence)
2134 world.gop.fence();
2135 }
2136
2137 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
2138 /// @param[in] op the unary operator for the coefficients
2139 template <typename opT>
2140 void unary_op_node_inplace(const opT& op, bool fence) {
2141 typename dcT::iterator end = coeffs.end();
2142 for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) {
2143 const keyT& parent = it->first;
2144 nodeT& node = it->second;
2145 op(parent, node);
2146 }
2147 if (fence)
2148 world.gop.fence();
2149 }
2150
2151 /// Integrate over one particle of a two particle function and get a one particle function
2152 /// bsp \int g(1,2) \delta(2-1) d2 = f(1)
2153 /// The overall dimension of g should be even
2154
2155 /// The operator
2156 template<std::size_t LDIM>
2157 void dirac_convolution_op(const keyT &key, const nodeT &node, FunctionImpl<T,LDIM>* f) const {
2158 // fast return if the node has children (not a leaf node)
2159 if(node.has_children()) return;
2160
2161 const implT* g=this;
2162
2163 // break the 6D key into two 3D keys (may also work for every even dimension)
2164 Key<LDIM> key1, key2;
2165 key.break_apart(key1,key2);
2166
2167 // get the coefficients of the 6D function g
2168 const coeffT& g_coeff = node.coeff();
2169
2170 // get the values of the 6D function g
2171 coeffT g_values = g->coeffs2values(key,g_coeff);
2172
2173 // Determine rank and k
2174 const long rank=g_values.rank();
2175 const long maxk=f->get_k();
2176 MADNESS_ASSERT(maxk==g_coeff.dim(0));
2177
2178 // get tensors for particle 1 and 2 (U and V in SVD)
2179 tensorT vec1=copy(g_values.get_svdtensor().ref_vector(0).reshape(rank,maxk,maxk,maxk));
2180 tensorT vec2=g_values.get_svdtensor().ref_vector(1).reshape(rank,maxk,maxk,maxk);
2181 tensorT result(maxk,maxk,maxk); // should give zero tensor
2182 // Multiply the values of each U and V vector
2183 for (long i=0; i<rank; ++i) {
2184 tensorT c1=vec1(Slice(i,i),_,_,_); // shallow copy (!)
2185 tensorT c2=vec2(Slice(i,i),_,_,_);
2186 c1.emul(c2); // this changes vec1 because of shallow copy, but not the g function because of the deep copy made above
2187 double singular_value_i = g_values.get_svdtensor().weights(i);
2188 result += (singular_value_i*c1);
2189 }
2190
2191 // accumulate coefficients (since only diagonal boxes are used the coefficients get just replaced, but accumulate is needed to create the right tree structure
2192 tensorT f_coeff = f->values2coeffs(key1,result);
2193 f->coeffs.task(key1, &FunctionNode<T,LDIM>::accumulate2, f_coeff, f->coeffs, key1, TaskAttributes::hipri());
2194// coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri());
2195
2196
2197 return;
2198 }
2199
2200
2201 template<std::size_t LDIM>
2203 typename dcT::const_iterator end = this->coeffs.end();
2204 for (typename dcT::const_iterator it=this->coeffs.begin(); it!=end; ++it) {
2205 // looping through all the leaf(!) coefficients in the NDIM function ("this")
2206 const keyT& key = it->first;
2207 const FunctionNode<T,NDIM>& node = it->second;
2208 if (node.is_leaf()) {
2209 // only process the diagonal boxes
2210 Key<LDIM> key1, key2;
2211 key.break_apart(key1,key2);
2212 if(key1 == key2){
2213 ProcessID p = coeffs.owner(key);
2214 woT::task(p, &implT:: template dirac_convolution_op<LDIM>, key, node, f);
2215 }
2216 }
2217 }
2218 world.gop.fence(); // fence is necessary if trickle down is used afterwards
2219 // trickle down and undo redundand shouldnt change anything if only the diagonal elements are considered above -> check this
2220 f->trickle_down(true); // fence must be true otherwise undo_redundant will have trouble
2221// f->undo_redundant(true);
2222 f->verify_tree();
2223 //if (fence) world.gop.fence(); // unnecessary, fence is activated in undo_redundant
2224
2225 }
2226
2227
2228 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
2229 /// @param[in] op the unary operator for the coefficients
2230 template <typename opT>
2231 void flo_unary_op_node_inplace(const opT& op, bool fence) {
2233// typedef do_unary_op_value_inplace<opT> xopT;
2235 if (fence) world.gop.fence();
2236 }
2237
2238 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
2239 /// @param[in] op the unary operator for the coefficients
2240 template <typename opT>
2241 void flo_unary_op_node_inplace(const opT& op, bool fence) const {
2243// typedef do_unary_op_value_inplace<opT> xopT;
2245 if (fence)
2246 world.gop.fence();
2247 }
2248
2249 /// truncate tree at a certain level
2250 /// @param[in] max_level truncate tree below this level
2251 void erase(const Level& max_level);
2252
2253 /// Returns some asymmetry measure ... no comms
2254 double check_symmetry_local() const;
2255
2256 /// given an NS tree resulting from a convolution, truncate leafs if appropriate
2259 const implT* f; // for calling its member functions
2260
2262
2263 bool operator()(typename rangeT::iterator& it) const {
2264
2265 const keyT& key = it->first;
2266 nodeT& node = it->second;
2267
2268 if (node.is_leaf() and node.coeff().has_data()) {
2269 coeffT d = copy(node.coeff());
2270 d(f->cdata.s0)=0.0;
2271 const double error=d.normf();
2272 const double tol=f->truncate_tol(f->get_thresh(),key);
2273 if (error<tol) node.coeff()=copy(node.coeff()(f->cdata.s0));
2274 }
2275 return true;
2276 }
2277 template <typename Archive> void serialize(const Archive& ar) {}
2278
2279 };
2280
2281 /// remove all coefficients of internal nodes
2284
2285 /// constructor need impl for cdata
2287
2288 bool operator()(typename rangeT::iterator& it) const {
2289
2290 nodeT& node = it->second;
2291 if (node.has_children()) node.clear_coeff();
2292 return true;
2293 }
2294 template <typename Archive> void serialize(const Archive& ar) {}
2295
2296 };
2297
2298 /// remove all coefficients of leaf nodes
2301
2302 /// constructor need impl for cdata
2304
2305 bool operator()(typename rangeT::iterator& it) const {
2306 nodeT& node = it->second;
2307 if (not node.has_children()) node.clear_coeff();
2308 return true;
2309 }
2310 template <typename Archive> void serialize(const Archive& ar) {}
2311
2312 };
2313
2314
2315 /// keep only the sum coefficients in each node
2319
2320 /// constructor need impl for cdata
2322
2323 bool operator()(typename rangeT::iterator& it) const {
2324
2325 nodeT& node = it->second;
2326 coeffT s=copy(node.coeff()(impl->cdata.s0));
2327 node.coeff()=s;
2328 return true;
2329 }
2330 template <typename Archive> void serialize(const Archive& ar) {}
2331
2332 };
2333
2334
2335 /// reduce the rank of the nodes, optional fence
2338
2339 // threshold for rank reduction / SVD truncation
2341
2342 // constructor takes target precision
2343 do_reduce_rank() = default;
2345 do_reduce_rank(const double& thresh) {
2347 }
2348
2349 //
2350 bool operator()(typename rangeT::iterator& it) const {
2351
2352 nodeT& node = it->second;
2353 node.reduceRank(args.thresh);
2354 return true;
2355 }
2356 template <typename Archive> void serialize(const Archive& ar) {}
2357 };
2358
2359
2360
2361 /// check symmetry wrt particle exchange
2364 const implT* f;
2367
2368 /// return the norm of the difference of this node and its "mirror" node
2369 double operator()(typename rangeT::iterator& it) const {
2370
2371 // Temporary fix to GCC whining about out of range access for NDIM!=6
2372 if constexpr(NDIM==6) {
2373 const keyT& key = it->first;
2374 const nodeT& fnode = it->second;
2375
2376 // skip internal nodes
2377 if (fnode.has_children()) return 0.0;
2378
2379 if (f->world.size()>1) return 0.0;
2380
2381 // exchange particles
2382 std::vector<long> map(NDIM);
2383 map[0]=3; map[1]=4; map[2]=5;
2384 map[3]=0; map[4]=1; map[5]=2;
2385
2386 // make mapped key
2388 for (std::size_t i=0; i<NDIM; ++i) l[map[i]] = key.translation()[i];
2389 const keyT mapkey(key.level(),l);
2390
2391 double norm=0.0;
2392
2393
2394 // hope it's local
2395 if (f->get_coeffs().probe(mapkey)) {
2396 MADNESS_ASSERT(f->get_coeffs().probe(mapkey));
2397 const nodeT& mapnode=f->get_coeffs().find(mapkey).get()->second;
2398
2399// bool have_c1=fnode.coeff().has_data() and fnode.coeff().config().has_data();
2400// bool have_c2=mapnode.coeff().has_data() and mapnode.coeff().config().has_data();
2401 bool have_c1=fnode.coeff().has_data();
2402 bool have_c2=mapnode.coeff().has_data();
2403
2404 if (have_c1 and have_c2) {
2405 tensorT c1=fnode.coeff().full_tensor_copy();
2406 tensorT c2=mapnode.coeff().full_tensor_copy();
2407 c2 = copy(c2.mapdim(map));
2408 norm=(c1-c2).normf();
2409 } else if (have_c1) {
2410 tensorT c1=fnode.coeff().full_tensor_copy();
2411 norm=c1.normf();
2412 } else if (have_c2) {
2413 tensorT c2=mapnode.coeff().full_tensor_copy();
2414 norm=c2.normf();
2415 } else {
2416 norm=0.0;
2417 }
2418 } else {
2419 norm=fnode.coeff().normf();
2420 }
2421 return norm*norm;
2422 }
2423 else {
2424 MADNESS_EXCEPTION("ONLY FOR DIM 6!", 1);
2425 }
2426 }
2427
2428 double operator()(double a, double b) const {
2429 return (a+b);
2430 }
2431
2432 template <typename Archive> void serialize(const Archive& ar) {
2433 MADNESS_EXCEPTION("no serialization of do_check_symmetry yet",1);
2434 }
2435
2436
2437 };
2438
2439 /// merge the coefficent boxes of this into result's tree
2440
2441 /// result+= alpha*this
2442 /// this and result don't have to have the same distribution or live in the same world
2443 /// no comm, and the tree should be in an consistent state by virtue
2444 template<typename Q, typename R>
2448 T alpha=T(1.0);
2452
2453 /// return the norm of the difference of this node and its "mirror" node
2454 bool operator()(typename rangeT::iterator& it) const {
2455
2456 const keyT& key = it->first;
2457 const nodeT& node = it->second;
2458 if (node.has_coeff()) result->get_coeffs().task(key, &nodeT::accumulate,
2459 alpha*node.coeff(), result->get_coeffs(), key, result->targs);
2460 return true;
2461 }
2462
2463 template <typename Archive> void serialize(const Archive& ar) {
2464 MADNESS_EXCEPTION("no serialization of do_accumulate_trees",1);
2465 }
2466 };
2467
2468
2469 /// merge the coefficient boxes of this into other's tree
2470
2471 /// no comm, and the tree should be in an consistent state by virtue
2472 /// of FunctionNode::gaxpy_inplace
2473 template<typename Q, typename R>
2482
2483 /// return the norm of the difference of this node and its "mirror" node
2484 bool operator()(typename rangeT::iterator& it) const {
2485
2486 const keyT& key = it->first;
2487 const nodeT& fnode = it->second;
2488
2489 // if other's node exists: add this' coeffs to it
2490 // otherwise insert this' node into other's tree
2491 typename dcT::accessor acc;
2492 if (other->get_coeffs().find(acc,key)) {
2493 nodeT& gnode=acc->second;
2494 gnode.gaxpy_inplace(beta,fnode,alpha);
2495 } else {
2496 nodeT gnode=fnode;
2497 gnode.scale(alpha);
2498 other->get_coeffs().replace(key,gnode);
2499 }
2500 return true;
2501 }
2502
2503 template <typename Archive> void serialize(const Archive& ar) {
2504 MADNESS_EXCEPTION("no serialization of do_merge_trees",1);
2505 }
2506 };
2507
2508
2509 /// map this on f
2510 struct do_mapdim {
2512
2513 std::vector<long> map;
2515
2516 do_mapdim() : f(0) {};
2517 do_mapdim(const std::vector<long> map, implT& f) : map(map), f(&f) {}
2518
2519 bool operator()(typename rangeT::iterator& it) const {
2520
2521 const keyT& key = it->first;
2522 const nodeT& node = it->second;
2523
2525 for (std::size_t i=0; i<NDIM; ++i) l[map[i]] = key.translation()[i];
2526 tensorT c = node.coeff().reconstruct_tensor();
2527 if (c.size()) c = copy(c.mapdim(map));
2529 f->get_coeffs().replace(keyT(key.level(),l), nodeT(cc,node.has_children()));
2530
2531 return true;
2532 }
2533 template <typename Archive> void serialize(const Archive& ar) {
2534 MADNESS_EXCEPTION("no serialization of do_mapdim",1);
2535 }
2536
2537 };
2538
2539 /// mirror dimensions of this, write result on f
2540 struct do_mirror {
2542
2543 std::vector<long> mirror;
2545
2546 do_mirror() : f(0) {};
2547 do_mirror(const std::vector<long> mirror, implT& f) : mirror(mirror), f(&f) {}
2548
2549 bool operator()(typename rangeT::iterator& it) const {
2550
2551 const keyT& key = it->first;
2552 const nodeT& node = it->second;
2553
2554 // mirror translation index: l_new + l_old = l_max
2556 Translation lmax = (Translation(1)<<key.level()) - 1;
2557 for (std::size_t i=0; i<NDIM; ++i) {
2558 if (mirror[i]==-1) l[i]= lmax - key.translation()[i];
2559 }
2560
2561 // mirror coefficients: multiply all odd-k slices with -1
2562 tensorT c = node.coeff().full_tensor_copy();
2563 if (c.size()) {
2564 std::vector<Slice> s(___);
2565
2566 // loop over dimensions and over k
2567 for (size_t i=0; i<NDIM; ++i) {
2568 std::size_t kmax=c.dim(i);
2569 if (mirror[i]==-1) {
2570 for (size_t k=1; k<kmax; k+=2) {
2571 s[i]=Slice(k,k,1);
2572 c(s)*=(-1.0);
2573 }
2574 s[i]=_;
2575 }
2576 }
2577 }
2579 f->get_coeffs().replace(keyT(key.level(),l), nodeT(cc,node.has_children()));
2580
2581 return true;
2582 }
2583 template <typename Archive> void serialize(const Archive& ar) {
2584 MADNESS_EXCEPTION("no serialization of do_mirror",1);
2585 }
2586
2587 };
2588
2589 /// mirror dimensions of this, write result on f
2592
2593 std::vector<long> map,mirror;
2595
2597 do_map_and_mirror(const std::vector<long> map, const std::vector<long> mirror, implT& f)
2598 : map(map), mirror(mirror), f(&f) {}
2599
2600 bool operator()(typename rangeT::iterator& it) const {
2601
2602 const keyT& key = it->first;
2603 const nodeT& node = it->second;
2604
2605 tensorT c = node.coeff().full_tensor_copy();
2607
2608 // do the mapping first (if present)
2609 if (map.size()>0) {
2611 for (std::size_t i=0; i<NDIM; ++i) l1[map[i]] = l[i];
2612 std::swap(l,l1);
2613 if (c.size()) c = copy(c.mapdim(map));
2614 }
2615
2616 if (mirror.size()>0) {
2617 // mirror translation index: l_new + l_old = l_max
2619 Translation lmax = (Translation(1)<<key.level()) - 1;
2620 for (std::size_t i=0; i<NDIM; ++i) {
2621 if (mirror[i]==-1) l1[i]= lmax - l[i];
2622 }
2623 std::swap(l,l1);
2624
2625 // mirror coefficients: multiply all odd-k slices with -1
2626 if (c.size()) {
2627 std::vector<Slice> s(___);
2628
2629 // loop over dimensions and over k
2630 for (size_t i=0; i<NDIM; ++i) {
2631 std::size_t kmax=c.dim(i);
2632 if (mirror[i]==-1) {
2633 for (size_t k=1; k<kmax; k+=2) {
2634 s[i]=Slice(k,k,1);
2635 c(s)*=(-1.0);
2636 }
2637 s[i]=_;
2638 }
2639 }
2640 }
2641 }
2642
2644 f->get_coeffs().replace(keyT(key.level(),l), nodeT(cc,node.has_children()));
2645 return true;
2646 }
2647 template <typename Archive> void serialize(const Archive& ar) {
2648 MADNESS_EXCEPTION("no serialization of do_mirror",1);
2649 }
2650
2651 };
2652
2653
2654
2655 /// "put" this on g
2656 struct do_average {
2658
2660
2661 do_average() : g(0) {}
2663
2664 /// iterator it points to this
2665 bool operator()(typename rangeT::iterator& it) const {
2666
2667 const keyT& key = it->first;
2668 const nodeT& fnode = it->second;
2669
2670 // fast return if rhs has no coeff here
2671 if (fnode.has_coeff()) {
2672
2673 // check if there is a node already existing
2674 typename dcT::accessor acc;
2675 if (g->get_coeffs().find(acc,key)) {
2676 nodeT& gnode=acc->second;
2677 if (gnode.has_coeff()) gnode.coeff()+=fnode.coeff();
2678 } else {
2679 g->get_coeffs().replace(key,fnode);
2680 }
2681 }
2682
2683 return true;
2684 }
2685 template <typename Archive> void serialize(const Archive& ar) {}
2686 };
2687
2688 /// change representation of nodes' coeffs to low rank, optional fence
2691
2692 // threshold for rank reduction / SVD truncation
2695
2696 // constructor takes target precision
2698 // do_change_tensor_type(const TensorArgs& targs) : targs(targs) {}
2700
2701 //
2702 bool operator()(typename rangeT::iterator& it) const {
2703
2704 double cpu0=cpu_time();
2705 nodeT& node = it->second;
2707 double cpu1=cpu_time();
2709
2710 return true;
2711
2712 }
2713 template <typename Archive> void serialize(const Archive& ar) {}
2714 };
2715
2718
2719 // threshold for rank reduction / SVD truncation
2721
2722 // constructor takes target precision
2725 bool operator()(typename rangeT::iterator& it) const {
2726 it->second.consolidate_buffer(targs);
2727 return true;
2728 }
2729 template <typename Archive> void serialize(const Archive& ar) {}
2730 };
2731
2732
2733
2734 template <typename opT>
2738 opT op;
2740 bool operator()(typename rangeT::iterator& it) const {
2741 const keyT& key = it->first;
2742 nodeT& node = it->second;
2743 if (node.has_coeff()) {
2744 const TensorArgs full_args(-1.0,TT_FULL);
2745 change_tensor_type(node.coeff(),full_args);
2746 tensorT& t= node.coeff().full_tensor();
2747 //double before = t.normf();
2748 tensorT values = impl->fcube_for_mul(key, key, t);
2749 op(key, values);
2750 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2751 t = transform(values,impl->cdata.quad_phiw).scale(scale);
2752 node.coeff()=coeffT(t,impl->get_tensor_args());
2753 //double after = t.normf();
2754 //madness::print("XOP:", key, before, after);
2755 }
2756 return true;
2757 }
2758 template <typename Archive> void serialize(const Archive& ar) {}
2759 };
2760
2761 template <typename Q, typename R>
2762 /// @todo I don't know what this does other than a trasform
2763 void vtransform_doit(const std::shared_ptr< FunctionImpl<R,NDIM> >& right,
2764 const Tensor<Q>& c,
2765 const std::vector< std::shared_ptr< FunctionImpl<T,NDIM> > >& vleft,
2766 double tol) {
2767 // To reduce crunch on vectors being transformed each task
2768 // does them in a random order
2769 std::vector<unsigned int> ind(vleft.size());
2770 for (unsigned int i=0; i<vleft.size(); ++i) {
2771 ind[i] = i;
2772 }
2773 for (unsigned int i=0; i<vleft.size(); ++i) {
2774 unsigned int j = RandomValue<int>()%vleft.size();
2775 std::swap(ind[i],ind[j]);
2776 }
2777
2778 typename FunctionImpl<R,NDIM>::dcT::const_iterator end = right->coeffs.end();
2779 for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right->coeffs.begin(); it != end; ++it) {
2780 if (it->second.has_coeff()) {
2781 const Key<NDIM>& key = it->first;
2782 const GenTensor<R>& r = it->second.coeff();
2783 double norm = r.normf();
2784 double keytol = truncate_tol(tol,key);
2785
2786 for (unsigned int j=0; j<vleft.size(); ++j) {
2787 unsigned int i = ind[j]; // Random permutation
2788 if (std::abs(norm*c(i)) > keytol) {
2789 implT* left = vleft[i].get();
2790 typename dcT::accessor acc;
2791 bool newnode = left->coeffs.insert(acc,key);
2792 if (newnode && key.level()>0) {
2793 Key<NDIM> parent = key.parent();
2794 if (left->coeffs.is_local(parent))
2795 left->coeffs.send(parent, &nodeT::set_has_children_recursive, left->coeffs, parent);
2796 else
2797 left->coeffs.task(parent, &nodeT::set_has_children_recursive, left->coeffs, parent);
2798
2799 }
2800 nodeT& node = acc->second;
2801 if (!node.has_coeff())
2802 node.set_coeff(coeffT(cdata.v2k,targs));
2803 coeffT& t = node.coeff();
2804 t.gaxpy(1.0, r, c(i));
2805 }
2806 }
2807 }
2808 }
2809 }
2810
2811 /// Refine multiple functions down to the same finest level
2812
2813 /// @param v the vector of functions we are refining.
2814 /// @param key the current node.
2815 /// @param c the vector of coefficients passed from above.
2816 void refine_to_common_level(const std::vector<FunctionImpl<T,NDIM>*>& v,
2817 const std::vector<tensorT>& c,
2818 const keyT key);
2819
2820 /// Inplace operate on many functions (impl's) with an operator within a certain box
2821 /// @param[in] key the key of the current function node (box)
2822 /// @param[in] op the operator
2823 /// @param[in] v the vector of function impl's on which to be operated
2824 template <typename opT>
2825 void multiop_values_doit(const keyT& key, const opT& op, const std::vector<implT*>& v) {
2826 std::vector<tensorT> c(v.size());
2827 for (unsigned int i=0; i<v.size(); i++) {
2828 if (v[i]) {
2829 coeffT cc = coeffs2values(key, v[i]->coeffs.find(key).get()->second.coeff());
2830 c[i]=cc.full_tensor();
2831 }
2832 }
2833 tensorT r = op(key, c);
2834 coeffs.replace(key, nodeT(coeffT(values2coeffs(key, r),targs),false));
2835 }
2836
2837 /// Inplace operate on many functions (impl's) with an operator within a certain box
2838 /// Assumes all functions have been refined down to the same level
2839 /// @param[in] op the operator
2840 /// @param[in] v the vector of function impl's on which to be operated
2841 template <typename opT>
2842 void multiop_values(const opT& op, const std::vector<implT*>& v) {
2843 // rough check on refinement level (ignore non-initialized functions
2844 for (std::size_t i=1; i<v.size(); ++i) {
2845 if (v[i] and v[i-1]) {
2846 MADNESS_ASSERT(v[i]->coeffs.size()==v[i-1]->coeffs.size());
2847 }
2848 }
2849 typename dcT::iterator end = v[0]->coeffs.end();
2850 for (typename dcT::iterator it=v[0]->coeffs.begin(); it!=end; ++it) {
2851 const keyT& key = it->first;
2852 if (it->second.has_coeff())
2853 world.taskq.add(*this, &implT:: template multiop_values_doit<opT>, key, op, v);
2854 else
2855 coeffs.replace(key, nodeT(coeffT(),true));
2856 }
2857 world.gop.fence();
2858 }
2859
2860 /// Inplace operate on many functions (impl's) with an operator within a certain box
2861
2862 /// @param[in] key the key of the current function node (box)
2863 /// @param[in] op the operator
2864 /// @param[in] vin the vector of function impl's on which to be operated
2865 /// @param[out] vout the resulting vector of function impl's
2866 template <typename opT>
2867 void multi_to_multi_op_values_doit(const keyT& key, const opT& op,
2868 const std::vector<implT*>& vin, std::vector<implT*>& vout) {
2869 std::vector<tensorT> c(vin.size());
2870 for (unsigned int i=0; i<vin.size(); i++) {
2871 if (vin[i]) {
2872 coeffT cc = coeffs2values(key, vin[i]->coeffs.find(key).get()->second.coeff());
2873 c[i]=cc.full_tensor();
2874 }
2875 }
2876 std::vector<tensorT> r = op(key, c);
2877 MADNESS_ASSERT(r.size()==vout.size());
2878 for (std::size_t i=0; i<vout.size(); ++i) {
2879 vout[i]->coeffs.replace(key, nodeT(coeffT(values2coeffs(key, r[i]),targs),false));
2880 }
2881 }
2882
2883 /// Inplace operate on many functions (impl's) with an operator within a certain box
2884
2885 /// Assumes all functions have been refined down to the same level
2886 /// @param[in] op the operator
2887 /// @param[in] vin the vector of function impl's on which to be operated
2888 /// @param[out] vout the resulting vector of function impl's
2889 template <typename opT>
2890 void multi_to_multi_op_values(const opT& op, const std::vector<implT*>& vin,
2891 std::vector<implT*>& vout, const bool fence=true) {
2892 // rough check on refinement level (ignore non-initialized functions
2893 for (std::size_t i=1; i<vin.size(); ++i) {
2894 if (vin[i] and vin[i-1]) {
2895 MADNESS_ASSERT(vin[i]->coeffs.size()==vin[i-1]->coeffs.size());
2896 }
2897 }
2898 typename dcT::iterator end = vin[0]->coeffs.end();
2899 for (typename dcT::iterator it=vin[0]->coeffs.begin(); it!=end; ++it) {
2900 const keyT& key = it->first;
2901 if (it->second.has_coeff())
2902 world.taskq.add(*this, &implT:: template multi_to_multi_op_values_doit<opT>,
2903 key, op, vin, vout);
2904 else {
2905 // fill result functions with empty box in this key
2906 for (implT* it2 : vout) {
2907 it2->coeffs.replace(key, nodeT(coeffT(),true));
2908 }
2909 }
2910 }
2911 if (fence) world.gop.fence();
2912 }
2913
2914 /// Transforms a vector of functions left[i] = sum[j] right[j]*c[j,i] using sparsity
2915 /// @param[in] vright vector of functions (impl's) on which to be transformed
2916 /// @param[in] c the tensor (matrix) transformer
2917 /// @param[in] vleft vector of of the *newly* transformed functions (impl's)
2918 template <typename Q, typename R>
2919 void vtransform(const std::vector< std::shared_ptr< FunctionImpl<R,NDIM> > >& vright,
2920 const Tensor<Q>& c,
2921 const std::vector< std::shared_ptr< FunctionImpl<T,NDIM> > >& vleft,
2922 double tol,
2923 bool fence) {
2924 for (unsigned int j=0; j<vright.size(); ++j) {
2925 world.taskq.add(*this, &implT:: template vtransform_doit<Q,R>, vright[j], copy(c(j,_)), vleft, tol);
2926 }
2927 if (fence)
2928 world.gop.fence();
2929 }
2930
2931 /// Unary operation applied inplace to the values with optional refinement and fence
2932 /// @param[in] op the unary operator for the values
2933 template <typename opT>
2934 void unary_op_value_inplace(const opT& op, bool fence) {
2936 typedef do_unary_op_value_inplace<opT> xopT;
2937 world.taskq.for_each<rangeT,xopT>(rangeT(coeffs.begin(), coeffs.end()), xopT(this,op));
2938 if (fence)
2939 world.gop.fence();
2940 }
2941
2942 // Multiplication assuming same distribution and recursive descent
2943 /// Both left and right functions are in the scaling function basis
2944 /// @param[in] key the key to the current function node (box)
2945 /// @param[in] left the function impl associated with the left function
2946 /// @param[in] lcin the scaling function coefficients associated with the
2947 /// current box in the left function
2948 /// @param[in] vrightin the vector of function impl's associated with
2949 /// the vector of right functions
2950 /// @param[in] vrcin the vector scaling function coefficients associated with the
2951 /// current box in the right functions
2952 /// @param[out] vresultin the vector of resulting functions (impl's)
2953 template <typename L, typename R>
2954 void mulXXveca(const keyT& key,
2955 const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin,
2956 const std::vector<const FunctionImpl<R,NDIM>*> vrightin,
2957 const std::vector< Tensor<R> >& vrcin,
2958 const std::vector<FunctionImpl<T,NDIM>*> vresultin,
2959 double tol) {
2960 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
2961 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT;
2962
2963 double lnorm = 1e99;
2964 Tensor<L> lc = lcin;
2965 if (lc.size() == 0) {
2966 literT it = left->coeffs.find(key).get();
2967 MADNESS_ASSERT(it != left->coeffs.end());
2968 lnorm = it->second.get_norm_tree();
2969 if (it->second.has_coeff())
2970 lc = it->second.coeff().full_tensor_copy();
2971 }
2972
2973 // Loop thru RHS functions seeing if anything can be multiplied
2974 std::vector<FunctionImpl<T,NDIM>*> vresult;
2975 std::vector<const FunctionImpl<R,NDIM>*> vright;
2976 std::vector< Tensor<R> > vrc;
2977 vresult.reserve(vrightin.size());
2978 vright.reserve(vrightin.size());
2979 vrc.reserve(vrightin.size());
2980
2981 for (unsigned int i=0; i<vrightin.size(); ++i) {
2982 FunctionImpl<T,NDIM>* result = vresultin[i];
2983 const FunctionImpl<R,NDIM>* right = vrightin[i];
2984 Tensor<R> rc = vrcin[i];
2985 double rnorm;
2986 if (rc.size() == 0) {
2987 riterT it = right->coeffs.find(key).get();
2988 MADNESS_ASSERT(it != right->coeffs.end());
2989 rnorm = it->second.get_norm_tree();
2990 if (it->second.has_coeff())
2991 rc = it->second.coeff().full_tensor_copy();
2992 }
2993 else {
2994 rnorm = rc.normf();
2995 }
2996
2997 if (rc.size() && lc.size()) { // Yipee!
2998 result->task(world.rank(), &implT:: template do_mul<L,R>, key, lc, std::make_pair(key,rc));
2999 }
3000 else if (tol && lnorm*rnorm < truncate_tol(tol, key)) {
3001 result->coeffs.replace(key, nodeT(coeffT(cdata.vk,targs),false)); // Zero leaf
3002 }
3003 else { // Interior node
3004 result->coeffs.replace(key, nodeT(coeffT(),true));
3005 vresult.push_back(result);
3006 vright.push_back(right);
3007 vrc.push_back(rc);
3008 }
3009 }
3010
3011 if (vresult.size()) {
3012 Tensor<L> lss;
3013 if (lc.size()) {
3014 Tensor<L> ld(cdata.v2k);
3015 ld(cdata.s0) = lc(___);
3016 lss = left->unfilter(ld);
3017 }
3018
3019 std::vector< Tensor<R> > vrss(vresult.size());
3020 for (unsigned int i=0; i<vresult.size(); ++i) {
3021 if (vrc[i].size()) {
3022 Tensor<R> rd(cdata.v2k);
3023 rd(cdata.s0) = vrc[i](___);
3024 vrss[i] = vright[i]->unfilter(rd);
3025 }
3026 }
3027
3028 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3029 const keyT& child = kit.key();
3030 Tensor<L> ll;
3031
3032 std::vector<Slice> cp = child_patch(child);
3033
3034 if (lc.size())
3035 ll = copy(lss(cp));
3036
3037 std::vector< Tensor<R> > vv(vresult.size());
3038 for (unsigned int i=0; i<vresult.size(); ++i) {
3039 if (vrc[i].size())
3040 vv[i] = copy(vrss[i](cp));
3041 }
3042
3043 woT::task(coeffs.owner(child), &implT:: template mulXXveca<L,R>, child, left, ll, vright, vv, vresult, tol);
3044 }
3045 }
3046 }
3047
3048 /// Multiplication using recursive descent and assuming same distribution
3049 /// Both left and right functions are in the scaling function basis
3050 /// @param[in] key the key to the current function node (box)
3051 /// @param[in] left the function impl associated with the left function
3052 /// @param[in] lcin the scaling function coefficients associated with the
3053 /// current box in the left function
3054 /// @param[in] right the function impl associated with the right function
3055 /// @param[in] rcin the scaling function coefficients associated with the
3056 /// current box in the right function
3057 template <typename L, typename R>
3058 void mulXXa(const keyT& key,
3059 const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin,
3060 const FunctionImpl<R,NDIM>* right,const Tensor<R>& rcin,
3061 double tol) {
3062 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
3063 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT;
3064
3065 double lnorm=1e99, rnorm=1e99;
3066
3067 Tensor<L> lc = lcin;
3068 if (lc.size() == 0) {
3069 literT it = left->coeffs.find(key).get();
3070 MADNESS_ASSERT(it != left->coeffs.end());
3071 lnorm = it->second.get_norm_tree();
3072 if (it->second.has_coeff())
3073 lc = it->second.coeff().reconstruct_tensor();
3074 }
3075
3076 Tensor<R> rc = rcin;
3077 if (rc.size() == 0) {
3078 riterT it = right->coeffs.find(key).get();
3079 MADNESS_ASSERT(it != right->coeffs.end());
3080 rnorm = it->second.get_norm_tree();
3081 if (it->second.has_coeff())
3082 rc = it->second.coeff().reconstruct_tensor();
3083 }
3084
3085 // both nodes are leaf nodes: multiply and return
3086 if (rc.size() && lc.size()) { // Yipee!
3087 do_mul<L,R>(key, lc, std::make_pair(key,rc));
3088 return;
3089 }
3090
3091 if (tol) {
3092 if (lc.size())
3093 lnorm = lc.normf(); // Otherwise got from norm tree above
3094 if (rc.size())
3095 rnorm = rc.normf();
3096 if (lnorm*rnorm < truncate_tol(tol, key)) {
3097 coeffs.replace(key, nodeT(coeffT(cdata.vk,targs),false)); // Zero leaf node
3098 return;
3099 }
3100 }
3101
3102 // Recur down
3103 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node
3104
3105 Tensor<L> lss;
3106 if (lc.size()) {
3107 Tensor<L> ld(cdata.v2k);
3108 ld(cdata.s0) = lc(___);
3109 lss = left->unfilter(ld);
3110 }
3111
3112 Tensor<R> rss;
3113 if (rc.size()) {
3114 Tensor<R> rd(cdata.v2k);
3115 rd(cdata.s0) = rc(___);
3116 rss = right->unfilter(rd);
3117 }
3118
3119 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3120 const keyT& child = kit.key();
3121 Tensor<L> ll;
3122 Tensor<R> rr;
3123 if (lc.size())
3124 ll = copy(lss(child_patch(child)));
3125 if (rc.size())
3126 rr = copy(rss(child_patch(child)));
3127
3128 woT::task(coeffs.owner(child), &implT:: template mulXXa<L,R>, child, left, ll, right, rr, tol);
3129 }
3130 }
3131
3132
3133 // Binary operation on values using recursive descent and assuming same distribution
3134 /// Both left and right functions are in the scaling function basis
3135 /// @param[in] key the key to the current function node (box)
3136 /// @param[in] left the function impl associated with the left function
3137 /// @param[in] lcin the scaling function coefficients associated with the
3138 /// current box in the left function
3139 /// @param[in] right the function impl associated with the right function
3140 /// @param[in] rcin the scaling function coefficients associated with the
3141 /// current box in the right function
3142 /// @param[in] op the binary operator
3143 template <typename L, typename R, typename opT>
3144 void binaryXXa(const keyT& key,
3145 const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin,
3146 const FunctionImpl<R,NDIM>* right,const Tensor<R>& rcin,
3147 const opT& op) {
3148 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
3149 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT;
3150
3151 Tensor<L> lc = lcin;
3152 if (lc.size() == 0) {
3153 literT it = left->coeffs.find(key).get();
3154 MADNESS_ASSERT(it != left->coeffs.end());
3155 if (it->second.has_coeff())
3156 lc = it->second.coeff().reconstruct_tensor();
3157 }
3158
3159 Tensor<R> rc = rcin;
3160 if (rc.size() == 0) {
3161 riterT it = right->coeffs.find(key).get();
3162 MADNESS_ASSERT(it != right->coeffs.end());
3163 if (it->second.has_coeff())
3164 rc = it->second.coeff().reconstruct_tensor();
3165 }
3166
3167 if (rc.size() && lc.size()) { // Yipee!
3168 do_binary_op<L,R>(key, lc, std::make_pair(key,rc), op);
3169 return;
3170 }
3171
3172 // Recur down
3173 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node
3174
3175 Tensor<L> lss;
3176 if (lc.size()) {
3177 Tensor<L> ld(cdata.v2k);
3178 ld(cdata.s0) = lc(___);
3179 lss = left->unfilter(ld);
3180 }
3181
3182 Tensor<R> rss;
3183 if (rc.size()) {
3184 Tensor<R> rd(cdata.v2k);
3185 rd(cdata.s0) = rc(___);
3186 rss = right->unfilter(rd);
3187 }
3188
3189 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3190 const keyT& child = kit.key();
3191 Tensor<L> ll;
3192 Tensor<R> rr;
3193 if (lc.size())
3194 ll = copy(lss(child_patch(child)));
3195 if (rc.size())
3196 rr = copy(rss(child_patch(child)));
3197
3198 woT::task(coeffs.owner(child), &implT:: template binaryXXa<L,R,opT>, child, left, ll, right, rr, op);
3199 }
3200 }
3201
3202 template <typename Q, typename opT>
3204 typedef typename opT::resultT resultT;
3206 opT op;
3207
3212
3213 Tensor<resultT> operator()(const Key<NDIM>& key, const Tensor<Q>& t) const {
3214 Tensor<Q> invalues = impl_func->coeffs2values(key, t);
3215
3216 Tensor<resultT> outvalues = op(key, invalues);
3217
3218 return impl_func->values2coeffs(key, outvalues);
3219 }
3220
3221 template <typename Archive>
3222 void serialize(Archive& ar) {
3223 ar & impl_func & op;
3224 }
3225 };
3226
3227 /// Out of place unary operation on function impl
3228 /// The skeleton algorithm should resemble something like
3229 ///
3230 /// *this = op(*func)
3231 ///
3232 /// @param[in] key the key of the current function node (box)
3233 /// @param[in] func the function impl on which to be operated
3234 /// @param[in] op the unary operator
3235 template <typename Q, typename opT>
3236 void unaryXXa(const keyT& key,
3237 const FunctionImpl<Q,NDIM>* func, const opT& op) {
3238
3239 // const Tensor<Q>& fc = func->coeffs.find(key).get()->second.full_tensor_copy();
3240 const Tensor<Q> fc = func->coeffs.find(key).get()->second.coeff().reconstruct_tensor();
3241
3242 if (fc.size() == 0) {
3243 // Recur down
3244 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node
3245 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3246 const keyT& child = kit.key();
3247 woT::task(coeffs.owner(child), &implT:: template unaryXXa<Q,opT>, child, func, op);
3248 }
3249 }
3250 else {
3251 tensorT t=op(key,fc);
3252 coeffs.replace(key, nodeT(coeffT(t,targs),false)); // Leaf node
3253 }
3254 }
3255
3256 /// Multiplies two functions (impl's) together. Delegates to the mulXXa() method
3257 /// @param[in] left pointer to the left function impl
3258 /// @param[in] right pointer to the right function impl
3259 /// @param[in] tol numerical tolerance
3260 template <typename L, typename R>
3261 void mulXX(const FunctionImpl<L,NDIM>* left, const FunctionImpl<R,NDIM>* right, double tol, bool fence) {
3262 if (world.rank() == coeffs.owner(cdata.key0))
3263 mulXXa(cdata.key0, left, Tensor<L>(), right, Tensor<R>(), tol);
3264 if (fence)
3265 world.gop.fence();
3266
3267 //verify_tree();
3268 }
3269
3270 /// Performs binary operation on two functions (impl's). Delegates to the binaryXXa() method
3271 /// @param[in] left pointer to the left function impl
3272 /// @param[in] right pointer to the right function impl
3273 /// @param[in] op the binary operator
3274 template <typename L, typename R, typename opT>
3276 const opT& op, bool fence) {
3277 if (world.rank() == coeffs.owner(cdata.key0))
3278 binaryXXa(cdata.key0, left, Tensor<L>(), right, Tensor<R>(), op);
3279 if (fence)
3280 world.gop.fence();
3281
3282 //verify_tree();
3283 }
3284
3285 /// Performs unary operation on function impl. Delegates to the unaryXXa() method
3286 /// @param[in] func function impl of the operand
3287 /// @param[in] op the unary operator
3288 template <typename Q, typename opT>
3289 void unaryXX(const FunctionImpl<Q,NDIM>* func, const opT& op, bool fence) {
3290 if (world.rank() == coeffs.owner(cdata.key0))
3291 unaryXXa(cdata.key0, func, op);
3292 if (fence)
3293 world.gop.fence();
3294
3295 //verify_tree();
3296 }
3297
3298 /// Performs unary operation on function impl. Delegates to the unaryXXa() method
3299 /// @param[in] func function impl of the operand
3300 /// @param[in] op the unary operator
3301 template <typename Q, typename opT>
3302 void unaryXXvalues(const FunctionImpl<Q,NDIM>* func, const opT& op, bool fence) {
3303 if (world.rank() == coeffs.owner(cdata.key0))
3305 if (fence)
3306 world.gop.fence();
3307
3308 //verify_tree();
3309 }
3310
3311 /// Multiplies a function (impl) with a vector of functions (impl's). Delegates to the
3312 /// mulXXveca() method.
3313 /// @param[in] left pointer to the left function impl
3314 /// @param[in] vright vector of pointers to the right function impl's
3315 /// @param[in] tol numerical tolerance
3316 /// @param[out] vresult vector of pointers to the resulting function impl's
3317 template <typename L, typename R>
3319 const std::vector<const FunctionImpl<R,NDIM>*>& vright,
3320 const std::vector<FunctionImpl<T,NDIM>*>& vresult,
3321 double tol,
3322 bool fence) {
3323 std::vector< Tensor<R> > vr(vright.size());
3324 if (world.rank() == coeffs.owner(cdata.key0))
3325 mulXXveca(cdata.key0, left, Tensor<L>(), vright, vr, vresult, tol);
3326 if (fence)
3327 world.gop.fence();
3328 }
3329
3331
3332 mutable long box_leaf[1000];
3333 mutable long box_interior[1000];
3334
3335 // horrifically non-scalable
3336 void put_in_box(ProcessID from, long nl, long ni) const;
3337
3338 /// Prints summary of data distribution
3339 void print_info() const;
3340
3341 /// Verify tree is properly constructed ... global synchronization involved
3342
3343 /// If an inconsistency is detected, prints a message describing the error and
3344 /// then throws a madness exception.
3345 ///
3346 /// This is a reasonably quick and scalable operation that is
3347 /// useful for debugging and paranoia.
3348 void verify_tree() const;
3349
3350 /// check that parents and children are consistent
3351
3352 /// will not check proper size of coefficients
3353 /// global communication
3354 bool verify_parents_and_children() const;
3355
3356 /// check that the tree state and the coeffs are consistent
3357
3358 /// will not check existence of children and/or parents
3359 /// no communication
3360 bool verify_tree_state_local() const;
3361
3362 /// Walk up the tree returning pair(key,node) for first node with coefficients
3363
3364 /// Three possibilities.
3365 ///
3366 /// 1) The coeffs are present and returned with the key of the containing node.
3367 ///
3368 /// 2) The coeffs are further up the tree ... the request is forwarded up.
3369 ///
3370 /// 3) The coeffs are futher down the tree ... an empty tensor is returned.
3371 ///
3372 /// !! This routine is crying out for an optimization to
3373 /// manage the number of messages being sent ... presently
3374 /// each parent is fetched 2^(n*d) times where n is the no. of
3375 /// levels between the level of evaluation and the parent.
3376 /// Alternatively, reimplement multiply as a downward tree
3377 /// walk and just pass the parent down. Slightly less
3378 /// parallelism but much less communication.
3379 /// @todo Robert .... help!
3380 void sock_it_to_me(const keyT& key,
3381 const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const;
3382 /// As above, except
3383 /// 3) The coeffs are constructed from the avg of nodes further down the tree
3384 /// @todo Robert .... help!
3385 void sock_it_to_me_too(const keyT& key,
3386 const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const;
3387
3388 /// @todo help!
3390 const keyT& key,
3391 const coordT& plotlo, const coordT& plothi, const std::vector<long>& npt,
3392 bool eval_refine) const;
3393
3394
3395 /// Evaluate a cube/slice of points ... plotlo and plothi are already in simulation coordinates
3396 /// No communications
3397 /// @param[in] plotlo the coordinate of the starting point
3398 /// @param[in] plothi the coordinate of the ending point
3399 /// @param[in] npt the number of points in each dimension
3400 Tensor<T> eval_plot_cube(const coordT& plotlo,
3401 const coordT& plothi,
3402 const std::vector<long>& npt,
3403 const bool eval_refine = false) const;
3404
3405
3406 /// Evaluate function only if point is local returning (true,value); otherwise return (false,0.0)
3407
3408 /// maxlevel is the maximum depth to search down to --- the max local depth can be
3409 /// computed with max_local_depth();
3410 std::pair<bool,T> eval_local_only(const Vector<double,NDIM>& xin, Level maxlevel) ;
3411
3412
3413 /// Evaluate the function at a point in \em simulation coordinates
3414
3415 /// Only the invoking process will get the result via the
3416 /// remote reference to a future. Active messages may be sent
3417 /// to other nodes.
3418 void eval(const Vector<double,NDIM>& xin,
3419 const keyT& keyin,
3420 const typename Future<T>::remote_refT& ref);
3421
3422 /// Get the depth of the tree at a point in \em simulation coordinates
3423
3424 /// Only the invoking process will get the result via the
3425 /// remote reference to a future. Active messages may be sent
3426 /// to other nodes.
3427 ///
3428 /// This function is a minimally-modified version of eval()
3429 void evaldepthpt(const Vector<double,NDIM>& xin,
3430 const keyT& keyin,
3431 const typename Future<Level>::remote_refT& ref);
3432
3433 /// Get the rank of leaf box of the tree at a point in \em simulation coordinates
3434
3435 /// Only the invoking process will get the result via the
3436 /// remote reference to a future. Active messages may be sent
3437 /// to other nodes.
3438 ///
3439 /// This function is a minimally-modified version of eval()
3440 void evalR(const Vector<double,NDIM>& xin,
3441 const keyT& keyin,
3442 const typename Future<long>::remote_refT& ref);
3443
3444
3445 /// Computes norm of low/high-order polyn. coeffs for autorefinement test
3446
3447 /// t is a k^d tensor. In order to screen the autorefinement
3448 /// during multiplication compute the norms of
3449 /// ... lo ... the block of t for all polynomials of order < k/2
3450 /// ... hi ... the block of t for all polynomials of order >= k/2
3451 ///
3452 /// k=5 0,1,2,3,4 --> 0,1,2 ... 3,4
3453 /// k=6 0,1,2,3,4,5 --> 0,1,2 ... 3,4,5
3454 ///
3455 /// k=number of wavelets, so k=5 means max order is 4, so max exactly
3456 /// representable squarable polynomial is of order 2.
3457 void static tnorm(const tensorT& t, double* lo, double* hi);
3458
3459 void static tnorm(const GenTensor<T>& t, double* lo, double* hi);
3460
3461 void static tnorm(const SVDTensor<T>& t, double* lo, double* hi, const int particle);
3462
3463 // This invoked if node has not been autorefined
3464 void do_square_inplace(const keyT& key);
3465
3466 // This invoked if node has been autorefined
3467 void do_square_inplace2(const keyT& parent, const keyT& child, const tensorT& parent_coeff);
3468
3469 /// Always returns false (for when autorefine is not wanted)
3470 bool noautorefine(const keyT& key, const tensorT& t) const;
3471
3472 /// Returns true if this block of coeffs needs autorefining
3473 bool autorefine_square_test(const keyT& key, const nodeT& t) const;
3474
3475 /// Pointwise squaring of function with optional global fence
3476
3477 /// If not autorefining, local computation only if not fencing.
3478 /// If autorefining, may result in asynchronous communication.
3479 void square_inplace(bool fence);
3480 void abs_inplace(bool fence);
3481 void abs_square_inplace(bool fence);
3482
3483 /// is this the same as trickle_down() ?
3484 void sum_down_spawn(const keyT& key, const coeffT& s);
3485
3486 /// After 1d push operator must sum coeffs down the tree to restore correct scaling function coefficients
3487 void sum_down(bool fence);
3488
3489 /// perform this multiplication: h(1,2) = f(1,2) * g(1)
3490 template<size_t LDIM>
3492
3493 static bool randomize() {return false;}
3497
3498 implT* h; ///< the result function h(1,2) = f(1,2) * g(1)
3501 int particle; ///< if g is g(1) or g(2)
3502
3503 multiply_op() : h(), f(), g(), particle(1) {}
3504
3505 multiply_op(implT* h1, const ctT& f1, const ctL& g1, const int particle1)
3506 : h(h1), f(f1), g(g1), particle(particle1) {};
3507
3508 /// return true if this will be a leaf node
3509
3510 /// use generalization of tnorm for a GenTensor
3511 bool screen(const coeffT& fcoeff, const coeffT& gcoeff, const keyT& key) const {
3513 MADNESS_ASSERT(fcoeff.is_svd_tensor());
3516
3517 double glo=0.0, ghi=0.0, flo=0.0, fhi=0.0;
3518 g.get_impl()->tnorm(gcoeff.get_tensor(), &glo, &ghi);
3519 g.get_impl()->tnorm(fcoeff.get_svdtensor(),&flo,&fhi,particle);
3520
3521 double total_hi=glo*fhi + ghi*flo + fhi*ghi;
3522 return (total_hi<h->truncate_tol(h->get_thresh(),key));
3523
3524 }
3525
3526 /// apply this on a FunctionNode of f and g of Key key
3527
3528 /// @param[in] key key for FunctionNode in f and g, (g: broken into particles)
3529 /// @return <this node is a leaf, coefficients of this node>
3530 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
3531
3532 // bool is_leaf=(not fdatum.second.has_children());
3533 // if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT());
3534
3535 // break key into particles (these are the child keys, with f/gdatum come the parent keys)
3536 Key<LDIM> key1,key2;
3537 key.break_apart(key1,key2);
3538 const Key<LDIM> gkey= (particle==1) ? key1 : key2;
3539
3540 // get coefficients of the actual FunctionNode
3541 coeffT coeff1=f.get_impl()->parent_to_child(f.coeff(),f.key(),key);
3542 coeff1.normalize();
3543 const coeffT coeff2=g.get_impl()->parent_to_child(g.coeff(),g.key(),gkey);
3544
3545 // multiplication is done in TT_2D
3546 coeffT coeff1_2D=coeff1.convert(TensorArgs(h->get_thresh(),TT_2D));
3547 coeff1_2D.normalize();
3548
3549 bool is_leaf=screen(coeff1_2D,coeff2,key);
3550 if (key.level()<2) is_leaf=false;
3551
3552 coeffT hcoeff;
3553 if (is_leaf) {
3554
3555 // convert coefficients to values
3556 coeffT hvalues=f.get_impl()->coeffs2values(key,coeff1_2D);
3557 coeffT gvalues=g.get_impl()->coeffs2values(gkey,coeff2);
3558
3559 // perform multiplication
3560 coeffT result_val=h->multiply(hvalues,gvalues,particle-1);
3561
3562 hcoeff=h->values2coeffs(key,result_val);
3563
3564 // conversion on coeffs, not on values, because it implies truncation!
3565 if (not hcoeff.is_of_tensortype(h->get_tensor_type()))
3566 hcoeff=hcoeff.convert(h->get_tensor_args());
3567 }
3568
3569 return std::pair<bool,coeffT> (is_leaf,hcoeff);
3570 }
3571
3572 this_type make_child(const keyT& child) const {
3573
3574 // break key into particles
3575 Key<LDIM> key1, key2;
3576 child.break_apart(key1,key2);
3577 const Key<LDIM> gkey= (particle==1) ? key1 : key2;
3578
3579 return this_type(h,f.make_child(child),g.make_child(gkey),particle);
3580 }
3581
3583 Future<ctT> f1=f.activate();
3585 return h->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3586 &this_type::forward_ctor),h,f1,g1,particle);
3587 }
3588
3589 this_type forward_ctor(implT* h1, const ctT& f1, const ctL& g1, const int particle) {
3590 return this_type(h1,f1,g1,particle);
3591 }
3592
3593 template <typename Archive> void serialize(const Archive& ar) {
3594 ar & h & f & g & particle;
3595 }
3596 };
3597
3598
3599 /// add two functions f and g: result=alpha * f + beta * g
3600 struct add_op {
3601
3604
3605 bool randomize() const {return false;}
3606
3607 /// tracking coeffs of first and second addend
3609 /// prefactor for f, g
3610 double alpha, beta;
3611
3612 add_op() = default;
3613 add_op(const ctT& f, const ctT& g, const double alpha, const double beta)
3614 : f(f), g(g), alpha(alpha), beta(beta){}
3615
3616 /// if we are at the bottom of the trees, return the sum of the coeffs
3617 std::pair<bool,coeffT> operator()(const keyT& key) const {
3618
3619 bool is_leaf=(f.is_leaf() and g.is_leaf());
3620 if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT());
3621
3622 coeffT fcoeff=f.get_impl()->parent_to_child(f.coeff(),f.key(),key);
3623 coeffT gcoeff=g.get_impl()->parent_to_child(g.coeff(),g.key(),key);
3624 coeffT hcoeff=copy(fcoeff);
3625 hcoeff.gaxpy(alpha,gcoeff,beta);
3626 hcoeff.reduce_rank(f.get_impl()->get_tensor_args().thresh);
3627 return std::pair<bool,coeffT> (is_leaf,hcoeff);
3628 }
3629
3630 this_type make_child(const keyT& child) const {
3631 return this_type(f.make_child(child),g.make_child(child),alpha,beta);
3632 }
3633
3634 /// retrieve the coefficients (parent coeffs might be remote)
3636 Future<ctT> f1=f.activate();
3637 Future<ctT> g1=g.activate();
3638 return f.get_impl()->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3640 }
3641
3642 /// taskq-compatible ctor
3643 this_type forward_ctor(const ctT& f1, const ctT& g1, const double alpha, const double beta) {
3644 return this_type(f1,g1,alpha,beta);
3645 }
3646
3647 template <typename Archive> void serialize(const Archive& ar) {
3648 ar & f & g & alpha & beta;
3649 }
3650
3651 };
3652
3653 /// multiply f (a pair function of NDIM) with an orbital g (LDIM=NDIM/2)
3654
3655 /// as in (with h(1,2)=*this) : h(1,2) = g(1) * f(1,2)
3656 /// use tnorm as a measure to determine if f (=*this) must be refined
3657 /// @param[in] f the NDIM function f=f(1,2)
3658 /// @param[in] g the LDIM function g(1) (or g(2))
3659 /// @param[in] particle 1 or 2, as in g(1) or g(2)
3660 template<size_t LDIM>
3661 void multiply(const implT* f, const FunctionImpl<T,LDIM>* g, const int particle) {
3662
3665
3666 typedef multiply_op<LDIM> coeff_opT;
3667 coeff_opT coeff_op(this,ff,gg,particle);
3668
3669 typedef insert_op<T,NDIM> apply_opT;
3670 apply_opT apply_op(this);
3671
3672 keyT key0=f->cdata.key0;
3673 if (world.rank() == coeffs.owner(key0)) {
3675 woT::task(p, &implT:: template forward_traverse<coeff_opT,apply_opT>, coeff_op, apply_op, key0);
3676 }
3677
3679 }
3680
3681 /// Hartree product of two LDIM functions to yield a NDIM = 2*LDIM function
3682 template<size_t LDIM, typename leaf_opT>
3683 struct hartree_op {
3684 bool randomize() const {return false;}
3685
3688
3689 implT* result; ///< where to construct the pair function
3690 ctL p1, p2; ///< tracking coeffs of the two lo-dim functions
3691 leaf_opT leaf_op; ///< determine if a given node will be a leaf node
3692
3693 // ctor
3695 hartree_op(implT* result, const ctL& p11, const ctL& p22, const leaf_opT& leaf_op)
3696 : result(result), p1(p11), p2(p22), leaf_op(leaf_op) {
3697 MADNESS_ASSERT(LDIM+LDIM==NDIM);
3698 }
3699
3700 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
3701
3702 // break key into particles (these are the child keys, with datum1/2 come the parent keys)
3703 Key<LDIM> key1,key2;
3704 key.break_apart(key1,key2);
3705
3706 // this returns the appropriate NS coeffs for key1 and key2 resp.
3707 const coeffT fcoeff=p1.coeff(key1);
3708 const coeffT gcoeff=p2.coeff(key2);
3709 bool is_leaf=leaf_op(key,fcoeff.full_tensor(),gcoeff.full_tensor());
3710 if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT());
3711
3712 // extract the sum coeffs from the NS coeffs
3713 const coeffT s1=fcoeff(p1.get_impl()->cdata.s0);
3714 const coeffT s2=gcoeff(p2.get_impl()->cdata.s0);
3715
3716 // new coeffs are simply the hartree/kronecker/outer product --
3717 coeffT coeff=outer(s1,s2,result->get_tensor_args());
3718 // no post-determination
3719 // is_leaf=leaf_op(key,coeff);
3720 return std::pair<bool,coeffT>(is_leaf,coeff);
3721 }
3722
3723 this_type make_child(const keyT& child) const {
3724
3725 // break key into particles
3726 Key<LDIM> key1, key2;
3727 child.break_apart(key1,key2);
3728
3729 return this_type(result,p1.make_child(key1),p2.make_child(key2),leaf_op);
3730 }
3731
3733 Future<ctL> p11=p1.activate();
3734 Future<ctL> p22=p2.activate();
3735 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3736 &this_type::forward_ctor),result,p11,p22,leaf_op);
3737 }
3738
3739 this_type forward_ctor(implT* result1, const ctL& p11, const ctL& p22, const leaf_opT& leaf_op) {
3740 return this_type(result1,p11,p22,leaf_op);
3741 }
3742
3743 template <typename Archive> void serialize(const Archive& ar) {
3744 ar & result & p1 & p2 & leaf_op;
3745 }
3746 };
3747
3748 /// traverse a non-existing tree
3749
3750 /// part II: activate coeff_op, i.e. retrieve all the necessary remote boxes (communication)
3751 /// @param[in] coeff_op operator making the coefficients that needs activation
3752 /// @param[in] apply_op just passing thru
3753 /// @param[in] key the key we are working on
3754 template<typename coeff_opT, typename apply_opT>
3755 void forward_traverse(const coeff_opT& coeff_op, const apply_opT& apply_op, const keyT& key) const {
3757 Future<coeff_opT> active_coeff=coeff_op.activate();
3758 woT::task(world.rank(), &implT:: template traverse_tree<coeff_opT,apply_opT>, active_coeff, apply_op, key);
3759 }
3760
3761
3762 /// traverse a non-existing tree
3763
3764 /// part I: make the coefficients, process them and continue the recursion if necessary
3765 /// @param[in] coeff_op operator making the coefficients and determining them being leaves
3766 /// @param[in] apply_op operator processing the coefficients
3767 /// @param[in] key the key we are currently working on
3768 template<typename coeff_opT, typename apply_opT>
3769 void traverse_tree(const coeff_opT& coeff_op, const apply_opT& apply_op, const keyT& key) const {
3771
3772 typedef typename std::pair<bool,coeffT> argT;
3773 const argT arg=coeff_op(key);
3774 apply_op.operator()(key,arg.second,arg.first);
3775
3776 const bool has_children=(not arg.first);
3777 if (has_children) {
3778 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3779 const keyT& child=kit.key();
3780 coeff_opT child_op=coeff_op.make_child(child);
3781 // spawn activation where child is local
3782 ProcessID p=coeffs.owner(child);
3783
3784 void (implT::*ft)(const coeff_opT&, const apply_opT&, const keyT&) const = &implT::forward_traverse<coeff_opT,apply_opT>;
3785
3786 woT::task(p, ft, child_op, apply_op, child);
3787 }
3788 }
3789 }
3790
3791
3792 /// given two functions of LDIM, perform the Hartree/Kronecker/outer product
3793
3794 /// |Phi(1,2)> = |phi(1)> x |phi(2)>
3795 /// @param[in] p1 FunctionImpl of particle 1
3796 /// @param[in] p2 FunctionImpl of particle 2
3797 /// @param[in] leaf_op operator determining of a given box will be a leaf
3798 template<std::size_t LDIM, typename leaf_opT>
3799 void hartree_product(const std::vector<std::shared_ptr<FunctionImpl<T,LDIM>>> p1,
3800 const std::vector<std::shared_ptr<FunctionImpl<T,LDIM>>> p2,
3801 const leaf_opT& leaf_op, bool fence) {
3802 MADNESS_CHECK_THROW(p1.size()==p2.size(),"hartree_product: p1 and p2 must have the same size");
3803 for (auto& p : p1) MADNESS_CHECK(p->is_nonstandard() or p->is_nonstandard_with_leaves());
3804 for (auto& p : p2) MADNESS_CHECK(p->is_nonstandard() or p->is_nonstandard_with_leaves());
3805
3806 const keyT key0=cdata.key0;
3807
3808 for (std::size_t i=0; i<p1.size(); ++i) {
3809 if (world.rank() == this->get_coeffs().owner(key0)) {
3810
3811 // prepare the CoeffTracker
3812 CoeffTracker<T,LDIM> iap1(p1[i].get());
3813 CoeffTracker<T,LDIM> iap2(p2[i].get());
3814
3815 // the operator making the coefficients
3816 typedef hartree_op<LDIM,leaf_opT> coeff_opT;
3817 coeff_opT coeff_op(this,iap1,iap2,leaf_op);
3818
3819 // this operator simply inserts the coeffs into this' tree
3820// typedef insert_op<T,NDIM> apply_opT;
3821 typedef accumulate_op<T,NDIM> apply_opT;
3822 apply_opT apply_op(this);
3823
3824 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
3825 coeff_op, apply_op, cdata.key0);
3826
3827 }
3828 }
3829
3831 if (fence) world.gop.fence();
3832 }
3833
3834
3835 template <typename opT, typename R>
3836 void
3838 const opT* op = pop.ptr;
3839 const Level n = key.level();
3840 const double cnorm = c.normf();
3841 const double tol = truncate_tol(thresh, key)*0.1; // ??? why this value????
3842
3844 const Translation lold = lnew[axis];
3845 const Translation maxs = Translation(1)<<n;
3846
3847 int nsmall = 0; // Counts neglected blocks to terminate s loop
3848 for (Translation s=0; s<maxs; ++s) {
3849 int maxdir = s ? 1 : -1;
3850 for (int direction=-1; direction<=maxdir; direction+=2) {
3851 lnew[axis] = lold + direction*s;
3852 if (lnew[axis] >= 0 && lnew[axis] < maxs) { // NON-ZERO BOUNDARY CONDITIONS IGNORED HERE !!!!!!!!!!!!!!!!!!!!
3853 const Tensor<typename opT::opT>& r = op->rnlij(n, s*direction, true);
3854 double Rnorm = r.normf();
3855
3856 if (Rnorm == 0.0) {
3857 return; // Hard zero means finished!
3858 }
3859
3860 if (s <= 1 || r.normf()*cnorm > tol) { // Always do kernel and neighbor
3861 nsmall = 0;
3862 tensorT result = transform_dir(c,r,axis);
3863
3864 if (result.normf() > tol*0.3) {
3865 Key<NDIM> dest(n,lnew);
3866 coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri());
3867 }
3868 }
3869 else {
3870 ++nsmall;
3871 }
3872 }
3873 else {
3874 ++nsmall;
3875 }
3876 }
3877 if (nsmall >= 4) {
3878 // If have two negligble blocks in
3879 // succession in each direction interpret
3880 // this as the operator being zero beyond
3881 break;
3882 }
3883 }
3884 }
3885
3886 template <typename opT, typename R>
3887 void
3888 apply_1d_realspace_push(const opT& op, const FunctionImpl<R,NDIM>* f, int axis, bool fence) {
3889 MADNESS_ASSERT(!f->is_compressed());
3890
3891 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator fiterT;
3892 typedef FunctionNode<R,NDIM> fnodeT;
3893 fiterT end = f->coeffs.end();
3894 ProcessID me = world.rank();
3895 for (fiterT it=f->coeffs.begin(); it!=end; ++it) {
3896 const fnodeT& node = it->second;
3897 if (node.has_coeff()) {
3898 const keyT& key = it->first;
3899 const Tensor<R>& c = node.coeff().full_tensor_copy();
3900 woT::task(me, &implT:: template apply_1d_realspace_push_op<opT,R>,
3902 }
3903 }
3904 if (fence) world.gop.fence();
3905 }
3906
3908 const implT* f,
3909 const keyT& key,
3910 const std::pair<keyT,coeffT>& left,
3911 const std::pair<keyT,coeffT>& center,
3912 const std::pair<keyT,coeffT>& right);
3913
3914 void do_diff1(const DerivativeBase<T,NDIM>* D,
3915 const implT* f,
3916 const keyT& key,
3917 const std::pair<keyT,coeffT>& left,
3918 const std::pair<keyT,coeffT>& center,
3919 const std::pair<keyT,coeffT>& right);
3920
3921 // Called by result function to differentiate f
3922 void diff(const DerivativeBase<T,NDIM>* D, const implT* f, bool fence);
3923
3924 /// Returns key of general neighbor enforcing BC
3925
3926 /// Out of volume keys are mapped to enforce the BC as follows.
3927 /// * Periodic BC map back into the volume and return the correct key
3928 /// * non-periodic BC - returns invalid() to indicate out of volume
3929 keyT neighbor(const keyT& key, const keyT& disp, const array_of_bools<NDIM>& is_periodic) const;
3930
3931 /// Returns key of general neighbor that resides in-volume
3932
3933 /// Out of volume keys are mapped to invalid()
3934 keyT neighbor_in_volume(const keyT& key, const keyT& disp) const;
3935
3936 /// find_me. Called by diff_bdry to get coefficients of boundary function
3937 Future< std::pair<keyT,coeffT> > find_me(const keyT& key) const;
3938
3939 /// return the a std::pair<key, node>, which MUST exist
3940 std::pair<Key<NDIM>,ShallowNode<T,NDIM> > find_datum(keyT key) const;
3941
3942 /// multiply the ket with a one-electron potential rr(1,2)= f(1,2)*g(1)
3943
3944 /// @param[in] val_ket function values of f(1,2)
3945 /// @param[in] val_pot function values of g(1)
3946 /// @param[in] particle if 0 then g(1), if 1 then g(2)
3947 /// @return the resulting function values
3948 coeffT multiply(const coeffT& val_ket, const coeffT& val_pot, int particle) const;
3949
3950
3951 /// given several coefficient tensors, assemble a result tensor
3952
3953 /// the result looks like: (v(1,2) + v(1) + v(2)) |ket(1,2)>
3954 /// or (v(1,2) + v(1) + v(2)) |p(1) p(2)>
3955 /// i.e. coefficients for the ket and coefficients for the two particles are
3956 /// mutually exclusive. All potential terms are optional, just pass in empty coeffs.
3957 /// @param[in] key the key of the FunctionNode to which these coeffs belong
3958 /// @param[in] coeff_ket coefficients of the ket
3959 /// @param[in] vpotential1 function values of the potential for particle 1
3960 /// @param[in] vpotential2 function values of the potential for particle 2
3961 /// @param[in] veri function values for the 2-particle potential
3962 coeffT assemble_coefficients(const keyT& key, const coeffT& coeff_ket,
3963 const coeffT& vpotential1, const coeffT& vpotential2,
3964 const tensorT& veri) const;
3965
3966
3967
3968 template<std::size_t LDIM>
3972 double error=0.0;
3973 double lo=0.0, hi=0.0, lo1=0.0, hi1=0.0, lo2=0.0, hi2=0.0;
3974
3976 pointwise_multiplier(const Key<NDIM> key, const coeffT& clhs) : coeff_lhs(clhs) {
3978 val_lhs=fcf.coeffs2values(key,coeff_lhs);
3979 error=0.0;
3981 if (coeff_lhs.is_svd_tensor()) {
3984 }
3985 }
3986
3987 /// multiply values of rhs and lhs, result on rhs, rhs and lhs are of the same dimensions
3988 tensorT operator()(const Key<NDIM> key, const tensorT& coeff_rhs) {
3989
3990 MADNESS_ASSERT(coeff_rhs.dim(0)==coeff_lhs.dim(0));
3992
3993 // the tnorm estimate is not tight enough to be efficient, better use oversampling
3994 bool use_tnorm=false;
3995 if (use_tnorm) {
3996 double rlo, rhi;
3997 implT::tnorm(coeff_rhs,&rlo,&rhi);
3998 error = hi*rlo + rhi*lo + rhi*hi;
3999 tensorT val_rhs=fcf.coeffs2values(key, coeff_rhs);
4000 val_rhs.emul(val_lhs.full_tensor());
4001 return fcf.values2coeffs(key,val_rhs);
4002 } else { // use quadrature of order k+1
4003
4004 auto& cdata=FunctionCommonData<T,NDIM>::get(coeff_rhs.dim(0)); // npt=k+1
4005 auto& cdata_npt=FunctionCommonData<T,NDIM>::get(coeff_rhs.dim(0)+oversampling); // npt=k+1
4006 FunctionCommonFunctionality<T,NDIM> fcf_hi_npt(cdata_npt);
4007
4008 // coeffs2values for rhs: k -> npt=k+1
4009 tensorT coeff1(cdata_npt.vk);
4010 coeff1(cdata.s0)=coeff_rhs; // s0 is smaller than vk!
4011 tensorT val_rhs_k1=fcf_hi_npt.coeffs2values(key,coeff1);
4012
4013 // coeffs2values for lhs: k -> npt=k+1
4014 tensorT coeff_lhs_k1(cdata_npt.vk);
4015 coeff_lhs_k1(cdata.s0)=std::as_const(coeff_lhs).full_tensor();
4016 tensorT val_lhs_k1=fcf_hi_npt.coeffs2values(key,coeff_lhs_k1);
4017
4018 // multiply
4019 val_lhs_k1.emul(val_rhs_k1);
4020
4021 // values2coeffs: npt = k+1-> k
4022 tensorT result1=fcf_hi_npt.values2coeffs(key,val_lhs_k1);
4023
4024 // extract coeffs up to k
4025 tensorT result=copy(result1(cdata.s0));
4026 result1(cdata.s0)=0.0;
4027 error=result1.normf();
4028 return result;
4029 }
4030 }
4031
4032 /// multiply values of rhs and lhs, result on rhs, rhs and lhs are of differnet dimensions
4033 coeffT operator()(const Key<NDIM> key, const tensorT& coeff_rhs, const int particle) {
4034 Key<LDIM> key1, key2;
4035 key.break_apart(key1,key2);
4036 const long k=coeff_rhs.dim(0);
4038 auto& cdata_lowdim=FunctionCommonData<T,LDIM>::get(k);
4039 FunctionCommonFunctionality<T,LDIM> fcf_lo(cdata_lowdim);
4043
4044
4045 // make hi-dim values from lo-dim coeff_rhs on npt grid points
4046 tensorT ones=tensorT(fcf_lo_npt.cdata.vk);
4047 ones=1.0;
4048
4049 tensorT coeff_rhs_npt1(fcf_lo_npt.cdata.vk);
4050 coeff_rhs_npt1(fcf_lo.cdata.s0)=coeff_rhs;
4051 tensorT val_rhs_npt1=fcf_lo_npt.coeffs2values(key1,coeff_rhs_npt1);
4052
4053 TensorArgs targs(-1.0,TT_2D);
4054 coeffT val_rhs;
4055 if (particle==1) val_rhs=outer(val_rhs_npt1,ones,targs);
4056 if (particle==2) val_rhs=outer(ones,val_rhs_npt1,targs);
4057
4058 // make values from hi-dim coeff_lhs on npt grid points
4059 coeffT coeff_lhs_k1(fcf_hi_npt.cdata.vk,coeff_lhs.tensor_type());
4060 coeff_lhs_k1(fcf_hi.cdata.s0)+=coeff_lhs;
4061 coeffT val_lhs_npt=fcf_hi_npt.coeffs2values(key,coeff_lhs_k1);
4062
4063 // multiply
4064 val_lhs_npt.emul(val_rhs);
4065
4066 // values2coeffs: npt = k+1-> k
4067 coeffT result1=fcf_hi_npt.values2coeffs(key,val_lhs_npt);
4068
4069 // extract coeffs up to k
4070 coeffT result=copy(result1(cdata.s0));
4071 result1(cdata.s0)=0.0;
4072 error=result1.normf();
4073 return result;
4074 }
4075
4076 template <typename Archive> void serialize(const Archive& ar) {
4077 ar & error & lo & lo1 & lo2 & hi & hi1& hi2 & val_lhs & coeff_lhs;
4078 }
4079
4080
4081 };
4082
4083 /// given a ket and the 1- and 2-electron potentials, construct the function V phi
4084
4085 /// small memory footstep version of Vphi_op: use the NS form to have information
4086 /// about parent and children to determine if a box is a leaf. This will require
4087 /// compression of the constituent functions, which will lead to more memory usage
4088 /// there, but will avoid oversampling of the result function.
4089 template<typename opT, size_t LDIM>
4090 struct Vphi_op_NS {
4091
4092 bool randomize() const {return true;}
4093
4097
4098 implT* result; ///< where to construct Vphi, no need to track parents
4099 opT leaf_op; ///< deciding if a given FunctionNode will be a leaf node
4100 ctT iaket; ///< the ket of a pair function (exclusive with p1, p2)
4101 ctL iap1, iap2; ///< the particles 1 and 2 (exclusive with ket)
4102 ctL iav1, iav2; ///< potentials for particles 1 and 2
4103 const implT* eri; ///< 2-particle potential, must be on-demand
4104
4105 bool have_ket() const {return iaket.get_impl();}
4106 bool have_v1() const {return iav1.get_impl();}
4107 bool have_v2() const {return iav2.get_impl();}
4108 bool have_eri() const {return eri;}
4109
4110 void accumulate_into_result(const Key<NDIM>& key, const coeffT& coeff) const {
4112 }
4113
4114 // ctor
4116 Vphi_op_NS(implT* result, const opT& leaf_op, const ctT& iaket,
4117 const ctL& iap1, const ctL& iap2, const ctL& iav1, const ctL& iav2,
4118 const implT* eri)
4120 , iav1(iav1), iav2(iav2), eri(eri) {
4121
4122 // 2-particle potential must be on-demand
4124 }
4125
4126 /// make and insert the coefficients into result's tree
4127 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
4128
4130 if(leaf_op.do_pre_screening()){
4131 // this means that we only construct the boxes which are leaf boxes from the other function in the leaf_op
4132 if(leaf_op.pre_screening(key)){
4133 // construct sum_coefficients, insert them and leave
4134 auto [sum_coeff, error]=make_sum_coeffs(key);
4135 accumulate_into_result(key,sum_coeff);
4136 return std::pair<bool,coeffT> (true,coeffT());
4137 }else{
4138 return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key);
4139 }
4140 }
4141
4142 // this means that the function has to be completely constructed and not mirrored by another function
4143
4144 // if the initial level is not reached then this must not be a leaf box
4145 size_t il = result->get_initial_level();
4147 if(key.level()<int(il)){
4148 return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key);
4149 }
4150 // if further refinement is needed (because we are at a special box, special point)
4151 // and the special_level is not reached then this must not be a leaf box
4152 if(key.level()<result->get_special_level() and leaf_op.special_refinement_needed(key)){
4153 return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key);
4154 }
4155
4156 auto [sum_coeff,error]=make_sum_coeffs(key);
4157
4158 // coeffs are leaf (for whatever reason), insert into tree and stop recursion
4159 if(leaf_op.post_screening(key,sum_coeff)){
4160 accumulate_into_result(key,sum_coeff);
4161 return std::pair<bool,coeffT> (true,coeffT());
4162 }
4163
4164 // coeffs are accurate, insert into tree and stop recursion
4165 if(error<result->truncate_tol(result->get_thresh(),key)){
4166 accumulate_into_result(key,sum_coeff);
4167 return std::pair<bool,coeffT> (true,coeffT());
4168 }
4169
4170 // coeffs are inaccurate, continue recursion
4171 std::vector<bool> child_is_leaf(1<<NDIM,false);
4172 return continue_recursion(child_is_leaf,tensorT(),key);
4173 }
4174
4175
4176 /// loop over all children and either insert their sum coeffs or continue the recursion
4177
4178 /// @param[in] child_is_leaf for each child: is it a leaf?
4179 /// @param[in] coeffs coefficient tensor with 2^N sum coeffs (=unfiltered NS coeffs)
4180 /// @param[in] key the key for the NS coeffs (=parent key of the children)
4181 /// @return to avoid recursion outside this return: std::pair<is_leaf,coeff> = true,coeffT()
4182 std::pair<bool,coeffT> continue_recursion(const std::vector<bool> child_is_leaf,
4183 const tensorT& coeffs, const keyT& key) const {
4184 std::size_t i=0;
4185 for (KeyChildIterator<NDIM> kit(key); kit; ++kit, ++i) {
4186 keyT child=kit.key();
4187 bool is_leaf=child_is_leaf[i];
4188
4189 if (is_leaf) {
4190 // insert the sum coeffs
4192 iop(child,coeffT(copy(coeffs(result->child_patch(child))),result->get_tensor_args()),is_leaf);
4193 } else {
4194 this_type child_op=this->make_child(child);
4195 noop<T,NDIM> no;
4196 // spawn activation where child is local
4197 ProcessID p=result->get_coeffs().owner(child);
4198
4199 void (implT::*ft)(const Vphi_op_NS<opT,LDIM>&, const noop<T,NDIM>&, const keyT&) const = &implT:: template forward_traverse< Vphi_op_NS<opT,LDIM>, noop<T,NDIM> >;
4200 result->task(p, ft, child_op, no, child);
4201 }
4202 }
4203 // return e sum coeffs; also return always is_leaf=true:
4204 // the recursion is continued within this struct, not outside in traverse_tree!
4205 return std::pair<bool,coeffT> (true,coeffT());
4206 }
4207
4208 tensorT eri_coeffs(const keyT& key) const {
4211 if (eri->get_functor()->provides_coeff()) {
4212 return eri->get_functor()->coeff(key).full_tensor();
4213 } else {
4214 tensorT val_eri(eri->cdata.vk);
4215 eri->fcube(key,*(eri->get_functor()),eri->cdata.quad_x,val_eri);
4216 return eri->values2coeffs(key,val_eri);
4217 }
4218 }
4219
4220 /// the error is computed from the d coefficients of the constituent functions
4221
4222 /// the result is h_n = P_n(f g), computed as h_n \approx Pn(f_n g_n)
4223 /// its error is therefore
4224 /// h_n = (f g)_n = ((Pn(f) + Qn(f)) (Pn(g) + Qn(g))
4225 /// = Pn(fn gn) + Qn(fn gn) + Pn(f) Qn(g) + Qn(f) Pn(g) + Qn(f) Pn(g)
4226 /// the first term is what we compute, the second term is estimated by tnorm (in another function),
4227 /// the third to last terms are estimated in this function by e.g.: Qn(f)Pn(g) < ||Qn(f)|| ||Pn(g)||
4229 const tensorT& ceri) const {
4230 double error = 0.0;
4231 Key<LDIM> key1, key2;
4232 key.break_apart(key1,key2);
4233
4234 PROFILE_BLOCK(compute_error);
4235 double dnorm_ket, snorm_ket;
4236 if (have_ket()) {
4237 snorm_ket=iaket.coeff(key).normf();
4238 dnorm_ket=iaket.dnorm(key);
4239 } else {
4240 double s1=iap1.coeff(key1).normf();
4241 double s2=iap2.coeff(key2).normf();
4242 double d1=iap1.dnorm(key1);
4243 double d2=iap2.dnorm(key2);
4244 snorm_ket=s1*s2;
4245 dnorm_ket=s1*d2 + s2*d1 + d1*d2;
4246 }
4247
4248 if (have_v1()) {
4249 double snorm=iav1.coeff(key1).normf();
4250 double dnorm=iav1.dnorm(key1);
4251 error+=snorm*dnorm_ket + dnorm*snorm_ket + dnorm*dnorm_ket;
4252 }
4253 if (have_v2()) {
4254 double snorm=iav2.coeff(key2).normf();
4255 double dnorm=iav2.dnorm(key2);
4256 error+=snorm*dnorm_ket + dnorm*snorm_ket + dnorm*dnorm_ket;
4257 }
4258 if (have_eri()) {
4259 tensorT s_coeffs=ceri(result->cdata.s0);
4260 double snorm=s_coeffs.normf();
4261 tensorT d=copy(ceri);
4262 d(result->cdata.s0)=0.0;
4263 double dnorm=d.normf();
4264 error+=snorm*dnorm_ket + dnorm*snorm_ket + dnorm*dnorm_ket;
4265 }
4266
4267 bool no_potential=not ((have_v1() or have_v2() or have_eri()));
4268 if (no_potential) {
4269 error=dnorm_ket;
4270 }
4271 return error;
4272 }
4273
4274 /// make the sum coeffs for key
4275 std::pair<coeffT,double> make_sum_coeffs(const keyT& key) const {
4277 // break key into particles
4278 Key<LDIM> key1, key2;
4279 key.break_apart(key1,key2);
4280
4281 // bool printme=(int(key.translation()[0])==int(std::pow(key.level(),2)/2)) and
4282 // (int(key.translation()[1])==int(std::pow(key.level(),2)/2)) and
4283 // (int(key.translation()[2])==int(std::pow(key.level(),2)/2));
4284
4285// printme=false;
4286
4287 // get/make all coefficients
4288 const coeffT coeff_ket = (iaket.get_impl()) ? iaket.coeff(key)
4289 : outer(iap1.coeff(key1),iap2.coeff(key2),result->get_tensor_args());
4290 const coeffT cpot1 = (have_v1()) ? iav1.coeff(key1) : coeffT();
4291 const coeffT cpot2 = (have_v2()) ? iav2.coeff(key2) : coeffT();
4292 const tensorT ceri = (have_eri()) ? eri_coeffs(key) : tensorT();
4293
4294 // compute first part of the total error
4295 double refine_error=compute_error_from_inaccurate_refinement(key,ceri);
4296 double error=refine_error;
4297
4298 // prepare the multiplication
4299 pointwise_multiplier<LDIM> pm(key,coeff_ket);
4300
4301 // perform the multiplication, compute tnorm part of the total error
4302 coeffT cresult(result->cdata.vk,result->get_tensor_args());
4303 if (have_v1()) {
4304 cresult+=pm(key,cpot1.get_tensor(),1);
4305 error+=pm.error;
4306 }
4307 if (have_v2()) {
4308 cresult+=pm(key,cpot2.get_tensor(),2);
4309 error+=pm.error;
4310 }
4311
4312 if (have_eri()) {
4313 tensorT result1=cresult.full_tensor_copy();
4314 result1+=pm(key,copy(ceri(result->cdata.s0)));
4315 cresult=coeffT(result1,result->get_tensor_args());
4316 error+=pm.error;
4317 } else {
4319 }
4320 if ((not have_v1()) and (not have_v2()) and (not have_eri())) {
4321 cresult=coeff_ket;
4322 }
4323
4324 return std::make_pair(cresult,error);
4325 }
4326
4327 this_type make_child(const keyT& child) const {
4328
4329 // break key into particles
4330 Key<LDIM> key1, key2;
4331 child.break_apart(key1,key2);
4332
4333 return this_type(result,leaf_op,iaket.make_child(child),
4334 iap1.make_child(key1),iap2.make_child(key2),
4335 iav1.make_child(key1),iav2.make_child(key2),eri);
4336 }
4337
4339 Future<ctT> iaket1=iaket.activate();
4340 Future<ctL> iap11=iap1.activate();
4341 Future<ctL> iap21=iap2.activate();
4342 Future<ctL> iav11=iav1.activate();
4343 Future<ctL> iav21=iav2.activate();
4344 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
4345 &this_type::forward_ctor),result,leaf_op,
4346 iaket1,iap11,iap21,iav11,iav21,eri);
4347 }
4348
4349 this_type forward_ctor(implT* result1, const opT& leaf_op, const ctT& iaket1,
4350 const ctL& iap11, const ctL& iap21, const ctL& iav11, const ctL& iav21,
4351 const implT* eri1) {
4352 return this_type(result1,leaf_op,iaket1,iap11,iap21,iav11,iav21,eri1);
4353 }
4354
4355 /// serialize this (needed for use in recursive_op)
4356 template <typename Archive> void serialize(const Archive& ar) {
4357 ar & iaket & eri & result & leaf_op & iap1 & iap2 & iav1 & iav2;
4358 }
4359 };
4360
4361 /// assemble the function V*phi using V and phi given from the functor
4362
4363 /// this function must have been constructed using the CompositeFunctorInterface.
4364 /// The interface provides one- and two-electron potentials, and the ket, which are
4365 /// assembled to give V*phi.
4366 /// @param[in] leaf_op operator to decide if a given node is a leaf node
4367 /// @param[in] fence global fence
4368 template<typename opT>
4369 void make_Vphi(const opT& leaf_op, const bool fence=true) {
4370
4371 constexpr size_t LDIM=NDIM/2;
4372 MADNESS_CHECK_THROW(NDIM==LDIM*2,"make_Vphi only works for even dimensions");
4373
4374
4375 // keep the functor available, but remove it from the result
4376 // result will return false upon is_on_demand(), which is necessary for the
4377 // CoeffTracker to track the parent coeffs correctly for error_leaf_op
4378 std::shared_ptr< FunctionFunctorInterface<T,NDIM> > func2(this->get_functor());
4379 this->unset_functor();
4380
4382 dynamic_cast<CompositeFunctorInterface<T,NDIM,LDIM>* >(&(*func2));
4384
4385 // make sure everything is in place if no fence is requested
4386 if (fence) func->make_redundant(true); // no-op if already redundant
4387 MADNESS_CHECK_THROW(func->check_redundant(),"make_Vphi requires redundant functions");
4388
4389 // loop over all functions in the functor (either ket or particles)
4390 for (auto& ket : func->impl_ket_vector) {
4391 FunctionImpl<T,NDIM>* eri=func->impl_eri.get();
4392 FunctionImpl<T,LDIM>* v1=func->impl_m1.get();
4393 FunctionImpl<T,LDIM>* v2=func->impl_m2.get();
4394 FunctionImpl<T,LDIM>* p1=nullptr;
4395 FunctionImpl<T,LDIM>* p2=nullptr;
4396 make_Vphi_only(leaf_op,ket.get(),v1,v2,p1,p2,eri,false);
4397 }
4398
4399 for (std::size_t i=0; i<func->impl_p1_vector.size(); ++i) {
4400 FunctionImpl<T,NDIM>* ket=nullptr;
4401 FunctionImpl<T,NDIM>* eri=func->impl_eri.get();
4402 FunctionImpl<T,LDIM>* v1=func->impl_m1.get();
4403 FunctionImpl<T,LDIM>* v2=func->impl_m2.get();
4404 FunctionImpl<T,LDIM>* p1=func->impl_p1_vector[i].get();
4405 FunctionImpl<T,LDIM>* p2=func->impl_p2_vector[i].get();
4406 make_Vphi_only(leaf_op,ket,v1,v2,p1,p2,eri,false);
4407 }
4408
4409 // some post-processing:
4410 // - FunctionNode::accumulate() uses buffer -> add the buffer contents to the actual coefficients
4411 // - the operation constructs sum coefficients on all scales -> sum down to get a well-defined tree-state
4412 if (fence) {
4413 world.gop.fence();
4415 sum_down(true);
4417 }
4418
4419
4420 }
4421
4422 /// assemble the function V*phi using V and phi given from the functor
4423
4424 /// this function must have been constructed using the CompositeFunctorInterface.
4425 /// The interface provides one- and two-electron potentials, and the ket, which are
4426 /// assembled to give V*phi.
4427 /// @param[in] leaf_op operator to decide if a given node is a leaf node
4428 /// @param[in] fence global fence
4429 template<typename opT, std::size_t LDIM>
4434 const bool fence=true) {
4435
4436 // prepare the CoeffTracker
4437 CoeffTracker<T,NDIM> iaket(ket);
4438 CoeffTracker<T,LDIM> iap1(p1);
4439 CoeffTracker<T,LDIM> iap2(p2);
4440 CoeffTracker<T,LDIM> iav1(v1);
4441 CoeffTracker<T,LDIM> iav2(v2);
4442
4443 // the operator making the coefficients
4444 typedef Vphi_op_NS<opT,LDIM> coeff_opT;
4445 coeff_opT coeff_op(this,leaf_op,iaket,iap1,iap2,iav1,iav2,eri);
4446
4447 // this operator simply inserts the coeffs into this' tree
4448 typedef noop<T,NDIM> apply_opT;
4449 apply_opT apply_op;
4450
4451 if (world.rank() == coeffs.owner(cdata.key0)) {
4452 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
4453 coeff_op, apply_op, cdata.key0);
4454 }
4455
4457 if (fence) world.gop.fence();
4458
4459 }
4460
4461 /// Permute the dimensions of f according to map, result on this
4462 void mapdim(const implT& f, const std::vector<long>& map, bool fence);
4463
4464 /// mirror the dimensions of f according to map, result on this
4465 void mirror(const implT& f, const std::vector<long>& mirror, bool fence);
4466
4467 /// map and mirror the translation index and the coefficients, result on this
4468
4469 /// first map the dimensions, the mirror!
4470 /// this = mirror(map(f))
4471 void map_and_mirror(const implT& f, const std::vector<long>& map,
4472 const std::vector<long>& mirror, bool fence);
4473
4474 /// take the average of two functions, similar to: this=0.5*(this+rhs)
4475
4476 /// works in either basis and also in nonstandard form
4477 void average(const implT& rhs);
4478
4479 /// change the tensor type of the coefficients in the FunctionNode
4480
4481 /// @param[in] targs target tensor arguments (threshold and full/low rank)
4482 void change_tensor_type1(const TensorArgs& targs, bool fence);
4483
4484 /// reduce the rank of the coefficients tensors
4485
4486 /// @param[in] targs target tensor arguments (threshold and full/low rank)
4487 void reduce_rank(const double thresh, bool fence);
4488
4489
4490 /// remove all nodes with level higher than n
4491 void chop_at_level(const int n, const bool fence=true);
4492
4493 /// compute norm of s and d coefficients for all nodes
4494 void compute_snorm_and_dnorm(bool fence=true);
4495
4496 /// compute the norm of the wavelet coefficients
4499
4503
4504 bool operator()(typename rangeT::iterator& it) const {
4505 auto& node=it->second;
4506 node.recompute_snorm_and_dnorm(cdata);
4507 return true;
4508 }
4509 };
4510
4511
4512 T eval_cube(Level n, coordT& x, const tensorT& c) const;
4513
4514 /// Transform sum coefficients at level n to sums+differences at level n-1
4515
4516 /// Given scaling function coefficients s[n][l][i] and s[n][l+1][i]
4517 /// return the scaling function and wavelet coefficients at the
4518 /// coarser level. I.e., decompose Vn using Vn = Vn-1 + Wn-1.
4519 /// \code
4520 /// s_i = sum(j) h0_ij*s0_j + h1_ij*s1_j
4521 /// d_i = sum(j) g0_ij*s0_j + g1_ij*s1_j
4522 // \endcode
4523 /// Returns a new tensor and has no side effects. Works for any
4524 /// number of dimensions.
4525 ///
4526 /// No communication involved.
4527 tensorT filter(const tensorT& s) const;
4528
4529 coeffT filter(const coeffT& s) const;
4530
4531 /// Transform sums+differences at level n to sum coefficients at level n+1
4532
4533 /// Given scaling function and wavelet coefficients (s and d)
4534 /// returns the scaling function coefficients at the next finer
4535 /// level. I.e., reconstruct Vn using Vn = Vn-1 + Wn-1.
4536 /// \code
4537 /// s0 = sum(j) h0_ji*s_j + g0_ji*d_j
4538 /// s1 = sum(j) h1_ji*s_j + g1_ji*d_j
4539 /// \endcode
4540 /// Returns a new tensor and has no side effects
4541 ///
4542 /// If (sonly) ... then ss is only the scaling function coeff (and
4543 /// assume the d are zero). Works for any number of dimensions.
4544 ///
4545 /// No communication involved.
4546 tensorT unfilter(const tensorT& s) const;
4547
4548 coeffT unfilter(const coeffT& s) const;
4549
4550 /// downsample the sum coefficients of level n+1 to sum coeffs on level n
4551
4552 /// specialization of the filter method, will yield only the sum coefficients
4553 /// @param[in] key key of level n
4554 /// @param[in] v vector of sum coefficients of level n+1
4555 /// @return sum coefficients on level n in full tensor format
4556 tensorT downsample(const keyT& key, const std::vector< Future<coeffT > >& v) const;
4557
4558 /// upsample the sum coefficients of level 1 to sum coeffs on level n+1
4559
4560 /// specialization of the unfilter method, will transform only the sum coefficients
4561 /// @param[in] key key of level n+1
4562 /// @param[in] coeff sum coefficients of level n (does NOT belong to key!!)
4563 /// @return sum coefficients on level n+1
4564 coeffT upsample(const keyT& key, const coeffT& coeff) const;
4565
4566 /// Projects old function into new basis (only in reconstructed form)
4567 void project(const implT& old, bool fence);
4568
4570 bool operator()(const implT* f, const keyT& key, const nodeT& t) const {
4571 return true;
4572 }
4573 template <typename Archive> void serialize(Archive& ar) {}
4574 };
4575
4576 template <typename opT>
4577 void refine_op(const opT& op, const keyT& key) {
4578 // Must allow for someone already having autorefined the coeffs
4579 // and we get a write accessor just in case they are already executing
4580 typename dcT::accessor acc;
4581 const auto found = coeffs.find(acc,key);
4582 MADNESS_CHECK(found);
4583 nodeT& node = acc->second;
4584 if (node.has_coeff() && key.level() < max_refine_level && op(this, key, node)) {
4585 coeffT d(cdata.v2k,targs);
4586 d(cdata.s0) += copy(node.coeff());
4587 d = unfilter(d);
4588 node.clear_coeff();
4589 node.set_has_children(true);
4590 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
4591 const keyT& child = kit.key();
4592 coeffT ss = copy(d(child_patch(child)));
4594 // coeffs.replace(child,nodeT(ss,-1.0,false).node_to_low_rank());
4595 coeffs.replace(child,nodeT(ss,-1.0,false));
4596 // Note value -1.0 for norm tree to indicate result of refinement
4597 }
4598 }
4599 }
4600
4601 template <typename opT>
4602 void refine_spawn(const opT& op, const keyT& key) {
4603 nodeT& node = coeffs.find(key).get()->second;
4604 if (node.has_children()) {
4605 for (KeyChildIterator<NDIM> kit(key); kit; ++kit)
4606 woT::task(coeffs.owner(kit.key()), &implT:: template refine_spawn<opT>, op, kit.key(), TaskAttributes::hipri());
4607 }
4608 else {
4609 woT::task(coeffs.owner(key), &implT:: template refine_op<opT>, op, key);
4610 }
4611 }
4612
4613 // Refine in real space according to local user-defined criterion
4614 template <typename opT>
4615 void refine(const opT& op, bool fence) {
4616 if (world.rank() == coeffs.owner(cdata.key0))
4617 woT::task(coeffs.owner(cdata.key0), &implT:: template refine_spawn<opT>, op, cdata.key0, TaskAttributes::hipri());
4618 if (fence)
4619 world.gop.fence();
4620 }
4621
4622 bool exists_and_has_children(const keyT& key) const;
4623
4624 bool exists_and_is_leaf(const keyT& key) const;
4625
4626
4627 void broaden_op(const keyT& key, const std::vector< Future <bool> >& v);
4628
4629 // For each local node sets value of norm tree, snorm and dnorm to 0.0
4630 void zero_norm_tree();
4631
4632 // Broaden tree
4633 void broaden(const array_of_bools<NDIM>& is_periodic, bool fence);
4634
4635 /// sum all the contributions from all scales after applying an operator in mod-NS form
4636 void trickle_down(bool fence);
4637
4638 /// sum all the contributions from all scales after applying an operator in mod-NS form
4639
4640 /// cf reconstruct_op
4641 void trickle_down_op(const keyT& key, const coeffT& s);
4642
4643 /// reconstruct this tree -- respects fence
4644 void reconstruct(bool fence);
4645
4646 void change_tree_state(const TreeState finalstate, bool fence=true);
4647
4648 // Invoked on node where key is local
4649 // void reconstruct_op(const keyT& key, const tensorT& s);
4650 void reconstruct_op(const keyT& key, const coeffT& s, const bool accumulate_NS=true);
4651
4652 /// compress the wave function
4653
4654 /// after application there will be sum coefficients at the root level,
4655 /// and difference coefficients at all other levels; furthermore:
4656 /// @param[in] nonstandard keep sum coeffs at all other levels, except leaves
4657 /// @param[in] keepleaves keep sum coeffs (but no diff coeffs) at leaves
4658 /// @param[in] redundant keep only sum coeffs at all levels, discard difference coeffs
4659// void compress(bool nonstandard, bool keepleaves, bool redundant, bool fence);
4660 void compress(const TreeState newstate, bool fence);
4661
4662 /// Invoked on node where key is local
4663 Future<std::pair<coeffT,double> > compress_spawn(const keyT& key, bool nonstandard, bool keepleaves,
4664 bool redundant1);
4665
4666 private:
4667 /// convert this to redundant, i.e. have sum coefficients on all levels
4668 void make_redundant(const bool fence);
4669 public:
4670
4671 /// convert this from redundant to standard reconstructed form
4672 void undo_redundant(const bool fence);
4673
4674 void remove_internal_coefficients(const bool fence);
4675 void remove_leaf_coefficients(const bool fence);
4676
4677
4678 /// compute for each FunctionNode the norm of the function inside that node
4679 void norm_tree(bool fence);
4680
4681 double norm_tree_op(const keyT& key, const std::vector< Future<double> >& v);
4682
4684
4685 /// truncate using a tree in reconstructed form
4686
4687 /// must be invoked where key is local
4688 Future<coeffT> truncate_reconstructed_spawn(const keyT& key, const double tol);
4689
4690 /// given the sum coefficients of all children, truncate or not
4691
4692 /// @return new sum coefficients (empty if internal, not empty, if new leaf); might delete its children
4693 coeffT truncate_reconstructed_op(const keyT& key, const std::vector< Future<coeffT > >& v, const double tol);
4694
4695 /// calculate the wavelet coefficients using the sum coefficients of all child nodes
4696
4697 /// also compute the norm tree for all nodes
4698 /// @param[in] key this's key
4699 /// @param[in] v sum coefficients of the child nodes
4700 /// @param[in] nonstandard keep the sum coefficients with the wavelet coefficients
4701 /// @param[in] redundant keep only the sum coefficients, discard the wavelet coefficients
4702 /// @return the sum coefficients
4703 std::pair<coeffT,double> compress_op(const keyT& key, const std::vector< Future<std::pair<coeffT,double>> >& v, bool nonstandard);
4704
4705
4706 /// similar to compress_op, but insert only the sum coefficients in the tree
4707
4708 /// also compute the norm tree for all nodes
4709 /// @param[in] key this's key
4710 /// @param[in] v sum coefficients of the child nodes
4711 /// @return the sum coefficients
4712 std::pair<coeffT,double> make_redundant_op(const keyT& key,const std::vector< Future<std::pair<coeffT,double> > >& v);
4713
4714 /// Changes non-standard compressed form to standard compressed form
4715 void standard(bool fence);
4716
4717 /// Changes non-standard compressed form to standard compressed form
4720
4721 // threshold for rank reduction / SVD truncation
4723
4724 // constructor takes target precision
4725 do_standard() = default;
4727
4728 //
4729 bool operator()(typename rangeT::iterator& it) const {
4730
4731 const keyT& key = it->first;
4732 nodeT& node = it->second;
4733 if (key.level()> 0 && node.has_coeff()) {
4734 if (node.has_children()) {
4735 // Zero out scaling coeffs
4736 MADNESS_ASSERT(node.coeff().dim(0)==2*impl->get_k());
4737 node.coeff()(impl->cdata.s0)=0.0;
4738 node.reduceRank(impl->targs.thresh);
4739 } else {
4740 // Deleting both scaling and wavelet coeffs
4741 node.clear_coeff();
4742 }
4743 }
4744 return true;
4745 }
4746 template <typename Archive> void serialize(const Archive& ar) {
4747 MADNESS_EXCEPTION("no serialization of do_standard",1);
4748 }
4749 };
4750
4751
4752 /// laziness
4753 template<size_t OPDIM>
4754 struct do_op_args {
4757 double tol, fac, cnorm;
4758
4759 do_op_args() = default;
4760 do_op_args(const Key<OPDIM>& key, const Key<OPDIM>& d, const keyT& dest, double tol, double fac, double cnorm)
4761 : key(key), d(d), dest(dest), tol(tol), fac(fac), cnorm(cnorm) {}
4762 template <class Archive>
4763 void serialize(Archive& ar) {
4764 ar & archive::wrap_opaque(this,1);
4765 }
4766 };
4767
4768 /// for fine-grain parallelism: call the apply method of an operator in a separate task
4769
4770 /// @param[in] op the operator working on our function
4771 /// @param[in] c full rank tensor holding the NS coefficients
4772 /// @param[in] args laziness holding norm of the coefficients, displacement, destination, ..
4773 template <typename opT, typename R, size_t OPDIM>
4774 void do_apply_kernel(const opT* op, const Tensor<R>& c, const do_op_args<OPDIM>& args) {
4775
4776 tensorT result = op->apply(args.key, args.d, c, args.tol/args.fac/args.cnorm);
4777
4778 // Screen here to reduce communication cost of negligible data
4779 // and also to ensure we don't needlessly widen the tree when
4780 // applying the operator
4781 if (result.normf()> 0.3*args.tol/args.fac) {
4783 //woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri());
4784 // UGLY BUT ADDED THE OPTIMIZATION BACK IN HERE EXPLICITLY/
4785 if (args.dest == world.rank()) {
4786 coeffs.send(args.dest, &nodeT::accumulate, result, coeffs, args.dest);
4787 }
4788 else {
4790 }
4791 }
4792 }
4793
4794 /// same as do_apply_kernel, but use full rank tensors as input and low rank tensors as output
4795
4796 /// @param[in] op the operator working on our function
4797 /// @param[in] c full rank tensor holding the NS coefficients
4798 /// @param[in] args laziness holding norm of the coefficients, displacement, destination, ..
4799 /// @param[in] apply_targs TensorArgs with tightened threshold for accumulation
4800 /// @return nothing, but accumulate the result tensor into the destination node
4801 template <typename opT, typename R, size_t OPDIM>
4802 double do_apply_kernel2(const opT* op, const Tensor<R>& c, const do_op_args<OPDIM>& args,
4803 const TensorArgs& apply_targs) {
4804
4805 tensorT result_full = op->apply(args.key, args.d, c, args.tol/args.fac/args.cnorm);
4806 const double norm=result_full.normf();
4807
4808 // Screen here to reduce communication cost of negligible data
4809 // and also to ensure we don't needlessly widen the tree when
4810 // applying the operator
4811 // OPTIMIZATION NEEDED HERE ... CHANGING THIS TO TASK NOT SEND REMOVED
4812 // BUILTIN OPTIMIZATION TO SHORTCIRCUIT MSG IF DATA IS LOCAL
4813 if (norm > 0.3*args.tol/args.fac) {
4814
4815 small++;
4816 //double cpu0=cpu_time();
4817 coeffT result=coeffT(result_full,apply_targs);
4818 MADNESS_ASSERT(result.is_full_tensor() or result.is_svd_tensor());
4819 //double cpu1=cpu_time();
4820 //timer_lr_result.accumulate(cpu1-cpu0);
4821
4822 coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, apply_targs,
4824
4825 //woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri());
4826 }
4827 return norm;
4828 }
4829
4830
4831
4832 /// same as do_apply_kernel2, but use low rank tensors as input and low rank tensors as output
4833
4834 /// @param[in] op the operator working on our function
4835 /// @param[in] coeff full rank tensor holding the NS coefficients
4836 /// @param[in] args laziness holding norm of the coefficients, displacement, destination, ..
4837 /// @param[in] apply_targs TensorArgs with tightened threshold for accumulation
4838 /// @return nothing, but accumulate the result tensor into the destination node
4839 template <typename opT, typename R, size_t OPDIM>
4840 double do_apply_kernel3(const opT* op, const GenTensor<R>& coeff, const do_op_args<OPDIM>& args,
4841 const TensorArgs& apply_targs) {
4842
4843 coeffT result;
4844 if (2*OPDIM==NDIM) result= op->apply2_lowdim(args.key, args.d, coeff,
4845 args.tol/args.fac/args.cnorm, args.tol/args.fac);
4846 if (OPDIM==NDIM) result = op->apply2(args.key, args.d, coeff,
4847 args.tol/args.fac/args.cnorm, args.tol/args.fac);
4848
4849 const double result_norm=result.svd_normf();
4850
4851 if (result_norm> 0.3*args.tol/args.fac) {
4852 small++;
4853
4854 double cpu0=cpu_time();
4855 if (not result.is_of_tensortype(targs.tt)) result=result.convert(targs);
4856 double cpu1=cpu_time();
4857 timer_lr_result.accumulate(cpu1-cpu0);
4858
4859 // accumulate also expects result in SVD form
4860 coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, apply_targs,
4862// woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri());
4863
4864 }
4865 return result_norm;
4866
4867 }
4868
4869 // volume of n-dimensional sphere of radius R
4870 double vol_nsphere(int n, double R) {
4871 return std::pow(madness::constants::pi,n*0.5)*std::pow(R,n)/std::tgamma(1+0.5*n);
4872 }
4873
4874
4875 /// apply an operator on the coeffs c (at node key)
4876
4877 /// the result is accumulated inplace to this's tree at various FunctionNodes
4878 /// @param[in] op the operator to act on the source function
4879 /// @param[in] key key of the source FunctionNode of f which is processed
4880 /// @param[in] c coeffs of the FunctionNode of f which is processed
4881 template <typename opT, typename R>
4882 void do_apply(const opT* op, const keyT& key, const Tensor<R>& c) {
4884
4885 // working assumption here WAS that the operator is
4886 // isotropic and monotonically decreasing with distance
4887 // ... however, now we are using derivative Gaussian
4888 // expansions (and also non-cubic boxes) isotropic is
4889 // violated. While not strictly monotonically decreasing,
4890 // the derivative gaussian is still such that once it
4891 // becomes negligible we are in the asymptotic region.
4892
4893 typedef typename opT::keyT opkeyT;
4894 constexpr auto opdim = opT::opdim;
4895 const opkeyT source = op->get_source_key(key);
4896
4897 // Tuning here is based on observation that with
4898 // sufficiently high-order wavelet relative to the
4899 // precision, that only nearest neighbor boxes contribute,
4900 // whereas for low-order wavelets more neighbors will
4901 // contribute. Sufficiently high is picked as
4902 // k>=2-log10(eps) which is our empirical rule for
4903 // efficiency/accuracy and code instrumentation has
4904 // previously indicated that (in 3D) just unit
4905 // displacements are invoked. The error decays as R^-(k+1),
4906 // and the number of boxes increases as R^d.
4907 //
4908 // Fac is the expected number of contributions to a given
4909 // box, so the error permitted per contribution will be
4910 // tol/fac
4911
4912 // radius of shell (nearest neighbor is diameter of 3 boxes, so radius=1.5)
4913 double radius = 1.5 + 0.33 * std::max(0.0, 2 - std::log10(thresh) -
4914 k); // 0.33 was 0.5
4915 //double radius = 2.5;
4916 double fac = vol_nsphere(NDIM, radius);
4917 // previously fac=10.0 selected empirically constrained by qmprop
4918
4919 double cnorm = c.normf();
4920
4921 // BC handling:
4922 // - if operator is lattice-summed then treat this as nonperiodic (i.e. tell neighbor() to stay in simulation cell)
4923 // - if operator is NOT lattice-summed then obey BC (i.e. tell neighbor() to go outside the simulation cell along periodic dimensions)
4924 // - BUT user can force operator to treat its arguments as non-periodic (`op.set_domain_periodicity({true,true,true})`) so ... which dimensions of this function are treated as periodic by op?
4925 const array_of_bools<NDIM> func_is_treated_by_op_as_periodic =
4926 (op->particle() == 1)
4927 ? array_of_bools<NDIM>{false}.or_front(
4928 op->func_domain_is_periodic())
4929 : array_of_bools<NDIM>{false}.or_back(
4930 op->func_domain_is_periodic());
4931
4932 const auto default_real_distance_squared = [&](const auto &displacement)
4933 -> double {
4934 return displacement.real_distsq_bc(op->lattice_summed(), FunctionDefaults<NDIM>::get_cell_width());
4935 };
4936 const auto default_lattice_distance_squared = [&](const auto &displacement)
4937 -> std::uint64_t {
4938 return displacement.distsq_bc(op->lattice_summed());
4939 };
4940 const auto default_skip_predicate = [&](const auto &displacement)
4941 -> bool {
4942 return false;
4943 };
4944 const auto for_each = [&](const auto &displacements,
4945 const auto &real_distance_squared,
4946 const auto &lattice_distance_squared,
4947 const auto &skip_predicate) -> std::optional<double> {
4948
4949 // used to screen estimated and actual contributions
4950 //const double tol = truncate_tol(thresh, key);
4951 //const double tol = 0.1*truncate_tol(thresh, key);
4952 const double tol = truncate_tol(thresh, key);
4953
4954 // assume isotropic decaying kernel, screen in shell-wise fashion by
4955 // monitoring the decay of magnitude of contribution norms with the
4956 // distance ... as soon as we find a shell of displacements at least
4957 // one of each in simulation domain (see neighbor()) and
4958 // all in-domain shells produce negligible contributions, stop.
4959 // a displacement is negligible if ||op|| * ||c|| > tol / fac
4960 // where fac takes into account
4961 int nvalid = 1; // Counts #valid at each distance
4962 int nused = 1; // Counts #used at each distance
4963 std::optional<double> real_last_distsq;
4964 std::optional<std::uint64_t> lattice_last_distsq;
4965
4966 // displacements to the kernel range boundary are typically same magnitude (modulo variation)
4967 // estimate the norm of the resulting contributions and skip all if one is too small
4968 if constexpr (std::is_same_v<std::decay_t<decltype(displacements)>,BoxSurfaceDisplacementRange<opdim>>) {
4969 const auto &probing_displacement =
4970 displacements.probing_displacement();
4971 const double opnorm =
4972 op->norm(key.level(), probing_displacement, source);
4973 if (cnorm * opnorm <= tol / fac) {
4974 return {};
4975 }
4976 }
4977
4978 for (const auto& displacement: displacements) {
4979 if (skip_predicate(displacement)) continue;
4980
4981 keyT d;
4982 Key<NDIM - opdim> nullkey(key.level());
4983 MADNESS_ASSERT(op->particle() == 1 || op->particle() == 2);
4984 if (op->particle() == 1)
4985 d = displacement.merge_with(nullkey);
4986 else
4987 d = nullkey.merge_with(displacement);
4988
4989 // Screen out shells. We assume shells are grouped into shells so that the operator decays with shell index.
4990 // Shells are indexed by least distance from box to the central box.
4991 // Cells touching so much as a corner of the central box are further grouped by their lattice distance.
4992 // N.B. lattice-summed decaying kernel is periodic (i.e. does decay w.r.t. r), so loop over shells of displacements sorted by distances modulated by periodicity (Key::distsq_bc)
4993 const auto real_distsq = real_distance_squared(displacement);
4994 const std::uint64_t lattice_distsq = real_distsq ? 0 : lattice_distance_squared(displacement);
4995 if (!real_last_distsq.has_value() ||
4996 !nearlyEqual(real_distsq, *real_last_distsq) || (nearlyEqual(*real_last_distsq, 0) && lattice_distsq != *lattice_last_distsq)) { // Moved to next shell of neighbors
4997 if (nvalid > 0 && nused == 0 && (real_distsq > 0 || lattice_distsq > 1)) {
4998 // Have at least done the input box and all first
4999 // nearest neighbors, and none of the last set
5000 // of neighbors made significant contributions. Thus,
5001 // assuming monotonic decrease, we are done.
5002 break;
5003 }
5004 nused = 0;
5005 nvalid = 0;
5006 real_last_distsq = real_distsq;
5007 // After real_last_distsq > 0, we stop caring about keeping lattice_last_distsq up-to-date.
5008 lattice_last_distsq = real_distsq ? std::optional<std::uint64_t>{} : lattice_distsq;
5009 }
5010
5011 keyT dest = neighbor(key, d, func_is_treated_by_op_as_periodic);
5012 if (dest.is_valid()) {
5013 nvalid++;
5014 const double opnorm = op->norm(key.level(), displacement, source);
5015
5016 if (cnorm * opnorm > tol / fac) {
5017 tensorT result =
5018 op->apply(source, displacement, c, tol / fac / cnorm);
5019 if (result.normf() > 0.3 * tol / fac) {
5020 if (coeffs.is_local(dest))
5021 coeffs.send(dest, &nodeT::accumulate2, result, coeffs,
5022 dest);
5023 else
5024 coeffs.task(dest, &nodeT::accumulate2, result, coeffs,
5025 dest);
5026 nused++;
5027 }
5028 }
5029 }
5030 }
5031
5032 return real_last_distsq;
5033 };
5034
5035 // process "standard" displacements, screening assumes monotonic decay of the kernel
5036 // list of displacements sorted in order of increasing distance
5037 // N.B. if op is lattice-summed use periodic displacements, else use
5038 // non-periodic even if op treats any modes of this as periodic
5039 const std::vector<opkeyT> &disp = op->get_disp(key.level());
5040 const auto max_distsq_reached = for_each(disp, default_real_distance_squared, default_lattice_distance_squared, default_skip_predicate);
5041
5042 // for range-restricted kernels displacements to the boundary of the kernel range also need to be included
5043 // N.B. hard range restriction will result in slow decay of operator matrix elements for the displacements
5044 // to the range boundary, should use soft restriction or sacrifice precision
5045 if (op->range_restricted() && key.level() >= 1) {
5046
5047 std::array<std::optional<std::int64_t>, opdim> box_radius;
5048 std::array<std::optional<std::int64_t>, opdim> surface_thickness;
5049 auto &range = op->get_range();
5050 for (int d = 0; d != opdim; ++d) {
5051 if (range[d]) {
5052 box_radius[d] = range[d].N();
5053 surface_thickness[d] = range[d].finite_soft() ? 1 : 0;
5054 }
5055 }
5056
5058 // skip surface displacements that take us outside of the domain and/or were included in regular displacements
5059 // N.B. for lattice-summed axes the "filter" also maps the displacement back into the simulation cell
5060 if (max_distsq_reached)
5061 validator = BoxSurfaceDisplacementValidator<opdim>(/* is_infinite_domain= */ op->func_domain_is_periodic(), /* is_lattice_summed= */ op->lattice_summed(), range, default_real_distance_squared, *max_distsq_reached);
5062
5063 // this range iterates over the entire surface layer(s), and provides a probing displacement that can be used to screen out the entire box
5064 auto opkey = op->particle() == 1 ? key.template extract_front<opdim>() : key.template extract_front<opdim>();
5066 range_boundary_face_displacements(opkey, box_radius,
5067 surface_thickness,
5068 op->lattice_summed(),
5069 validator);
5070 for_each(
5071 range_boundary_face_displacements,
5072 // surface displacements are not screened, all are included
5073 [](const auto &displacement) -> double { return 0; },
5074 [](const auto &displacement) -> std::uint64_t { return 0; },
5075 default_skip_predicate);
5076 }
5077 }
5078
5079
5080 /// apply an operator on f to return this
5081 template <typename opT, typename R>
5082 void apply(opT& op, const FunctionImpl<R,NDIM>& f, bool fence) {
5084 MADNESS_ASSERT(!op.modified());
5085 for (const auto& [key, node]: f.coeffs) {
5086 if (node.has_coeff()) {
5087 if (node.coeff().dim(0) != k /* i.e. not a leaf */ || op.doleaves) {
5089// woT::task(p, &implT:: template do_apply<opT,R>, &op, key, node.coeff()); //.full_tensor_copy() ????? why copy ????
5090 woT::task(p, &implT:: template do_apply<opT,R>, &op, key, node.coeff().reconstruct_tensor());
5091 }
5092 }
5093 }
5094 if (fence)
5095 world.gop.fence();
5096
5098// this->compressed=true;
5099// this->nonstandard=true;
5100// this->redundant=false;
5101
5102 }
5103
5104
5105
5106 /// apply an operator on the coeffs c (at node key)
5107
5108 /// invoked by result; the result is accumulated inplace to this's tree at various FunctionNodes
5109 /// @param[in] op the operator to act on the source function
5110 /// @param[in] key key of the source FunctionNode of f which is processed (see "source")
5111 /// @param[in] coeff coeffs of FunctionNode being processed
5112 /// @param[in] do_kernel true: do the 0-disp only; false: do everything but the kernel
5113 /// @return max norm, and will modify or include new nodes in this' tree
5114 template <typename opT, typename R>
5115 double do_apply_directed_screening(const opT* op, const keyT& key, const coeffT& coeff,
5116 const bool& do_kernel) {
5118 // insert timer here
5119 typedef typename opT::keyT opkeyT;
5120
5121 // screening: contains all displacement keys that had small result norms
5122 std::list<opkeyT> blacklist;
5123
5124 constexpr auto opdim=opT::opdim;
5125 Key<NDIM-opdim> nullkey(key.level());
5126
5127 // source is that part of key that corresponds to those dimensions being processed
5128 const opkeyT source=op->get_source_key(key);
5129
5130 const double tol = truncate_tol(thresh, key);
5131
5132 // fac is the root of the number of contributing neighbors (1st shell)
5133 double fac=std::pow(3,NDIM*0.5);
5134 double cnorm = coeff.normf();
5135
5136 // for accumulation: keep slightly tighter TensorArgs
5137 TensorArgs apply_targs(targs);
5138 apply_targs.thresh=tol/fac*0.03;
5139
5140 double maxnorm=0.0;
5141
5142 // for the kernel it may be more efficient to do the convolution in full rank
5143 tensorT coeff_full;
5144 // for partial application (exchange operator) it's more efficient to
5145 // do SVD tensors instead of tensortrains, because addition in apply
5146 // can be done in full form for the specific particle
5147 coeffT coeff_SVD=coeff.convert(TensorArgs(-1.0,TT_2D));
5148#ifdef HAVE_GENTENSOR
5149 coeff_SVD.get_svdtensor().orthonormalize(tol*GenTensor<T>::fac_reduce());
5150#endif
5151
5152 // list of displacements sorted in order of increasing distance
5153 // N.B. if op is lattice-summed gives periodic displacements, else uses
5154 // non-periodic even if op treats any modes of this as periodic
5155 const std::vector<opkeyT>& disp = Displacements<opdim>().get_disp(key.level(), op->lattice_summed());
5156
5157 for (const auto& d: disp) {
5158 const int shell=d.distsq_bc(op->lattice_summed());
5159 if (do_kernel and (shell>0)) break;
5160 if ((not do_kernel) and (shell==0)) continue;
5161
5162 keyT disp1;
5163 if (op->particle()==1) disp1=d.merge_with(nullkey);
5164 else if (op->particle()==2) disp1=nullkey.merge_with(d);
5165 else {
5166 MADNESS_EXCEPTION("confused particle in operator??",1);
5167 }
5168
5169 keyT dest = neighbor_in_volume(key, disp1);
5170
5171 if (not dest.is_valid()) continue;
5172
5173 // directed screening
5174 // working assumption here is that the operator is isotropic and
5175 // monotonically decreasing with distance
5176 bool screened=false;
5177 typename std::list<opkeyT>::const_iterator it2;
5178 for (it2=blacklist.begin(); it2!=blacklist.end(); it2++) {
5179 if (d.is_farther_out_than(*it2)) {
5180 screened=true;
5181 break;
5182 }
5183 }
5184 if (not screened) {
5185
5186 double opnorm = op->norm(key.level(), d, source);
5187 double norm=0.0;
5188
5189 if (cnorm*opnorm> tol/fac) {
5190
5191 double cost_ratio=op->estimate_costs(source, d, coeff_SVD, tol/fac/cnorm, tol/fac);
5192 // cost_ratio=1.5; // force low rank
5193 // cost_ratio=0.5; // force full rank
5194
5195 if (cost_ratio>0.0) {
5196
5197 do_op_args<opdim> args(source, d, dest, tol, fac, cnorm);
5198 norm=0.0;
5199 if (cost_ratio<1.0) {
5200 if (not coeff_full.has_data()) coeff_full=coeff.full_tensor_copy();
5201 norm=do_apply_kernel2(op, coeff_full,args,apply_targs);
5202 } else {
5203 if (2*opdim==NDIM) { // apply operator on one particle only
5204 norm=do_apply_kernel3(op,coeff_SVD,args,apply_targs);
5205 } else {
5206 norm=do_apply_kernel3(op,coeff,args,apply_targs);
5207 }
5208 }
5209 maxnorm=std::max(norm,maxnorm);
5210 }
5211
5212 } else if (shell >= 12) {
5213 break; // Assumes monotonic decay beyond nearest neighbor
5214 }
5215 if (norm<0.3*tol/fac) blacklist.push_back(d);
5216 }
5217 }
5218 return maxnorm;
5219 }
5220
5221
5222 /// similar to apply, but for low rank coeffs
5223 template <typename opT, typename R>
5224 void apply_source_driven(opT& op, const FunctionImpl<R,NDIM>& f, bool fence) {
5226
5227 MADNESS_ASSERT(not op.modified());
5228 // looping through all the coefficients of the source f
5229 typename dcT::const_iterator end = f.get_coeffs().end();
5230 for (typename dcT::const_iterator it=f.get_coeffs().begin(); it!=end; ++it) {
5231
5232 const keyT& key = it->first;
5233 const coeffT& coeff = it->second.coeff();
5234
5235 if (coeff.has_data() and (coeff.rank()!=0)) {
5237 woT::task(p, &implT:: template do_apply_directed_screening<opT,R>, &op, key, coeff, true);
5238 woT::task(p, &implT:: template do_apply_directed_screening<opT,R>, &op, key, coeff, false);
5239 }
5240 }
5241 if (fence) world.gop.fence();
5243 }
5244
5245 /// after apply we need to do some cleanup;
5246
5247 /// forces fence
5248 double finalize_apply();
5249
5250 /// after summing up we need to do some cleanup;
5251
5252 /// forces fence
5253 void finalize_sum();
5254
5255 /// traverse a non-existing tree, make its coeffs and apply an operator
5256
5257 /// invoked by result
5258 /// here we use the fact that the hi-dim NS coefficients on all scales are exactly
5259 /// the outer product of the underlying low-dim functions (also in NS form),
5260 /// so we don't need to construct the full hi-dim tree and then turn it into NS form.
5261 /// @param[in] apply_op the operator acting on the NS tree
5262 /// @param[in] fimpl the funcimpl of the function of particle 1
5263 /// @param[in] gimpl the funcimpl of the function of particle 2
5264 template<typename opT, std::size_t LDIM>
5265 void recursive_apply(opT& apply_op, const FunctionImpl<T,LDIM>* fimpl,
5266 const FunctionImpl<T,LDIM>* gimpl, const bool fence) {
5267
5268 //print("IN RECUR2");
5269 const keyT& key0=cdata.key0;
5270
5271 if (world.rank() == coeffs.owner(key0)) {
5272
5273 CoeffTracker<T,LDIM> ff(fimpl);
5274 CoeffTracker<T,LDIM> gg(gimpl);
5275
5276 typedef recursive_apply_op<opT,LDIM> coeff_opT;
5277 coeff_opT coeff_op(this,ff,gg,&apply_op);
5278
5279 typedef noop<T,NDIM> apply_opT;
5280 apply_opT apply_op;
5281
5283 woT::task(p, &implT:: template forward_traverse<coeff_opT,apply_opT>, coeff_op, apply_op, key0);
5284
5285 }
5286 if (fence) world.gop.fence();
5288 }
5289
5290 /// recursive part of recursive_apply
5291 template<typename opT, std::size_t LDIM>
5293 bool randomize() const {return true;}
5294
5296
5301
5302 // ctor
5306 const opT* apply_op) : result(result), iaf(iaf), iag(iag), apply_op(apply_op)
5307 {
5308 MADNESS_ASSERT(LDIM+LDIM==NDIM);
5309 }
5311 iag(other.iag), apply_op(other.apply_op) {}
5312
5313
5314 /// make the NS-coefficients and send off the application of the operator
5315
5316 /// @return a Future<bool,coeffT>(is_leaf,coeffT())
5317 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
5318
5319 // World& world=result->world;
5320 // break key into particles (these are the child keys, with datum1/2 come the parent keys)
5321 Key<LDIM> key1,key2;
5322 key.break_apart(key1,key2);
5323
5324 // the lo-dim functions should be in full tensor form
5325 const tensorT fcoeff=iaf.coeff(key1).full_tensor();
5326 const tensorT gcoeff=iag.coeff(key2).full_tensor();
5327
5328 // would this be a leaf node? If so, then its sum coeffs have already been
5329 // processed by the parent node's wavelet coeffs. Therefore we won't
5330 // process it any more.
5332 bool is_leaf=leaf_op(key,fcoeff,gcoeff);
5333
5334 if (not is_leaf) {
5335 // new coeffs are simply the hartree/kronecker/outer product --
5336 const std::vector<Slice>& s0=iaf.get_impl()->cdata.s0;
5337 const coeffT coeff = (apply_op->modified())
5338 ? outer(copy(fcoeff(s0)),copy(gcoeff(s0)),result->targs)
5339 : outer(fcoeff,gcoeff,result->targs);
5340
5341 // now send off the application
5342 tensorT coeff_full;
5344 double norm0=result->do_apply_directed_screening<opT,T>(apply_op, key, coeff, true);
5345
5346 result->task(p,&implT:: template do_apply_directed_screening<opT,T>,
5347 apply_op,key,coeff,false);
5348
5349 return finalize(norm0,key,coeff);
5350
5351 } else {
5352 return std::pair<bool,coeffT> (is_leaf,coeffT());
5353 }
5354 }
5355
5356 /// sole purpose is to wait for the kernel norm, wrap it and send it back to caller
5357 std::pair<bool,coeffT> finalize(const double kernel_norm, const keyT& key,
5358 const coeffT& coeff) const {
5359 const double thresh=result->get_thresh()*0.1;
5360 bool is_leaf=(kernel_norm<result->truncate_tol(thresh,key));
5361 if (key.level()<2) is_leaf=false;
5362 return std::pair<bool,coeffT> (is_leaf,coeff);
5363 }
5364
5365
5366 this_type make_child(const keyT& child) const {
5367
5368 // break key into particles
5369 Key<LDIM> key1, key2;
5370 child.break_apart(key1,key2);
5371
5372 return this_type(result,iaf.make_child(key1),iag.make_child(key2),apply_op);
5373 }
5374
5378 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
5379 &this_type::forward_ctor),result,f1,g1,apply_op);
5380 }
5381
5383 const opT* apply_op1) {
5384 return this_type(r,f1,g1,apply_op1);
5385 }
5386
5387 template <typename Archive> void serialize(const Archive& ar) {
5388 ar & result & iaf & iag & apply_op;
5389 }
5390 };
5391
5392 /// traverse an existing tree and apply an operator
5393
5394 /// invoked by result
5395 /// @param[in] apply_op the operator acting on the NS tree
5396 /// @param[in] fimpl the funcimpl of the source function
5397 /// @param[in] rimpl a dummy function for recursive_op to insert data
5398 template<typename opT>
5399 void recursive_apply(opT& apply_op, const implT* fimpl, implT* rimpl, const bool fence) {
5400
5401 print("IN RECUR1");
5402
5403 const keyT& key0=cdata.key0;
5404
5405 if (world.rank() == coeffs.owner(key0)) {
5406
5407 typedef recursive_apply_op2<opT> coeff_opT;
5408 coeff_opT coeff_op(this,fimpl,&apply_op);
5409
5410 typedef noop<T,NDIM> apply_opT;
5411 apply_opT apply_op;
5412
5413 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
5414 coeff_op, apply_op, cdata.key0);
5415
5416 }
5417 if (fence) world.gop.fence();
5419 }
5420
5421 /// recursive part of recursive_apply
5422 template<typename opT>
5424 bool randomize() const {return true;}
5425
5428 typedef std::pair<bool,coeffT> argT;
5429
5430 mutable implT* result;
5431 ctT iaf; /// need this for randomization
5432 const opT* apply_op;
5433
5434 // ctor
5438
5440 iaf(other.iaf), apply_op(other.apply_op) {}
5441
5442
5443 /// send off the application of the operator
5444
5445 /// the first (core) neighbor (ie. the box itself) is processed
5446 /// immediately, all other ones are shoved into the taskq
5447 /// @return a pair<bool,coeffT>(is_leaf,coeffT())
5448 argT operator()(const Key<NDIM>& key) const {
5449
5450 const coeffT& coeff=iaf.coeff();
5451
5452 if (coeff.has_data()) {
5453
5454 // now send off the application for all neighbor boxes
5456 result->task(p,&implT:: template do_apply_directed_screening<opT,T>,
5457 apply_op, key, coeff, false);
5458
5459 // process the core box
5460 double norm0=result->do_apply_directed_screening<opT,T>(apply_op,key,coeff,true);
5461
5462 if (iaf.is_leaf()) return argT(true,coeff);
5463 return finalize(norm0,key,coeff,result);
5464
5465 } else {
5466 const bool is_leaf=true;
5467 return argT(is_leaf,coeffT());
5468 }
5469 }
5470
5471 /// sole purpose is to wait for the kernel norm, wrap it and send it back to caller
5472 argT finalize(const double kernel_norm, const keyT& key,
5473 const coeffT& coeff, const implT* r) const {
5474 const double thresh=r->get_thresh()*0.1;
5475 bool is_leaf=(kernel_norm<r->truncate_tol(thresh,key));
5476 if (key.level()<2) is_leaf=false;
5477 return argT(is_leaf,coeff);
5478 }
5479
5480
5481 this_type make_child(const keyT& child) const {
5482 return this_type(result,iaf.make_child(child),apply_op);
5483 }
5484
5485 /// retrieve the coefficients (parent coeffs might be remote)
5487 Future<ctT> f1=iaf.activate();
5488
5489// Future<ctL> g1=g.activate();
5490// return h->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
5491// &this_type::forward_ctor),h,f1,g1,particle);
5492
5493 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
5494 &this_type::forward_ctor),result,f1,apply_op);
5495 }
5496
5497 /// taskq-compatible ctor
5498 this_type forward_ctor(implT* result1, const ctT& iaf1, const opT* apply_op1) {
5499 return this_type(result1,iaf1,apply_op1);
5500 }
5501
5502 template <typename Archive> void serialize(const Archive& ar) {
5503 ar & result & iaf & apply_op;
5504 }
5505 };
5506
5507 /// Returns the square of the error norm in the box labeled by key
5508
5509 /// Assumed to be invoked locally but it would be easy to eliminate
5510 /// this assumption
5511 template <typename opT>
5512 double err_box(const keyT& key, const nodeT& node, const opT& func,
5513 int npt, const Tensor<double>& qx, const Tensor<double>& quad_phit,
5514 const Tensor<double>& quad_phiw) const {
5515
5516 std::vector<long> vq(NDIM);
5517 for (std::size_t i=0; i<NDIM; ++i)
5518 vq[i] = npt;
5519 tensorT fval(vq,false), work(vq,false), result(vq,false);
5520
5521 // Compute the "exact" function in this volume at npt points
5522 // where npt is usually this->npt+1.
5523 fcube(key, func, qx, fval);
5524
5525 // Transform into the scaling function basis of order npt
5526 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
5527 fval = fast_transform(fval,quad_phiw,result,work).scale(scale);
5528
5529 // Subtract to get the error ... the original coeffs are in the order k
5530 // basis but we just computed the coeffs in the order npt(=k+1) basis
5531 // so we can either use slices or an iterator macro.
5532 const tensorT coeff = node.coeff().full_tensor();
5533 ITERATOR(coeff,fval(IND)-=coeff(IND););
5534 // flo note: we do want to keep a full tensor here!
5535
5536 // Compute the norm of what remains
5537 double err = fval.normf();
5538 return err*err;
5539 }
5540
5541 template <typename opT>
5543 const implT* impl;
5544 const opT* func;
5545 int npt;
5549 public:
5550 do_err_box() = default;
5551
5555
5558
5559 double operator()(typename dcT::const_iterator& it) const {
5560 const keyT& key = it->first;
5561 const nodeT& node = it->second;
5562 if (node.has_coeff())
5563 return impl->err_box(key, node, *func, npt, qx, quad_phit, quad_phiw);
5564 else
5565 return 0.0;
5566 }
5567
5568 double operator()(double a, double b) const {
5569 return a+b;
5570 }
5571
5572 template <typename Archive>
5573 void serialize(const Archive& ar) {
5574 MADNESS_EXCEPTION("not yet", 1);
5575 }
5576 };
5577
5578 /// Returns the sum of squares of errors from local info ... no comms
5579 template <typename opT>
5580 double errsq_local(const opT& func) const {
5582 // Make quadrature rule of higher order
5583 const int npt = cdata.npt + 1;
5584 Tensor<double> qx, qw, quad_phi, quad_phiw, quad_phit;
5585 FunctionCommonData<T,NDIM>::_init_quadrature(k+1, npt, qx, qw, quad_phi, quad_phiw, quad_phit);
5586
5589 return world.taskq.reduce< double,rangeT,do_err_box<opT> >(range,
5590 do_err_box<opT>(this, &func, npt, qx, quad_phit, quad_phiw));
5591 }
5592
5593 /// Returns \c int(f(x),x) in local volume
5594 T trace_local() const;
5595
5597 double operator()(typename dcT::const_iterator& it) const {
5598 const nodeT& node = it->second;
5599 if (node.has_coeff()) {
5600 double norm = node.coeff().normf();
5601 return norm*norm;
5602 }
5603 else {
5604 return 0.0;
5605 }
5606 }
5607
5608 double operator()(double a, double b) const {
5609 return (a+b);
5610 }
5611
5612 template <typename Archive> void serialize(const Archive& ar) {
5613 MADNESS_EXCEPTION("NOT IMPLEMENTED", 1);
5614 }
5615 };
5616
5617
5618 /// Returns the square of the local norm ... no comms
5619 double norm2sq_local() const;
5620
5621 /// compute the inner product of this range with other
5622 template<typename R>
5626 typedef TENSOR_RESULT_TYPE(T,R) resultT;
5627
5630 resultT operator()(typename dcT::const_iterator& it) const {
5631
5633 const keyT& key=it->first;
5634 const nodeT& fnode = it->second;
5635 if (fnode.has_coeff()) {
5636 if (other->coeffs.probe(it->first)) {
5637 const FunctionNode<R,NDIM>& gnode = other->coeffs.find(key).get()->second;
5638 if (gnode.has_coeff()) {
5639 if (gnode.coeff().dim(0) != fnode.coeff().dim(0)) {
5640 madness::print("INNER", it->first, gnode.coeff().dim(0),fnode.coeff().dim(0));
5641 MADNESS_EXCEPTION("functions have different k or compress/reconstruct error", 0);
5642 }
5643 if (leaves_only) {
5644 if (gnode.is_leaf() or fnode.is_leaf()) {
5645 sum += fnode.coeff().trace_conj(gnode.coeff());
5646 }
5647 } else {
5648 sum += fnode.coeff().trace_conj(gnode.coeff());
5649 }
5650 }
5651 }
5652 }
5653 return sum;
5654 }
5655
5656 resultT operator()(resultT a, resultT b) const {
5657 return (a+b);
5658 }
5659
5660 template <typename Archive> void serialize(const Archive& ar) {
5661 MADNESS_EXCEPTION("NOT IMPLEMENTED", 1);
5662 }
5663 };
5664
5665 /// Returns the inner product ASSUMING same distribution
5666
5667 /// handles compressed and redundant form
5668 template <typename R>
5672 typedef TENSOR_RESULT_TYPE(T,R) resultT;
5673
5674 // make sure the states of the trees are consistent
5677 return world.taskq.reduce<resultT,rangeT,do_inner_local<R> >
5679 }
5680
5681
5682 /// compute the inner product of this range with other
5683 template<typename R>
5687 bool leaves_only=true;
5688 typedef TENSOR_RESULT_TYPE(T,R) resultT;
5689
5693 resultT operator()(typename dcT::const_iterator& it) const {
5694
5695 constexpr std::size_t LDIM=std::max(NDIM/2,std::size_t(1));
5696
5697 const keyT& key=it->first;
5698 const nodeT& fnode = it->second;
5699 if (not fnode.has_coeff()) return resultT(0.0); // probably internal nodes
5700
5701 // assuming all boxes (esp the low-dim ones) are local, i.e. the functions are replicated
5702 auto find_valid_parent = [](auto& key, auto& impl, auto&& find_valid_parent) {
5703 MADNESS_CHECK(impl->get_coeffs().owner(key)==impl->world.rank()); // make sure everything is local!
5704 if (impl->get_coeffs().probe(key)) return key;
5705 auto parentkey=key.parent();
5706 return find_valid_parent(parentkey, impl, find_valid_parent);
5707 };
5708
5709 // returns coefficients, empty if no functor present
5710 auto get_coeff = [&find_valid_parent](const auto& key, const auto& v_impl) {
5711 if ((v_impl.size()>0) and v_impl.front().get()) {
5712 auto impl=v_impl.front();
5713
5714// bool have_impl=impl.get();
5715// if (have_impl) {
5716 auto parentkey = find_valid_parent(key, impl, find_valid_parent);
5717 MADNESS_CHECK(impl->get_coeffs().probe(parentkey));
5718 typename decltype(impl->coeffs)::accessor acc;
5719 impl->get_coeffs().find(acc,parentkey);
5720 auto parentcoeff=acc->second.coeff();
5721 auto coeff=impl->parent_to_child(parentcoeff, parentkey, key);
5722 return coeff;
5723 } else {
5724 // get type of vector elements
5725 typedef typename std::decay_t<decltype(v_impl)>::value_type::element_type::typeT S;
5726// typedef typename std::decay_t<decltype(v_impl)>::value_type S;
5727 return GenTensor<S>();
5728// return GenTensor<typename std::decay_t<decltype(*impl)>::typeT>();
5729 }
5730 };
5731
5732 auto make_vector = [](auto& arg) {
5733 return std::vector<std::decay_t<decltype(arg)>>(1,arg);
5734 };
5735
5736
5737 Key<LDIM> key1,key2;
5738 key.break_apart(key1,key2);
5739
5740 auto func=dynamic_cast<CompositeFunctorInterface<R,NDIM,LDIM>* >(ket->functor.get());
5742
5743 MADNESS_CHECK_THROW(func->impl_ket_vector.size()==0 or func->impl_ket_vector.size()==1,
5744 "only one ket function supported in inner_on_demand");
5745 MADNESS_CHECK_THROW(func->impl_p1_vector.size()==0 or func->impl_p1_vector.size()==1,
5746 "only one p1 function supported in inner_on_demand");
5747 MADNESS_CHECK_THROW(func->impl_p2_vector.size()==0 or func->impl_p2_vector.size()==1,
5748 "only one p2 function supported in inner_on_demand");
5749 auto coeff_bra=fnode.coeff();
5750 auto coeff_ket=get_coeff(key,func->impl_ket_vector);
5751 auto coeff_v1=get_coeff(key1,make_vector(func->impl_m1));
5752 auto coeff_v2=get_coeff(key2,make_vector(func->impl_m2));
5753 auto coeff_p1=get_coeff(key1,func->impl_p1_vector);
5754 auto coeff_p2=get_coeff(key2,func->impl_p2_vector);
5755
5756 // construct |ket(1,2)> or |p(1)p(2)> or |p(1)p(2) ket(1,2)>
5757 double error=0.0;
5758 if (coeff_ket.has_data() and coeff_p1.has_data()) {
5759 pointwise_multiplier<LDIM> pm(key,coeff_ket);
5760 coeff_ket=pm(key,outer(coeff_p1,coeff_p2,TensorArgs(TT_FULL,-1.0)).full_tensor());
5761 error+=pm.error;
5762 } else if (coeff_ket.has_data() or coeff_p1.has_data()) {
5763 coeff_ket = (coeff_ket.has_data()) ? coeff_ket : outer(coeff_p1,coeff_p2);
5764 } else { // not ket and no p1p2
5765 MADNESS_EXCEPTION("confused ket/p1p2 in do_inner_local_on_demand",1);
5766 }
5767
5768 // construct (v(1) + v(2)) |ket(1,2)>
5769 coeffT v1v2ket;
5770 if (coeff_v1.has_data()) {
5771 pointwise_multiplier<LDIM> pm(key,coeff_ket);
5772 v1v2ket = pm(key,coeff_v1.full_tensor(), 1);
5773 error+=pm.error;
5774 v1v2ket+= pm(key,coeff_v2.full_tensor(), 2);
5775 error+=pm.error;
5776 } else {
5777 v1v2ket = coeff_ket;
5778 }
5779
5780 resultT result;
5781 if (func->impl_eri) { // project bra*ket onto eri, avoid multiplication with eri
5782 MADNESS_CHECK(func->impl_eri->get_functor()->provides_coeff());
5783 coeffT coeff_eri=func->impl_eri->get_functor()->coeff(key).full_tensor();
5784 pointwise_multiplier<LDIM> pm(key,v1v2ket);
5785 tensorT braket=pm(key,coeff_bra.full_tensor_copy().conj());
5786 error+=pm.error;
5787 if (error>1.e-3) print("error in key",key,error);
5788 result=coeff_eri.full_tensor().trace(braket);
5789
5790 } else { // no eri, project ket onto bra
5791 result=coeff_bra.full_tensor_copy().trace_conj(v1v2ket.full_tensor_copy());
5792 }
5793 return result;
5794 }
5795
5796 resultT operator()(resultT a, resultT b) const {
5797 return (a+b);
5798 }
5799
5800 template <typename Archive> void serialize(const Archive& ar) {
5801 MADNESS_EXCEPTION("NOT IMPLEMENTED", 1);
5802 }
5803 };
5804
5805 /// Returns the inner product of this with function g constructed on-the-fly
5806
5807 /// the leaf boxes of this' MRA tree defines the inner product
5808 template <typename R>
5809 TENSOR_RESULT_TYPE(T,R) inner_local_on_demand(const FunctionImpl<R,NDIM>& gimpl) const {
5812
5816 do_inner_local_on_demand<R>(this, &gimpl));
5817 }
5818
5819 /// compute the inner product of this range with other
5820 template<typename R>
5824 typedef TENSOR_RESULT_TYPE(T,R) resultT;
5825
5828 resultT operator()(typename dcT::const_iterator& it) const {
5829
5831 const keyT& key=it->first;
5832 const nodeT& fnode = it->second;
5833 if (fnode.has_coeff()) {
5834 if (other->coeffs.probe(it->first)) {
5835 const FunctionNode<R,NDIM>& gnode = other->coeffs.find(key).get()->second;
5836 if (gnode.has_coeff()) {
5837 if (gnode.coeff().dim(0) != fnode.coeff().dim(0)) {
5838 madness::print("DOT", it->first, gnode.coeff().dim(0),fnode.coeff().dim(0));
5839 MADNESS_EXCEPTION("functions have different k or compress/reconstruct error", 0);
5840 }
5841 if (leaves_only) {
5842 if (gnode.is_leaf() or fnode.is_leaf()) {
5843 sum += fnode.coeff().full_tensor().trace(gnode.coeff().full_tensor());
5844 }
5845 } else {
5846 sum += fnode.coeff().full_tensor().trace(gnode.coeff().full_tensor());
5847 }
5848 }
5849 }
5850 }
5851 return sum;
5852 }
5853
5854 resultT operator()(resultT a, resultT b) const {
5855 return (a+b);
5856 }
5857
5858 template <typename Archive> void serialize(const Archive& ar) {
5859 MADNESS_EXCEPTION("NOT IMPLEMENTED", 1);
5860 }
5861 };
5862
5863 /// Returns the dot product ASSUMING same distribution
5864
5865 /// handles compressed and redundant form
5866 template <typename R>
5870 typedef TENSOR_RESULT_TYPE(T,R) resultT;
5871
5872 // make sure the states of the trees are consistent
5874 bool leaves_only=(this->is_redundant());
5875 return world.taskq.reduce<resultT,rangeT,do_dot_local<R> >
5877 }
5878
5879 /// Type of the entry in the map returned by make_key_vec_map
5880 typedef std::vector< std::pair<int,const coeffT*> > mapvecT;
5881
5882 /// Type of the map returned by make_key_vec_map
5884
5885 /// Adds keys to union of local keys with specified index
5886 void add_keys_to_map(mapT* map, int index) const {
5887 typename dcT::const_iterator end = coeffs.end();
5888 for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
5889 typename mapT::accessor acc;
5890 const keyT& key = it->first;
5891 const FunctionNode<T,NDIM>& node = it->second;
5892 if (node.has_coeff()) {
5893 [[maybe_unused]] auto inserted = map->insert(acc,key);
5894 acc->second.push_back(std::make_pair(index,&(node.coeff())));
5895 }
5896 }
5897 }
5898
5899 /// Returns map of union of local keys to vector of indexes of functions containing that key
5900
5901 /// Local concurrency and synchronization only; no communication
5902 static
5903 mapT
5904 make_key_vec_map(const std::vector<const FunctionImpl<T,NDIM>*>& v) {
5905 mapT map(100000);
5906 // This loop must be parallelized
5907 for (unsigned int i=0; i<v.size(); i++) {
5908 //v[i]->add_keys_to_map(&map,i);
5909 v[i]->world.taskq.add(*(v[i]), &FunctionImpl<T,NDIM>::add_keys_to_map, &map, int(i));
5910 }
5911 if (v.size()) v[0]->world.taskq.fence();
5912 return map;
5913 }
5914
5915#if 0
5916// Original
5917 template <typename R>
5918 static void do_inner_localX(const typename mapT::iterator lstart,
5919 const typename mapT::iterator lend,
5920 typename FunctionImpl<R,NDIM>::mapT* rmap_ptr,
5921 const bool sym,
5922 Tensor< TENSOR_RESULT_TYPE(T,R) >* result_ptr,
5923 Mutex* mutex) {
5924 Tensor< TENSOR_RESULT_TYPE(T,R) >& result = *result_ptr;
5925 Tensor< TENSOR_RESULT_TYPE(T,R) > r(result.dim(0),result.dim(1));
5926 for (typename mapT::iterator lit=lstart; lit!=lend; ++lit) {
5927 const keyT& key = lit->first;
5928 typename FunctionImpl<R,NDIM>::mapT::iterator rit=rmap_ptr->find(key);
5929 if (rit != rmap_ptr->end()) {
5930 const mapvecT& leftv = lit->second;
5931 const typename FunctionImpl<R,NDIM>::mapvecT& rightv =rit->second;
5932 const int nleft = leftv.size();
5933 const int nright= rightv.size();
5934
5935 for (int iv=0; iv<nleft; iv++) {
5936 const int i = leftv[iv].first;
5937 const GenTensor<T>* iptr = leftv[iv].second;
5938
5939 for (int jv=0; jv<nright; jv++) {
5940 const int j = rightv[jv].first;
5941 const GenTensor<R>* jptr = rightv[jv].second;
5942
5943 if (!sym || (sym && i<=j))
5944 r(i,j) += iptr->trace_conj(*jptr);
5945 }
5946 }
5947 }
5948 }
5949 mutex->lock();
5950 result += r;
5951 mutex->unlock();
5952 }
5953#else
5954 template <typename R>
5955 static void do_inner_localX(const typename mapT::iterator lstart,
5956 const typename mapT::iterator lend,
5957 typename FunctionImpl<R,NDIM>::mapT* rmap_ptr,
5958 const bool sym,
5959 Tensor< TENSOR_RESULT_TYPE(T,R) >* result_ptr,
5960 Mutex* mutex) {
5961 Tensor< TENSOR_RESULT_TYPE(T,R) >& result = *result_ptr;
5962 //Tensor< TENSOR_RESULT_TYPE(T,R) > r(result.dim(0),result.dim(1));
5963 for (typename mapT::iterator lit=lstart; lit!=lend; ++lit) {
5964 const keyT& key = lit->first;
5965 typename FunctionImpl<R,NDIM>::mapT::iterator rit=rmap_ptr->find(key);
5966 if (rit != rmap_ptr->end()) {
5967 const mapvecT& leftv = lit->second;
5968 const typename FunctionImpl<R,NDIM>::mapvecT& rightv =rit->second;
5969 const size_t nleft = leftv.size();
5970 const size_t nright= rightv.size();
5971
5972 unsigned int size = leftv[0].second->size();
5973 Tensor<T> Left(nleft, size);
5974 Tensor<R> Right(nright, size);
5975 Tensor< TENSOR_RESULT_TYPE(T,R)> r(nleft, nright);
5976 for(unsigned int iv = 0; iv < nleft; ++iv) Left(iv,_) = (*(leftv[iv].second)).full_tensor();
5977 for(unsigned int jv = 0; jv < nright; ++jv) Right(jv,_) = (*(rightv[jv].second)).full_tensor();
5978 // call mxmT from mxm.h in tensor
5979 if(TensorTypeData<T>::iscomplex) Left = Left.conj(); // Should handle complex case and leave real case alone
5980 mxmT(nleft, nright, size, r.ptr(), Left.ptr(), Right.ptr());
5981 mutex->lock();
5982 for(unsigned int iv = 0; iv < nleft; ++iv) {
5983 const int i = leftv[iv].first;
5984 for(unsigned int jv = 0; jv < nright; ++jv) {
5985 const int j = rightv[jv].first;
5986 if (!sym || (sym && i<=j)) result(i,j) += r(iv,jv);
5987 }
5988 }
5989 mutex->unlock();
5990 }
5991 }
5992 }
5993#endif
5994
5995#if 0
5996// Original
5997 template <typename R, typename = std::enable_if_t<std::is_floating_point_v<R>>>
5998 static void do_dot_localX(const typename mapT::iterator lstart,
5999 const typename mapT::iterator lend,
6000 typename FunctionImpl<R, NDIM>::mapT* rmap_ptr,
6001 const bool sym,
6002 Tensor<TENSOR_RESULT_TYPE(T, R)>* result_ptr,
6003 Mutex* mutex) {
6004 if (TensorTypeData<T>::iscomplex) MADNESS_EXCEPTION("no complex trace in LowRankTensor, sorry", 1);
6005 Tensor<TENSOR_RESULT_TYPE(T, R)>& result = *result_ptr;
6006 Tensor<TENSOR_RESULT_TYPE(T, R)> r(result.dim(0), result.dim(1));
6007 for (typename mapT::iterator lit = lstart; lit != lend; ++lit) {
6008 const keyT& key = lit->first;
6009 typename FunctionImpl<R, NDIM>::mapT::iterator rit = rmap_ptr->find(key);
6010 if (rit != rmap_ptr->end()) {
6011 const mapvecT& leftv = lit->second;
6012 const typename FunctionImpl<R, NDIM>::mapvecT& rightv = rit->second;
6013 const int nleft = leftv.size();
6014 const int nright = rightv.size();
6015
6016 for (int iv = 0; iv < nleft; iv++) {
6017 const int i = leftv[iv].first;
6018 const GenTensor<T>* iptr = leftv[iv].second;
6019
6020 for (int jv = 0; jv < nright; jv++) {
6021 const int j = rightv[jv].first;
6022 const GenTensor<R>* jptr = rightv[jv].second;
6023
6024 if (!sym || (sym && i <= j))
6025 r(i, j) += iptr->trace_conj(*jptr);
6026 }
6027 }
6028 }
6029 }
6030 mutex->lock();
6031 result += r;
6032 mutex->unlock();
6033 }
6034#else
6035 template <typename R>
6036 static void do_dot_localX(const typename mapT::iterator lstart,
6037 const typename mapT::iterator lend,
6038 typename FunctionImpl<R, NDIM>::mapT* rmap_ptr,
6039 const bool sym,
6040 Tensor<TENSOR_RESULT_TYPE(T, R)>* result_ptr,
6041 Mutex* mutex) {
6042 Tensor<TENSOR_RESULT_TYPE(T, R)>& result = *result_ptr;
6043 // Tensor<TENSOR_RESULT_TYPE(T, R)> r(result.dim(0), result.dim(1));
6044 for (typename mapT::iterator lit = lstart; lit != lend; ++lit) {
6045 const keyT& key = lit->first;
6046 typename FunctionImpl<R, NDIM>::mapT::iterator rit = rmap_ptr->find(key);
6047 if (rit != rmap_ptr->end()) {
6048 const mapvecT& leftv = lit->second;
6049 const typename FunctionImpl<R, NDIM>::mapvecT& rightv = rit->second;
6050 const size_t nleft = leftv.size();
6051 const size_t nright= rightv.size();
6052
6053 unsigned int size = leftv[0].second->size();
6054 Tensor<T> Left(nleft, size);
6055 Tensor<R> Right(nright, size);
6056 Tensor< TENSOR_RESULT_TYPE(T, R)> r(nleft, nright);
6057 for(unsigned int iv = 0; iv < nleft; ++iv) Left(iv, _) = (*(leftv[iv].second)).full_tensor();
6058 for(unsigned int jv = 0; jv < nright; ++jv) Right(jv, _) = (*(rightv[jv].second)).full_tensor();
6059 // call mxmT from mxm.h in tensor
6060 mxmT(nleft, nright, size, r.ptr(), Left.ptr(), Right.ptr());
6061 mutex->lock();
6062 for(unsigned int iv = 0; iv < nleft; ++iv) {
6063 const int i = leftv[iv].first;
6064 for(unsigned int jv = 0; jv < nright; ++jv) {
6065 const int j = rightv[jv].first;
6066 if (!sym || (sym && i <= j)) result(i, j) += r(iv, jv);
6067 }
6068 }
6069 mutex->unlock();
6070 }
6071 }
6072 }
6073#endif
6074
6075 template <typename Real>
6076 static std::enable_if_t<std::is_floating_point_v<Real>, Real> conj(const Real x) {
6077 return x;
6078 }
6079
6080 template <typename Real>
6081 static std::complex<Real> conj(const std::complex<Real>& x) {
6082 return std::conj(x);
6083 }
6084
6085 template <typename R>
6086 static Tensor< TENSOR_RESULT_TYPE(T,R) >
6087 inner_local(const std::vector<const FunctionImpl<T,NDIM>*>& left,
6088 const std::vector<const FunctionImpl<R,NDIM>*>& right,
6089 bool sym) {
6090
6091 // This is basically a sparse matrix^T * matrix product
6092 // Rij = sum(k) Aki * Bkj
6093 // where i and j index functions and k index the wavelet coeffs
6094 // eventually the goal is this structure (don't have jtile yet)
6095 //
6096 // do in parallel tiles of k (tensors of coeffs)
6097 // do tiles of j
6098 // do i
6099 // do j in jtile
6100 // do k in ktile
6101 // Rij += Aki*Bkj
6102
6103 mapT lmap = make_key_vec_map(left);
6104 typename FunctionImpl<R,NDIM>::mapT rmap;
6105 auto* rmap_ptr = (typename FunctionImpl<R,NDIM>::mapT*)(&lmap);
6106 if ((std::vector<const FunctionImpl<R,NDIM>*>*)(&left) != &right) {
6108 rmap_ptr = &rmap;
6109 }
6110
6111 size_t chunk = (lmap.size()-1)/(3*4*5)+1;
6112
6113 Tensor< TENSOR_RESULT_TYPE(T,R) > r(left.size(), right.size());
6114 Mutex mutex;
6115
6116 typename mapT::iterator lstart=lmap.begin();
6117 while (lstart != lmap.end()) {
6118 typename mapT::iterator lend = lstart;
6119 advance(lend,chunk);
6120 left[0]->world.taskq.add(&FunctionImpl<T,NDIM>::do_inner_localX<R>, lstart, lend, rmap_ptr, sym, &r, &mutex);
6121 lstart = lend;
6122 }
6123 left[0]->world.taskq.fence();
6124
6125 if (sym) {
6126 for (long i=0; i<r.dim(0); i++) {
6127 for (long j=0; j<i; j++) {
6128 TENSOR_RESULT_TYPE(T,R) sum = r(i,j)+conj(r(j,i));
6129 r(i,j) = sum;
6130 r(j,i) = conj(sum);
6131 }
6132 }
6133 }
6134 return r;
6135 }
6136
6137 template <typename R>
6138 static Tensor<TENSOR_RESULT_TYPE(T, R)>
6139 dot_local(const std::vector<const FunctionImpl<T, NDIM>*>& left,
6140 const std::vector<const FunctionImpl<R, NDIM>*>& right,
6141 bool sym) {
6142
6143 // This is basically a sparse matrix * matrix product
6144 // Rij = sum(k) Aik * Bkj
6145 // where i and j index functions and k index the wavelet coeffs
6146 // eventually the goal is this structure (don't have jtile yet)
6147 //
6148 // do in parallel tiles of k (tensors of coeffs)
6149 // do tiles of j
6150 // do i
6151 // do j in jtile
6152 // do k in ktile
6153 // Rij += Aik*Bkj
6154
6155 mapT lmap = make_key_vec_map(left);
6156 typename FunctionImpl<R, NDIM>::mapT rmap;
6157 auto* rmap_ptr = (typename FunctionImpl<R, NDIM>::mapT*)(&lmap);
6158 if ((std::vector<const FunctionImpl<R, NDIM>*>*)(&left) != &right) {
6160 rmap_ptr = &rmap;
6161 }
6162
6163 size_t chunk = (lmap.size() - 1) / (3 * 4 * 5) + 1;
6164
6165 Tensor<TENSOR_RESULT_TYPE(T, R)> r(left.size(), right.size());
6166 Mutex mutex;
6167
6168 typename mapT::iterator lstart=lmap.begin();
6169 while (lstart != lmap.end()) {
6170 typename mapT::iterator lend = lstart;
6171 advance(lend, chunk);
6172 left[0]->world.taskq.add(&FunctionImpl<T, NDIM>::do_dot_localX<R>, lstart, lend, rmap_ptr, sym, &r, &mutex);
6173 lstart = lend;
6174 }
6175 left[0]->world.taskq.fence();
6176
6177 // sym is for hermiticity
6178 if (sym) {
6179 for (long i = 0; i < r.dim(0); i++) {
6180 for (long j = 0; j < i; j++) {
6181 TENSOR_RESULT_TYPE(T, R) sum = r(i, j) + conj(r(j, i));
6182 r(i, j) = sum;
6183 r(j, i) = conj(sum);
6184 }
6185 }
6186 }
6187 return r;
6188 }
6189
6190 template <typename R>
6192 {
6193 static_assert(!std::is_same<R, int>::value &&
6194 std::is_same<R, int>::value,
6195 "Compilation failed because you wanted to know the type; see below:");
6196 }
6197
6198 /// invoked by result
6199
6200 /// contract 2 functions f(x,z) = \int g(x,y) * h(y,z) dy
6201 /// @tparam CDIM: the dimension of the contraction variable (y)
6202 /// @tparam NDIM: the dimension of the result (x,z)
6203 /// @tparam LDIM: the dimension of g(x,y)
6204 /// @tparam KDIM: the dimension of h(y,z)
6205 template<typename Q, std::size_t LDIM, typename R, std::size_t KDIM,
6206 std::size_t CDIM = (KDIM + LDIM - NDIM) / 2>
6208 const std::array<int, CDIM> v1, const std::array<int, CDIM> v2) {
6209
6210 typedef std::multimap<Key<NDIM>, std::list<Key<CDIM>>> contractionmapT;
6211 //double wall_get_lists=0.0;
6212 //double wall_recur=0.0;
6213 //double wall_contract=0.0;
6216
6217 // auto print_map = [](const auto& map) {
6218 // for (const auto& kv : map) print(kv.first,"--",kv.second);
6219 // };
6220 // logical constness, not bitwise constness
6221 FunctionImpl<Q,LDIM>& g_nc=const_cast<FunctionImpl<Q,LDIM>&>(g);
6222 FunctionImpl<R,KDIM>& h_nc=const_cast<FunctionImpl<R,KDIM>&>(h);
6223
6224 std::list<contractionmapT> all_contraction_maps;
6225 for (std::size_t n=0; n<nmax; ++n) {
6226
6227 // list of nodes with d coefficients (and their parents)
6228 //double wall0 = wall_time();
6229 auto [g_ijlist, g_jlist] = g.get_contraction_node_lists(n, v1);
6230 auto [h_ijlist, h_jlist] = h.get_contraction_node_lists(n, v2);
6231 if ((g_ijlist.size() == 0) and (h_ijlist.size() == 0)) break;
6232 //double wall1 = wall_time();
6233 //wall_get_lists += (wall1 - wall0);
6234 //wall0 = wall1;
6235// print("g_jlist");
6236// for (const auto& kv : g_jlist) print(kv.first,kv.second);
6237// print("h_jlist");
6238// for (const auto& kv : h_jlist) print(kv.first,kv.second);
6239
6240 // next lines will insert s nodes into g and h -> possible race condition!
6241 bool this_first = true; // are the remaining indices of g before those of g: f(x,z) = g(x,y) h(y,z)
6242 // CDIM, NDIM, KDIM
6243 contractionmapT contraction_map = g_nc.recur_down_for_contraction_map(
6244 g_nc.key0(), g_nc.get_coeffs().find(g_nc.key0()).get()->second, v1, v2,
6245 h_ijlist, h_jlist, this_first, thresh);
6246
6247 this_first = false;
6248 // CDIM, NDIM, LDIM
6249 auto hnode0=h_nc.get_coeffs().find(h_nc.key0()).get()->second;
6250 contractionmapT contraction_map1 = h_nc.recur_down_for_contraction_map(
6251 h_nc.key0(), hnode0, v2, v1,
6252 g_ijlist, g_jlist, this_first, thresh);
6253
6254 // will contain duplicate entries
6255 contraction_map.merge(contraction_map1);
6256 // turn multimap into a map of list
6257 auto it = contraction_map.begin();
6258 while (it != contraction_map.end()) {
6259 auto it_end = contraction_map.upper_bound(it->first);
6260 auto it2 = it;
6261 it2++;
6262 while (it2 != it_end) {
6263 it->second.splice(it->second.end(), it2->second);
6264 it2 = contraction_map.erase(it2);
6265 }
6266 it = it_end;
6267 }
6268// print("thresh ",thresh);
6269// print("contraction list size",contraction_map.size());
6270
6271 // remove all double entries
6272 for (auto& elem: contraction_map) {
6273 elem.second.sort();
6274 elem.second.unique();
6275 }
6276 //wall1 = wall_time();
6277 //wall_recur += (wall1 - wall0);
6278// if (n==2) {
6279// print("contraction map for n=", n);
6280// print_map(contraction_map);
6281// }
6282 all_contraction_maps.push_back(contraction_map);
6283
6284 long mapsize=contraction_map.size();
6285 if (mapsize==0) break;
6286 }
6287
6288
6289 // finally do the contraction
6290 for (const auto& contraction_map : all_contraction_maps) {
6291 for (const auto& key_list : contraction_map) {
6292 const Key<NDIM>& key=key_list.first;
6293 const std::list<Key<CDIM>>& list=key_list.second;
6294 woT::task(coeffs.owner(key), &implT:: template partial_inner_contract<Q,LDIM,R,KDIM>,
6295 &g,&h,v1,v2,key,list);
6296 }
6297 }
6298 }
6299
6300 /// for contraction two functions f(x,z) = \int g(x,y) h(y,z) dy
6301
6302 /// find all nodes with d coefficients and return a list of complete keys and of
6303 /// keys holding only the y dimension, also the maximum norm of all d for the j dimension
6304 /// @param[in] n the scale
6305 /// @param[in] v array holding the indices of the integration variable
6306 /// @return ijlist: list of all nodes with d coeffs; jlist: j-part of ij list only
6307 template<std::size_t CDIM>
6308 std::tuple<std::set<Key<NDIM>>, std::map<Key<CDIM>,double>>
6309 get_contraction_node_lists(const std::size_t n, const std::array<int, CDIM>& v) const {
6310
6311 const auto& cdata=get_cdata();
6312 auto has_d_coeffs = [&cdata](const coeffT& coeff) {
6313 if (coeff.has_no_data()) return false;
6314 return (coeff.dim(0)==2*cdata.k);
6315 };
6316
6317 // keys to be contracted in g
6318 std::set<Key<NDIM>> ij_list; // full key
6319 std::map<Key<CDIM>,double> j_list; // only that dimension that will be contracted
6320
6321 for (auto it=get_coeffs().begin(); it!=get_coeffs().end(); ++it) {
6322 const Key<NDIM>& key=it->first;
6323 const FunctionNode<T,NDIM>& node=it->second;
6324 if ((key.level()==int(n)) and (has_d_coeffs(node.coeff()))) {
6325 ij_list.insert(key);
6327 for (std::size_t i=0; i<CDIM; ++i) j_trans[i]=key.translation()[v[i]];
6328 Key<CDIM> jkey(n,j_trans);
6329 const double max_d_norm=j_list[jkey];
6330 j_list.insert_or_assign(jkey,std::max(max_d_norm,node.get_dnorm()));
6331 Key<CDIM> parent_jkey=jkey.parent();
6332 while (j_list.count(parent_jkey)==0) {
6333 j_list.insert({parent_jkey,1.0});
6334 parent_jkey=parent_jkey.parent();
6335 }
6336 }
6337 }
6338 return std::make_tuple(ij_list,j_list);
6339 }
6340
6341 /// make a map of all nodes that will contribute to a partial inner product
6342
6343 /// given the list of d coefficient-holding nodes of the other function:
6344 /// recur down h if snorm * dnorm > tol and key n−jx ∈ other−ij-list. Make s
6345 /// coefficients if necessary. Make list of nodes n − ijk as map(n-ik, list(j)).
6346 ///
6347 /// !! WILL ADD NEW S NODES TO THIS TREE THAT MUST BE REMOVED TO AVOID INCONSISTENT TREE STRUCTURE !!
6348 ///
6349 /// @param[in] key for recursion
6350 /// @param[in] node corresponds to key
6351 /// @param[in] v_this this' dimension that are contracted
6352 /// @param[in] v_other other's dimension that are contracted
6353 /// @param[in] ij_other_list list of nodes of the other function that will be contracted (and their parents)
6354 /// @param[in] j_other_list list of column nodes of the other function that will be contracted (and their parents)
6355 /// @param[in] max_d_norm max d coeff norm of the nodes in j_list
6356 /// @param[in] this_first are the remaining coeffs of this functions first or last in the result function
6357 /// @param[in] thresh threshold for including nodes in the contraction: snorm*dnorm > thresh
6358 /// @tparam CDIM dimension to be contracted
6359 /// @tparam ODIM dimensions of the other function
6360 /// @tparam FDIM dimensions of the final function
6361 template<std::size_t CDIM, std::size_t ODIM, std::size_t FDIM=NDIM+ODIM-2*CDIM>
6362 std::multimap<Key<FDIM>, std::list<Key<CDIM>>> recur_down_for_contraction_map(
6363 const keyT& key, const nodeT& node,
6364 const std::array<int,CDIM>& v_this,
6365 const std::array<int,CDIM>& v_other,
6366 const std::set<Key<ODIM>>& ij_other_list,
6367 const std::map<Key<CDIM>,double>& j_other_list,
6368 bool this_first, const double thresh) {
6369
6370 std::multimap<Key<FDIM>, std::list<Key<CDIM>>> contraction_map;
6371
6372 // fast return if the other function has no d coeffs
6373 if (j_other_list.empty()) return contraction_map;
6374
6375 // continue recursion if this node may be contracted with the j column
6376 // extract relevant node translations from this node
6377 const auto j_this_key=key.extract_key(v_this);
6378
6379// print("\nkey, j_this_key", key, j_this_key);
6380 const double max_d_norm=j_other_list.find(j_this_key)->second;
6381 const bool sd_norm_product_large = node.get_snorm() * max_d_norm > truncate_tol(thresh,key);
6382// print("sd_product_norm",node.get_snorm() * max_d_norm, thresh);
6383
6384 // end recursion if we have reached the final scale n
6385 // with which nodes from other will this node be contracted?
6386 bool final_scale=key.level()==ij_other_list.begin()->level();
6387 if (final_scale and sd_norm_product_large) {
6388 for (auto& other_key : ij_other_list) {
6389 const auto j_other_key=other_key.extract_key(v_other);
6390 if (j_this_key != j_other_key) continue;
6391 auto i_key=key.extract_complement_key(v_this);
6392 auto k_key=other_key.extract_complement_key(v_other);
6393// print("key, ij_other_key",key,other_key);
6394// print("i, k, j key",i_key, k_key, j_this_key);
6395 Key<FDIM> ik_key=(this_first) ? i_key.merge_with(k_key) : k_key.merge_with(i_key);
6396// print("ik_key",ik_key);
6397// MADNESS_CHECK(contraction_map.count(ik_key)==0);
6398 contraction_map.insert(std::make_pair(ik_key,std::list<Key<CDIM>>{j_this_key}));
6399 }
6400 return contraction_map;
6401 }
6402
6403 bool continue_recursion = (j_other_list.count(j_this_key)==1);
6404 if (not continue_recursion) return contraction_map;
6405
6406
6407 // continue recursion if norms are large
6408 continue_recursion = (node.has_children() or sd_norm_product_large);
6409
6410 if (continue_recursion) {
6411 // in case we need to compute children's coefficients: unfilter only once
6412 bool compute_child_s_coeffs=true;
6413 coeffT d = node.coeff();
6414// print("continuing recursion from key",key);
6415
6416 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
6417 keyT child=kit.key();
6418 typename dcT::accessor acc;
6419
6420 // make child's s coeffs if it doesn't exist or if is has no s coeffs
6421 bool childnode_exists=get_coeffs().find(acc,child);
6422 bool need_s_coeffs= childnode_exists ? (acc->second.get_snorm()<=0.0) : true;
6423
6424 coeffT child_s_coeffs;
6425 if (need_s_coeffs) {
6426 if (compute_child_s_coeffs) {
6427 if (d.dim(0)==cdata.vk[0]) { // s coeffs only in this node
6428 coeffT d1(cdata.v2k,get_tensor_args());
6429 d1(cdata.s0)+=d;
6430 d=d1;
6431 }
6432 d = unfilter(d);
6433 compute_child_s_coeffs=false;
6434 }
6435 child_s_coeffs=copy(d(child_patch(child)));
6436 child_s_coeffs.reduce_rank(thresh);
6437 }
6438
6439 if (not childnode_exists) {
6440 get_coeffs().replace(child,nodeT(child_s_coeffs,false));
6441 get_coeffs().find(acc,child);
6442 } else if (childnode_exists and need_s_coeffs) {
6443 acc->second.coeff()=child_s_coeffs;
6444 }
6445 bool exists= get_coeffs().find(acc,child);
6446 MADNESS_CHECK(exists);
6447 nodeT& childnode = acc->second;
6448 if (need_s_coeffs) childnode.recompute_snorm_and_dnorm(get_cdata());
6449// print("recurring down to",child);
6450 contraction_map.merge(recur_down_for_contraction_map(child,childnode, v_this, v_other,
6451 ij_other_list, j_other_list, this_first, thresh));
6452// print("contraction_map.size()",contraction_map.size());
6453 }
6454
6455 }
6456
6457 return contraction_map;
6458 }
6459
6460
6461 /// tensor contraction part of partial_inner
6462
6463 /// @param[in] g rhs of the inner product
6464 /// @param[in] h lhs of the inner product
6465 /// @param[in] v1 dimensions of g to be contracted
6466 /// @param[in] v2 dimensions of h to be contracted
6467 /// @param[in] key key of result's (this) FunctionNode
6468 /// @param[in] j_key_list list of contraction index-j keys contributing to this' node
6469 template<typename Q, std::size_t LDIM, typename R, std::size_t KDIM,
6470 std::size_t CDIM = (KDIM + LDIM - NDIM) / 2>
6472 const std::array<int, CDIM> v1, const std::array<int, CDIM> v2,
6473 const Key<NDIM>& key, const std::list<Key<CDIM>>& j_key_list) {
6474
6475 Key<LDIM - CDIM> i_key;
6476 Key<KDIM - CDIM> k_key;
6477 key.break_apart(i_key, k_key);
6478
6479 coeffT result_coeff(get_cdata().v2k, get_tensor_type());
6480 for (const auto& j_key: j_key_list) {
6481
6482 auto v_complement = [](const auto& v, const auto& vc) {
6483 constexpr std::size_t VDIM = std::tuple_size<std::decay_t<decltype(v)>>::value;
6484 constexpr std::size_t VCDIM = std::tuple_size<std::decay_t<decltype(vc)>>::value;
6485 std::array<int, VCDIM> result;
6486 for (std::size_t i = 0; i < VCDIM; i++) result[i] = (v.back() + i + 1) % (VDIM + VCDIM);
6487 return result;
6488 };
6489 auto make_ij_key = [&v_complement](const auto i_key, const auto j_key, const auto& v) {
6490 constexpr std::size_t IDIM = std::decay_t<decltype(i_key)>::static_size;
6491 constexpr std::size_t JDIM = std::decay_t<decltype(j_key)>::static_size;
6492 static_assert(JDIM == std::tuple_size<std::decay_t<decltype(v)>>::value);
6493
6495 for (std::size_t i = 0; i < v.size(); ++i) l[v[i]] = j_key.translation()[i];
6496 std::array<int, IDIM> vc1;
6497 auto vc = v_complement(v, vc1);
6498 for (std::size_t i = 0; i < vc.size(); ++i) l[vc[i]] = i_key.translation()[i];
6499
6500 return Key<IDIM + JDIM>(i_key.level(), l);
6501 };
6502
6503 Key<LDIM> ij_key = make_ij_key(i_key, j_key, v1);
6504 Key<KDIM> jk_key = make_ij_key(k_key, j_key, v2);
6505
6506 MADNESS_CHECK(g->get_coeffs().probe(ij_key));
6507 MADNESS_CHECK(h->get_coeffs().probe(jk_key));
6508 const coeffT& gcoeff = g->get_coeffs().find(ij_key).get()->second.coeff();
6509 const coeffT& hcoeff = h->get_coeffs().find(jk_key).get()->second.coeff();
6510 coeffT gcoeff1, hcoeff1;
6511 if (gcoeff.dim(0) == g->get_cdata().k) {
6512 gcoeff1 = coeffT(g->get_cdata().v2k, g->get_tensor_args());
6513 gcoeff1(g->get_cdata().s0) += gcoeff;
6514 } else {
6515 gcoeff1 = gcoeff;
6516 }
6517 if (hcoeff.dim(0) == g->get_cdata().k) {
6518 hcoeff1 = coeffT(h->get_cdata().v2k, h->get_tensor_args());
6519 hcoeff1(h->get_cdata().s0) += hcoeff;
6520 } else {
6521 hcoeff1 = hcoeff;
6522 }
6523
6524 // offset: 0 for full tensor, 1 for svd representation with rand being the first dimension (r,d1,d2,d3) -> (r,d1*d2*d3)
6525 auto fuse = [](Tensor<T> tensor, const std::array<int, CDIM>& v, int offset) {
6526 for (std::size_t i = 0; i < CDIM - 1; ++i) {
6527 MADNESS_CHECK((v[i] + 1) == v[i + 1]); // make sure v is contiguous and ascending
6528 tensor = tensor.fusedim(v[0]+offset);
6529 }
6530 return tensor;
6531 };
6532
6533 // use case: partial_projection of 2-electron functions in svd representation f(1) = \int g(2) h(1,2) d2
6534 // c_i = \sum_j a_j b_ij = \sum_jr a_j b_rj b'_rj
6535 // = \sum_jr ( a_j b_rj) b'_rj )
6536 auto contract2 = [](const auto& svdcoeff, const auto& tensor, const int particle) {
6537#if HAVE_GENTENSOR
6538 const int spectator_particle=(particle+1)%2;
6539 Tensor<Q> gtensor = svdcoeff.get_svdtensor().make_vector_with_weights(particle);
6540 gtensor=gtensor.reshape(svdcoeff.rank(),gtensor.size()/svdcoeff.rank());
6541 MADNESS_CHECK(gtensor.ndim()==2);
6542 Tensor<Q> gtensor_other = svdcoeff.get_svdtensor().ref_vector(spectator_particle);
6543 Tensor<T> tmp1=inner(gtensor,tensor.flat(),1,0); // tmp1(r) = sum_j a'_(r,j) b(j)
6544 MADNESS_CHECK(tmp1.ndim()==1);
6545 Tensor<T> tmp2=inner(gtensor_other,tmp1,0,0); // tmp2(i) = sum_r a_(r,i) tmp1(r)
6546 return tmp2;
6547#else
6548 MADNESS_EXCEPTION("no partial_inner using svd without GenTensor",1);
6549 return Tensor<T>();
6550#endif
6551 };
6552
6553 if (gcoeff.is_full_tensor() and hcoeff.is_full_tensor() and result_coeff.is_full_tensor()) {
6554 // merge multiple contraction dimensions into one
6555 int offset = 0;
6556 Tensor<Q> gtensor = fuse(gcoeff1.full_tensor(), v1, offset);
6557 Tensor<R> htensor = fuse(hcoeff1.full_tensor(), v2, offset);
6558 result_coeff.full_tensor() += inner(gtensor, htensor, v1[0], v2[0]);
6559 if (key.level() > 0) {
6560 gtensor = copy(gcoeff1.full_tensor()(g->get_cdata().s0));
6561 htensor = copy(hcoeff1.full_tensor()(h->get_cdata().s0));
6562 gtensor = fuse(gtensor, v1, offset);
6563 htensor = fuse(htensor, v2, offset);
6564 result_coeff.full_tensor()(get_cdata().s0) -= inner(gtensor, htensor, v1[0], v2[0]);
6565 }
6566 }
6567
6568
6569 // use case: 2-electron functions in svd representation f(1,3) = \int g(1,2) h(2,3) d2
6570 // c_ik = \sum_j a_ij b_jk = \sum_jrr' a_ri a'_rj b_r'j b_r'k
6571 // = \sum_jrr' ( a_ri (a'_rj b_r'j) ) b_r'k
6572 // = \sum_jrr' c_r'i b_r'k
6573 else if (gcoeff.is_svd_tensor() and hcoeff.is_svd_tensor() and result_coeff.is_svd_tensor()) {
6574 MADNESS_CHECK(v1[0]==0 or v1[CDIM-1]==LDIM-1);
6575 MADNESS_CHECK(v2[0]==0 or v2[CDIM-1]==KDIM-1);
6576 int gparticle= v1[0]==0 ? 0 : 1; // which particle to integrate over
6577 int hparticle= v2[0]==0 ? 0 : 1; // which particle to integrate over
6578 // merge multiple contraction dimensions into one
6579 Tensor<Q> gtensor = gcoeff1.get_svdtensor().flat_vector_with_weights(gparticle);
6580 Tensor<Q> gtensor_other = gcoeff1.get_svdtensor().flat_vector((gparticle+1)%2);
6581 Tensor<R> htensor = hcoeff1.get_svdtensor().flat_vector_with_weights(hparticle);
6582 Tensor<R> htensor_other = hcoeff1.get_svdtensor().flat_vector((hparticle+1)%2);
6583 Tensor<T> tmp1=inner(gtensor,htensor,1,1); // tmp1(r,r') = sum_j b(r,j) a(r',j)
6584 Tensor<T> tmp2=inner(tmp1,gtensor_other,0,0); // tmp2(r',i) = sum_r tmp1(r,r') a(r,i)
6586 MADNESS_CHECK(tmp2.dim(0)==htensor_other.dim(0));
6587 w=1.0;
6588 coeffT result_tmp(get_cdata().v2k, get_tensor_type());
6589 result_tmp.get_svdtensor().set_vectors_and_weights(w,tmp2,htensor_other);
6590 if (key.level() > 0) {
6591 GenTensor<Q> gcoeff2 = copy(gcoeff1(g->get_cdata().s0));
6592 GenTensor<R> hcoeff2 = copy(hcoeff1(h->get_cdata().s0));
6593 Tensor<Q> gtensor = gcoeff2.get_svdtensor().flat_vector_with_weights(gparticle);
6594 Tensor<Q> gtensor_other = gcoeff2.get_svdtensor().flat_vector((gparticle+1)%2);
6595 Tensor<R> htensor = hcoeff2.get_svdtensor().flat_vector_with_weights(hparticle);
6596 Tensor<R> htensor_other = hcoeff2.get_svdtensor().flat_vector((hparticle+1)%2);
6597 Tensor<T> tmp1=inner(gtensor,htensor,1,1); // tmp1(r,r') = sum_j b(r,j) a(r',j)
6598 Tensor<T> tmp2=inner(tmp1,gtensor_other,0,0); // tmp2(r',i) = sum_r tmp1(r,r') a(r,i)
6600 MADNESS_CHECK(tmp2.dim(0)==htensor_other.dim(0));
6601 w=1.0;
6602 coeffT result_coeff1(get_cdata().vk, get_tensor_type());
6603 result_coeff1.get_svdtensor().set_vectors_and_weights(w,tmp2,htensor_other);
6604 result_tmp(get_cdata().s0)-=result_coeff1;
6605 }
6606 result_coeff+=result_tmp;
6607 }
6608
6609 // use case: partial_projection of 2-electron functions in svd representation f(1) = \int g(2) h(1,2) d2
6610 // c_i = \sum_j a_j b_ij = \sum_jr a_j b_rj b'_rj
6611 // = \sum_jr ( a_j b_rj) b'_rj )
6612 else if (gcoeff.is_full_tensor() and hcoeff.is_svd_tensor() and result_coeff.is_full_tensor()) {
6613 MADNESS_CHECK(v1[0]==0 and v1[CDIM-1]==LDIM-1);
6614 MADNESS_CHECK(v2[0]==0 or v2[CDIM-1]==KDIM-1);
6615 MADNESS_CHECK(LDIM==CDIM);
6616 int hparticle= v2[0]==0 ? 0 : 1; // which particle to integrate over
6617
6618 Tensor<T> r=contract2(hcoeff1,gcoeff1.full_tensor(),hparticle);
6619 if (key.level()>0) r(get_cdata().s0)-=contract2(copy(hcoeff1(h->get_cdata().s0)),copy(gcoeff.full_tensor()(g->get_cdata().s0)),hparticle);
6620 result_coeff.full_tensor()+=r;
6621 }
6622 // use case: partial_projection of 2-electron functions in svd representation f(1) = \int g(1,2) h(2) d2
6623 // c_i = \sum_j a_ij b_j = \sum_jr a_ri a'_rj b_j
6624 // = \sum_jr ( a_ri (a'_rj b_j) )
6625 else if (gcoeff.is_svd_tensor() and hcoeff.is_full_tensor() and result_coeff.is_full_tensor()) {
6626 MADNESS_CHECK(v1[0]==0 or v1[CDIM-1]==LDIM-1);
6627 MADNESS_CHECK(v2[0]==0 and v2[CDIM-1]==KDIM-1);
6628 MADNESS_CHECK(KDIM==CDIM);
6629 int gparticle= v1[0]==0 ? 0 : 1; // which particle to integrate over
6630
6631 Tensor<T> r=contract2(gcoeff1,hcoeff1.full_tensor(),gparticle);
6632 if (key.level()>0) r(get_cdata().s0)-=contract2(copy(gcoeff1(g->get_cdata().s0)),copy(hcoeff.full_tensor()(h->get_cdata().s0)),gparticle);
6633 result_coeff.full_tensor()+=r;
6634
6635 } else {
6636 MADNESS_EXCEPTION("unknown case in partial_inner_contract",1);
6637 }
6638 }
6639
6640 MADNESS_CHECK(result_coeff.is_assigned());
6641 result_coeff.reduce_rank(get_thresh());
6642
6643 if (coeffs.is_local(key))
6644 coeffs.send(key, &nodeT::accumulate, result_coeff, coeffs, key, get_tensor_args());
6645 else
6647 }
6648
6649 /// Return the inner product with an external function on a specified function node.
6650
6651 /// @param[in] key Key of the function node to compute the inner product on. (the domain of integration)
6652 /// @param[in] c Tensor of coefficients for the function at the function node given by key
6653 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
6654 /// @return Returns the inner product over the domain of a single function node, no guarantee of accuracy.
6655 T inner_ext_node(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f) const {
6656 tensorT fvals = tensorT(this->cdata.vk);
6657 // Compute the value of the external function at the quadrature points.
6658 fcube(key, *(f), cdata.quad_x, fvals);
6659 // Convert quadrature point values to scaling coefficients.
6660 tensorT fc = tensorT(values2coeffs(key, fvals));
6661 // Return the inner product of the two functions' scaling coefficients.
6662 return c.trace_conj(fc);
6663 }
6664
6665 /// Call inner_ext_node recursively until convergence.
6666 /// @param[in] key Key of the function node on which to compute inner product (the domain of integration)
6667 /// @param[in] c coeffs for the function at the node given by key
6668 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
6669 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
6670 /// @param[in] old_inner the inner product on the parent function node
6671 /// @return Returns the inner product over the domain of a single function, checks for convergence.
6672 T inner_ext_recursive(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine, T old_inner=T(0)) const {
6673 int i = 0;
6674 tensorT c_child, inner_child;
6675 T new_inner, result = 0.0;
6676
6677 c_child = tensorT(cdata.v2k); // tensor of child coeffs
6678 inner_child = Tensor<double>(pow(2, NDIM)); // child inner products
6679
6680 // If old_inner is default value, assume this is the first call
6681 // and compute inner product on this node.
6682 if (old_inner == T(0)) {
6683 old_inner = inner_ext_node(key, c, f);
6684 }
6685
6686 if (coeffs.find(key).get()->second.has_children()) {
6687 // Since the key has children and we know the func is redundant,
6688 // Iterate over all children of this compute node, computing
6689 // the inner product on each child node. new_inner will store
6690 // the sum of these, yielding a more accurate inner product.
6691 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) {
6692 const keyT& child = it.key();
6693 tensorT cc = coeffs.find(child).get()->second.coeff().full_tensor_copy();
6694 inner_child(i) = inner_ext_node(child, cc, f);
6695 }
6696 new_inner = inner_child.sum();
6697 } else if (leaf_refine) {
6698 // We need the scaling coefficients of the numerical function
6699 // at each of the children nodes. We can't use project because
6700 // there is no guarantee that the numerical function will have
6701 // a functor. Instead, since we know we are at or below the
6702 // leaf nodes, the wavelet coefficients are zero (to within the
6703 // truncate tolerance). Thus, we can use unfilter() to
6704 // get the scaling coefficients at the next level.
6705 tensorT d = tensorT(cdata.v2k);
6706 d = T(0);
6707 d(cdata.s0) = copy(c);
6708 c_child = unfilter(d);
6709
6710 // Iterate over all children of this compute node, computing
6711 // the inner product on each child node. new_inner will store
6712 // the sum of these, yielding a more accurate inner product.
6713 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) {
6714 const keyT& child = it.key();
6715 tensorT cc = tensorT(c_child(child_patch(child)));
6716 inner_child(i) = inner_ext_node(child, cc, f);
6717 }
6718 new_inner = inner_child.sum();
6719 } else {
6720 // If we get to here, we are at the leaf nodes and the user has
6721 // specified that they do not want refinement past leaf nodes.
6722 new_inner = old_inner;
6723 }
6724
6725 // Check for convergence. If converged...yay, we're done. If not,
6726 // call inner_ext_node_recursive on each child node and accumulate
6727 // the inner product in result.
6728 // if (std::abs(new_inner - old_inner) <= truncate_tol(thresh, key)) {
6729 if (std::abs(new_inner - old_inner) <= thresh) {
6730 result = new_inner;
6731 } else {
6732 i = 0;
6733 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) {
6734 const keyT& child = it.key();
6735 tensorT cc = tensorT(c_child(child_patch(child)));
6736 result += inner_ext_recursive(child, cc, f, leaf_refine, inner_child(i));
6737 }
6738 }
6739
6740 return result;
6741 }
6742
6744 const std::shared_ptr< FunctionFunctorInterface<T, NDIM> > fref;
6745 const implT * impl;
6746 const bool leaf_refine;
6747 const bool do_leaves; ///< start with leaf nodes instead of initial_level
6748
6750 const implT * impl, const bool leaf_refine, const bool do_leaves)
6751 : fref(f), impl(impl), leaf_refine(leaf_refine), do_leaves(do_leaves) {};
6752
6753 T operator()(typename dcT::const_iterator& it) const {
6754 if (do_leaves and it->second.is_leaf()) {
6755 tensorT cc = it->second.coeff().full_tensor();
6756 return impl->inner_adaptive_recursive(it->first, cc, fref, leaf_refine, T(0));
6757 } else if ((not do_leaves) and (it->first.level() == impl->initial_level)) {
6758 tensorT cc = it->second.coeff().full_tensor();
6759 return impl->inner_ext_recursive(it->first, cc, fref, leaf_refine, T(0));
6760 } else {
6761 return 0.0;
6762 }
6763 }
6764
6765 T operator()(T a, T b) const {
6766 return (a + b);
6767 }
6768
6769 template <typename Archive> void serialize(const Archive& ar) {
6770 MADNESS_EXCEPTION("NOT IMPLEMENTED", 1);
6771 }
6772 };
6773
6774 /// Return the local part of inner product with external function ... no communication.
6775 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
6776 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
6777 /// @return Returns local part of the inner product, i.e. over the domain of all function nodes on this compute node.
6778 T inner_ext_local(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine) const {
6780
6782 do_inner_ext_local_ffi(f, this, leaf_refine, false));
6783 }
6784
6785 /// Return the local part of inner product with external function ... no communication.
6786 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
6787 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
6788 /// @return Returns local part of the inner product, i.e. over the domain of all function nodes on this compute node.
6789 T inner_adaptive_local(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine) const {
6791
6793 do_inner_ext_local_ffi(f, this, leaf_refine, true));
6794 }
6795
6796 /// Call inner_ext_node recursively until convergence.
6797 /// @param[in] key Key of the function node on which to compute inner product (the domain of integration)
6798 /// @param[in] c coeffs for the function at the node given by key
6799 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
6800 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
6801 /// @param[in] old_inner the inner product on the parent function node
6802 /// @return Returns the inner product over the domain of a single function, checks for convergence.
6804 const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f,
6805 const bool leaf_refine, T old_inner=T(0)) const {
6806
6807 // the inner product in the current node
6808 old_inner = inner_ext_node(key, c, f);
6809 T result=0.0;
6810
6811 // the inner product in the child nodes
6812
6813 // compute the sum coefficients of the MRA function
6814 tensorT d = tensorT(cdata.v2k);
6815 d = T(0);
6816 d(cdata.s0) = copy(c);
6817 tensorT c_child = unfilter(d);
6818
6819 // compute the inner product in the child nodes
6820 T new_inner=0.0; // child inner products
6821 for (KeyChildIterator<NDIM> it(key); it; ++it) {
6822 const keyT& child = it.key();
6823 tensorT cc = tensorT(c_child(child_patch(child)));
6824 new_inner+= inner_ext_node(child, cc, f);
6825 }
6826
6827 // continue recursion if needed
6828 const double tol=truncate_tol(thresh,key);
6829 if (leaf_refine and (std::abs(new_inner - old_inner) > tol)) {
6830 for (KeyChildIterator<NDIM> it(key); it; ++it) {
6831 const keyT& child = it.key();
6832 tensorT cc = tensorT(c_child(child_patch(child)));
6833 result += inner_adaptive_recursive(child, cc, f, leaf_refine, T(0));
6834 }
6835 } else {
6836 result = new_inner;
6837 }
6838 return result;
6839
6840 }
6841
6842
6843 /// Return the gaxpy product with an external function on a specified
6844 /// function node.
6845 /// @param[in] key Key of the function node on which to compute gaxpy
6846 /// @param[in] lc Tensor of coefficients for the function at the
6847 /// function node given by key
6848 /// @param[in] f Pointer to function of type T that takes coordT
6849 /// arguments. This is the externally provided function and
6850 /// the right argument of gaxpy.
6851 /// @param[in] alpha prefactor of c Tensor for gaxpy
6852 /// @param[in] beta prefactor of fcoeffs for gaxpy
6853 /// @return Returns coefficient tensor of the gaxpy product at specified
6854 /// key, no guarantee of accuracy.
6855 template <typename L>
6856 tensorT gaxpy_ext_node(keyT key, Tensor<L> lc, T (*f)(const coordT&), T alpha, T beta) const {
6857 // Compute the value of external function at the quadrature points.
6858 tensorT fvals = madness::fcube(key, f, cdata.quad_x);
6859 // Convert quadrature point values to scaling coefficients.
6860 tensorT fcoeffs = values2coeffs(key, fvals);
6861 // Return the inner product of the two functions' scaling coeffs.
6862 tensorT c2 = copy(lc);
6863 c2.gaxpy(alpha, fcoeffs, beta);
6864 return c2;
6865 }
6866
6867 /// Return out of place gaxpy using recursive descent.
6868 /// @param[in] key Key of the function node on which to compute gaxpy
6869 /// @param[in] left FunctionImpl, left argument of gaxpy
6870 /// @param[in] lcin coefficients of left at this node
6871 /// @param[in] c coefficients of gaxpy product at this node
6872 /// @param[in] f pointer to function of type T that takes coordT
6873 /// arguments. This is the externally provided function and
6874 /// the right argument of gaxpy.
6875 /// @param[in] alpha prefactor of left argument for gaxpy
6876 /// @param[in] beta prefactor of right argument for gaxpy
6877 /// @param[in] tol convergence tolerance...when the norm of the gaxpy's
6878 /// difference coefficients is less than tol, we are done.
6879 template <typename L>
6880 void gaxpy_ext_recursive(const keyT& key, const FunctionImpl<L,NDIM>* left,
6881 Tensor<L> lcin, tensorT c, T (*f)(const coordT&),
6882 T alpha, T beta, double tol, bool below_leaf) {
6883 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
6884
6885 // If we haven't yet reached the leaf level, check whether the
6886 // current key is a leaf node of left. If so, set below_leaf to true
6887 // and continue. If not, make this a parent, recur down, return.
6888 if (not below_leaf) {
6889 bool left_leaf = left->coeffs.find(key).get()->second.is_leaf();
6890 if (left_leaf) {
6891 below_leaf = true;
6892 } else {
6893 this->coeffs.replace(key, nodeT(coeffT(), true));
6894 for (KeyChildIterator<NDIM> it(key); it; ++it) {
6895 const keyT& child = it.key();
6896 woT::task(left->coeffs.owner(child), &implT:: template gaxpy_ext_recursive<L>,
6897 child, left, Tensor<L>(), tensorT(), f, alpha, beta, tol, below_leaf);
6898 }
6899 return;
6900 }
6901 }
6902
6903 // Compute left's coefficients if not provided
6904 Tensor<L> lc = lcin;
6905 if (lc.size() == 0) {
6906 literT it = left->coeffs.find(key).get();
6907 MADNESS_ASSERT(it != left->coeffs.end());
6908 if (it->second.has_coeff())
6909 lc = it->second.coeff().reconstruct_tensor();
6910 }
6911
6912 // Compute this node's coefficients if not provided in function call
6913 if (c.size() == 0) {
6914 c = gaxpy_ext_node(key, lc, f, alpha, beta);
6915 }
6916
6917 // We need the scaling coefficients of the numerical function at
6918 // each of the children nodes. We can't use project because there
6919 // is no guarantee that the numerical function will have a functor.
6920 // Instead, since we know we are at or below the leaf nodes, the
6921 // wavelet coefficients are zero (to within the truncate tolerance).
6922 // Thus, we can use unfilter() to get the scaling coefficients at
6923 // the next level.
6924 Tensor<L> lc_child = Tensor<L>(cdata.v2k); // left's child coeffs
6925 Tensor<L> ld = Tensor<L>(cdata.v2k);
6926 ld = L(0);
6927 ld(cdata.s0) = copy(lc);
6928 lc_child = unfilter(ld);
6929
6930 // Iterate over children of this node,
6931 // storing the gaxpy coeffs in c_child
6932 tensorT c_child = tensorT(cdata.v2k); // tensor of child coeffs
6933 for (KeyChildIterator<NDIM> it(key); it; ++it) {
6934 const keyT& child = it.key();
6935 tensorT lcoeff = tensorT(lc_child(child_patch(child)));
6936 c_child(child_patch(child)) = gaxpy_ext_node(child, lcoeff, f, alpha, beta);
6937 }
6938
6939 // Compute the difference coefficients to test for convergence.
6940 tensorT d = tensorT(cdata.v2k);
6941 d = filter(c_child);
6942 // Filter returns both s and d coefficients, so set scaling
6943 // coefficient part of d to 0 so that we take only the
6944 // norm of the difference coefficients.
6945 d(cdata.s0) = T(0);
6946 double dnorm = d.normf();
6947
6948 // Small d.normf means we've reached a good level of resolution
6949 // Store the coefficients and return.
6950 if (dnorm <= truncate_tol(tol,key)) {
6951 this->coeffs.replace(key, nodeT(coeffT(c,targs), false));
6952 } else {
6953 // Otherwise, make this a parent node and recur down
6954 this->coeffs.replace(key, nodeT(coeffT(), true)); // Interior node
6955
6956 for (KeyChildIterator<NDIM> it(key); it; ++it) {
6957 const keyT& child = it.key();
6958 tensorT child_coeff = tensorT(c_child(child_patch(child)));
6959 tensorT left_coeff = tensorT(lc_child(child_patch(child)));
6960 woT::task(left->coeffs.owner(child), &implT:: template gaxpy_ext_recursive<L>,
6961 child, left, left_coeff, child_coeff, f, alpha, beta, tol, below_leaf);
6962 }
6963 }
6964 }
6965
6966 template <typename L>
6967 void gaxpy_ext(const FunctionImpl<L,NDIM>* left, T (*f)(const coordT&), T alpha, T beta, double tol, bool fence) {
6968 if (world.rank() == coeffs.owner(cdata.key0))
6969 gaxpy_ext_recursive<L> (cdata.key0, left, Tensor<L>(), tensorT(), f, alpha, beta, tol, false);
6970 if (fence)
6971 world.gop.fence();
6972 }
6973
6974 /// project the low-dim function g on the hi-dim function f: result(x) = <this(x,y) | g(y)>
6975
6976 /// invoked by the hi-dim function, a function of NDIM+LDIM
6977
6978 /// Upon return, result matches this, with contributions on all scales
6979 /// @param[in] result lo-dim function of NDIM-LDIM \todo Should this be param[out]?
6980 /// @param[in] gimpl lo-dim function of LDIM
6981 /// @param[in] dim over which dimensions to be integrated: 0..LDIM or LDIM..LDIM+NDIM-1
6982 template<size_t LDIM>
6984 const int dim, const bool fence) {
6985
6986 const keyT& key0=cdata.key0;
6987
6988 if (world.rank() == coeffs.owner(key0)) {
6989
6990 // coeff_op will accumulate the result
6991 typedef project_out_op<LDIM> coeff_opT;
6992 coeff_opT coeff_op(this,result,CoeffTracker<T,LDIM>(gimpl),dim);
6993
6994 // don't do anything on this -- coeff_op will accumulate into result
6995 typedef noop<T,NDIM> apply_opT;
6996 apply_opT apply_op;
6997
6998 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
6999 coeff_op, apply_op, cdata.key0);
7000
7001 }
7002 if (fence) world.gop.fence();
7003
7004 }
7005
7006
7007 /// project the low-dim function g on the hi-dim function f: result(x) = <f(x,y) | g(y)>
7008 template<size_t LDIM>
7010 bool randomize() const {return false;}
7011
7014 typedef FunctionImpl<T,NDIM-LDIM> implL1;
7015 typedef std::pair<bool,coeffT> argT;
7016
7017 const implT* fimpl; ///< the hi dim function f
7018 mutable implL1* result; ///< the low dim result function
7019 ctL iag; ///< the low dim function g
7020 int dim; ///< 0: project 0..LDIM-1, 1: project LDIM..NDIM-1
7021
7022 // ctor
7023 project_out_op() = default;
7024 project_out_op(const implT* fimpl, implL1* result, const ctL& iag, const int dim)
7025 : fimpl(fimpl), result(result), iag(iag), dim(dim) {}
7027 : fimpl(other.fimpl), result(other.result), iag(other.iag), dim(other.dim) {}
7028
7029
7030 /// do the actual contraction
7032
7033 Key<LDIM> key1,key2,dest;
7034 key.break_apart(key1,key2);
7035
7036 // make the right coefficients
7037 coeffT gcoeff;
7038 if (dim==0) {
7039 gcoeff=iag.get_impl()->parent_to_child(iag.coeff(),iag.key(),key1);
7040 dest=key2;
7041 }
7042 if (dim==1) {
7043 gcoeff=iag.get_impl()->parent_to_child(iag.coeff(),iag.key(),key2);
7044 dest=key1;
7045 }
7046
7047 MADNESS_ASSERT(fimpl->get_coeffs().probe(key)); // must be local!
7048 const nodeT& fnode=fimpl->get_coeffs().find(key).get()->second;
7049 const coeffT& fcoeff=fnode.coeff();
7050
7051 // fast return if possible
7052 if (fcoeff.has_no_data() or gcoeff.has_no_data())
7053 return Future<argT> (argT(fnode.is_leaf(),coeffT()));;
7054
7055 MADNESS_CHECK(gcoeff.is_full_tensor());
7056 tensorT final(result->cdata.vk);
7057 const int k=fcoeff.dim(0);
7058 const int k_ldim=std::pow(k,LDIM);
7059 std::vector<long> shape(LDIM, k);
7060
7061 if (fcoeff.is_full_tensor()) {
7062 // result_i = \sum_j g_j f_ji
7063 const tensorT gtensor = gcoeff.full_tensor().reshape(k_ldim);
7064 const tensorT ftensor = fcoeff.full_tensor().reshape(k_ldim,k_ldim);
7065 final=inner(gtensor,ftensor,0,dim).reshape(shape);
7066
7067 } else if (fcoeff.is_svd_tensor()) {
7068 if (fcoeff.rank()>0) {
7069
7070 // result_i = \sum_jr g_j a_rj w_r b_ri
7071 const int otherdim = (dim + 1) % 2;
7072 const tensorT gtensor = gcoeff.full_tensor().flat();
7073 const tensorT atensor = fcoeff.get_svdtensor().flat_vector(dim); // a_rj
7074 const tensorT btensor = fcoeff.get_svdtensor().flat_vector(otherdim);
7075 const tensorT gatensor = inner(gtensor, atensor, 0, 1); // ga_r
7076 tensorT weights = copy(fcoeff.get_svdtensor().weights_);
7077 weights.emul(gatensor); // ga_r * w_r
7078 // sum over all ranks of b, include new weights:
7079 // result_i = \sum_r ga_r * w_r * b_ri
7080 for (int r = 0; r < fcoeff.rank(); ++r) final += weights(r) * btensor(r, _);
7081 final = final.reshape(shape);
7082 }
7083
7084 } else {
7085 MADNESS_EXCEPTION("unsupported tensor type in project_out_op",1);
7086 }
7087
7088 // accumulate the result
7089 result->coeffs.task(dest, &FunctionNode<T,LDIM>::accumulate2, final, result->coeffs, dest, TaskAttributes::hipri());
7090
7091 return Future<argT> (argT(fnode.is_leaf(),coeffT()));
7092 }
7093
7094 this_type make_child(const keyT& child) const {
7095 Key<LDIM> key1,key2;
7096 child.break_apart(key1,key2);
7097 const Key<LDIM> gkey = (dim==0) ? key1 : key2;
7098
7099 return this_type(fimpl,result,iag.make_child(gkey),dim);
7100 }
7101
7102 /// retrieve the coefficients (parent coeffs might be remote)
7105 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
7106 &this_type::forward_ctor),fimpl,result,g1,dim);
7107 }
7108
7109 /// taskq-compatible ctor
7110 this_type forward_ctor(const implT* fimpl1, implL1* result1, const ctL& iag1, const int dim1) {
7111 return this_type(fimpl1,result1,iag1,dim1);
7112 }
7113
7114 template <typename Archive> void serialize(const Archive& ar) {
7115 ar & result & iag & fimpl & dim;
7116 }
7117
7118 };
7119
7120
7121 /// project the low-dim function g on the hi-dim function f: this(x) = <f(x,y) | g(y)>
7122
7123 /// invoked by result, a function of NDIM
7124
7125 /// @param[in] f hi-dim function of LDIM+NDIM
7126 /// @param[in] g lo-dim function of LDIM
7127 /// @param[in] dim over which dimensions to be integrated: 0..LDIM or LDIM..LDIM+NDIM-1
7128 template<size_t LDIM>
7129 void project_out2(const FunctionImpl<T,LDIM+NDIM>* f, const FunctionImpl<T,LDIM>* g, const int dim) {
7130
7131 typedef std::pair< keyT,coeffT > pairT;
7132 typedef typename FunctionImpl<T,NDIM+LDIM>::dcT::const_iterator fiterator;
7133
7134 // loop over all nodes of hi-dim f, compute the inner products with all
7135 // appropriate nodes of g, and accumulate in result
7136 fiterator end = f->get_coeffs().end();
7137 for (fiterator it=f->get_coeffs().begin(); it!=end; ++it) {
7138 const Key<LDIM+NDIM> key=it->first;
7139 const FunctionNode<T,LDIM+NDIM> fnode=it->second;
7140 const coeffT& fcoeff=fnode.coeff();
7141
7142 if (fnode.is_leaf() and fcoeff.has_data()) {
7143
7144 // break key into particle: over key1 will be summed, over key2 will be
7145 // accumulated, or vice versa, depending on dim
7146 if (dim==0) {
7147 Key<NDIM> key1;
7148 Key<LDIM> key2;
7149 key.break_apart(key1,key2);
7150
7151 Future<pairT> result;
7152 // sock_it_to_me(key1, result.remote_ref(world));
7153 g->task(coeffs.owner(key1), &implT::sock_it_to_me, key1, result.remote_ref(world), TaskAttributes::hipri());
7154 woT::task(world.rank(),&implT:: template do_project_out<LDIM>,fcoeff,result,key1,key2,dim);
7155
7156 } else if (dim==1) {
7157 Key<LDIM> key1;
7158 Key<NDIM> key2;
7159 key.break_apart(key1,key2);
7160
7161 Future<pairT> result;
7162 // sock_it_to_me(key2, result.remote_ref(world));
7163 g->task(coeffs.owner(key2), &implT::sock_it_to_me, key2, result.remote_ref(world), TaskAttributes::hipri());
7164 woT::task(world.rank(),&implT:: template do_project_out<LDIM>,fcoeff,result,key2,key1,dim);
7165
7166 } else {
7167 MADNESS_EXCEPTION("confused dim in project_out",1);
7168 }
7169 }
7170 }
7172// this->compressed=false;
7173// this->nonstandard=false;
7174// this->redundant=true;
7175 }
7176
7177
7178 /// compute the inner product of two nodes of only some dimensions and accumulate on result
7179
7180 /// invoked by result
7181 /// @param[in] fcoeff coefficients of high dimension LDIM+NDIM
7182 /// @param[in] gpair key and coeffs of low dimension LDIM (possibly a parent node)
7183 /// @param[in] gkey key of actual low dim node (possibly the same as gpair.first, iff gnode exists)
7184 /// @param[in] dest destination node for the result
7185 /// @param[in] dim which dimensions should be contracted: 0..LDIM-1 or LDIM..NDIM+LDIM-1
7186 template<size_t LDIM>
7187 void do_project_out(const coeffT& fcoeff, const std::pair<keyT,coeffT> gpair, const keyT& gkey,
7188 const Key<NDIM>& dest, const int dim) const {
7189
7190 const coeffT gcoeff=parent_to_child(gpair.second,gpair.first,gkey);
7191
7192 // fast return if possible
7193 if (fcoeff.has_no_data() or gcoeff.has_no_data()) return;
7194
7195 // let's specialize for the time being on SVD tensors for f and full tensors of half dim for g
7197 MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D);
7198 const tensorT gtensor=gcoeff.full_tensor();
7199 tensorT result(cdata.vk);
7200
7201 const int otherdim=(dim+1)%2;
7202 const int k=fcoeff.dim(0);
7203 std::vector<Slice> s(fcoeff.config().dim_per_vector()+1,_);
7204
7205 // do the actual contraction
7206 for (int r=0; r<fcoeff.rank(); ++r) {
7207 s[0]=Slice(r,r);
7208 const tensorT contracted_tensor=fcoeff.config().ref_vector(dim)(s).reshape(k,k,k);
7209 const tensorT other_tensor=fcoeff.config().ref_vector(otherdim)(s).reshape(k,k,k);
7210 const double ovlp= gtensor.trace_conj(contracted_tensor);
7211 const double fac=ovlp * fcoeff.config().weights(r);
7212 result+=fac*other_tensor;
7213 }
7214
7215 // accumulate the result
7216 coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri());
7217 }
7218
7219
7220
7221
7222 /// Returns the maximum local depth of the tree ... no communications.
7223 std::size_t max_local_depth() const;
7224
7225
7226 /// Returns the maximum depth of the tree ... collective ... global sum/broadcast
7227 std::size_t max_depth() const;
7228
7229 /// Returns the max number of nodes on a processor
7230 std::size_t max_nodes() const;
7231
7232 /// Returns the min number of nodes on a processor
7233 std::size_t min_nodes() const;
7234
7235 /// Returns the size of the tree structure of the function ... collective global sum
7236 std::size_t tree_size() const;
7237
7238 /// Returns the number of coefficients in the function for each rank
7239 std::size_t size_local() const;
7240
7241 /// Returns the number of coefficients in the function ... collective global sum
7242 std::size_t size() const;
7243
7244 /// Returns the number of coefficients in the function for this MPI rank
7245 std::size_t nCoeff_local() const;
7246
7247 /// Returns the number of coefficients in the function ... collective global sum
7248 std::size_t nCoeff() const;
7249
7250 /// Returns the number of coefficients in the function ... collective global sum
7251 std::size_t real_size() const;
7252
7253 /// print tree size and size
7254 void print_size(const std::string name) const;
7255
7256 /// print the number of configurations per node
7257 void print_stats() const;
7258
7259 /// In-place scale by a constant
7260 void scale_inplace(const T q, bool fence);
7261
7262 /// Out-of-place scale by a constant
7263 template <typename Q, typename F>
7264 void scale_oop(const Q q, const FunctionImpl<F,NDIM>& f, bool fence) {
7265 typedef typename FunctionImpl<F,NDIM>::nodeT fnodeT;
7266 typedef typename FunctionImpl<F,NDIM>::dcT fdcT;
7267 typename fdcT::const_iterator end = f.coeffs.end();
7268 for (typename fdcT::const_iterator it=f.coeffs.begin(); it!=end; ++it) {
7269 const keyT& key = it->first;
7270 const fnodeT& node = it->second;
7271
7272 if (node.has_coeff()) {
7273 coeffs.replace(key,nodeT(node.coeff()*q,node.has_children()));
7274 }
7275 else {
7276 coeffs.replace(key,nodeT(coeffT(),node.has_children()));
7277 }
7278 }
7279 if (fence)
7280 world.gop.fence();
7281 }
7282
7283 /// Hash a pointer to \c FunctionImpl
7284
7285 /// \param[in] impl pointer to a FunctionImpl
7286 /// \return The hash.
7287 inline friend hashT hash_value(const FunctionImpl<T,NDIM>* pimpl) {
7288 hashT seed = hash_value(pimpl->id().get_world_id());
7289 detail::combine_hash(seed, hash_value(pimpl->id().get_obj_id()));
7290 return seed;
7291 }
7292
7293 /// Hash a shared_ptr to \c FunctionImpl
7294
7295 /// \param[in] impl pointer to a FunctionImpl
7296 /// \return The hash.
7297 inline friend hashT hash_value(const std::shared_ptr<FunctionImpl<T,NDIM>> impl) {
7298 return hash_value(impl.get());
7299 }
7300 };
7301
7302 namespace archive {
7303 template <class Archive, class T, std::size_t NDIM>
7304 struct ArchiveLoadImpl<Archive,const FunctionImpl<T,NDIM>*> {
7305 static void load(const Archive& ar, const FunctionImpl<T,NDIM>*& ptr) {
7306 bool exists=false;
7307 ar & exists;
7308 if (exists) {
7309 uniqueidT id;
7310 ar & id;
7311 World* world = World::world_from_id(id.get_world_id());
7312 MADNESS_ASSERT(world);
7313 auto ptr_opt = world->ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(id);
7314 if (!ptr_opt)
7315 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0);
7316 ptr = static_cast< const FunctionImpl<T,NDIM>*>(*ptr_opt);
7317 if (!ptr)
7318 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use an unregistered object",0);
7319 } else {
7320 ptr=nullptr;
7321 }
7322 }
7323 };
7324
7325 template <class Archive, class T, std::size_t NDIM>
7326 struct ArchiveStoreImpl<Archive,const FunctionImpl<T,NDIM>*> {
7327 static void store(const Archive& ar, const FunctionImpl<T,NDIM>*const& ptr) {
7328 bool exists=(ptr) ? true : false;
7329 ar & exists;
7330 if (exists) ar & ptr->id();
7331 }
7332 };
7333
7334 template <class Archive, class T, std::size_t NDIM>
7335 struct ArchiveLoadImpl<Archive, FunctionImpl<T,NDIM>*> {
7336 static void load(const Archive& ar, FunctionImpl<T,NDIM>*& ptr) {
7337 bool exists=false;
7338 ar & exists;
7339 if (exists) {
7340 uniqueidT id;
7341 ar & id;
7342 World* world = World::world_from_id(id.get_world_id());
7343 MADNESS_ASSERT(world);
7344 auto ptr_opt = world->ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(id);
7345 if (!ptr_opt)
7346 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0);
7347 ptr = static_cast< FunctionImpl<T,NDIM>*>(*ptr_opt);
7348 if (!ptr) {
7349 auto ids=world->get_object_ids();
7350 print(world->get_world_ids());
7351 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use an unregistered object",0);
7352 }
7353 } else {
7354 ptr=nullptr;
7355 }
7356 }
7357 };
7358
7359 template <class Archive, class T, std::size_t NDIM>
7361 static void store(const Archive& ar, FunctionImpl<T,NDIM>*const& ptr) {
7362 bool exists=(ptr) ? true : false;
7363 ar & exists;
7364 if (exists) ar & ptr->id();
7365 // ar & ptr->id();
7366 }
7367 };
7368
7369 template <class Archive, class T, std::size_t NDIM>
7370 struct ArchiveLoadImpl<Archive, std::shared_ptr<const FunctionImpl<T,NDIM> > > {
7371 static void load(const Archive& ar, std::shared_ptr<const FunctionImpl<T,NDIM> >& ptr) {
7372 const FunctionImpl<T,NDIM>* f = nullptr;
7374 ptr.reset(f, [] (const FunctionImpl<T,NDIM> *p_) -> void {});
7375 }
7376 };
7377
7378 template <class Archive, class T, std::size_t NDIM>
7379 struct ArchiveStoreImpl<Archive, std::shared_ptr<const FunctionImpl<T,NDIM> > > {
7380 static void store(const Archive& ar, const std::shared_ptr<const FunctionImpl<T,NDIM> >& ptr) {
7382 }
7383 };
7384
7385 template <class Archive, class T, std::size_t NDIM>
7386 struct ArchiveLoadImpl<Archive, std::shared_ptr<FunctionImpl<T,NDIM> > > {
7387 static void load(const Archive& ar, std::shared_ptr<FunctionImpl<T,NDIM> >& ptr) {
7388 FunctionImpl<T,NDIM>* f = nullptr;
7390 ptr.reset(f, [] (FunctionImpl<T,NDIM> *p_) -> void {});
7391 }
7392 };
7393
7394 template <class Archive, class T, std::size_t NDIM>
7395 struct ArchiveStoreImpl<Archive, std::shared_ptr<FunctionImpl<T,NDIM> > > {
7396 static void store(const Archive& ar, const std::shared_ptr<FunctionImpl<T,NDIM> >& ptr) {
7398 }
7399 };
7400 }
7401
7402}
7403
7404#endif // MADNESS_MRA_FUNCIMPL_H__INCLUDED
double w(double t, double eps)
Definition DKops.h:22
double q(double t)
Definition DKops.h:18
This header should include pretty much everything needed for the parallel runtime.
An integer with atomic set, get, read+increment, read+decrement, and decrement+test operations.
Definition atomicint.h:126
long dim(int i) const
Returns the size of dimension i.
Definition basetensor.h:147
long ndim() const
Returns the number of dimensions in the tensor.
Definition basetensor.h:144
long size() const
Returns the number of elements in the tensor.
Definition basetensor.h:138
Definition displacements.h:332
std::function< bool(Level, const PointPattern &, std::optional< Displacement > &)> Validator
Definition displacements.h:340
Definition displacements.h:784
a class to track where relevant (parent) coeffs are
Definition funcimpl.h:791
const keyT & key() const
const reference to the key
Definition funcimpl.h:839
CoeffTracker(const CoeffTracker &other, const datumT &datum)
ctor with a pair<keyT,nodeT>
Definition funcimpl.h:821
const LeafStatus & is_leaf() const
const reference to is_leaf flag
Definition funcimpl.h:863
const implT * impl
the funcimpl that has the coeffs
Definition funcimpl.h:800
LeafStatus
Definition funcimpl.h:797
@ yes
Definition funcimpl.h:797
@ no
Definition funcimpl.h:797
@ unknown
Definition funcimpl.h:797
CoeffTracker(const CoeffTracker &other)
copy ctor
Definition funcimpl.h:829
double dnorm(const keyT &key) const
return the s and dnorm belonging to the passed-in key
Definition funcimpl.h:856
coeffT coeff_
the coefficients belonging to key
Definition funcimpl.h:806
const implT * get_impl() const
const reference to impl
Definition funcimpl.h:833
const coeffT & coeff() const
const reference to the coeffs
Definition funcimpl.h:836
keyT key_
the current key, which must exists in impl
Definition funcimpl.h:802
double dnorm_
norm of d coefficients corresponding to key
Definition funcimpl.h:808
CoeffTracker(const implT *impl)
the initial ctor making the root key
Definition funcimpl.h:816
void serialize(const Archive &ar)
serialization
Definition funcimpl.h:915
Future< CoeffTracker > activate() const
find the coefficients
Definition funcimpl.h:892
CoeffTracker()
default ctor
Definition funcimpl.h:813
GenTensor< T > coeffT
Definition funcimpl.h:795
CoeffTracker make_child(const keyT &child) const
make a child of this, ignoring the coeffs
Definition funcimpl.h:866
FunctionImpl< T, NDIM > implT
Definition funcimpl.h:793
std::pair< Key< NDIM >, ShallowNode< T, NDIM > > datumT
Definition funcimpl.h:796
CoeffTracker forward_ctor(const CoeffTracker &other, const datumT &datum) const
taskq-compatible forwarding to the ctor
Definition funcimpl.h:909
LeafStatus is_leaf_
flag if key is a leaf node
Definition funcimpl.h:804
coeffT coeff(const keyT &key) const
return the coefficients belonging to the passed-in key
Definition funcimpl.h:847
Key< NDIM > keyT
Definition funcimpl.h:794
CompositeFunctorInterface implements a wrapper of holding several functions and functors.
Definition function_interface.h:172
Definition worldhashmap.h:396
Tri-diagonal operator traversing tree primarily for derivative operator.
Definition derivative.h:73
Holds displacements for applying operators to avoid replicating for all operators.
Definition displacements.h:65
const std::vector< Key< NDIM > > & get_disp(Level n, const array_of_bools< NDIM > &kernel_lattice_sum_axes)
Definition displacements.h:236
FunctionCommonData holds all Function data common for given k.
Definition function_common_data.h:52
Tensor< double > quad_phit
transpose of quad_phi
Definition function_common_data.h:102
Tensor< double > quad_phiw
quad_phiw(i,j) = at x[i] value of w[i]*phi[j]
Definition function_common_data.h:103
std::vector< long > vk
(k,...) used to initialize Tensors
Definition function_common_data.h:93
std::vector< Slice > s0
s[0] in each dimension to get scaling coeff
Definition function_common_data.h:91
static const FunctionCommonData< T, NDIM > & get(int k)
Definition function_common_data.h:111
static void _init_quadrature(int k, int npt, Tensor< double > &quad_x, Tensor< double > &quad_w, Tensor< double > &quad_phi, Tensor< double > &quad_phiw, Tensor< double > &quad_phit)
Initialize the quadrature information.
Definition mraimpl.h:91
collect common functionality does not need to be member function of funcimpl
Definition function_common_data.h:135
const FunctionCommonData< T, NDIM > & cdata
Definition function_common_data.h:138
GenTensor< T > coeffs2values(const Key< NDIM > &key, const GenTensor< T > &coeff) const
Definition function_common_data.h:142
Tensor< T > values2coeffs(const Key< NDIM > &key, const Tensor< T > &values) const
Definition function_common_data.h:155
FunctionDefaults holds default paramaters as static class members.
Definition funcdefaults.h:100
static const double & get_thresh()
Returns the default threshold.
Definition funcdefaults.h:177
static int get_max_refine_level()
Gets the default maximum adaptive refinement level.
Definition funcdefaults.h:214
static const Tensor< double > & get_cell_width()
Returns the width of each user cell dimension.
Definition funcdefaults.h:381
static bool get_apply_randomize()
Gets the random load balancing for integral operators flag.
Definition funcdefaults.h:290
static const Tensor< double > & get_cell()
Gets the user cell for the simulation.
Definition funcdefaults.h:348
FunctionFactory implements the named-parameter idiom for Function.
Definition function_factory.h:86
bool _refine
Definition function_factory.h:99
bool _empty
Definition function_factory.h:100
bool _fence
Definition function_factory.h:103
Abstract base class interface required for functors used as input to Functions.
Definition function_interface.h:68
Definition funcimpl.h:5542
double operator()(double a, double b) const
Definition funcimpl.h:5568
const opT * func
Definition funcimpl.h:5544
Tensor< double > qx
Definition funcimpl.h:5546
double operator()(typename dcT::const_iterator &it) const
Definition funcimpl.h:5559
void serialize(const Archive &ar)
Definition funcimpl.h:5573
do_err_box(const implT *impl, const opT *func, int npt, const Tensor< double > &qx, const Tensor< double > &quad_phit, const Tensor< double > &quad_phiw)
Definition funcimpl.h:5552
int npt
Definition funcimpl.h:5545
Tensor< double > quad_phiw
Definition funcimpl.h:5548
const implT * impl
Definition funcimpl.h:5543
Tensor< double > quad_phit
Definition funcimpl.h:5547
do_err_box(const do_err_box &e)
Definition funcimpl.h:5556
FunctionImpl holds all Function state to facilitate shallow copy semantics.
Definition funcimpl.h:945
std::tuple< std::set< Key< NDIM > >, std::map< Key< CDIM >, double > > get_contraction_node_lists(const std::size_t n, const std::array< int, CDIM > &v) const
for contraction two functions f(x,z) = \int g(x,y) h(y,z) dy
Definition funcimpl.h:6309
void copy_coeffs(const FunctionImpl< Q, NDIM > &other, bool fence)
Copy coeffs from other into self.
Definition funcimpl.h:1145
bool is_nonstandard() const
Definition mraimpl.h:273
void insert_serialized_coeffs(std::vector< unsigned char > &v)
insert coeffs from vector archive into this
Definition funcimpl.h:1191
T eval_cube(Level n, coordT &x, const tensorT &c) const
Definition mraimpl.h:2025
void partial_inner_contract(const FunctionImpl< Q, LDIM > *g, const FunctionImpl< R, KDIM > *h, const std::array< int, CDIM > v1, const std::array< int, CDIM > v2, const Key< NDIM > &key, const std::list< Key< CDIM > > &j_key_list)
tensor contraction part of partial_inner
Definition funcimpl.h:6471
AtomicInt large
Definition funcimpl.h:1002
Timer timer_target_driven
Definition funcimpl.h:1000
void binaryXX(const FunctionImpl< L, NDIM > *left, const FunctionImpl< R, NDIM > *right, const opT &op, bool fence)
Definition funcimpl.h:3275
void do_apply(const opT *op, const keyT &key, const Tensor< R > &c)
apply an operator on the coeffs c (at node key)
Definition funcimpl.h:4882
void do_print_tree_graphviz(const keyT &key, std::ostream &os, Level maxlevel) const
Functor for the do_print_tree method (using GraphViz)
Definition mraimpl.h:2763
void add_keys_to_map(mapT *map, int index) const
Adds keys to union of local keys with specified index.
Definition funcimpl.h:5886
void change_tensor_type1(const TensorArgs &targs, bool fence)
change the tensor type of the coefficients in the FunctionNode
Definition mraimpl.h:1099
void gaxpy_ext_recursive(const keyT &key, const FunctionImpl< L, NDIM > *left, Tensor< L > lcin, tensorT c, T(*f)(const coordT &), T alpha, T beta, double tol, bool below_leaf)
Definition funcimpl.h:6880
int initial_level
Initial level for refinement.
Definition funcimpl.h:974
int max_refine_level
Do not refine below this level.
Definition funcimpl.h:978
double do_apply_kernel3(const opT *op, const GenTensor< R > &coeff, const do_op_args< OPDIM > &args, const TensorArgs &apply_targs)
same as do_apply_kernel2, but use low rank tensors as input and low rank tensors as output
Definition funcimpl.h:4840
void hartree_product(const std::vector< std::shared_ptr< FunctionImpl< T, LDIM > > > p1, const std::vector< std::shared_ptr< FunctionImpl< T, LDIM > > > p2, const leaf_opT &leaf_op, bool fence)
given two functions of LDIM, perform the Hartree/Kronecker/outer product
Definition funcimpl.h:3799
void traverse_tree(const coeff_opT &coeff_op, const apply_opT &apply_op, const keyT &key) const
traverse a non-existing tree
Definition funcimpl.h:3769
void do_square_inplace(const keyT &key)
int special_level
Minimium level for refinement on special points.
Definition funcimpl.h:975
void do_apply_kernel(const opT *op, const Tensor< R > &c, const do_op_args< OPDIM > &args)
for fine-grain parallelism: call the apply method of an operator in a separate task
Definition funcimpl.h:4774
double errsq_local(const opT &func) const
Returns the sum of squares of errors from local info ... no comms.
Definition funcimpl.h:5580
WorldContainer< keyT, nodeT > dcT
Type of container holding the coefficients.
Definition funcimpl.h:957
void evaldepthpt(const Vector< double, NDIM > &xin, const keyT &keyin, const typename Future< Level >::remote_refT &ref)
Get the depth of the tree at a point in simulation coordinates.
Definition mraimpl.h:2944
void scale_inplace(const T q, bool fence)
In-place scale by a constant.
Definition mraimpl.h:3115
void gaxpy_oop_reconstructed(const double alpha, const implT &f, const double beta, const implT &g, const bool fence)
perform: this= alpha*f + beta*g, invoked by result
Definition mraimpl.h:223
void unary_op_coeff_inplace(const opT &op, bool fence)
Definition funcimpl.h:2119
World & world
Definition funcimpl.h:964
void apply_1d_realspace_push_op(const archive::archive_ptr< const opT > &pop, int axis, const keyT &key, const Tensor< R > &c)
Definition funcimpl.h:3837
bool is_redundant() const
Returns true if the function is redundant.
Definition mraimpl.h:262
FunctionNode< T, NDIM > nodeT
Type of node.
Definition funcimpl.h:955
std::size_t nCoeff_local() const
Returns the number of coefficients in the function for this MPI rank.
Definition mraimpl.h:1922
void print_size(const std::string name) const
print tree size and size
Definition mraimpl.h:1941
FunctionImpl(const FunctionImpl< T, NDIM > &p)
void print_info() const
Prints summary of data distribution.
Definition mraimpl.h:833
void abs_inplace(bool fence)
Definition mraimpl.h:3127
void binaryXXa(const keyT &key, const FunctionImpl< L, NDIM > *left, const Tensor< L > &lcin, const FunctionImpl< R, NDIM > *right, const Tensor< R > &rcin, const opT &op)
Definition funcimpl.h:3144
void print_timer() const
Definition mraimpl.h:357
void evalR(const Vector< double, NDIM > &xin, const keyT &keyin, const typename Future< long >::remote_refT &ref)
Get the rank of leaf box of the tree at a point in simulation coordinates.
Definition mraimpl.h:2986
const FunctionCommonData< T, NDIM > & cdata
Definition funcimpl.h:984
void do_print_grid(const std::string filename, const std::vector< keyT > &keys) const
print the grid in xyz format
Definition mraimpl.h:584
void mulXXa(const keyT &key, const FunctionImpl< L, NDIM > *left, const Tensor< L > &lcin, const FunctionImpl< R, NDIM > *right, const Tensor< R > &rcin, double tol)
Definition funcimpl.h:3058
int get_truncate_mode() const
Definition funcimpl.h:1757
const std::vector< Vector< double, NDIM > > & get_special_points() const
Definition funcimpl.h:969
std::size_t nCoeff() const
Returns the number of coefficients in the function ... collective global sum.
Definition mraimpl.h:1932
double vol_nsphere(int n, double R)
Definition funcimpl.h:4870
keyT neighbor_in_volume(const keyT &key, const keyT &disp) const
Returns key of general neighbor that resides in-volume.
Definition mraimpl.h:3240
void compress(const TreeState newstate, bool fence)
compress the wave function
Definition mraimpl.h:1500
void do_dirac_convolution(FunctionImpl< T, LDIM > *f, bool fence) const
Definition funcimpl.h:2202
std::pair< coeffT, double > compress_op(const keyT &key, const std::vector< Future< std::pair< coeffT, double > > > &v, bool nonstandard)
calculate the wavelet coefficients using the sum coefficients of all child nodes
Definition mraimpl.h:1668
Future< bool > truncate_spawn(const keyT &key, double tol)
Returns true if after truncation this node has coefficients.
Definition mraimpl.h:2608
void print_type_in_compilation_error(R &&)
Definition funcimpl.h:6191
Future< double > norm_tree_spawn(const keyT &key)
Definition mraimpl.h:1570
std::vector< keyT > local_leaf_keys() const
return the keys of the local leaf boxes
Definition mraimpl.h:558
MADNESS_ASSERT(this->is_redundant()==g.is_redundant())
void do_print_tree(const keyT &key, std::ostream &os, Level maxlevel) const
Functor for the do_print_tree method.
Definition mraimpl.h:2681
void vtransform(const std::vector< std::shared_ptr< FunctionImpl< R, NDIM > > > &vright, const Tensor< Q > &c, const std::vector< std::shared_ptr< FunctionImpl< T, NDIM > > > &vleft, double tol, bool fence)
Definition funcimpl.h:2919
void unset_functor()
Definition mraimpl.h:312
void refine_spawn(const opT &op, const keyT &key)
Definition funcimpl.h:4602
void apply_1d_realspace_push(const opT &op, const FunctionImpl< R, NDIM > *f, int axis, bool fence)
Definition funcimpl.h:3888
void do_print_plane(const std::string filename, std::vector< Tensor< double > > plotinfo, const int xaxis, const int yaxis, const coordT el2)
print the MRA structure
Definition mraimpl.h:499
std::pair< Key< NDIM >, ShallowNode< T, NDIM > > find_datum(keyT key) const
return the a std::pair<key, node>, which MUST exist
Definition mraimpl.h:965
void set_functor(const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > functor1)
Definition mraimpl.h:293
std::enable_if< NDIM==FDIM >::type read_grid2(const std::string gridfile, std::shared_ptr< FunctionFunctorInterface< double, NDIM > > vnuc_functor)
read data from a grid
Definition funcimpl.h:1651
bool verify_tree_state_local() const
check that the tree state and the coeffs are consistent
Definition mraimpl.h:169
const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > & get_pmap() const
Definition mraimpl.h:207
Tensor< Q > fcube_for_mul(const keyT &child, const keyT &parent, const Tensor< Q > &coeff) const
Compute the function values for multiplication.
Definition funcimpl.h:1966
Timer timer_filter
Definition funcimpl.h:998
void sock_it_to_me(const keyT &key, const RemoteReference< FutureImpl< std::pair< keyT, coeffT > > > &ref) const
Walk up the tree returning pair(key,node) for first node with coefficients.
Definition mraimpl.h:2821
void recursive_apply(opT &apply_op, const implT *fimpl, implT *rimpl, const bool fence)
traverse an existing tree and apply an operator
Definition funcimpl.h:5399
double get_thresh() const
Definition mraimpl.h:328
void trickle_down(bool fence)
sum all the contributions from all scales after applying an operator in mod-NS form
Definition mraimpl.h:1354
bool autorefine
If true, autorefine where appropriate.
Definition funcimpl.h:980
std::pair< coeffT, double > make_redundant_op(const keyT &key, const std::vector< Future< std::pair< coeffT, double > > > &v)
similar to compress_op, but insert only the sum coefficients in the tree
Definition mraimpl.h:1728
void set_autorefine(bool value)
Definition mraimpl.h:337
tensorT filter(const tensorT &s) const
Transform sum coefficients at level n to sums+differences at level n-1.
Definition mraimpl.h:1152
void chop_at_level(const int n, const bool fence=true)
remove all nodes with level higher than n
Definition mraimpl.h:1115
void unaryXXvalues(const FunctionImpl< Q, NDIM > *func, const opT &op, bool fence)
Definition funcimpl.h:3302
void partial_inner(const FunctionImpl< Q, LDIM > &g, const FunctionImpl< R, KDIM > &h, const std::array< int, CDIM > v1, const std::array< int, CDIM > v2)
invoked by result
Definition funcimpl.h:6207
TreeState tree_state
Definition funcimpl.h:987
void print_tree_json(std::ostream &os=std::cout, Level maxlevel=10000) const
Definition mraimpl.h:2701
coeffT parent_to_child_NS(const keyT &child, const keyT &parent, const coeffT &coeff) const
Directly project parent NS coeffs to child NS coeffs.
Definition mraimpl.h:707
void copy_coeffs_different_world(const FunctionImpl< Q, NDIM > &other)
Copy coefficients from other funcimpl with possibly different world and on a different node.
Definition funcimpl.h:1155
void mapdim(const implT &f, const std::vector< long > &map, bool fence)
Permute the dimensions of f according to map, result on this.
Definition mraimpl.h:1057
bool is_compressed() const
Returns true if the function is compressed.
Definition mraimpl.h:250
Vector< double, NDIM > coordT
Type of vector holding coordinates.
Definition funcimpl.h:959
void apply(opT &op, const FunctionImpl< R, NDIM > &f, bool fence)
apply an operator on f to return this
Definition funcimpl.h:5082
Tensor< T > tensorT
Type of tensor for anything but to hold coeffs.
Definition funcimpl.h:952
void mirror(const implT &f, const std::vector< long > &mirror, bool fence)
mirror the dimensions of f according to map, result on this
Definition mraimpl.h:1066
T inner_adaptive_recursive(keyT key, const tensorT &c, const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > f, const bool leaf_refine, T old_inner=T(0)) const
Definition funcimpl.h:6803
void store(Archive &ar)
Definition funcimpl.h:1323
void do_binary_op(const keyT &key, const Tensor< L > &left, const std::pair< keyT, Tensor< R > > &arg, const opT &op)
Functor for the binary_op method.
Definition funcimpl.h:2068
void gaxpy_ext(const FunctionImpl< L, NDIM > *left, T(*f)(const coordT &), T alpha, T beta, double tol, bool fence)
Definition funcimpl.h:6967
void accumulate_trees(FunctionImpl< Q, NDIM > &result, const R alpha, const bool fence=true) const
merge the trees of this and other, while multiplying them with the alpha or beta, resp
Definition funcimpl.h:1244
void print_stats() const
print the number of configurations per node
Definition mraimpl.h:1969
void broaden(const array_of_bools< NDIM > &is_periodic, bool fence)
Definition mraimpl.h:1303
coeffT truncate_reconstructed_op(const keyT &key, const std::vector< Future< coeffT > > &v, const double tol)
given the sum coefficients of all children, truncate or not
Definition mraimpl.h:1617
void refine_op(const opT &op, const keyT &key)
Definition funcimpl.h:4577
static Tensor< TENSOR_RESULT_TYPE(T, R) > inner_local(const std::vector< const FunctionImpl< T, NDIM > * > &left, const std::vector< const FunctionImpl< R, NDIM > * > &right, bool sym)
Definition funcimpl.h:6087
void fcube(const keyT &key, const FunctionFunctorInterface< T, NDIM > &f, const Tensor< double > &qx, tensorT &fval) const
Evaluate function at quadrature points in the specified box.
Definition mraimpl.h:2446
Timer timer_change_tensor_type
Definition funcimpl.h:996
void forward_do_diff1(const DerivativeBase< T, NDIM > *D, const implT *f, const keyT &key, const std::pair< keyT, coeffT > &left, const std::pair< keyT, coeffT > &center, const std::pair< keyT, coeffT > &right)
Definition mraimpl.h:923
std::vector< Slice > child_patch(const keyT &child) const
Returns patch referring to coeffs of child in parent box.
Definition mraimpl.h:696
void print_tree_graphviz(std::ostream &os=std::cout, Level maxlevel=10000) const
Definition mraimpl.h:2754
void set_tree_state(const TreeState &state)
Definition funcimpl.h:1354
std::size_t min_nodes() const
Returns the min number of nodes on a processor.
Definition mraimpl.h:1873
void copy_coeffs_same_world(const FunctionImpl< Q, NDIM > &other, bool fence)
Copy coeffs from other into self.
Definition funcimpl.h:1198
std::shared_ptr< FunctionFunctorInterface< T, NDIM > > functor
Definition funcimpl.h:986
Timer timer_compress_svd
Definition funcimpl.h:999
Tensor< TENSOR_RESULT_TYPE(T, R)> mul(const Tensor< T > &c1, const Tensor< R > &c2, const int npt, const keyT &key) const
multiply the values of two coefficient tensors using a custom number of grid points
Definition funcimpl.h:2041
void make_redundant(const bool fence)
convert this to redundant, i.e. have sum coefficients on all levels
Definition mraimpl.h:1528
void load(Archive &ar)
Definition funcimpl.h:1305
std::size_t max_nodes() const
Returns the max number of nodes on a processor.
Definition mraimpl.h:1864
T inner_ext_local(const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > f, const bool leaf_refine) const
Definition funcimpl.h:6778
coeffT upsample(const keyT &key, const coeffT &coeff) const
upsample the sum coefficients of level 1 to sum coeffs on level n+1
Definition mraimpl.h:1231
TensorArgs targs
type of tensor to be used in the FunctionNodes
Definition funcimpl.h:982
void flo_unary_op_node_inplace(const opT &op, bool fence)
Definition funcimpl.h:2231
std::size_t size_local() const
Returns the number of coefficients in the function for each rank.
Definition mraimpl.h:1891
GenTensor< Q > values2coeffs(const keyT &key, const GenTensor< Q > &values) const
Definition funcimpl.h:1945
void plot_cube_kernel(archive::archive_ptr< Tensor< T > > ptr, const keyT &key, const coordT &plotlo, const coordT &plothi, const std::vector< long > &npt, bool eval_refine) const
Definition mraimpl.h:3331
T trace_local() const
Returns int(f(x),x) in local volume.
Definition mraimpl.h:3169
void print_grid(const std::string filename) const
Definition mraimpl.h:542
Future< std::pair< coeffT, double > > compress_spawn(const keyT &key, bool nonstandard, bool keepleaves, bool redundant1)
Invoked on node where key is local.
Definition mraimpl.h:3268
void replicate_on_hosts(bool fence=true)
Definition funcimpl.h:1124
bool get_autorefine() const
Definition mraimpl.h:334
int k
Wavelet order.
Definition funcimpl.h:972
void vtransform_doit(const std::shared_ptr< FunctionImpl< R, NDIM > > &right, const Tensor< Q > &c, const std::vector< std::shared_ptr< FunctionImpl< T, NDIM > > > &vleft, double tol)
Definition funcimpl.h:2763
MADNESS_CHECK(this->is_reconstructed())
void phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor< double > &phi) const
Compute the Legendre scaling functions for multiplication.
Definition mraimpl.h:3137
Future< std::pair< keyT, coeffT > > find_me(const keyT &key) const
find_me. Called by diff_bdry to get coefficients of boundary function
Definition mraimpl.h:3255
TensorType get_tensor_type() const
Definition mraimpl.h:319
void do_project_out(const coeffT &fcoeff, const std::pair< keyT, coeffT > gpair, const keyT &gkey, const Key< NDIM > &dest, const int dim) const
compute the inner product of two nodes of only some dimensions and accumulate on result
Definition funcimpl.h:7187
void remove_leaf_coefficients(const bool fence)
Definition mraimpl.h:1522
void insert_zero_down_to_initial_level(const keyT &key)
Initialize nodes to zero function at initial_level of refinement.
Definition mraimpl.h:2577
void do_diff1(const DerivativeBase< T, NDIM > *D, const implT *f, const keyT &key, const std::pair< keyT, coeffT > &left, const std::pair< keyT, coeffT > &center, const std::pair< keyT, coeffT > &right)
Definition mraimpl.h:934
typedef TENSOR_RESULT_TYPE(T, R) resultT
void unary_op_node_inplace(const opT &op, bool fence)
Definition funcimpl.h:2140
T inner_adaptive_local(const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > f, const bool leaf_refine) const
Definition funcimpl.h:6789
void do_print_tree_json(const keyT &key, std::multimap< Level, std::tuple< tranT, std::string > > &data, Level maxlevel) const
Functor for the do_print_tree_json method.
Definition mraimpl.h:2732
std::multimap< Key< FDIM >, std::list< Key< CDIM > > > recur_down_for_contraction_map(const keyT &key, const nodeT &node, const std::array< int, CDIM > &v_this, const std::array< int, CDIM > &v_other, const std::set< Key< ODIM > > &ij_other_list, const std::map< Key< CDIM >, double > &j_other_list, bool this_first, const double thresh)
make a map of all nodes that will contribute to a partial inner product
Definition funcimpl.h:6362
std::shared_ptr< FunctionImpl< T, NDIM > > pimplT
pointer to this class
Definition funcimpl.h:951
TENSOR_RESULT_TYPE(T, R) dot_local(const FunctionImpl< R
Returns the dot product ASSUMING same distribution.
void finalize_sum()
after summing up we need to do some cleanup;
Definition mraimpl.h:1821
std::enable_if< NDIM==FDIM >::type read_grid(const std::string keyfile, const std::string gridfile, std::shared_ptr< FunctionFunctorInterface< double, NDIM > > vnuc_functor)
read data from a grid
Definition funcimpl.h:1544
dcT coeffs
The coefficients.
Definition funcimpl.h:989
bool exists_and_is_leaf(const keyT &key) const
Definition mraimpl.h:1275
static std::complex< Real > conj(const std::complex< Real > &x)
Definition funcimpl.h:6081
void make_Vphi(const opT &leaf_op, const bool fence=true)
assemble the function V*phi using V and phi given from the functor
Definition funcimpl.h:4369
void unaryXX(const FunctionImpl< Q, NDIM > *func, const opT &op, bool fence)
Definition funcimpl.h:3289
std::vector< std::pair< int, const coeffT * > > mapvecT
Type of the entry in the map returned by make_key_vec_map.
Definition funcimpl.h:5880
void project_out(FunctionImpl< T, NDIM-LDIM > *result, const FunctionImpl< T, LDIM > *gimpl, const int dim, const bool fence)
project the low-dim function g on the hi-dim function f: result(x) = <this(x,y) | g(y)>
Definition funcimpl.h:6983
void verify_tree() const
Verify tree is properly constructed ... global synchronization involved.
Definition mraimpl.h:111
void do_square_inplace2(const keyT &parent, const keyT &child, const tensorT &parent_coeff)
void gaxpy_inplace_reconstructed(const T &alpha, const FunctionImpl< Q, NDIM > &g, const R &beta, const bool fence)
Definition funcimpl.h:1212
void undo_replicate(bool fence=true)
Definition funcimpl.h:1129
void set_tensor_args(const TensorArgs &t)
Definition mraimpl.h:325
GenTensor< Q > fcube_for_mul(const keyT &child, const keyT &parent, const GenTensor< Q > &coeff) const
Compute the function values for multiplication.
Definition funcimpl.h:1994
Range< typename dcT::const_iterator > rangeT
Definition funcimpl.h:5671
std::size_t real_size() const
Returns the number of coefficients in the function ... collective global sum.
Definition mraimpl.h:1909
bool exists_and_has_children(const keyT &key) const
Definition mraimpl.h:1270
void sum_down_spawn(const keyT &key, const coeffT &s)
is this the same as trickle_down() ?
Definition mraimpl.h:876
void multi_to_multi_op_values(const opT &op, const std::vector< implT * > &vin, std::vector< implT * > &vout, const bool fence=true)
Inplace operate on many functions (impl's) with an operator within a certain box.
Definition funcimpl.h:2890
long box_interior[1000]
Definition funcimpl.h:3333
keyT neighbor(const keyT &key, const keyT &disp, const array_of_bools< NDIM > &is_periodic) const
Returns key of general neighbor enforcing BC.
Definition mraimpl.h:3225
GenTensor< Q > NS_fcube_for_mul(const keyT &child, const keyT &parent, const GenTensor< Q > &coeff, const bool s_only) const
Compute the function values for multiplication.
Definition funcimpl.h:1864
rangeT range(coeffs.begin(), coeffs.end())
void norm_tree(bool fence)
compute for each FunctionNode the norm of the function inside that node
Definition mraimpl.h:1547
void gaxpy_inplace(const T &alpha, const FunctionImpl< Q, NDIM > &other, const R &beta, bool fence)
Inplace general bilinear operation.
Definition funcimpl.h:1292
const Tensor< double > cell
the size of the root cell in each dimension, unchangeable
Definition funcimpl.h:977
bool has_leaves() const
Definition mraimpl.h:288
bool verify_parents_and_children() const
check that parents and children are consistent
Definition mraimpl.h:119
void apply_source_driven(opT &op, const FunctionImpl< R, NDIM > &f, bool fence)
similar to apply, but for low rank coeffs
Definition funcimpl.h:5224
void distribute(std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > newmap) const
Definition funcimpl.h:1136
int get_special_level() const
Definition funcimpl.h:968
void reconstruct_op(const keyT &key, const coeffT &s, const bool accumulate_NS=true)
Definition mraimpl.h:2079
tensorT gaxpy_ext_node(keyT key, Tensor< L > lc, T(*f)(const coordT &), T alpha, T beta) const
Definition funcimpl.h:6856
const coeffT parent_to_child(const coeffT &s, const keyT &parent, const keyT &child) const
Directly project parent coeffs to child coeffs.
Definition mraimpl.h:3152
WorldObject< FunctionImpl< T, NDIM > > woT
Base class world object type.
Definition funcimpl.h:947
void undo_redundant(const bool fence)
convert this from redundant to standard reconstructed form
Definition mraimpl.h:1538
GenTensor< T > coeffT
Type of tensor used to hold coeffs.
Definition funcimpl.h:956
const keyT & key0() const
Returns cdata.key0.
Definition mraimpl.h:394
double finalize_apply()
after apply we need to do some cleanup;
Definition mraimpl.h:1778
bool leaves_only
Definition funcimpl.h:5676
friend hashT hash_value(const FunctionImpl< T, NDIM > *pimpl)
Hash a pointer to FunctionImpl.
Definition funcimpl.h:7287
const dcT & get_coeffs() const
Definition mraimpl.h:343
FunctionImpl(World &world, const FunctionImpl< Q, NDIM > &other, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool dozero)
Copy constructor.
Definition funcimpl.h:1085
T inner_ext_node(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > f) const
Return the inner product with an external function on a specified function node.
Definition funcimpl.h:6655
double norm2sq_local() const
Returns the square of the local norm ... no comms.
Definition mraimpl.h:1830
const FunctionCommonData< T, NDIM > & get_cdata() const
Definition mraimpl.h:349
void sum_down(bool fence)
After 1d push operator must sum coeffs down the tree to restore correct scaling function coefficients...
Definition mraimpl.h:915
T inner_ext_recursive(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > f, const bool leaf_refine, T old_inner=T(0)) const
Definition funcimpl.h:6672
bool noautorefine(const keyT &key, const tensorT &t) const
Always returns false (for when autorefine is not wanted)
Definition mraimpl.h:859
double truncate_tol(double tol, const keyT &key) const
Returns the truncation threshold according to truncate_method.
Definition mraimpl.h:649
void flo_unary_op_node_inplace(const opT &op, bool fence) const
Definition funcimpl.h:2241
bool autorefine_square_test(const keyT &key, const nodeT &t) const
Returns true if this block of coeffs needs autorefining.
Definition mraimpl.h:865
void erase(const Level &max_level)
truncate tree at a certain level
Definition mraimpl.h:739
void mulXX(const FunctionImpl< L, NDIM > *left, const FunctionImpl< R, NDIM > *right, double tol, bool fence)
Definition funcimpl.h:3261
void reconstruct(bool fence)
reconstruct this tree – respects fence
Definition mraimpl.h:1468
void multiply(const implT *f, const FunctionImpl< T, LDIM > *g, const int particle)
multiply f (a pair function of NDIM) with an orbital g (LDIM=NDIM/2)
Definition funcimpl.h:3661
coeffT assemble_coefficients(const keyT &key, const coeffT &coeff_ket, const coeffT &vpotential1, const coeffT &vpotential2, const tensorT &veri) const
given several coefficient tensors, assemble a result tensor
Definition mraimpl.h:1013
static void tnorm(const tensorT &t, double *lo, double *hi)
Computes norm of low/high-order polyn. coeffs for autorefinement test.
Definition mraimpl.h:3029
std::pair< bool, T > eval_local_only(const Vector< double, NDIM > &xin, Level maxlevel)
Evaluate function only if point is local returning (true,value); otherwise return (false,...
Definition mraimpl.h:2915
std::size_t max_depth() const
Returns the maximum depth of the tree ... collective ... global sum/broadcast.
Definition mraimpl.h:1856
std::size_t size() const
Returns the number of coefficients in the function ... collective global sum.
Definition mraimpl.h:1901
void reduce_rank(const double thresh, bool fence)
reduce the rank of the coefficients tensors
Definition mraimpl.h:1107
TreeState get_tree_state() const
Definition funcimpl.h:1358
void merge_trees(const T alpha, const FunctionImpl< Q, NDIM > &other, const R beta, const bool fence=true)
merge the trees of this and other, while multiplying them with the alpha or beta, resp
Definition funcimpl.h:1232
const Tensor< double > & get_cell() const
return the simulation cell
Definition funcimpl.h:1377
std::shared_ptr< FunctionFunctorInterface< T, NDIM > > get_functor()
Definition mraimpl.h:300
double do_apply_directed_screening(const opT *op, const keyT &key, const coeffT &coeff, const bool &do_kernel)
apply an operator on the coeffs c (at node key)
Definition funcimpl.h:5115
tensorT unfilter(const tensorT &s) const
Transform sums+differences at level n to sum coefficients at level n+1.
Definition mraimpl.h:1181
int get_initial_level() const
getter
Definition funcimpl.h:967
Tensor< T > eval_plot_cube(const coordT &plotlo, const coordT &plothi, const std::vector< long > &npt, const bool eval_refine=false) const
Definition mraimpl.h:3423
virtual ~FunctionImpl()
Definition funcimpl.h:1116
Vector< Translation, NDIM > tranT
Type of array holding translation.
Definition funcimpl.h:953
void change_tree_state(const TreeState finalstate, bool fence=true)
change the tree state of this function, might or might not respect fence!
Definition mraimpl.h:1407
Future< coeffT > truncate_reconstructed_spawn(const keyT &key, const double tol)
truncate using a tree in reconstructed form
Definition mraimpl.h:1593
GenTensor< Q > coeffs2values(const keyT &key, const GenTensor< Q > &coeff) const
Definition funcimpl.h:1812
FunctionImpl(const FunctionFactory< T, NDIM > &factory)
Initialize function impl from data in factory.
Definition funcimpl.h:1005
void map_and_mirror(const implT &f, const std::vector< long > &map, const std::vector< long > &mirror, bool fence)
map and mirror the translation index and the coefficients, result on this
Definition mraimpl.h:1076
Timer timer_lr_result
Definition funcimpl.h:997
void gaxpy(T alpha, const FunctionImpl< L, NDIM > &left, T beta, const FunctionImpl< R, NDIM > &right, bool fence)
Invoked by result to perform result += alpha*left+beta*right in wavelet basis.
Definition funcimpl.h:2091
void truncate(double tol, bool fence)
Truncate according to the threshold with optional global fence.
Definition mraimpl.h:378
void do_mul(const keyT &key, const Tensor< L > &left, const std::pair< keyT, Tensor< R > > &arg)
Functor for the mul method.
Definition funcimpl.h:2016
void copy_remote_coeffs_from_pid(const ProcessID pid, const FunctionImpl< Q, NDIM > &other)
Definition funcimpl.h:1175
void project_out2(const FunctionImpl< T, LDIM+NDIM > *f, const FunctionImpl< T, LDIM > *g, const int dim)
project the low-dim function g on the hi-dim function f: this(x) = <f(x,y) | g(y)>
Definition funcimpl.h:7129
double do_apply_kernel2(const opT *op, const Tensor< R > &c, const do_op_args< OPDIM > &args, const TensorArgs &apply_targs)
same as do_apply_kernel, but use full rank tensors as input and low rank tensors as output
Definition funcimpl.h:4802
static Tensor< TENSOR_RESULT_TYPE(T, R)> dot_local(const std::vector< const FunctionImpl< T, NDIM > * > &left, const std::vector< const FunctionImpl< R, NDIM > * > &right, bool sym)
Definition funcimpl.h:6139
Tensor< Q > coeffs2values(const keyT &key, const Tensor< Q > &coeff) const
Definition funcimpl.h:1938
Tensor< Q > values2coeffs(const keyT &key, const Tensor< Q > &values) const
Definition funcimpl.h:1952
void multi_to_multi_op_values_doit(const keyT &key, const opT &op, const std::vector< implT * > &vin, std::vector< implT * > &vout)
Inplace operate on many functions (impl's) with an operator within a certain box.
Definition funcimpl.h:2867
bool is_reconstructed() const
Returns true if the function is compressed.
Definition mraimpl.h:256
void replicate(bool fence=true)
Definition funcimpl.h:1120
double norm_tree_op(const keyT &key, const std::vector< Future< double > > &v)
Definition mraimpl.h:1555
void reset_timer()
Definition mraimpl.h:366
void refine_to_common_level(const std::vector< FunctionImpl< T, NDIM > * > &v, const std::vector< tensorT > &c, const keyT key)
Refine multiple functions down to the same finest level.
Definition mraimpl.h:769
int get_k() const
Definition mraimpl.h:340
void dirac_convolution_op(const keyT &key, const nodeT &node, FunctionImpl< T, LDIM > *f) const
The operator.
Definition funcimpl.h:2157
FunctionImpl< T, NDIM > implT
Type of this class (implementation)
Definition funcimpl.h:950
void eval(const Vector< double, NDIM > &xin, const keyT &keyin, const typename Future< T >::remote_refT &ref)
Evaluate the function at a point in simulation coordinates.
Definition mraimpl.h:2871
bool truncate_op(const keyT &key, double tol, const std::vector< Future< bool > > &v)
Definition mraimpl.h:2644
void zero_norm_tree()
Definition mraimpl.h:1292
std::size_t max_local_depth() const
Returns the maximum local depth of the tree ... no communications.
Definition mraimpl.h:1842
tensorT project(const keyT &key) const
Definition mraimpl.h:2789
double thresh
Screening threshold.
Definition funcimpl.h:973
double check_symmetry_local() const
Returns some asymmetry measure ... no comms.
Definition mraimpl.h:755
Future< double > get_norm_tree_recursive(const keyT &key) const
Definition mraimpl.h:2810
bool is_redundant_after_merge() const
Returns true if the function is redundant_after_merge.
Definition mraimpl.h:268
void mulXXvec(const FunctionImpl< L, NDIM > *left, const std::vector< const FunctionImpl< R, NDIM > * > &vright, const std::vector< FunctionImpl< T, NDIM > * > &vresult, double tol, bool fence)
Definition funcimpl.h:3318
Key< NDIM > keyT
Type of key.
Definition funcimpl.h:954
friend hashT hash_value(const std::shared_ptr< FunctionImpl< T, NDIM > > impl)
Hash a shared_ptr to FunctionImpl.
Definition funcimpl.h:7297
std::vector< Vector< double, NDIM > > special_points
special points for further refinement (needed for composite functions or multiplication)
Definition funcimpl.h:976
bool truncate_on_project
If true projection inserts at level n-1 not n.
Definition funcimpl.h:981
AtomicInt small
Definition funcimpl.h:1001
static void do_dot_localX(const typename mapT::iterator lstart, const typename mapT::iterator lend, typename FunctionImpl< R, NDIM >::mapT *rmap_ptr, const bool sym, Tensor< TENSOR_RESULT_TYPE(T, R)> *result_ptr, Mutex *mutex)
Definition funcimpl.h:6036
bool is_on_demand() const
Definition mraimpl.h:283
double err_box(const keyT &key, const nodeT &node, const opT &func, int npt, const Tensor< double > &qx, const Tensor< double > &quad_phit, const Tensor< double > &quad_phiw) const
Returns the square of the error norm in the box labeled by key.
Definition funcimpl.h:5512
void accumulate_timer(const double time) const
Definition mraimpl.h:352
void trickle_down_op(const keyT &key, const coeffT &s)
sum all the contributions from all scales after applying an operator in mod-NS form
Definition mraimpl.h:1365
static void do_inner_localX(const typename mapT::iterator lstart, const typename mapT::iterator lend, typename FunctionImpl< R, NDIM >::mapT *rmap_ptr, const bool sym, Tensor< TENSOR_RESULT_TYPE(T, R) > *result_ptr, Mutex *mutex)
Definition funcimpl.h:5955
void mulXXveca(const keyT &key, const FunctionImpl< L, NDIM > *left, const Tensor< L > &lcin, const std::vector< const FunctionImpl< R, NDIM > * > vrightin, const std::vector< Tensor< R > > &vrcin, const std::vector< FunctionImpl< T, NDIM > * > vresultin, double tol)
Definition funcimpl.h:2954
void set_thresh(double value)
Definition mraimpl.h:331
Tensor< double > print_plane_local(const int xaxis, const int yaxis, const coordT &el2)
collect the data for a plot of the MRA structure locally on each node
Definition mraimpl.h:423
void sock_it_to_me_too(const keyT &key, const RemoteReference< FutureImpl< std::pair< keyT, coeffT > > > &ref) const
Definition mraimpl.h:2849
void broaden_op(const keyT &key, const std::vector< Future< bool > > &v)
Definition mraimpl.h:1281
void print_plane(const std::string filename, const int xaxis, const int yaxis, const coordT &el2)
Print a plane ("xy", "xz", or "yz") containing the point x to file.
Definition mraimpl.h:403
void print_tree(std::ostream &os=std::cout, Level maxlevel=10000) const
Definition mraimpl.h:2672
void project_refine_op(const keyT &key, bool do_refine, const std::vector< Vector< double, NDIM > > &specialpts)
Definition mraimpl.h:2458
void scale_oop(const Q q, const FunctionImpl< F, NDIM > &f, bool fence)
Out-of-place scale by a constant.
Definition funcimpl.h:7264
T typeT
Definition funcimpl.h:949
std::size_t tree_size() const
Returns the size of the tree structure of the function ... collective global sum.
Definition mraimpl.h:1882
ConcurrentHashMap< keyT, mapvecT > mapT
Type of the map returned by make_key_vec_map.
Definition funcimpl.h:5883
void add_scalar_inplace(T t, bool fence)
Adds a constant to the function. Local operation, optional fence.
Definition mraimpl.h:2536
void forward_traverse(const coeff_opT &coeff_op, const apply_opT &apply_op, const keyT &key) const
traverse a non-existing tree
Definition funcimpl.h:3755
tensorT downsample(const keyT &key, const std::vector< Future< coeffT > > &v) const
downsample the sum coefficients of level n+1 to sum coeffs on level n
Definition mraimpl.h:1201
void abs_square_inplace(bool fence)
Definition mraimpl.h:3132
FunctionImpl(const FunctionImpl< Q, NDIM > &other, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool dozero)
Copy constructor.
Definition funcimpl.h:1071
void refine(const opT &op, bool fence)
Definition funcimpl.h:4615
static mapT make_key_vec_map(const std::vector< const FunctionImpl< T, NDIM > * > &v)
Returns map of union of local keys to vector of indexes of functions containing that key.
Definition funcimpl.h:5904
void put_in_box(ProcessID from, long nl, long ni) const
Definition mraimpl.h:824
void unary_op_value_inplace(const opT &op, bool fence)
Definition funcimpl.h:2934
std::pair< const keyT, nodeT > datumT
Type of entry in container.
Definition funcimpl.h:958
Timer timer_accumulate
Definition funcimpl.h:995
TensorArgs get_tensor_args() const
Definition mraimpl.h:322
void unaryXXa(const keyT &key, const FunctionImpl< Q, NDIM > *func, const opT &op)
Definition funcimpl.h:3236
void make_Vphi_only(const opT &leaf_op, FunctionImpl< T, NDIM > *ket, FunctionImpl< T, LDIM > *v1, FunctionImpl< T, LDIM > *v2, FunctionImpl< T, LDIM > *p1, FunctionImpl< T, LDIM > *p2, FunctionImpl< T, NDIM > *eri, const bool fence=true)
assemble the function V*phi using V and phi given from the functor
Definition funcimpl.h:4430
void average(const implT &rhs)
take the average of two functions, similar to: this=0.5*(this+rhs)
Definition mraimpl.h:1088
void recursive_apply(opT &apply_op, const FunctionImpl< T, LDIM > *fimpl, const FunctionImpl< T, LDIM > *gimpl, const bool fence)
traverse a non-existing tree, make its coeffs and apply an operator
Definition funcimpl.h:5265
void diff(const DerivativeBase< T, NDIM > *D, const implT *f, bool fence)
Definition mraimpl.h:946
void square_inplace(bool fence)
Pointwise squaring of function with optional global fence.
Definition mraimpl.h:3121
void remove_internal_coefficients(const bool fence)
Definition mraimpl.h:1517
void compute_snorm_and_dnorm(bool fence=true)
compute norm of s and d coefficients for all nodes
Definition mraimpl.h:1131
std::vector< unsigned char > serialize_remote_coeffs()
invoked by copy_remote_coeffs_from_pid to serialize local coeffs
Definition funcimpl.h:1183
long box_leaf[1000]
Definition funcimpl.h:3332
void standard(bool fence)
Changes non-standard compressed form to standard compressed form.
Definition mraimpl.h:1765
void multiop_values_doit(const keyT &key, const opT &op, const std::vector< implT * > &v)
Definition funcimpl.h:2825
bool is_nonstandard_with_leaves() const
Definition mraimpl.h:278
GenTensor< Q > values2NScoeffs(const keyT &key, const GenTensor< Q > &values) const
convert function values of the a child generation directly to NS coeffs
Definition funcimpl.h:1913
int truncate_mode
0=default=(|d|<thresh), 1=(|d|<thresh/2^n), 2=(|d|<thresh/4^n);
Definition funcimpl.h:979
void multiop_values(const opT &op, const std::vector< implT * > &v)
Definition funcimpl.h:2842
GenTensor< Q > NScoeffs2values(const keyT &key, const GenTensor< Q > &coeff, const bool s_only) const
convert S or NS coeffs to values on a 2k grid of the children
Definition funcimpl.h:1828
static std::enable_if_t< std::is_floating_point_v< Real >, Real > conj(const Real x)
Definition funcimpl.h:6076
FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree.
Definition funcimpl.h:127
FunctionNode< Q, NDIM > convert() const
Copy with possible type conversion of coefficients, copying all other state.
Definition funcimpl.h:194
GenTensor< T > coeffT
Definition funcimpl.h:129
bool has_coeff() const
Returns true if there are coefficients in this node.
Definition funcimpl.h:200
void recompute_snorm_and_dnorm(const FunctionCommonData< T, NDIM > &cdata)
Definition funcimpl.h:335
FunctionNode(const coeffT &coeff, bool has_children=false)
Constructor from given coefficients with optional children.
Definition funcimpl.h:156
FunctionNode()
Default constructor makes node without coeff or children.
Definition funcimpl.h:146
void serialize(Archive &ar)
Definition funcimpl.h:458
void consolidate_buffer(const TensorArgs &args)
Definition funcimpl.h:444
double get_dnorm() const
return the precomputed norm of the (virtual) d coefficients
Definition funcimpl.h:316
size_t size() const
Returns the number of coefficients in this node.
Definition funcimpl.h:242
void set_has_children_recursive(const typename FunctionNode< T, NDIM >::dcT &c, const Key< NDIM > &key)
Sets has_children attribute to true recurring up to ensure connected.
Definition funcimpl.h:259
FunctionNode< T, NDIM > & operator=(const FunctionNode< T, NDIM > &other)
Definition funcimpl.h:176
double snorm
norm of the s coefficients
Definition funcimpl.h:141
void clear_coeff()
Clears the coefficients (has_coeff() will subsequently return false)
Definition funcimpl.h:295
Tensor< T > tensorT
Definition funcimpl.h:130
coeffT buffer
The coefficients, if any.
Definition funcimpl.h:139
T trace_conj(const FunctionNode< T, NDIM > &rhs) const
Definition funcimpl.h:453
void scale(Q a)
Scale the coefficients of this node.
Definition funcimpl.h:301
bool is_leaf() const
Returns true if this does not have children.
Definition funcimpl.h:213
void set_has_children(bool flag)
Sets has_children attribute to value of flag.
Definition funcimpl.h:254
void accumulate(const coeffT &t, const typename FunctionNode< T, NDIM >::dcT &c, const Key< NDIM > &key, const TensorArgs &args)
Accumulate inplace and if necessary connect node to parent.
Definition funcimpl.h:416
double get_norm_tree() const
Gets the value of norm_tree.
Definition funcimpl.h:311
bool _has_children
True if there are children.
Definition funcimpl.h:138
FunctionNode(const coeffT &coeff, double norm_tree, double snorm, double dnorm, bool has_children)
Definition funcimpl.h:166
void set_snorm(const double sn)
set the precomputed norm of the (virtual) s coefficients
Definition funcimpl.h:321
coeffT _coeffs
The coefficients, if any.
Definition funcimpl.h:136
void accumulate2(const tensorT &t, const typename FunctionNode< T, NDIM >::dcT &c, const Key< NDIM > &key)
Accumulate inplace and if necessary connect node to parent.
Definition funcimpl.h:383
void reduceRank(const double &eps)
reduces the rank of the coefficients (if applicable)
Definition funcimpl.h:249
WorldContainer< Key< NDIM >, FunctionNode< T, NDIM > > dcT
Definition funcimpl.h:144
void gaxpy_inplace(const T &alpha, const FunctionNode< Q, NDIM > &other, const R &beta)
General bi-linear operation — this = this*alpha + other*beta.
Definition funcimpl.h:365
double _norm_tree
After norm_tree will contain norm of coefficients summed up tree.
Definition funcimpl.h:137
void set_is_leaf(bool flag)
Sets has_children attribute to value of !flag.
Definition funcimpl.h:280
void print_json(std::ostream &s) const
Definition funcimpl.h:466
double get_snorm() const
get the precomputed norm of the (virtual) s coefficients
Definition funcimpl.h:331
const coeffT & coeff() const
Returns a const reference to the tensor containing the coeffs.
Definition funcimpl.h:237
FunctionNode(const coeffT &coeff, double norm_tree, bool has_children)
Definition funcimpl.h:161
bool has_children() const
Returns true if this node has children.
Definition funcimpl.h:207
void set_coeff(const coeffT &coeffs)
Takes a shallow copy of the coeff — same as this->coeff()=coeff.
Definition funcimpl.h:285
void set_dnorm(const double dn)
set the precomputed norm of the (virtual) d coefficients
Definition funcimpl.h:326
double dnorm
norm of the d coefficients, also defined if there are no d coefficients
Definition funcimpl.h:140
bool is_invalid() const
Returns true if this node is invalid (no coeffs and no children)
Definition funcimpl.h:219
FunctionNode(const FunctionNode< T, NDIM > &other)
Definition funcimpl.h:170
coeffT & coeff()
Returns a non-const reference to the tensor containing the coeffs.
Definition funcimpl.h:227
void set_norm_tree(double norm_tree)
Sets the value of norm_tree.
Definition funcimpl.h:306
Implements the functionality of futures.
Definition future.h:74
A future is a possibly yet unevaluated value.
Definition future.h:369
remote_refT remote_ref(World &world) const
Returns a structure used to pass references to another process.
Definition future.h:671
RemoteReference< FutureImpl< T > > remote_refT
Definition future.h:394
Definition lowranktensor.h:59
bool is_of_tensortype(const TensorType &tt) const
Definition gentensor.h:225
GenTensor convert(const TensorArgs &targs) const
Definition gentensor.h:198
long dim(const int i) const
return the number of entries in dimension i
Definition lowranktensor.h:391
Tensor< T > full_tensor_copy() const
Definition gentensor.h:206
long ndim() const
Definition lowranktensor.h:386
void add_SVD(const GenTensor< T > &rhs, const double &eps)
Definition gentensor.h:235
constexpr bool is_full_tensor() const
Definition gentensor.h:224
const Tensor< T > & get_tensor() const
Definition gentensor.h:203
bool has_no_data() const
Definition gentensor.h:211
void normalize()
Definition gentensor.h:218
GenTensor< T > & emul(const GenTensor< T > &other)
Inplace multiply by corresponding elements of argument Tensor.
Definition lowranktensor.h:637
float_scalar_type normf() const
Definition lowranktensor.h:406
double svd_normf() const
Definition gentensor.h:213
SRConf< T > config() const
Definition gentensor.h:237
void reduce_rank(const double &eps)
Definition gentensor.h:217
long rank() const
Definition gentensor.h:212
const Tensor< T > & full_tensor() const
Definition gentensor.h:200
long size() const
Definition lowranktensor.h:488
SVDTensor< T > & get_svdtensor()
Definition gentensor.h:228
TensorType tensor_type() const
Definition gentensor.h:221
bool has_data() const
Definition gentensor.h:210
Tensor< T > reconstruct_tensor() const
Definition gentensor.h:199
GenTensor & gaxpy(const T alpha, const GenTensor &other, const T beta)
Definition lowranktensor.h:586
bool is_assigned() const
Definition gentensor.h:209
IsSupported< TensorTypeData< Q >, GenTensor< T > & >::type scale(Q fac)
Inplace multiplication by scalar of supported type (legacy name)
Definition lowranktensor.h:426
constexpr bool is_svd_tensor() const
Definition gentensor.h:222
Iterates in lexical order thru all children of a key.
Definition key.h:505
Key is the index for a node of the 2^NDIM-tree.
Definition key.h:70
Key< NDIM+LDIM > merge_with(const Key< LDIM > &rhs) const
merge with other key (ie concatenate), use level of rhs, not of this
Definition key.h:444
Level level() const
Definition key.h:169
bool is_valid() const
Checks if a key is valid.
Definition key.h:124
hashT hash() const
Definition key.h:158
Key< NDIM-VDIM > extract_complement_key(const std::array< int, VDIM > &v) const
extract a new key with the Translations complementary to the ones indicated in the v array
Definition key.h:430
Key< VDIM > extract_key(const std::array< int, VDIM > &v) const
extract a new key with the Translations indicated in the v array
Definition key.h:422
Key parent(int generation=1) const
Returns the key of the parent.
Definition key.h:290
const Vector< Translation, NDIM > & translation() const
Definition key.h:174
void break_apart(Key< LDIM > &key1, Key< KDIM > &key2) const
break key into two low-dimensional keys
Definition key.h:381
A pmap that locates children on odd levels with their even level parents.
Definition funcimpl.h:105
LevelPmap(World &world)
Definition funcimpl.h:111
const int nproc
Definition funcimpl.h:107
LevelPmap()
Definition funcimpl.h:109
ProcessID owner(const keyT &key) const
Find the owner of a given key.
Definition funcimpl.h:114
Definition funcimpl.h:77
Mutex using pthread mutex operations.
Definition worldmutex.h:131
void unlock() const
Free a mutex owned by this thread.
Definition worldmutex.h:165
void lock() const
Acquire the mutex waiting if necessary.
Definition worldmutex.h:155
Range, vaguely a la Intel TBB, to encapsulate a random-access, STL-like start and end iterator with c...
Definition range.h:64
Simple structure used to manage references/pointers to remote instances.
Definition worldref.h:395
Definition SVDTensor.h:42
A simple process map.
Definition funcimpl.h:86
SimplePmap(World &world)
Definition funcimpl.h:92
const int nproc
Definition funcimpl.h:88
const ProcessID me
Definition funcimpl.h:89
ProcessID owner(const keyT &key) const
Maps key to processor.
Definition funcimpl.h:95
A slice defines a sub-range or patch of a dimension.
Definition slice.h:103
static TaskAttributes hipri()
Definition thread.h:456
Traits class to specify support of numeric types.
Definition type_data.h:56
A tensor is a multidimensional array.
Definition tensor.h:317
float_scalar_type normf() const
Returns the Frobenius norm of the tensor.
Definition tensor.h:1726
Tensor< T > & gaxpy(T alpha, const Tensor< T > &other, T beta)
Inplace generalized saxpy ... this = this*alpha + other*beta.
Definition tensor.h:1805
T sum() const
Returns the sum of all elements of the tensor.
Definition tensor.h:1662
Tensor< T > reshape(int ndimnew, const long *d)
Returns new view/tensor reshaping size/number of dimensions to conforming tensor.
Definition tensor.h:1384
T * ptr()
Returns a pointer to the internal data.
Definition tensor.h:1840
Tensor< T > mapdim(const std::vector< long > &map)
Returns new view/tensor permuting the dimensions.
Definition tensor.h:1624
IsSupported< TensorTypeData< Q >, Tensor< T > & >::type scale(Q x)
Inplace multiplication by scalar of supported type (legacy name)
Definition tensor.h:686
Tensor< T > & emul(const Tensor< T > &t)
Inplace multiply by corresponding elements of argument Tensor.
Definition tensor.h:1799
bool has_data() const
Definition tensor.h:1902
Tensor< T > fusedim(long i)
Returns new view/tensor fusing contiguous dimensions i and i+1.
Definition tensor.h:1587
Tensor< T > flat()
Returns new view/tensor rehshaping to flat (1-d) tensor.
Definition tensor.h:1555
Tensor< T > & conj()
Inplace complex conjugate.
Definition tensor.h:716
Definition function_common_data.h:169
void accumulate(const double time) const
accumulate timer
Definition function_common_data.h:183
A simple, fixed dimension vector.
Definition vector.h:64
Makes a distributed container with specified attributes.
Definition worlddc.h:1127
void process_pending()
Process pending messages.
Definition worlddc.h:1453
bool find(accessor &acc, const keyT &key)
Write access to LOCAL value by key. Returns true if found, false otherwise (always false for remote).
Definition worlddc.h:1274
bool probe(const keyT &key) const
Returns true if local data is immediately available (no communication)
Definition worlddc.h:1311
iterator begin()
Returns an iterator to the beginning of the local data (no communication)
Definition worlddc.h:1357
bool is_replicated() const
Definition worlddc.h:1227
ProcessID owner(const keyT &key) const
Returns processor that logically owns key (no communication)
Definition worlddc.h:1321
implT::const_iterator const_iterator
Definition worlddc.h:1135
void replicate(bool fence=true)
replicates this WorldContainer on all ProcessIDs
Definition worlddc.h:1249
void erase(const keyT &key)
Erases entry from container (non-blocking comm if remote)
Definition worlddc.h:1392
void replace(const pairT &datum)
Inserts/replaces key+value pair (non-blocking communication if key not local)
Definition worlddc.h:1261
iterator end()
Returns an iterator past the end of the local data (no communication)
Definition worlddc.h:1371
const std::shared_ptr< WorldDCPmapInterface< keyT > > & get_pmap() const
Returns shared pointer to the process mapping.
Definition worlddc.h:1429
bool insert(accessor &acc, const keyT &key)
Write access to LOCAL value by key. Returns true if inserted, false if already exists (throws if remo...
Definition worlddc.h:1288
bool is_distributed() const
Definition worlddc.h:1223
implT::iterator iterator
Definition worlddc.h:1134
std::size_t size() const
Returns the number of local entries (no communication)
Definition worlddc.h:1422
Future< REMFUTURE(MEMFUN_RETURNT(memfunT))> task(const keyT &key, memfunT memfun, const TaskAttributes &attr=TaskAttributes())
Adds task "resultT memfun()" in process owning item (non-blocking comm if remote)
Definition worlddc.h:1713
bool is_local(const keyT &key) const
Returns true if the key maps to the local processor (no communication)
Definition worlddc.h:1328
bool is_host_replicated() const
Definition worlddc.h:1231
Future< MEMFUN_RETURNT(memfunT)> send(const keyT &key, memfunT memfun)
Sends message "resultT memfun()" to item (non-blocking comm if remote)
Definition worlddc.h:1470
void replicate_on_hosts(bool fence=true)
replicates this WorldContainer on all hosts (one PID per host)
Definition worlddc.h:1255
implT::accessor accessor
Definition worlddc.h:1136
Interface to be provided by any process map.
Definition worlddc.h:122
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:161
Implements most parts of a globally addressable object (via unique ID).
Definition world_object.h:366
const uniqueidT & id() const
Returns the globally unique object ID.
Definition world_object.h:713
void process_pending()
To be called from derived constructor to process pending messages.
Definition world_object.h:658
ProcessID me
Rank of self.
Definition world_object.h:387
detail::task_result_type< memfnT >::futureT send(ProcessID dest, memfnT memfn) const
Definition world_object.h:733
detail::task_result_type< memfnT >::futureT task(ProcessID dest, memfnT memfn, const TaskAttributes &attr=TaskAttributes()) const
Sends task to derived class method returnT (this->*memfn)().
Definition world_object.h:1007
Future< bool > for_each(const rangeT &range, const opT &op)
Apply op(item) on all items in range.
Definition world_task_queue.h:572
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition world_task_queue.h:466
Future< resultT > reduce(const rangeT &range, const opT &op)
Reduce op(item) for all items in range using op(sum,op(item)).
Definition world_task_queue.h:527
A parallel world class.
Definition world.h:132
static World * world_from_id(std::uint64_t id)
Convert a World ID to a World pointer.
Definition world.h:492
WorldTaskQueue & taskq
Task queue.
Definition world.h:206
std::vector< uniqueidT > get_object_ids() const
Returns a vector of all unique IDs in this World.
Definition world.h:468
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320
static std::vector< unsigned long > get_world_ids()
return a vector containing all world ids
Definition world.h:476
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:330
unsigned long id() const
Definition world.h:315
WorldGopInterface & gop
Global operations.
Definition world.h:207
std::optional< T * > ptr_from_id(uniqueidT id) const
Look up a local pointer from a world-wide unique ID.
Definition world.h:416
ProcessID random_proc()
Returns a random process number; that is, an integer in [0,world.size()).
Definition world.h:591
Wraps an archive around an STL vector for input.
Definition vector_archive.h:101
Wraps an archive around an STL vector for output.
Definition vector_archive.h:55
Wrapper for an opaque pointer for serialization purposes.
Definition archive.h:851
syntactic sugar for std::array<bool, N>
Definition array_of_bools.h:19
Class for unique global IDs.
Definition uniqueid.h:53
unsigned long get_obj_id() const
Access the object ID.
Definition uniqueid.h:97
unsigned long get_world_id() const
Access the World ID.
Definition uniqueid.h:90
static const double R
Definition csqrt.cc:46
double(* f1)(const coord_3d &)
Definition derivatives.cc:55
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72
static double lo
Definition dirac-hatom.cc:23
@ upper
Definition dirac-hatom.cc:15
Provides FunctionDefaults and utilities for coordinate transformation.
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28
archive_array< unsigned char > wrap_opaque(const T *, unsigned int)
Factory function to wrap a pointer to contiguous data as an opaque (uchar) archive_array.
Definition archive.h:926
Tensor< typename Tensor< T >::scalar_type > arg(const Tensor< T > &t)
Return a new tensor holding the argument of each element of t (complex types only)
Definition tensor.h:2518
Tensor< TENSOR_RESULT_TYPE(T, Q) > & fast_transform(const Tensor< T > &t, const Tensor< Q > &c, Tensor< TENSOR_RESULT_TYPE(T, Q) > &result, Tensor< TENSOR_RESULT_TYPE(T, Q) > &workspace)
Restricted but heavily optimized form of transform()
Definition tensor.h:2459
const double beta
Definition gygi_soltion.cc:62
static const double v
Definition hatom_sf_dirac.cc:20
Provides IndexIterator.
Tensor< double > op(const Tensor< double > &x)
Definition kain.cc:508
Multidimension Key for MRA tree and associated iterators.
static double pow(const double *a, const double *b)
Definition lda.h:74
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:182
#define MADNESS_EXCEPTION(msg, value)
Macro for throwing a MADNESS exception.
Definition madness_exception.h:119
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
#define MADNESS_CHECK_THROW(condition, msg)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:207
Header to declare stuff which has not yet found a home.
constexpr double pi
Mathematical constant .
Definition constants.h:48
MemFuncWrapper< objT *, memfnT, typename result_of< memfnT >::type > wrap_mem_fn(objT &obj, memfnT memfn)
Create a member function wrapper (MemFuncWrapper) from an object and a member function pointer.
Definition mem_func_wrapper.h:251
void combine_hash(hashT &seed, hashT hash)
Internal use only.
Definition worldhash.h:248
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
std::ostream & operator<<(std::ostream &os, const particle< PDIM > &p)
Definition lowrankfunction.h:401
static const char * filename
Definition legendre.cc:96
static const std::vector< Slice > ___
Entire dimension.
Definition slice.h:128
static double cpu_time()
Returns the cpu time in seconds relative to an arbitrary origin.
Definition timers.h:127
GenTensor< TENSOR_RESULT_TYPE(R, Q)> general_transform(const GenTensor< R > &t, const Tensor< Q > c[])
Definition gentensor.h:274
response_space scale(response_space a, double b)
bool nearlyEqual(double a, double b, double epsilon=1e-9)
Definition numerics.cc:11
void finalize()
Call this once at the very end of your main program instead of MPI_Finalize().
Definition world.cc:235
void norm_tree(World &world, const std::vector< Function< T, NDIM > > &v, bool fence=true)
Makes the norm tree for all functions in a vector.
Definition vmra.h:1205
std::vector< Function< TENSOR_RESULT_TYPE(T, R), NDIM > > transform(World &world, const std::vector< Function< T, NDIM > > &v, const Tensor< R > &c, bool fence=true)
Transforms a vector of functions according to new[i] = sum[j] old[j]*c[j,i].
Definition vmra.h:731
TreeState
Definition funcdefaults.h:59
@ nonstandard_after_apply
s and d coeffs, state after operator application
Definition funcdefaults.h:64
@ redundant_after_merge
s coeffs everywhere, must be summed up to yield the result
Definition funcdefaults.h:66
@ reconstructed
s coeffs at the leaves only
Definition funcdefaults.h:60
@ nonstandard
s and d coeffs in internal nodes
Definition funcdefaults.h:62
@ redundant
s coeffs everywhere
Definition funcdefaults.h:65
static Tensor< double > weights[max_npt+1]
Definition legendre.cc:99
int64_t Translation
Definition key.h:58
Key< NDIM > displacement(const Key< NDIM > &source, const Key< NDIM > &target)
given a source and a target, return the displacement in translation
Definition key.h:490
static const Slice _(0,-1, 1)
std::shared_ptr< FunctionFunctorInterface< double, 3 > > func(new opT(g))
void change_tensor_type(GenTensor< T > &t, const TensorArgs &targs)
change representation to targ.tt
Definition gentensor.h:284
int Level
Definition key.h:59
std::enable_if< std::is_base_of< ProjectorBase, projT >::value, OuterProjector< projT, projQ > >::type outer(const projT &p0, const projQ &p1)
Definition projector.h:457
int RandomValue< int >()
Random int.
Definition ran.cc:250
static double pop(std::vector< double > &v)
Definition SCF.cc:115
void print(const T &t, const Ts &... ts)
Print items to std::cout (items separated by spaces) and terminate with a new line.
Definition print.h:226
Tensor< T > fcube(const Key< NDIM > &, T(*f)(const Vector< double, NDIM > &), const Tensor< double > &)
Definition mraimpl.h:2133
TensorType
low rank representations of tensors (see gentensor.h)
Definition gentensor.h:120
@ TT_2D
Definition gentensor.h:120
@ TT_FULL
Definition gentensor.h:120
NDIM & f
Definition mra.h:2543
void error(const char *msg)
Definition world.cc:142
NDIM const Function< R, NDIM > & g
Definition mra.h:2543
std::size_t hashT
The hash value type.
Definition worldhash.h:145
static const int kmax
Definition twoscale.cc:52
double inner(response_space &a, response_space &b)
Definition response_functions.h:639
GenTensor< TENSOR_RESULT_TYPE(R, Q)> transform_dir(const GenTensor< R > &t, const Tensor< Q > &c, const int axis)
Definition lowranktensor.h:1106
std::string name(const FuncType &type, const int ex=-1)
Definition ccpairfunction.h:28
void mxmT(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b)
Matrix += Matrix * matrix transpose ... MKL interface version.
Definition mxm.h:225
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2111
static const int MAXK
The maximum wavelet order presently supported.
Definition funcdefaults.h:54
Definition mraimpl.h:51
static long abs(long a)
Definition tensor.h:218
const double cc
Definition navstokes_cosines.cc:107
static const double b
Definition nonlinschro.cc:119
static const double d
Definition nonlinschro.cc:121
static const double a
Definition nonlinschro.cc:118
Defines simple templates for printing to std::cout "a la Python".
double Q(double a)
Definition relops.cc:20
static const double c
Definition relops.cc:10
static const double L
Definition rk.cc:46
static const double thresh
Definition rk.cc:45
Definition test_ar.cc:204
Definition test_dc.cc:47
Key parent() const
Definition test_tree.cc:68
hashT hash() const
Definition test_dc.cc:54
Definition test_ccpairfunction.cc:22
given a ket and the 1- and 2-electron potentials, construct the function V phi
Definition funcimpl.h:4090
implT * result
where to construct Vphi, no need to track parents
Definition funcimpl.h:4098
bool have_v2() const
Definition funcimpl.h:4107
ctL iav1
Definition funcimpl.h:4102
Vphi_op_NS(implT *result, const opT &leaf_op, const ctT &iaket, const ctL &iap1, const ctL &iap2, const ctL &iav1, const ctL &iav2, const implT *eri)
Definition funcimpl.h:4116
ctL iap1
Definition funcimpl.h:4101
bool have_v1() const
Definition funcimpl.h:4106
std::pair< bool, coeffT > continue_recursion(const std::vector< bool > child_is_leaf, const tensorT &coeffs, const keyT &key) const
loop over all children and either insert their sum coeffs or continue the recursion
Definition funcimpl.h:4182
opT leaf_op
deciding if a given FunctionNode will be a leaf node
Definition funcimpl.h:4099
std::pair< coeffT, double > make_sum_coeffs(const keyT &key) const
make the sum coeffs for key
Definition funcimpl.h:4275
CoeffTracker< T, NDIM > ctT
Definition funcimpl.h:4095
ctL iap2
the particles 1 and 2 (exclusive with ket)
Definition funcimpl.h:4101
bool have_ket() const
Definition funcimpl.h:4105
const implT * eri
2-particle potential, must be on-demand
Definition funcimpl.h:4103
CoeffTracker< T, LDIM > ctL
Definition funcimpl.h:4096
std::pair< bool, coeffT > operator()(const Key< NDIM > &key) const
make and insert the coefficients into result's tree
Definition funcimpl.h:4127
void serialize(const Archive &ar)
serialize this (needed for use in recursive_op)
Definition funcimpl.h:4356
Vphi_op_NS< opT, LDIM > this_type
Definition funcimpl.h:4094
ctT iaket
the ket of a pair function (exclusive with p1, p2)
Definition funcimpl.h:4100
double compute_error_from_inaccurate_refinement(const keyT &key, const tensorT &ceri) const
the error is computed from the d coefficients of the constituent functions
Definition funcimpl.h:4228
void accumulate_into_result(const Key< NDIM > &key, const coeffT &coeff) const
Definition funcimpl.h:4110
this_type make_child(const keyT &child) const
Definition funcimpl.h:4327
tensorT eri_coeffs(const keyT &key) const
Definition funcimpl.h:4208
ctL iav2
potentials for particles 1 and 2
Definition funcimpl.h:4102
bool have_eri() const
Definition funcimpl.h:4108
this_type forward_ctor(implT *result1, const opT &leaf_op, const ctT &iaket1, const ctL &iap11, const ctL &iap21, const ctL &iav11, const ctL &iav21, const implT *eri1)
Definition funcimpl.h:4349
Vphi_op_NS()
Definition funcimpl.h:4115
Future< this_type > activate() const
Definition funcimpl.h:4338
bool randomize() const
Definition funcimpl.h:4092
add two functions f and g: result=alpha * f + beta * g
Definition funcimpl.h:3600
bool randomize() const
Definition funcimpl.h:3605
Future< this_type > activate() const
retrieve the coefficients (parent coeffs might be remote)
Definition funcimpl.h:3635
add_op(const ctT &f, const ctT &g, const double alpha, const double beta)
Definition funcimpl.h:3613
ctT f
tracking coeffs of first and second addend
Definition funcimpl.h:3608
double alpha
prefactor for f, g
Definition funcimpl.h:3610
add_op this_type
Definition funcimpl.h:3603
CoeffTracker< T, NDIM > ctT
Definition funcimpl.h:3602
void serialize(const Archive &ar)
Definition funcimpl.h:3647
ctT g
Definition funcimpl.h:3608
std::pair< bool, coeffT > operator()(const keyT &key) const
if we are at the bottom of the trees, return the sum of the coeffs
Definition funcimpl.h:3617
double beta
Definition funcimpl.h:3610
this_type make_child(const keyT &child) const
Definition funcimpl.h:3630
this_type forward_ctor(const ctT &f1, const ctT &g1, const double alpha, const double beta)
taskq-compatible ctor
Definition funcimpl.h:3643
opT op
Definition funcimpl.h:3206
opT::resultT resultT
Definition funcimpl.h:3204
Tensor< resultT > operator()(const Key< NDIM > &key, const Tensor< Q > &t) const
Definition funcimpl.h:3213
coeff_value_adaptor(const FunctionImpl< Q, NDIM > *impl_func, const opT &op)
Definition funcimpl.h:3209
const FunctionImpl< Q, NDIM > * impl_func
Definition funcimpl.h:3205
void serialize(Archive &ar)
Definition funcimpl.h:3222
merge the coefficent boxes of this into result's tree
Definition funcimpl.h:2445
Range< typename dcT::const_iterator > rangeT
Definition funcimpl.h:2446
void serialize(const Archive &ar)
Definition funcimpl.h:2463
FunctionImpl< Q, NDIM > * result
Definition funcimpl.h:2447
do_accumulate_trees(FunctionImpl< Q, NDIM > &result, const T alpha)
Definition funcimpl.h:2450
T alpha
Definition funcimpl.h:2448
bool operator()(typename rangeT::iterator &it) const
return the norm of the difference of this node and its "mirror" node
Definition funcimpl.h:2454
"put" this on g
Definition funcimpl.h:2656
Range< typename dcT::const_iterator > rangeT
Definition funcimpl.h:2657
void serialize(const Archive &ar)
Definition funcimpl.h:2685
implT * g
Definition funcimpl.h:2659
do_average()
Definition funcimpl.h:2661
bool operator()(typename rangeT::iterator &it) const
iterator it points to this
Definition funcimpl.h:2665
do_average(implT &g)
Definition funcimpl.h:2662
change representation of nodes' coeffs to low rank, optional fence
Definition funcimpl.h:2689
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2690
void serialize(const Archive &ar)
Definition funcimpl.h:2713
TensorArgs targs
Definition funcimpl.h:2693
do_change_tensor_type(const TensorArgs &targs, implT &g)
Definition funcimpl.h:2699
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2702
implT * f
Definition funcimpl.h:2694
check symmetry wrt particle exchange
Definition funcimpl.h:2362
Range< typename dcT::const_iterator > rangeT
Definition funcimpl.h:2363
double operator()(typename rangeT::iterator &it) const
return the norm of the difference of this node and its "mirror" node
Definition funcimpl.h:2369
do_check_symmetry_local()
Definition funcimpl.h:2365
void serialize(const Archive &ar)
Definition funcimpl.h:2432
double operator()(double a, double b) const
Definition funcimpl.h:2428
do_check_symmetry_local(const implT &f)
Definition funcimpl.h:2366
const implT * f
Definition funcimpl.h:2364
compute the norm of the wavelet coefficients
Definition funcimpl.h:4497
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:4498
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:4504
do_compute_snorm_and_dnorm(const FunctionCommonData< T, NDIM > &cdata)
Definition funcimpl.h:4501
const FunctionCommonData< T, NDIM > & cdata
Definition funcimpl.h:4500
TensorArgs targs
Definition funcimpl.h:2720
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2725
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2717
do_consolidate_buffer(const TensorArgs &targs)
Definition funcimpl.h:2724
void serialize(const Archive &ar)
Definition funcimpl.h:2729
double operator()(double val) const
Definition funcimpl.h:1482
double limit
Definition funcimpl.h:1477
do_convert_to_color(const double limit, const bool log)
Definition funcimpl.h:1481
bool log
Definition funcimpl.h:1478
static double lower()
Definition funcimpl.h:1479
compute the inner product of this range with other
Definition funcimpl.h:5821
do_dot_local(const FunctionImpl< R, NDIM > *other, const bool leaves_only)
Definition funcimpl.h:5826
bool leaves_only
Definition funcimpl.h:5823
typedef TENSOR_RESULT_TYPE(T, R) resultT
resultT operator()(resultT a, resultT b) const
Definition funcimpl.h:5854
const FunctionImpl< R, NDIM > * other
Definition funcimpl.h:5822
void serialize(const Archive &ar)
Definition funcimpl.h:5858
resultT operator()(typename dcT::const_iterator &it) const
Definition funcimpl.h:5828
functor for the gaxpy_inplace method
Definition funcimpl.h:1264
FunctionImpl< T, NDIM > * f
prefactor for current function impl
Definition funcimpl.h:1266
do_gaxpy_inplace(FunctionImpl< T, NDIM > *f, T alpha, R beta)
Definition funcimpl.h:1270
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:1271
R beta
prefactor for other function impl
Definition funcimpl.h:1268
void serialize(Archive &ar)
Definition funcimpl.h:1279
Range< typename FunctionImpl< Q, NDIM >::dcT::const_iterator > rangeT
Definition funcimpl.h:1265
T alpha
the current function impl
Definition funcimpl.h:1267
const bool do_leaves
start with leaf nodes instead of initial_level
Definition funcimpl.h:6747
T operator()(T a, T b) const
Definition funcimpl.h:6765
do_inner_ext_local_ffi(const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > f, const implT *impl, const bool leaf_refine, const bool do_leaves)
Definition funcimpl.h:6749
void serialize(const Archive &ar)
Definition funcimpl.h:6769
const bool leaf_refine
Definition funcimpl.h:6746
const std::shared_ptr< FunctionFunctorInterface< T, NDIM > > fref
Definition funcimpl.h:6744
T operator()(typename dcT::const_iterator &it) const
Definition funcimpl.h:6753
const implT * impl
Definition funcimpl.h:6745
compute the inner product of this range with other
Definition funcimpl.h:5684
const FunctionImpl< T, NDIM > * bra
Definition funcimpl.h:5685
void serialize(const Archive &ar)
Definition funcimpl.h:5800
const FunctionImpl< R, NDIM > * ket
Definition funcimpl.h:5686
bool leaves_only
Definition funcimpl.h:5687
do_inner_local_on_demand(const FunctionImpl< T, NDIM > *bra, const FunctionImpl< R, NDIM > *ket, const bool leaves_only=true)
Definition funcimpl.h:5690
resultT operator()(resultT a, resultT b) const
Definition funcimpl.h:5796
resultT operator()(typename dcT::const_iterator &it) const
Definition funcimpl.h:5693
compute the inner product of this range with other
Definition funcimpl.h:5623
resultT operator()(resultT a, resultT b) const
Definition funcimpl.h:5656
bool leaves_only
Definition funcimpl.h:5625
void serialize(const Archive &ar)
Definition funcimpl.h:5660
do_inner_local(const FunctionImpl< R, NDIM > *other, const bool leaves_only)
Definition funcimpl.h:5628
const FunctionImpl< R, NDIM > * other
Definition funcimpl.h:5624
resultT operator()(typename dcT::const_iterator &it) const
Definition funcimpl.h:5630
typedef TENSOR_RESULT_TYPE(T, R) resultT
keep only the sum coefficients in each node
Definition funcimpl.h:2316
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2317
do_keep_sum_coeffs(implT *impl)
constructor need impl for cdata
Definition funcimpl.h:2321
implT * impl
Definition funcimpl.h:2318
void serialize(const Archive &ar)
Definition funcimpl.h:2330
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2323
mirror dimensions of this, write result on f
Definition funcimpl.h:2590
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2600
implT * f
Definition funcimpl.h:2594
std::vector< long > mirror
Definition funcimpl.h:2593
void serialize(const Archive &ar)
Definition funcimpl.h:2647
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2591
std::vector< long > map
Definition funcimpl.h:2593
do_map_and_mirror(const std::vector< long > map, const std::vector< long > mirror, implT &f)
Definition funcimpl.h:2597
map this on f
Definition funcimpl.h:2510
do_mapdim(const std::vector< long > map, implT &f)
Definition funcimpl.h:2517
void serialize(const Archive &ar)
Definition funcimpl.h:2533
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2511
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2519
std::vector< long > map
Definition funcimpl.h:2513
do_mapdim()
Definition funcimpl.h:2516
implT * f
Definition funcimpl.h:2514
merge the coefficient boxes of this into other's tree
Definition funcimpl.h:2474
bool operator()(typename rangeT::iterator &it) const
return the norm of the difference of this node and its "mirror" node
Definition funcimpl.h:2484
Range< typename dcT::const_iterator > rangeT
Definition funcimpl.h:2475
FunctionImpl< Q, NDIM > * other
Definition funcimpl.h:2476
do_merge_trees(const T alpha, const R beta, FunctionImpl< Q, NDIM > &other)
Definition funcimpl.h:2480
T alpha
Definition funcimpl.h:2477
do_merge_trees()
Definition funcimpl.h:2479
R beta
Definition funcimpl.h:2478
void serialize(const Archive &ar)
Definition funcimpl.h:2503
mirror dimensions of this, write result on f
Definition funcimpl.h:2540
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2549
implT * f
Definition funcimpl.h:2544
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2541
do_mirror()
Definition funcimpl.h:2546
do_mirror(const std::vector< long > mirror, implT &f)
Definition funcimpl.h:2547
void serialize(const Archive &ar)
Definition funcimpl.h:2583
std::vector< long > mirror
Definition funcimpl.h:2543
Definition funcimpl.h:5596
double operator()(typename dcT::const_iterator &it) const
Definition funcimpl.h:5597
void serialize(const Archive &ar)
Definition funcimpl.h:5612
double operator()(double a, double b) const
Definition funcimpl.h:5608
laziness
Definition funcimpl.h:4754
void serialize(Archive &ar)
Definition funcimpl.h:4763
Key< OPDIM > d
Definition funcimpl.h:4755
Key< OPDIM > key
Definition funcimpl.h:4755
keyT dest
Definition funcimpl.h:4756
double fac
Definition funcimpl.h:4757
do_op_args(const Key< OPDIM > &key, const Key< OPDIM > &d, const keyT &dest, double tol, double fac, double cnorm)
Definition funcimpl.h:4760
double cnorm
Definition funcimpl.h:4757
double tol
Definition funcimpl.h:4757
reduce the rank of the nodes, optional fence
Definition funcimpl.h:2336
do_reduce_rank(const TensorArgs &targs)
Definition funcimpl.h:2344
TensorArgs args
Definition funcimpl.h:2340
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2350
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2337
do_reduce_rank(const double &thresh)
Definition funcimpl.h:2345
void serialize(const Archive &ar)
Definition funcimpl.h:2356
Changes non-standard compressed form to standard compressed form.
Definition funcimpl.h:4718
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:4729
do_standard(implT *impl)
Definition funcimpl.h:4726
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:4719
void serialize(const Archive &ar)
Definition funcimpl.h:4746
implT * impl
Definition funcimpl.h:4722
given an NS tree resulting from a convolution, truncate leafs if appropriate
Definition funcimpl.h:2257
void serialize(const Archive &ar)
Definition funcimpl.h:2277
const implT * f
Definition funcimpl.h:2259
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2263
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2258
do_truncate_NS_leafs(const implT *f)
Definition funcimpl.h:2261
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2736
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2740
implT * impl
Definition funcimpl.h:2737
void serialize(const Archive &ar)
Definition funcimpl.h:2758
do_unary_op_value_inplace(implT *impl, const opT &op)
Definition funcimpl.h:2739
Hartree product of two LDIM functions to yield a NDIM = 2*LDIM function.
Definition funcimpl.h:3683
this_type forward_ctor(implT *result1, const ctL &p11, const ctL &p22, const leaf_opT &leaf_op)
Definition funcimpl.h:3739
bool randomize() const
Definition funcimpl.h:3684
void serialize(const Archive &ar)
Definition funcimpl.h:3743
hartree_op(implT *result, const ctL &p11, const ctL &p22, const leaf_opT &leaf_op)
Definition funcimpl.h:3695
CoeffTracker< T, LDIM > ctL
Definition funcimpl.h:3687
ctL p2
tracking coeffs of the two lo-dim functions
Definition funcimpl.h:3690
leaf_opT leaf_op
determine if a given node will be a leaf node
Definition funcimpl.h:3691
hartree_op()
Definition funcimpl.h:3694
implT * result
where to construct the pair function
Definition funcimpl.h:3689
hartree_op< LDIM, leaf_opT > this_type
Definition funcimpl.h:3686
std::pair< bool, coeffT > operator()(const Key< NDIM > &key) const
Definition funcimpl.h:3700
ctL p1
Definition funcimpl.h:3690
this_type make_child(const keyT &child) const
Definition funcimpl.h:3723
Future< this_type > activate() const
Definition funcimpl.h:3732
perform this multiplication: h(1,2) = f(1,2) * g(1)
Definition funcimpl.h:3491
multiply_op()
Definition funcimpl.h:3503
ctL g
Definition funcimpl.h:3500
Future< this_type > activate() const
Definition funcimpl.h:3582
CoeffTracker< T, LDIM > ctL
Definition funcimpl.h:3495
implT * h
the result function h(1,2) = f(1,2) * g(1)
Definition funcimpl.h:3498
CoeffTracker< T, NDIM > ctT
Definition funcimpl.h:3494
std::pair< bool, coeffT > operator()(const Key< NDIM > &key) const
apply this on a FunctionNode of f and g of Key key
Definition funcimpl.h:3530
this_type forward_ctor(implT *h1, const ctT &f1, const ctL &g1, const int particle)
Definition funcimpl.h:3589
static bool randomize()
Definition funcimpl.h:3493
int particle
if g is g(1) or g(2)
Definition funcimpl.h:3501
ctT f
Definition funcimpl.h:3499
multiply_op< LDIM > this_type
Definition funcimpl.h:3496
multiply_op(implT *h1, const ctT &f1, const ctL &g1, const int particle1)
Definition funcimpl.h:3505
bool screen(const coeffT &fcoeff, const coeffT &gcoeff, const keyT &key) const
return true if this will be a leaf node
Definition funcimpl.h:3511
this_type make_child(const keyT &child) const
Definition funcimpl.h:3572
void serialize(const Archive &ar)
Definition funcimpl.h:3593
coeffT val_lhs
Definition funcimpl.h:3970
double lo
Definition funcimpl.h:3973
double lo1
Definition funcimpl.h:3973
long oversampling
Definition funcimpl.h:3971
double error
Definition funcimpl.h:3972
tensorT operator()(const Key< NDIM > key, const tensorT &coeff_rhs)
multiply values of rhs and lhs, result on rhs, rhs and lhs are of the same dimensions
Definition funcimpl.h:3988
coeffT coeff_lhs
Definition funcimpl.h:3970
void serialize(const Archive &ar)
Definition funcimpl.h:4076
double lo2
Definition funcimpl.h:3973
double hi1
Definition funcimpl.h:3973
pointwise_multiplier(const Key< NDIM > key, const coeffT &clhs)
Definition funcimpl.h:3976
coeffT operator()(const Key< NDIM > key, const tensorT &coeff_rhs, const int particle)
multiply values of rhs and lhs, result on rhs, rhs and lhs are of differnet dimensions
Definition funcimpl.h:4033
double hi2
Definition funcimpl.h:3973
double hi
Definition funcimpl.h:3973
project the low-dim function g on the hi-dim function f: result(x) = <f(x,y) | g(y)>
Definition funcimpl.h:7009
project_out_op(const implT *fimpl, implL1 *result, const ctL &iag, const int dim)
Definition funcimpl.h:7024
ctL iag
the low dim function g
Definition funcimpl.h:7019
FunctionImpl< T, NDIM-LDIM > implL1
Definition funcimpl.h:7014
Future< this_type > activate() const
retrieve the coefficients (parent coeffs might be remote)
Definition funcimpl.h:7103
std::pair< bool, coeffT > argT
Definition funcimpl.h:7015
const implT * fimpl
the hi dim function f
Definition funcimpl.h:7017
this_type forward_ctor(const implT *fimpl1, implL1 *result1, const ctL &iag1, const int dim1)
taskq-compatible ctor
Definition funcimpl.h:7110
this_type make_child(const keyT &child) const
Definition funcimpl.h:7094
project_out_op< LDIM > this_type
Definition funcimpl.h:7012
implL1 * result
the low dim result function
Definition funcimpl.h:7018
Future< argT > operator()(const Key< NDIM > &key) const
do the actual contraction
Definition funcimpl.h:7031
void serialize(const Archive &ar)
Definition funcimpl.h:7114
project_out_op(const project_out_op &other)
Definition funcimpl.h:7026
int dim
0: project 0..LDIM-1, 1: project LDIM..NDIM-1
Definition funcimpl.h:7020
bool randomize() const
Definition funcimpl.h:7010
CoeffTracker< T, LDIM > ctL
Definition funcimpl.h:7013
recursive part of recursive_apply
Definition funcimpl.h:5423
ctT iaf
Definition funcimpl.h:5431
recursive_apply_op2< opT > this_type
Definition funcimpl.h:5426
Future< this_type > activate() const
retrieve the coefficients (parent coeffs might be remote)
Definition funcimpl.h:5486
const opT * apply_op
need this for randomization
Definition funcimpl.h:5432
bool randomize() const
Definition funcimpl.h:5424
recursive_apply_op2(const recursive_apply_op2 &other)
Definition funcimpl.h:5439
void serialize(const Archive &ar)
Definition funcimpl.h:5502
argT finalize(const double kernel_norm, const keyT &key, const coeffT &coeff, const implT *r) const
sole purpose is to wait for the kernel norm, wrap it and send it back to caller
Definition funcimpl.h:5472
this_type make_child(const keyT &child) const
Definition funcimpl.h:5481
recursive_apply_op2(implT *result, const ctT &iaf, const opT *apply_op)
Definition funcimpl.h:5436
std::pair< bool, coeffT > argT
Definition funcimpl.h:5428
implT * result
Definition funcimpl.h:5430
CoeffTracker< T, NDIM > ctT
Definition funcimpl.h:5427
argT operator()(const Key< NDIM > &key) const
send off the application of the operator
Definition funcimpl.h:5448
this_type forward_ctor(implT *result1, const ctT &iaf1, const opT *apply_op1)
taskq-compatible ctor
Definition funcimpl.h:5498
recursive part of recursive_apply
Definition funcimpl.h:5292
std::pair< bool, coeffT > operator()(const Key< NDIM > &key) const
make the NS-coefficients and send off the application of the operator
Definition funcimpl.h:5317
this_type forward_ctor(implT *r, const CoeffTracker< T, LDIM > &f1, const CoeffTracker< T, LDIM > &g1, const opT *apply_op1)
Definition funcimpl.h:5382
opT * apply_op
Definition funcimpl.h:5300
recursive_apply_op(const recursive_apply_op &other)
Definition funcimpl.h:5310
recursive_apply_op< opT, LDIM > this_type
Definition funcimpl.h:5295
Future< this_type > activate() const
Definition funcimpl.h:5375
bool randomize() const
Definition funcimpl.h:5293
implT * result
Definition funcimpl.h:5297
CoeffTracker< T, LDIM > iaf
Definition funcimpl.h:5298
void serialize(const Archive &ar)
Definition funcimpl.h:5387
std::pair< bool, coeffT > finalize(const double kernel_norm, const keyT &key, const coeffT &coeff) const
sole purpose is to wait for the kernel norm, wrap it and send it back to caller
Definition funcimpl.h:5357
recursive_apply_op(implT *result, const CoeffTracker< T, LDIM > &iaf, const CoeffTracker< T, LDIM > &iag, const opT *apply_op)
Definition funcimpl.h:5304
this_type make_child(const keyT &child) const
Definition funcimpl.h:5366
CoeffTracker< T, LDIM > iag
Definition funcimpl.h:5299
remove all coefficients of internal nodes
Definition funcimpl.h:2282
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2283
remove_internal_coeffs()=default
constructor need impl for cdata
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2288
void serialize(const Archive &ar)
Definition funcimpl.h:2294
remove all coefficients of leaf nodes
Definition funcimpl.h:2299
bool operator()(typename rangeT::iterator &it) const
Definition funcimpl.h:2305
remove_leaf_coeffs()=default
constructor need impl for cdata
void serialize(const Archive &ar)
Definition funcimpl.h:2310
Range< typename dcT::iterator > rangeT
Definition funcimpl.h:2300
Definition funcimpl.h:4569
void serialize(Archive &ar)
Definition funcimpl.h:4573
bool operator()(const implT *f, const keyT &key, const nodeT &t) const
Definition funcimpl.h:4570
shallow-copy, pared-down version of FunctionNode, for special purpose only
Definition funcimpl.h:749
coeffT & coeff()
Definition funcimpl.h:763
GenTensor< T > coeffT
Definition funcimpl.h:750
bool is_leaf() const
Definition funcimpl.h:765
void serialize(Archive &ar)
Definition funcimpl.h:767
ShallowNode(const ShallowNode< T, NDIM > &node)
Definition funcimpl.h:758
ShallowNode(const FunctionNode< T, NDIM > &node)
Definition funcimpl.h:755
bool has_children() const
Definition funcimpl.h:764
ShallowNode()
Definition funcimpl.h:754
bool _has_children
Definition funcimpl.h:752
double dnorm
Definition funcimpl.h:753
const coeffT & coeff() const
Definition funcimpl.h:762
coeffT _coeffs
Definition funcimpl.h:751
TensorArgs holds the arguments for creating a LowRankTensor.
Definition gentensor.h:134
double thresh
Definition gentensor.h:135
TensorType tt
Definition gentensor.h:136
inserts/accumulates coefficients into impl's tree
Definition funcimpl.h:716
FunctionImpl< T, NDIM > * impl
Definition funcimpl.h:720
FunctionNode< T, NDIM > nodeT
Definition funcimpl.h:718
accumulate_op(const accumulate_op &other)=default
void operator()(const Key< NDIM > &key, const coeffT &coeff, const bool &is_leaf) const
Definition funcimpl.h:724
void serialize(Archive &ar)
Definition funcimpl.h:728
GenTensor< T > coeffT
Definition funcimpl.h:717
accumulate_op(FunctionImpl< T, NDIM > *f)
Definition funcimpl.h:722
static void load(const Archive &ar, FunctionImpl< T, NDIM > *&ptr)
Definition funcimpl.h:7336
static void load(const Archive &ar, const FunctionImpl< T, NDIM > *&ptr)
Definition funcimpl.h:7305
static void load(const Archive &ar, std::shared_ptr< FunctionImpl< T, NDIM > > &ptr)
Definition funcimpl.h:7387
static void load(const Archive &ar, std::shared_ptr< const FunctionImpl< T, NDIM > > &ptr)
Definition funcimpl.h:7371
Default load of an object via serialize(ar, t).
Definition archive.h:667
static void load(const A &ar, const U &t)
Load an object.
Definition archive.h:679
static void store(const Archive &ar, FunctionImpl< T, NDIM > *const &ptr)
Definition funcimpl.h:7361
static void store(const Archive &ar, const FunctionImpl< T, NDIM > *const &ptr)
Definition funcimpl.h:7327
static void store(const Archive &ar, const std::shared_ptr< FunctionImpl< T, NDIM > > &ptr)
Definition funcimpl.h:7396
static void store(const Archive &ar, const std::shared_ptr< const FunctionImpl< T, NDIM > > &ptr)
Definition funcimpl.h:7380
Default store of an object via serialize(ar, t).
Definition archive.h:612
static std::enable_if_t< is_output_archive_v< A > &&!std::is_function< U >::value &&(has_member_serialize_v< U, A >||has_nonmember_serialize_v< U, A >||has_freestanding_serialize_v< U, A >||has_freestanding_default_serialize_v< U, A >), void > store(const A &ar, const U &t)
Definition archive.h:622
Definition funcimpl.h:610
void serialize(Archive &ar)
Definition funcimpl.h:674
const opT * op
Definition funcimpl.h:617
hartree_convolute_leaf_op(const implT *f, const implL *g, const opT *op)
Definition funcimpl.h:621
bool operator()(const Key< NDIM > &key) const
no pre-determination
Definition funcimpl.h:625
bool operator()(const Key< NDIM > &key, const Tensor< T > &fcoeff, const Tensor< T > &gcoeff) const
post-determination: true if f is a leaf and the result is well-represented
Definition funcimpl.h:638
const implL * g
Definition funcimpl.h:616
const FunctionImpl< T, NDIM > * f
Definition funcimpl.h:615
FunctionImpl< T, LDIM > implL
Definition funcimpl.h:613
bool do_error_leaf_op() const
Definition funcimpl.h:618
FunctionImpl< T, NDIM > implT
Definition funcimpl.h:612
bool operator()(const Key< NDIM > &key, const GenTensor< T > &coeff) const
no post-determination
Definition funcimpl.h:628
returns true if the result of a hartree_product is a leaf node (compute norm & error)
Definition funcimpl.h:500
bool do_error_leaf_op() const
Definition funcimpl.h:505
const FunctionImpl< T, NDIM > * f
Definition funcimpl.h:503
hartree_leaf_op(const implT *f, const long &k)
Definition funcimpl.h:508
long k
Definition funcimpl.h:504
void serialize(Archive &ar)
Definition funcimpl.h:556
bool operator()(const Key< NDIM > &key, const GenTensor< T > &coeff) const
no post-determination
Definition funcimpl.h:514
bool operator()(const Key< NDIM > &key, const Tensor< T > &fcoeff, const Tensor< T > &gcoeff) const
post-determination: true if f is a leaf and the result is well-represented
Definition funcimpl.h:524
bool operator()(const Key< NDIM > &key) const
no pre-determination
Definition funcimpl.h:511
FunctionImpl< T, NDIM > implT
Definition funcimpl.h:502
insert/replaces the coefficients into the function
Definition funcimpl.h:692
insert_op()
Definition funcimpl.h:699
implT * impl
Definition funcimpl.h:698
void operator()(const keyT &key, const coeffT &coeff, const bool &is_leaf) const
Definition funcimpl.h:702
FunctionNode< T, NDIM > nodeT
Definition funcimpl.h:696
Key< NDIM > keyT
Definition funcimpl.h:694
insert_op(const insert_op &other)
Definition funcimpl.h:701
FunctionImpl< T, NDIM > implT
Definition funcimpl.h:693
GenTensor< T > coeffT
Definition funcimpl.h:695
insert_op(implT *f)
Definition funcimpl.h:700
void serialize(Archive &ar)
Definition funcimpl.h:706
Definition mra.h:112
Definition funcimpl.h:680
bool operator()(const Key< NDIM > &key, const GenTensor< T > &fcoeff, const GenTensor< T > &gcoeff) const
Definition funcimpl.h:682
void serialize(Archive &ar)
Definition funcimpl.h:686
void operator()(const Key< NDIM > &key, const GenTensor< T > &coeff, const bool &is_leaf) const
Definition funcimpl.h:681
Definition funcimpl.h:564
bool operator()(const Key< NDIM > &key, const double &cnorm) const
post-determination: return true if operator and coefficient norms are small
Definition funcimpl.h:585
void serialize(Archive &ar)
Definition funcimpl.h:600
const implT * f
the source or result function, needed for truncate_tol
Definition funcimpl.h:568
op_leaf_op(const opT *op, const implT *f)
Definition funcimpl.h:572
FunctionImpl< T, NDIM > implT
Definition funcimpl.h:565
const opT * op
the convolution operator
Definition funcimpl.h:567
bool do_error_leaf_op() const
Definition funcimpl.h:569
bool operator()(const Key< NDIM > &key) const
pre-determination: we can't know if this will be a leaf node before we got the final coeffs
Definition funcimpl.h:575
bool operator()(const Key< NDIM > &key, const GenTensor< T > &coeff) const
post-determination: return true if operator and coefficient norms are small
Definition funcimpl.h:578
Definition lowrankfunction.h:336
Definition funcimpl.h:736
void serialize(Archive &ar)
Definition funcimpl.h:743
bool operator()(const Key< NDIM > &key, const T &t, const R &r) const
Definition funcimpl.h:742
bool operator()(const Key< NDIM > &key, const T &t) const
Definition funcimpl.h:739
int np
Definition tdse1d.cc:165
static const double s0
Definition tdse4.cc:83
Defines and implements most of Tensor.
#define ITERATOR(t, exp)
Definition tensor_macros.h:249
#define IND
Definition tensor_macros.h:204
#define TERNARY_OPTIMIZED_ITERATOR(X, x, Y, y, Z, z, exp)
Definition tensor_macros.h:719
AtomicInt sum
Definition test_atomicint.cc:46
double norm(const T i1)
Definition test_cloud.cc:85
int task(int i)
Definition test_runtime.cpp:4
void e()
Definition test_sig.cc:75
static const double alpha
Definition testcosine.cc:10
const double offset
Definition testfuns.cc:143
constexpr std::size_t NDIM
Definition testgconv.cc:54
double h(const coord_1d &r)
Definition testgconv.cc:175
double g1(const coord_t &r)
Definition testgconv.cc:122
std::size_t axis
Definition testpdiff.cc:59
double source(const coordT &r)
Definition testperiodic.cc:48
#define TENSOR_RESULT_TYPE(L, R)
This macro simplifies access to TensorResultType.
Definition type_data.h:205
#define PROFILE_MEMBER_FUNC(classname)
Definition worldprofile.h:210
#define PROFILE_BLOCK(name)
Definition worldprofile.h:208
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43