34 #ifndef MADNESS_MRA_VMRA_H__INCLUDED
35 #define MADNESS_MRA_VMRA_H__INCLUDED
130 template <
typename T, std::
size_t NDIM>
137 bool must_fence =
false;
138 unsigned int vvsize =
v.size();
139 for (
unsigned int i=0; i<vvsize; i+= blk) {
140 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
141 if (!
v[j].is_compressed()) {
142 v[j].compress(
false);
146 if ( blk!=1 && must_fence && fence) world.
gop.
fence();
149 if (fence && must_fence) world.
gop.
fence();
154 template <
typename T, std::
size_t NDIM>
160 bool must_fence =
false;
161 unsigned int vvsize =
v.size();
162 for (
unsigned int i=0; i<vvsize; i+= blk) {
163 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
164 if (
v[j].is_compressed()) {
165 v[j].reconstruct(
false);
169 if ( blk!=1 && must_fence && fence) world.
gop.
fence();
172 if (fence && must_fence) world.
gop.
fence();
176 template <
typename T, std::
size_t NDIM>
182 unsigned int vvsize =
v.size();
184 for (
unsigned int i=0; i<vvsize; i+= blk) {
185 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
186 v[j].make_nonstandard(
false,
false);
188 if ( blk!=1 && fence) world.
gop.
fence();
196 template <
typename T, std::
size_t NDIM>
202 unsigned int vvsize =
v.size();
203 for (
unsigned int i=0; i<vvsize; i+= blk) {
204 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
205 v[j].standard(
false);
207 if ( blk!=1 && fence) world.
gop.
fence();
214 template <
typename T, std::
size_t NDIM>
224 unsigned int vvsize =
v.size();
225 for (
unsigned int i=0; i<vvsize; i+= blk) {
226 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
227 v[j].truncate(tol,
false);
229 if ( blk!=1 && fence) world.
gop.
fence();
236 template <
typename T, std::
size_t NDIM>
237 std::vector< Function<T,NDIM> >
241 const unsigned int blk=1,
242 const bool fence=
true)
245 std::vector< Function<T,NDIM> > df(
v.size());
247 unsigned int vvsize =
v.size();
249 for (
unsigned int i=0; i<vvsize; i+= blk) {
250 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
251 df[j] =
D(
v[j],
false);
253 if (blk!= 1 && fence) world.
gop.
fence();
262 template <
typename T, std::
size_t NDIM>
263 std::vector< Function<T,NDIM> >
266 std::vector< Function<T,NDIM> > r(n);
267 for (
int i=0; i<n; ++i)
279 template <
typename T,
typename R, std::
size_t NDIM>
286 const bool fence=
true){
292 unsigned int blk = min(blki, blkj);
293 unsigned int n =
v.size();
294 unsigned int m =
c.dim(1);
297 std::vector< Function<resultT,NDIM> > vc = zero_functions_compressed<resultT,NDIM>(world,
m);
300 for (
unsigned int i=0; i<
m; i+= blki) {
301 for (
unsigned int ii=i; ii<std::min(
m,(i+1)*blki); ii++) {
302 for (
unsigned int j=0; j<n; j+= blkj) {
303 for (
unsigned int jj=j; jj<std::min(n, (j+1)*blkj); jj++)
304 if (
c(jj,ii) !=
R(0.0)) vc[ii].
gaxpy(1.0,
v[jj],
c(jj,ii),
false);
305 if (fence && (blkj!=1)) world.
gop.
fence();
308 if (fence && (blki!=1)) world.
gop.
fence();
323 template <
typename L,
typename R, std::
size_t NDIM>
329 const unsigned int blki=1,
336 unsigned int m=
c.dim(1);
338 for (
unsigned int i=0; i<
m; i+= blki) {
339 for (
unsigned int ii=i; ii<std::min(
m,(i+1)*blki); ii++) {
342 if (fence && (blki!=1)) world.
gop.
fence();
350 compress(world, vresult, blki,
false);
352 vresult[0].vtransform(
v,
c, vresult, tol, fence);
358 template <
typename T,
typename Q, std::
size_t NDIM>
361 const std::vector<Q>& factors,
362 const unsigned int blk=1,
363 const bool fence=
true)
367 unsigned int vvsize =
v.size();
368 for (
unsigned int i=0; i<vvsize; i+= blk) {
369 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
370 v[j].scale(factors[j],
false);
372 if (fence && blk!=1 ) world.
gop.
fence();
379 template <
typename T,
typename Q, std::
size_t NDIM>
383 const unsigned int blk=1,
384 const bool fence=
true){
388 unsigned int vvsize =
v.size();
389 for (
unsigned int i=0; i<vvsize; i+= blk) {
390 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
391 v[j].scale(factor,
false);
393 if (fence && blk!=1 ) world.
gop.
fence();
399 template <
typename T, std::
size_t NDIM>
402 const unsigned int blk=1,
403 const bool fence=
true){
406 unsigned int vvsize =
v.size();
407 std::vector<double> norms(vvsize);
409 for (
unsigned int i=0; i<vvsize; i+= blk) {
410 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j) {
411 norms[j] =
v[j].norm2sq_local();
413 if (fence && (blk!=1)) world.
gop.
fence();
417 world.
gop.
sum(&norms[0], norms.size());
419 for (
unsigned int i=0; i<vvsize; i+= blk) {
420 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
421 norms[j] = sqrt(norms[j]);
422 if (fence && (blk!=1)) world.
gop.
fence();
432 template <
typename T, std::
size_t NDIM>
433 double norm2(World& world,
434 const std::vector< Function<T,NDIM> >&
v)
439 std::vector<double> norms(
v.size());
441 for (
unsigned int i=0; i<
v.size(); ++i)
442 norms[i] =
v[i].norm2sq_local();
444 world.gop.sum(&norms[0], norms.size());
446 for (
unsigned int i=1; i<
v.size(); ++i)
447 norms[0] += norms[i];
450 return sqrt(norms[0]);
453 inline double conj(
double x) {
457 inline double conj(
float x) {
462 template <
typename T,
typename R, std::
size_t NDIM>
466 const std::vector< Function<R,NDIM> >&
g;
476 for (
long j=0; j<
jtop; ++j) {
485 virtual void get_id(std::pair<void*,unsigned short>&
id)
const {
496 template <
typename T,
typename R, std::
size_t NDIM>
498 const std::vector< Function<T,NDIM> >&
f,
499 const std::vector< Function<R,NDIM> >&
g,
502 unsigned int n=
f.size(),
m=
g.size();
519 for (
unsigned int i=n-1; i>=0; --i) {
520 unsigned int jtop =
m;
522 world.taskq.add(
new MatrixInnerTask<T,R,NDIM>(r(i,
_),
f[i],
g, jtop));
525 world.gop.sum(r.ptr(),n*
m);
528 for (
unsigned int i=0; i<n; ++i) {
529 for (
unsigned int j=0; j<i; ++j) {
530 r(j,i) =
conj(r(i,j));
539 template <
typename T,
typename R, std::
size_t NDIM>
541 const std::vector< Function<T,NDIM> >&
f,
542 const std::vector< Function<R,NDIM> >&
g) {
544 long n=
f.size(),
m=
g.size();
551 for (
long i=0; i<n; ++i) {
552 r(i) =
f[i].inner_local(
g[i]);
556 world.gop.sum(r.ptr(),n);
564 template <
typename T,
typename R, std::
size_t NDIM>
566 const Function<T,NDIM>&
f,
567 const std::vector< Function<R,NDIM> >&
g) {
575 for (
long i=0; i<n; ++i) {
576 r(i) =
f.inner_local(
g[i]);
580 world.gop.sum(r.ptr(),n);
588 template <
typename T,
typename R, std::
size_t NDIM>
593 const unsigned int blk=1,
594 const bool fence=
true) {
597 a.reconstruct(
false);
604 template <
typename T,
typename R, std::
size_t NDIM>
610 const bool fence=
true,
611 const unsigned int blk=1)
614 a.reconstruct(
false);
618 unsigned int vvsize =
v.size();
619 for (
unsigned int i=0; i<vvsize; i+= blk) {
620 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
622 if ( fence && (blk == 1)) world.
gop.
fence();
629 template <
typename T, std::
size_t NDIM>
636 unsigned int vvsize =
v.size();
637 for (
unsigned int i=0; i<vvsize; i+= blk) {
638 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
640 if (fence && blk!=1 ) world.
gop.
fence();
647 template <
typename T,
typename R, std::
size_t NDIM>
661 unsigned int vvsize =
a.size();
662 for (
unsigned int i=0; i<vvsize; i+= blk) {
663 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
664 q[j] =
mul(
a[j],
b[j],
false);
665 if (fence && (blk !=1 )) world.
gop.
fence();
674 template <
typename T, std::
size_t NDIM>
675 std::vector< Function<T,NDIM> >
677 const std::vector< Function<T,NDIM> >&
v,
679 return mul<T,T,NDIM>(world,
v,
v, fence);
690 template <
typename T, std::
size_t NDIM>
692 std::vector< Function<T,NDIM> >&
v,
695 for (
unsigned int j=0; j<
v.size(); ++j) {
698 if (fence) world.gop.fence();
703 template <
typename T, std::
size_t NDIM>
704 std::vector< Function<T,NDIM> >
706 const std::vector< Function<T,NDIM> >&
v,
709 std::vector< Function<T,NDIM> > r =
copy(world,
v);
710 for (
unsigned int i=0; i<
v.size(); ++i) {
713 if (fence) world.gop.fence();
720 template <
typename T, std::
size_t NDIM>
721 std::vector< Function<T,NDIM> >
723 const std::vector< Function<T,NDIM> >&
v,
726 std::vector< Function<T,NDIM> > r(
v.size());
727 for (
unsigned int i=0; i<
v.size(); ++i) {
728 r[i] =
copy(
v[i],
false);
730 if (fence) world.gop.fence();
736 template <
typename T, std::
size_t NDIM>
737 std::vector< Function<T,NDIM> >
739 const Function<T,NDIM>&
v,
740 const unsigned int n,
743 std::vector< Function<T,NDIM> > r(n);
744 for (
unsigned int i=0; i<n; ++i) {
745 r[i] =
copy(
v,
false);
747 if (fence) world.gop.fence();
753 template <
typename T,
typename R, std::
size_t NDIM>
759 unsigned int blk=1) {
767 unsigned int vvsize =
a.size();
768 for (
unsigned int i=0; i<vvsize; i+= blk) {
769 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
770 r[j] =
add(
a[j],
b[j],
false);
771 if (fence && (blk !=1 )) world.
gop.
fence();
780 template <
typename T,
typename R, std::
size_t NDIM>
786 unsigned int blk=1) {
794 unsigned int vvsize =
b.size();
795 for (
unsigned int i=0; i<vvsize; i+= blk) {
796 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
797 r[j] =
add(
a,
b[j],
false);
798 if (fence && (blk !=1 )) world.
gop.
fence();
805 template <
typename T,
typename R, std::
size_t NDIM>
811 unsigned int blk=1) {
812 return add(world,
a,
b, fence, blk);
817 template <
typename T,
typename R, std::
size_t NDIM>
823 unsigned int blk=1) {
831 unsigned int vvsize =
a.size();
832 for (
unsigned int i=0; i<vvsize; i+= blk) {
833 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
834 r[j] =
sub(
a[j],
b[j],
false);
835 if (fence && (blk !=1 )) world.
gop.
fence();
844 template <
typename T,
typename Q,
typename R, std::
size_t NDIM>
857 unsigned int vvsize =
a.size();
859 for (
unsigned int i=0; i<vvsize; i+= blk) {
860 for (
unsigned int j=i; j<std::min(vvsize,(i+1)*blk); ++j)
863 if (fence && (blk !=1 )) world.
gop.
fence();
875 template <
typename opT,
typename R, std::
size_t NDIM>
878 const std::vector< std::shared_ptr<opT> >&
op,
880 const unsigned int blk=1){
885 std::vector< Function<R,NDIM> >&
ncf = *
const_cast< std::vector< Function<R,NDIM>
>* >(&
f);
891 unsigned int ff =
f.size();
893 for (
unsigned int i=0; i<ff; ++blk) {
894 for (
unsigned int j=i; j<std::min(ff,(i+1)*blk); ++j)
912 template <
typename T,
typename R, std::
size_t NDIM>
917 const unsigned int blk=1) {
920 std::vector< Function<R,NDIM> >&
ncf = *
const_cast< std::vector< Function<R,NDIM>
>* >(&
f);
927 unsigned int ff =
f.size();
928 for (
unsigned int i=0; i<ff; ++blk) {
929 for (
unsigned int j=i; j<std::min(ff,(i+1)*blk); ++j)
945 template <
typename T, std::
size_t NDIM>
947 std::vector< Function<T,NDIM> >&
v,
951 std::vector<double> nn =
norm2s(world,
v);
953 for (
unsigned int i=0; i<
v.size(); ++i)
954 v[i].
scale(1.0/nn[i],
false);
956 if (fence) world.gop.fence();
double q(double t)
Definition: DKops.h:18
long size() const
Returns the number of elements in the tensor.
Definition: basetensor.h:138
Implements derivatives operators with variety of boundary conditions on simulation domain.
Definition: derivative.h:266
FunctionFactory implements the named-parameter idiom for Function.
Definition: function_factory.h:86
A multiresolution adaptive numerical function.
Definition: mra.h:122
static std::enable_if< detail::function_traits< fnT >::value||detail::memfunc_traits< fnT >::value >::type make_id(std::pair< void *, unsigned short > &id, fnT fn)
Definition: thread.h:916
All world tasks must be derived from this public interface.
Definition: taskfn.h:69
volatile World * world
Definition: taskfn.h:72
A tensor is a multidimension array.
Definition: tensor.h:317
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition: worldgop.cc:161
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition: worldgop.h:870
A parallel world class.
Definition: world.h:132
WorldGopInterface & gop
Global operations.
Definition: world.h:205
static const double R
Definition: csqrt.cc:46
Declaration and initialization of tree traversal functions and generic derivative.
const double m
Definition: gfit.cc:199
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
const double beta
Definition: gygi_soltion.cc:62
static const double v
Definition: hatom_sf_dirac.cc:20
Tensor< double > op(const Tensor< double > &x)
Definition: kain.cc:508
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition: madness_exception.h:134
Main include file for MADNESS and defines Function interface.
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
Function< TENSOR_RESULT_TYPE(L, R), NDIM > sub(const Function< L, NDIM > &left, const Function< R, NDIM > &right, bool fence=true)
Same as operator- but with optional fence and no automatic compression.
Definition: mra.h:1971
Function< TENSOR_RESULT_TYPE(L, R), NDIM > add(const Function< L, NDIM > &left, const Function< R, NDIM > &right, bool fence=true)
Same as operator+ but with optional fence and no automatic compression.
Definition: mra.h:1927
Function< TENSOR_RESULT_TYPE(Q, T), NDIM > mul(const Q alpha, const Function< T, NDIM > &f, bool fence=true)
Returns new function equal to alpha*f(x) with optional fence.
Definition: mra.h:1701
response_space scale(response_space a, double b)
Function< T, NDIM > conj(const Function< T, NDIM > &f, bool fence=true)
Return the complex conjugate of the input function with the same distribution and optional fence.
Definition: mra.h:2046
response_space apply(World &world, std::vector< std::vector< std::shared_ptr< real_convolution_3d >>> &op, response_space &f)
Definition: basic_operators.cc:39
void truncate(World &world, response_space &v, double tol, bool fence)
Definition: basic_operators.cc:30
void norm_tree(World &world, const std::vector< Function< T, NDIM > > &v, bool fence=true)
Makes the norm tree for all functions in a vector.
Definition: vmra.h:1147
@ nonstandard
s and d coeffs in internal nodes
Definition: funcdefaults.h:61
std::vector< Function< T, NDIM > > zero_functions(World &world, int n, bool fence=true)
Generates a vector of zero functions (reconstructed)
Definition: vmra.h:395
void standard(World &world, std::vector< Function< T, NDIM > > &v, bool fence=true)
Generates standard form of a vector of functions.
Definition: vmra.h:260
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition: mra.h:2002
void compress(World &world, const std::vector< Function< T, NDIM > > &v, bool fence=true)
Compress a vector of functions.
Definition: vmra.h:133
Function< T, NDIM > square(const Function< T, NDIM > &f, bool fence=true)
Create a new function that is the square of f - global comm only if not reconstructed.
Definition: mra.h:2681
void set_thresh(World &world, std::vector< Function< T, NDIM > > &v, double thresh, bool fence=true)
Sets the threshold in a vector of functions.
Definition: vmra.h:1251
double norm2(World &world, const std::vector< Function< T, NDIM > > &v)
Computes the 2-norm of a vector of functions.
Definition: vmra.h:851
static const Slice _(0,-1, 1)
std::vector< Function< TENSOR_RESULT_TYPE(L, R), D > > vmulXX(const Function< L, D > &left, const std::vector< Function< R, D > > &vright, double tol, bool fence=true)
Use the vmra/mul(...) interface instead.
Definition: mra.h:1807
TENSOR_RESULT_TYPE(T, R) inner(const Function< T
Computes the scalar/inner product between two functions.
Definition: vmra.h:1059
NDIM & f
Definition: mra.h:2416
Function< TENSOR_RESULT_TYPE(L, R), NDIM > mul_sparse(const Function< L, NDIM > &left, const Function< R, NDIM > &right, double tol, bool fence=true)
Sparse multiplication — left and right must be reconstructed and if tol!=0 have tree of norms already...
Definition: mra.h:1742
NDIM const Function< R, NDIM > & g
Definition: mra.h:2416
double inner(response_space &a, response_space &b)
Definition: response_functions.h:442
void normalize(World &world, std::vector< Function< T, NDIM > > &v, bool fence=true)
Normalizes a vector of functions — v[i] = v[i].scale(1.0/v[i].norm2())
Definition: vmra.h:1614
Function< TENSOR_RESULT_TYPE(typename opT::opT, R), NDIM > apply_only(const opT &op, const Function< R, NDIM > &f, bool fence=true)
Apply operator ONLY in non-standard form - required other steps missing !!
Definition: mra.h:2120
std::vector< Function< TENSOR_RESULT_TYPE(T, R), NDIM > > transform(World &world, const std::vector< Function< T, NDIM > > &v, const Tensor< R > &c, bool fence=true)
Transforms a vector of functions according to new[i] = sum[j] old[j]*c[j,i].
Definition: vmra.h:689
void matrix_inner(DistributedMatrix< T > &A, const std::vector< Function< T, NDIM > > &f, const std::vector< Function< T, NDIM > > &g, bool sym=false)
Definition: distpm.cc:46
std::vector< double > norm2s(World &world, const std::vector< Function< T, NDIM > > &v)
Computes the 2-norms of a vector of functions.
Definition: vmra.h:827
void gaxpy(const double a, ScalarResult< T > &left, const double b, const T &right, const bool fence=true)
the result type of a macrotask must implement gaxpy
Definition: macrotaskq.h:140
const std::vector< Function< T, NDIM > > & reconstruct(const std::vector< Function< T, NDIM > > &v)
reconstruct a vector of functions
Definition: vmra.h:156
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
double Q(double a)
Definition: relops.cc:20
static const double c
Definition: relops.cc:10
static const double L
Definition: rk.cc:46
static const double thresh
Definition: rk.cc:45
Definition: test_ar.cc:204
virtual void get_id(std::pair< void *, unsigned short > &id) const
Get the task id.
Definition: vmra1.h:485
const std::vector< Function< R, NDIM > > & g
Definition: vmra1.h:466
MatrixInnerTask(const Tensor< TENSOR_RESULT_TYPE(T, R)> &result, const Function< T, NDIM > &f, const std::vector< Function< R, NDIM > > &g, long jtop)
Definition: vmra1.h:469
const Function< T, NDIM > & f
Definition: vmra1.h:465
Tensor< TENSOR_RESULT_TYPE(T, R)> result
Definition: vmra1.h:464
void run(World &world)
Runs a single-threaded task ... derived classes must implement this.
Definition: vmra1.h:475
long jtop
Definition: vmra1.h:467
Definition: dirac-hatom.cc:108
const double alpha
Definition: test_coulomb.cc:54
static const std::size_t NDIM
Definition: testpdiff.cc:42
#define PROFILE_BLOCK(name)
Definition: worldprofile.h:208