madness/api-doc/operator_8h_source.html

/*

  This file is part of MADNESS.


  Copyright (C) 2007,2010 Oak Ridge National Laboratory


  This program is free software; you can redistribute it and/or modify

  it under the terms of the GNU General Public License as published by

  the Free Software Foundation; either version 2 of the License, or

  (at your option) any later version.


  This program is distributed in the hope that it will be useful,

  but WITHOUT ANY WARRANTY; without even the implied warranty of

  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

  GNU General Public License for more details.


  You should have received a copy of the GNU General Public License

  along with this program; if not, write to the Free Software

  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


  For more information please contact:


  Robert J. Harrison

  Oak Ridge National Laboratory

  One Bethel Valley Road

  P.O. Box 2008, MS-6367


  email: harrisonrj@ornl.gov

  tel:   865-241-3937

  fax:   865-572-0680

*/


#ifndef MADNESS_MRA_OPERATOR_H__INCLUDED

#define MADNESS_MRA_OPERATOR_H__INCLUDED


/// \file mra/operator.h

/// \brief Implements most functionality of separated operators


/// \ingroup function


#include <type_traits>

#include <limits.h>

#include <madness/mra/adquad.h>

#include <madness/tensor/aligned.h>

#include <madness/tensor/tensor_lapack.h>

#include <madness/constants.h>


#include <madness/mra/simplecache.h>

#include <madness/mra/convolution1d.h>

#include <madness/mra/displacements.h>

#include <madness/mra/function_common_data.h>

#include <madness/mra/gfit.h>

#include <madness/mra/operatorinfo.h>


namespace madness {


    template<typename T, std::size_t NDIM>

    class Function;


    template<typename T, std::size_t NDIM>

    class SeparatedConvolution;


    template<typename T, std::size_t NDIM>

    class CCPairFunction;


    template <typename T, typename R, std::size_t NDIM, std::size_t KDIM>

    std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >

    apply(const SeparatedConvolution<T,KDIM>& op, const std::vector< Function<R,NDIM> > f);


    template<typename T, std::size_t NDIM>

    std::vector<CCPairFunction<T,NDIM>> apply(const SeparatedConvolution<T,NDIM>& op, const std::vector<CCPairFunction<T,NDIM>>& argument);


    template<typename T, std::size_t NDIM>

    std::vector<CCPairFunction<T,NDIM>> apply(const SeparatedConvolution<T,NDIM/2>& op, const std::vector<CCPairFunction<T,NDIM>>& argument);


    template<typename T, std::size_t NDIM>

    CCPairFunction<T,NDIM> apply(const SeparatedConvolution<T,NDIM>& op, const CCPairFunction<T,NDIM>& argument);


    template<typename T, std::size_t NDIM>

    CCPairFunction<T,NDIM> apply(const SeparatedConvolution<T,NDIM/2>& op, const CCPairFunction<T,NDIM>& argument);


    /// SeparatedConvolutionInternal keeps data for 1 term and all dimensions and 1 displacement

    /// Why is this here?? Why don't you just use ConvolutionND in SeparatedConvolutionData??

    template <typename Q, std::size_t NDIM>


    struct SeparatedConvolutionInternal {

        double norm;

        const ConvolutionData1D<Q>* ops[NDIM];

    };


    /// SeparatedConvolutionData keeps data for all terms, all dimensions


    /// this struct is used to cache the data that are generated by

    template <typename Q, std::size_t NDIM>


    struct SeparatedConvolutionData {

        std::vector< SeparatedConvolutionInternal<Q,NDIM> > muops;

        double norm;


        SeparatedConvolutionData(int rank) : muops(rank), norm(0.0) {}


        SeparatedConvolutionData(const SeparatedConvolutionData<Q,NDIM>& q) {

            muops = q.muops;

            norm = q.norm;

        }


    };


    /// Convolutions in separated form (including Gaussian)


    /* this stuff is very confusing, poorly commented, and extremely poorly named!


    I think it works like this:

    We try to apply transition matrices to the compressed form of function coefficients.

    Most of the code is about caching these transition matrices. They are cached (key of the map is the displacement)

    in the SimpleCache "data", which is of type SeparatedConvolutionData, which keeps the matrices

    for all separated terms and dimensions. These SeparatedConvolutionData are constructed using

    ConvolutionND "ops", which is constructed at the construction of the SeparatedConvolution.


                        SeparatedConvolution (all terms, all dim, all displacements)


                    construction                                            storage


                                                                    SimpleCache<SeparatedConvolutionData>

                                                                        (all terms, all dim) / (all disp)

             vector<ConvolutionND>

             (1 term, all dim) / (all terms)

                                                                    vector<SeparatedConvolutionInternal>

                                                                        (1 term, all dim) / (all terms)


                                                                    vector<ConvolutionData1D>

                                                                        (1 term, 1 dim) / (all dim)


    ConvolutionND and SeparatedConvolutionInternal both point to the same data in ConvolutionData1D.

    Why we need SeparatedConvolutionInternal in the first place I have no idea. ConvolutionND has the global

    factor, and SeparatedConvolutionInternal has a norm.


    */


    template <typename Q, std::size_t NDIM>


    class SeparatedConvolution : public WorldObject< SeparatedConvolution<Q,NDIM> > {

    public:


        typedef Q opT;  ///< The apply function uses this to infer resultT=opT*inputT


        OperatorInfo info;


        bool doleaves;  ///< If should be applied to leaf coefficients ... false by default


      private:

        array_of_bools<NDIM>

            lattice_summed_;    ///< If lattice_summed_[d] is true, sum over lattice translations along axis d

                                ///< N.B. the resulting kernel can be non-zero at both ends of the simulation cell along that axis

        array_of_bools<NDIM> domain_is_periodic_{false};    ///< If domain_is_periodic_[d]==false and lattice_summed_[d]==false,

                                                            ///< ignore periodicity of BC when applying this to function

        std::array<KernelRange, NDIM> range;  ///< kernel range is along axis d is limited by range[d] if it's nonnull


      public:

        bool modified_=false;     ///< use modified NS form

        int particle_=1;        ///< must only be 1 or 2

        bool destructive_=false;    ///< destroy the argument or restore it (expensive for 6d functions)

        bool print_timings=false;


        typedef Key<NDIM> keyT;

        const static size_t opdim=NDIM;

        Timer timer_full;

        Timer timer_low_transf;

        Timer timer_low_accumulate;

        Timer timer_stats_accumulate;


    private:


        mutable std::vector< ConvolutionND<Q,NDIM> > ops;   ///< ConvolutionND keeps data for 1 term, all dimensions, 1 displacement

        const int k;

        const FunctionCommonData<Q,NDIM>& cdata;

        int rank;

        const std::vector<long> vk;

        const std::vector<long> v2k;

        const std::vector<Slice> s0;


        // SeparatedConvolutionData keeps data for all terms and all dimensions and 1 displacement

        mutable SimpleCache< SeparatedConvolutionData<Q,NDIM>, NDIM > data; ///< cache for all terms, dims and displacements

        mutable SimpleCache< SeparatedConvolutionData<Q,NDIM>, 2*NDIM > mod_data; ///< cache for all terms, dims and displacements


    public:


        bool& modified() {return modified_;}

        const bool& modified() const {return modified_;}


        int& particle() {return particle_;}

        const int& particle() const {return particle_;}


        SeparatedConvolution<Q,NDIM>& set_particle(const int p) {

            if (p!=1 and p!=2) throw std::runtime_error("particle must be 1 or 2");

            particle_=p;

            return *this;

        }


        bool& destructive() {return destructive_;}

        const bool& destructive() const {return destructive_;}


        const double& gamma() const {return info.mu;}

        const double& mu() const {return info.mu;}

        int get_rank() const { return rank; }

        int get_k() const { return k; }

        const std::vector<ConvolutionND<Q,NDIM>>& get_ops() const { return ops; }

        const std::array<KernelRange, NDIM>& get_range() const { return range; }

        bool range_restricted() const { return std::any_of(range.begin(), range.end(), [](const auto& v) { return v.finite(); }); }


    private:


        /// laziness for calling lists: which terms to apply


        struct ApplyTerms {

            ApplyTerms() : r_term(false), t_term(false) {}

            bool r_term;

            bool t_term;

            bool any_terms() const {return r_term or t_term;}

        };


        /// too lazy for extended calling lists


        struct Transformation {

            long r;             // Effective rank of transformation

            const Q* U;         // Ptr to matrix

            const Q* VT;

        };


        static inline std::pair<Tensor<Q>,Tensor<Q>>


        make_coeff_for_operator(World& world, double mu, double lo, double eps, OpType type,

                                const array_of_bools<NDIM>& lattice_summed) {


            OperatorInfo info(mu,lo,eps,type);

            return make_coeff_for_operator(world, info, lattice_summed);

//            const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();

//            double hi = cell_width.normf(); // Diagonal width of cell

//            if (bc(0,0) == BC_PERIODIC) hi *= 100; // Extend range for periodic summation

//

//            OperatorInfo info(mu,lo,eps,type);

//            info.hi=hi;

//            GFit<Q,NDIM> fit(info);

//

//            Tensor<Q> coeff=fit.coeffs();

//            Tensor<Q> expnt=fit.exponents();

//

//            if (bc(0,0) == BC_PERIODIC) {

//                fit.truncate_periodic_expansion(coeff, expnt, cell_width.max(), false);

//            }

//

//            return std::make_pair(coeff,expnt);

        }


        static inline std::pair<Tensor<double>,Tensor<double>>


        make_coeff_for_operator(World& world, OperatorInfo& info,

                                const array_of_bools<NDIM>& lattice_summed) {


          const Tensor<double> &cell_width =

              FunctionDefaults<NDIM>::get_cell_width();

          double hi = cell_width.normf(); // Diagonal width of cell

          // Extend kernel range for lattice summation

          // N.B. if have periodic boundaries, extend range just in case will be using periodic domain

          const auto lattice_summed_any = lattice_summed.any();

          if (lattice_summed.any() || FunctionDefaults<NDIM>::get_bc().is_periodic_any()) {

            hi *= 100;

          }


          info.hi = hi;

          GFit<Q, NDIM> fit(info);


          Tensor<Q> coeff = fit.coeffs();

          Tensor<Q> expnt = fit.exponents();


          if (info.truncate_lowexp_gaussians.value_or(lattice_summed_any)) {

            // convolution with Gaussians of exponents <= 0.25/(L^2) contribute only a constant shift

            // the largest spacing along lattice summed axes thus controls the smallest Gaussian exponent that NEEDS to be included

            double max_lattice_spacing = 0;

            for(int d=0; d!=NDIM; ++d) {

              if (lattice_summed[d])

                max_lattice_spacing =

                    std::max(max_lattice_spacing, cell_width(d));

            }

            // WARNING: discardG0 = true ignores the coefficients of truncated

            //          terms

            fit.truncate_periodic_expansion(coeff, expnt, max_lattice_spacing,

                                            /* discardG0 = */ true);

            info.truncate_lowexp_gaussians = true;

          }


          return std::make_pair(coeff, expnt);

        }


//        /// return the right block of the upsampled operator (modified NS only)

//

//        /// unlike the operator matrices on the natural level the upsampled operator

//        /// matrices are not Toeplitz, so we need more information than just the displacement

//        ///.@param[in]  source  the source key

//        /// @param[in]  disp    the displacement

//        /// @param[in]  upop    the unfiltered operator matrix from scale n-1

//        /// @return     (k,k) patch of the upop(2k,2k) matrix

//        static Tensor<Q> operator_patch(const Translation& source, const Translation& disp, const Tensor<Q>& upop) {

//

//            // which of the 4 upsampled matrices do we need?

//            Translation sx=source%2;              // source offset

//            Translation tx=(source+disp)%2;       // target offset

//

//            Tensor<Q> rij(k,k);

//            // those two are equivalent:

///*

//            if (sx==0 and tx==0) copy_2d_patch(rij.ptr(),             k, upop.ptr(), 2*k, k, k);

//            if (sx==1 and tx==0) copy_2d_patch(rij.ptr() + k,         k, upop.ptr(), 2*k, k, k);

//            if (sx==0 and tx==1) copy_2d_patch(rij.ptr() + 2*k*k,     k, upop.ptr(), 2*k, k, k);

//            if (sx==1 and tx==1) copy_2d_patch(rij.ptr() + 2*k*k + k, k, upop.ptr(), 2*k, k, k);

//*/

//            Slice s0(0,k-1), s1(k,2*k-1);

//            if (sx==0 and tx==0) rij=Rm(s0,s0);

//            if (sx==1 and tx==0) rij=Rm(s1,s0);

//            if (sx==0 and tx==1) rij=Rm(s0,s1);

//            if (sx==1 and tx==1) rij=Rm(s1,s1);

//

//            return rij;

//        }


        /// accumulate into result

        template <typename T, typename R>


        void apply_transformation(long dimk,

                                  const Transformation trans[NDIM],

                                  const Tensor<T>& f,

                                  Tensor<R>& work1,

                                  Tensor<R>& work2,

                                  const Q mufac,

                                  Tensor<R>& result) const {


            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling

            long size = 1;

            for (std::size_t i=0; i<NDIM; ++i) size *= dimk;

            long dimi = size/dimk;


            R* MADNESS_RESTRICT w1=work1.ptr();

            R* MADNESS_RESTRICT w2=work2.ptr();


#ifdef HAVE_IBMBGQ

            mTxmq_padding(dimi, trans[0].r, dimk, dimk, w1, f.ptr(), trans[0].U);

#else

            mTxmq(dimi, trans[0].r, dimk, w1, f.ptr(), trans[0].U, dimk);

#endif


            size = trans[0].r * size / dimk;

            dimi = size/dimk;

            for (std::size_t d=1; d<NDIM; ++d) {

#ifdef HAVE_IBMBGQ

                mTxmq_padding(dimi, trans[d].r, dimk, dimk, w2, w1, trans[d].U);

#else

                mTxmq(dimi, trans[d].r, dimk, w2, w1, trans[d].U, dimk);

#endif

                size = trans[d].r * size / dimk;

                dimi = size/dimk;

                std::swap(w1,w2);

            }


            // If all blocks are full rank we can skip the transposes

            bool doit = false;

            for (std::size_t d=0; d<NDIM; ++d) doit = doit || trans[d].VT;


            if (doit) {

                for (std::size_t d=0; d<NDIM; ++d) {

                    if (trans[d].VT) {

                        dimi = size/trans[d].r;

#ifdef HAVE_IBMBGQ

                        mTxmq_padding(dimi, dimk, trans[d].r, dimk, w2, w1, trans[d].VT);

#else

                        mTxmq(dimi, dimk, trans[d].r, w2, w1, trans[d].VT);

#endif

                        size = dimk*size/trans[d].r;

                    }

                    else {

                        fast_transpose(dimk, dimi, w1, w2);

                    }

                    std::swap(w1,w2);

                }

            }

            // Assuming here that result is contiguous and aligned

            aligned_axpy(size, result.ptr(), w1, mufac);

        }


        /// accumulate into result

        template <typename T, typename R>


        void apply_transformation3(const Tensor<T> trans2[NDIM],

                                  const Tensor<T>& f,

                                  const Q mufac,

                                  Tensor<R>& result) const {


            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling


            Tensor<R> result2=general_transform(f,trans2);

            result2.scale(mufac);

            result+=result2;


        }


        /// don't accumulate, since we want to do this at apply()

        template <typename T, typename R>


        void apply_transformation2(Level n, long dimk,  double tol,

                                  const Tensor<T> trans2[NDIM],

                                  const GenTensor<T>& f,

                                  GenTensor<R>& work1,

                                  GenTensor<R>& work2,

                                  const Q mufac,

                                  GenTensor<R>& result) const {


            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling


#if 1

            result=general_transform(f,trans2);

            result.scale(mufac);


#else


            long size = 1;

            for (std::size_t i=0; i<NDIM; ++i) size *= dimk;

            long dimi = size/dimk;


            R* MADNESS_RESTRICT w1=work1.ptr();

            R* MADNESS_RESTRICT w2=work2.ptr();


            mTxmq(dimi, trans[0].r, dimk, w1, f.ptr(), trans[0].U, dimk);

            size = trans[0].r * size / dimk;

            dimi = size/dimk;

            for (std::size_t d=1; d<NDIM; ++d) {

                mTxmq(dimi, trans[d].r, dimk, w2, w1, trans[d].U, dimk);

                size = trans[d].r * size / dimk;

                dimi = size/dimk;

                std::swap(w1,w2);

            }


            // If all blocks are full rank we can skip the transposes

            bool doit = false;

            for (std::size_t d=0; d<NDIM; ++d) doit = doit || trans[d].VT;


            if (doit) {

                for (std::size_t d=0; d<NDIM; ++d) {

                    if (trans[d].VT) {

                        dimi = size/trans[d].r;

                        mTxmq(dimi, dimk, trans[d].r, w2, w1, trans[d].VT);

                        size = dimk*size/trans[d].r;

                    }

                    else {

                        fast_transpose(dimk, dimi, w1, w2);

                    }

                    std::swap(w1,w2);

                }

            }

            // Assuming here that result is contiguous and aligned

            aligned_axpy(size, result.ptr(), w1, mufac);

            //    long one = 1;

            //daxpy_(&size, &mufac, w1, &one, result.ptr(), &one);

#endif

        }


        /// Apply one of the separated terms, accumulating into the result

        template <typename T>


        void muopxv_fast(ApplyTerms at,

                         const ConvolutionData1D<Q>* const ops_1d[NDIM],

                         const Tensor<T>& f, const Tensor<T>& f0,

                         Tensor<TENSOR_RESULT_TYPE(T,Q)>& result,

                         Tensor<TENSOR_RESULT_TYPE(T,Q)>& result0,

                         const double tol,

                         const Q mufac,

                         Tensor<TENSOR_RESULT_TYPE(T,Q)>& work1,

                         Tensor<TENSOR_RESULT_TYPE(T,Q)>& work2) const {


            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling

            Transformation trans[NDIM];

            Tensor<T> trans2[NDIM];


            double Rnorm = 1.0;

            for (std::size_t d=0; d<NDIM; ++d) Rnorm *= ops_1d[d]->Rnorm;


            if (at.r_term and (Rnorm > 1.e-20)) {


                const auto tol_Rs = tol/(Rnorm*NDIM);  // Errors are relative within here


                // Determine rank of SVD to use or if to use the full matrix

                long twok = 2*k;

                if (modified()) twok=k;


                long break_even;

                if (NDIM==1) break_even = long(0.5*twok);

                else if (NDIM==2) break_even = long(0.6*twok);

                else if (NDIM==3) break_even=long(0.65*twok);

                else break_even=long(0.7*twok);

                bool rank_is_zero = false;

                for (std::size_t d=0; d<NDIM; ++d) {

                    long r;

                    for (r=0; r<twok; ++r) {

                        if (ops_1d[d]->Rs[r] < tol_Rs) break;

                    }

                    if (r >= break_even) {

                        trans[d].r = twok;

                        trans[d].U = ops_1d[d]->R.ptr();

                        trans[d].VT = 0;

                    }

                    else {


#ifdef USE_GENTENSOR

                        r = std::max(2L,r+(r&1L)); // (needed for 6D == when GENTENSOR is on) NOLONGER NEED TO FORCE OPERATOR RANK TO BE EVEN

#endif

                        if (r == 0) {

                            rank_is_zero = true;

                            break;

                        }

                        trans[d].r = r;

                        trans[d].U = ops_1d[d]->RU.ptr();

                        trans[d].VT = ops_1d[d]->RVT.ptr();

                    }

                    trans2[d]=ops_1d[d]->R;

                }


                if (!rank_is_zero)

                    apply_transformation(twok, trans, f, work1, work2, mufac, result);


                //            apply_transformation2(n, twok, tol, trans2, f, work1, work2, mufac, result);

//                apply_transformation3(trans2, f, mufac, result);

            }


            double Tnorm = 1.0;

            for (std::size_t d=0; d<NDIM; ++d) Tnorm *= ops_1d[d]->Tnorm;


            if (at.t_term and (Tnorm>0.0)) {

                const auto tol_Ts = tol/(Tnorm*NDIM);  // Errors are relative within here


                long break_even;

                if (NDIM==1) break_even = long(0.5*k);

                else if (NDIM==2) break_even = long(0.6*k);

                else if (NDIM==3) break_even=long(0.65*k);

                else break_even=long(0.7*k);

                bool rank_is_zero = false;

                for (std::size_t d=0; d<NDIM; ++d) {

                    long r;

                    for (r=0; r<k; ++r) {

                        if (ops_1d[d]->Ts[r] < tol_Ts) break;

                    }

                    if (r >= break_even) {

                        trans[d].r = k;

                        trans[d].U = ops_1d[d]->T.ptr();

                        trans[d].VT = 0;

                    }

                    else {


#ifdef USE_GENTENSOR

                        r = std::max(2L,r+(r&1L)); // (needed for 6D == GENTENSOR is USED) NOLONGER NEED TO FORCE OPERATOR RANK TO BE EVEN

#endif

                        if (r == 0) {

                            rank_is_zero = true;

                            break;

                        }

                        trans[d].r = r;

                        trans[d].U = ops_1d[d]->TU.ptr();

                        trans[d].VT = ops_1d[d]->TVT.ptr();

                    }

                    trans2[d]=ops_1d[d]->T;

                }

                if (!rank_is_zero)

                    apply_transformation(k, trans, f0, work1, work2, -mufac, result0);

//                apply_transformation2(n, k, tol, trans2, f0, work1, work2, -mufac, result0);

//                apply_transformation3(trans2, f0, -mufac, result0);

            }

        }


        /// Apply one of the separated terms, accumulating into the result

        template <typename T>


        void muopxv_fast2(Level n,

                         const ConvolutionData1D<Q>* const ops_1d[NDIM],

                         const GenTensor<T>& f, const GenTensor<T>& f0,

                         GenTensor<TENSOR_RESULT_TYPE(T,Q)>& result,

                         GenTensor<TENSOR_RESULT_TYPE(T,Q)>& result0,

                         double tol,

                         const Q mufac,

                         GenTensor<TENSOR_RESULT_TYPE(T,Q)>& work1,

                         GenTensor<TENSOR_RESULT_TYPE(T,Q)>& work2) const {


            PROFILE_MEMBER_FUNC(SeparatedConvolution);

//            Transformation trans[NDIM];

            Tensor<T> trans2[NDIM];

//            MADNESS_EXCEPTION("no muopxv_fast2",1);


            double Rnorm = 1.0;

            for (std::size_t d=0; d<NDIM; ++d) Rnorm *= ops_1d[d]->Rnorm;

            if (Rnorm == 0.0) return;


            if (Rnorm > 1.e-20) {


                tol = tol/(Rnorm*NDIM);  // Errors are relative within here


                // Determine rank of SVD to use or if to use the full matrix

                long twok = 2*k;

                if (modified()) twok=k;

//              long break_even;

//              if (NDIM==1) break_even = long(0.5*twok);

//              else if (NDIM==2) break_even = long(0.6*twok);

//              else if (NDIM==3) break_even=long(0.65*twok);

//              else break_even=long(0.7*twok);

                for (std::size_t d=0; d<NDIM; ++d) {

                    // long r;

                    // for (r=0; r<twok; ++r) {

                    //  if (ops_1d[d]->Rs[r] < tol) break;

                    // }

//                  if (r >= break_even) {

//                      trans[d].r = twok;

//                      trans[d].U = ops_1d[d]->R.ptr();

//                      trans[d].VT = 0;

//                  }

//                  else {

//                      //r += std::max(2L,r&1L); // NOLONGER NEED TO FORCE OPERATOR RANK TO BE EVEN

//                      trans[d].r = r;

//                      trans[d].U = ops_1d[d]->RU.ptr();

//                      trans[d].VT = ops_1d[d]->RVT.ptr();

//                  }

                    trans2[d]=ops_1d[d]->R;

                }

                apply_transformation2(n, twok, tol, trans2, f, work1, work2, mufac, result);

            }


            double Tnorm = 1.0;

            for (std::size_t d=0; d<NDIM; ++d) Tnorm *= ops_1d[d]->Tnorm;


            if (n > 0 and (Tnorm>1.e-20)) {

//              long break_even;

//

//                if (NDIM==1) break_even = long(0.5*k);

//                else if (NDIM==2) break_even = long(0.6*k);

//                else if (NDIM==3) break_even=long(0.65*k);

//                else break_even=long(0.7*k);

                for (std::size_t d=0; d<NDIM; ++d) {

                    // long r;

                    // for (r=0; r<k; ++r) {

                    //     if (ops_1d[d]->Ts[r] < tol) break;

                    // }

//                    if (r >= break_even) {

//                        trans[d].r = k;

//                        trans[d].U = ops_1d[d]->T.ptr();

//                        trans[d].VT = 0;

//                    }

//                    else {

//                        //r += std::max(2L,r&1L); // NOLONGER NEED TO FORCE OPERATOR RANK TO BE EVEN

//                        trans[d].r = r;

//                        trans[d].U = ops_1d[d]->TU.ptr();

//                        trans[d].VT = ops_1d[d]->TVT.ptr();

//                    }

                    trans2[d]=ops_1d[d]->T;

                }

                apply_transformation2(n, k, tol, trans2, f0, work1, work2, -mufac, result0);

            }

        }


        /// Computes the Frobenius norm of one of the separated terms ... WITHOUT FACTOR INCLUDED

        /// compute for 1 term, all dim, 1 disp, essentially for SeparatedConvolutionInternal


        double munorm2(Level n, const ConvolutionData1D<Q>* ops[]) const {

            if (modified()) return munorm2_modified(n,ops);

            return munorm2_ns(n,ops);

        }


        /// Computes the Frobenius norm of one of the separated terms for the NS form

        ///       ... WITHOUT FACTOR INCLUDED

        /// compute for 1 term, all dim, 1 disp, essentially for SeparatedConvolutionInternal


        double munorm2_ns(Level n, const ConvolutionData1D<Q>* ops[]) const {

            //PROFILE_MEMBER_FUNC(SeparatedConvolution);


            double prodR=1.0, prodT=1.0;

            for (std::size_t d=0; d<NDIM; ++d) {

                prodR *= ops[d]->Rnormf;

                prodT *= ops[d]->Tnormf;


            }

//            if (n) prodR = sqrt(std::max(prodR*prodR - prodT*prodT,0.0));


            // this kicks in if the line above has no numerically significant digits.

//            if (prodR < 1e-8*prodT) {

                double prod=1.0, sum=0.0;

                for (std::size_t d=0; d<NDIM; ++d) {

                    double a = ops[d]->NSnormf;

                    double b = ops[d]->Tnormf;

                    double aa = std::min(a,b);

                    double bb = std::max(a,b);

                    prod *= bb;

                    if (bb > 0.0) sum +=(aa/bb);

                }

                if (n) prod *= sum;

                prodR = prod;

//            }


            return prodR;

        }


        /// Computes the operator norm of one of the separated terms of the modified NS form

        ///    ... WITHOUT FACTOR INCLUDED

        /// compute for 1 term, all dim, 1 disp, essentially for SeparatedConvolutionInternal


        double munorm2_modified(Level n, const ConvolutionData1D<Q>* ops_1d[]) const {

            PROFILE_MEMBER_FUNC(SeparatedConvolution);


            // follows Eq. (21) ff of Beylkin 2008 (Beylkin Appl. Comput. Harmon. Anal. 24, pp 354)


            // we have all combinations of difference, upsampled, F terms (d, u, f),

            // with the constraint that d is in each term exactly once. In the mixed terms (udf)

            // we just get all possible combinations, in the pure terms (dff, duu) we have

            // to multiply each term (dff, fdf, ffd) with (NDIM-1)!, to get the right number.


            double dff = 0.0;

            double duu = 0.0;

            double udf = 0.0;


            // loop over d shifting over the dimensions dxx, xdx, xxd,

            for (size_t d=0; d<NDIM; ++d) {

                double dff_tmp = ops_1d[d]->N_diff;

                double duu_tmp = ops_1d[d]->N_diff;

                double udf_tmp = ops_1d[d]->N_diff;


                for (size_t dd=0; dd<NDIM; ++dd) {

                    if (dd!=d) {

                        dff_tmp *= ops_1d[dd]->N_F;

                        duu_tmp *= ops_1d[dd]->N_up;


                        udf_tmp *= ops_1d[dd]->N_F;

                        for (size_t ddd=0; ddd<NDIM; ++ddd) {

                            if (ddd!=dd) udf += udf_tmp * ops_1d[ddd]->N_up;

                        }

                    }

                }


                dff+=dff_tmp;

                duu+=duu_tmp;

            }


            // finalize with the factorial

            double factorial=1.0;

            for (int i=1; i<static_cast<int>(NDIM)-1; ++i) factorial*=double(i);

            dff*=factorial;

            duu*=factorial;


            // Eq. (23) of Beylkin 2008, for one separated term WITHOUT the factor

            double norm=(dff + udf + duu) /(factorial * double(NDIM));


//            // double check

//            if (NDIM==3) {

//                Tensor<Q> R_full=outer(ops_1d[0]->R,outer(ops_1d[1]->R,ops_1d[2]->R));

//                Tensor<Q> T_full=outer(ops_1d[0]->T,outer(ops_1d[1]->T,ops_1d[2]->T));

//                double n2=(R_full-T_full).normf();

////                print("norm estimate, norm",norm, n2, norm<n2);

//                norm=n2;

//            }


            return norm;


        }


        /// get the transformation matrices for 1 term and all dimensions and one displacement


        /// use ConvolutionND, which uses ConvolutionData1D to collect the transformation matrices


        const SeparatedConvolutionInternal<Q,NDIM> getmuop(int mu, Level n, const Key<NDIM>& disp) const {

            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling

            SeparatedConvolutionInternal<Q,NDIM> op;

            for (std::size_t d=0; d<NDIM; ++d) {

                op.ops[d] = ops[mu].getop(d)->nonstandard(n, disp.translation()[d]);

            }

            op.norm = munorm2(n, op.ops)*std::abs(ops[mu].getfac());


//             double newnorm = munorm2(n, op.ops);

//             // This rescaling empirically based upon BSH separated expansion

//             // ... needs more testing.  OK also for TDSE.

//             // All is good except for some 000 blocks which are up to sqrt(k^d) off.

//             for (int d=0; d<NDIM; ++d)  {

//                 if (disp[d] == 0) newnorm *= 0.5;

//                 else if (std::abs(disp[d]) == 1) newnorm *= 0.8;

//             }

//            double oldnorm = munorm(n, op.ops);

//             if (oldnorm > 1e-13 && (newnorm < 0.5*oldnorm || newnorm > 2.0*oldnorm) )

//                 print("munorm", n, disp, mu, newnorm, oldnorm, newnorm/oldnorm);


            return op;

        }


        /// get the transformation matrices for 1 term and all dimensions and one displacement


        /// use ConvolutionND, which uses ConvolutionData1D to collect the transformation matrices

        const SeparatedConvolutionInternal<Q,NDIM>


        getmuop_modified(int mu, Level n, const Key<NDIM>& disp, const Key<NDIM>& source) const {

            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling


            // SeparatedConvolutionInternal keeps data for 1 term and all dimensions

            SeparatedConvolutionInternal<Q,NDIM> op;


            // in the modified NS form we need not only the displacement, but also the source Translation

            // for correctly constructing the operator, b/c the operator is not Toeplitz


            // op.ops is of type ConvolutionData1D (1 term, 1 dim, 1 disp)

            // ops[mu] is of type ConvolutionND (1 term, all dim, 1 disp)

            for (std::size_t d=0; d<NDIM; ++d) {

                Translation sx=source.translation()[d];                          // source translation

                Translation tx=source.translation()[d]+disp.translation()[d];    // target translation


                Key<2> op_key(n,Vector<Translation,2>{sx,tx});

                op.ops[d] = ops[mu].getop(d)->mod_nonstandard(op_key);

            }


            // works for both modified and not modified NS form

            op.norm = munorm2(n, op.ops)*std::abs(ops[mu].getfac());

//            op.norm=1.0;

            return op;

        }


        /// get the data for all terms and all dimensions for one displacement


        const SeparatedConvolutionData<Q,NDIM>* getop(Level n, const Key<NDIM>& d, const Key<NDIM>& source) const {


            // in the NS form the operator depends only on the displacement

            if (not modified()) return getop_ns(n,d);

            return getop_modified(n, d, source);

        }


        /// get the data for all terms and all dimensions for one displacement


        /// uses SeparatedConvolutionInternal (ConvolutionND, ConvolutionData1D) to construct

        /// the transformation matrices.

        /// @param[in]  d   displacement

        /// @return pointer to cached operator


        const SeparatedConvolutionData<Q,NDIM>* getop_ns(Level n, const Key<NDIM>& d) const {

            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling

            const SeparatedConvolutionData<Q,NDIM>* p = data.getptr(n,d);

            if (p) return p;


            // get the data for each term

            SeparatedConvolutionData<Q,NDIM> op(rank);

            for (int mu=0; mu<rank; ++mu) {

                // op.muops is of type SeparatedConvolutionInternal (1 term, all dim, 1 disp)

                // getmuop uses ConvolutionND

                op.muops[mu] = getmuop(mu, n, d);

            }


            double norm = 0.0;

            for (int mu=0; mu<rank; ++mu) {

                const double munorm = op.muops[mu].norm;

                norm += munorm*munorm;

            }

        //print("getop", n, d, norm);

            op.norm = sqrt(norm);

            data.set(n, d, op);

            return data.getptr(n,d);

        }


        /// get the data for all terms and all dimensions for one displacement (modified NS form)


        /// remember that the operator in the modified NS form is not Toeplitz, so we need

        /// information about the displacement and the source key

        /// @param[in]  n       level (=scale) (actually redundant, since included in source)

        /// @param[in]  disp    displacement key

        /// @param[in]  source  source key

        /// @return pointer to cached operator


        const SeparatedConvolutionData<Q,NDIM>* getop_modified(Level n, const Key<NDIM>& disp, const Key<NDIM>& source) const {

            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling


            // in the modified NS form the upsampled part of the operator depends on the modulus of the source

            Vector<Translation,NDIM> t=source.translation();

            for (size_t i=0; i<NDIM; ++i) t[i]=t[i]%2;

            Key<2*NDIM> key=disp.merge_with(Key<NDIM>(source.level(),t));


            const SeparatedConvolutionData<Q,NDIM>* p = mod_data.getptr(n,key);

            if (p) return p;


            // get the data for each term

            // op.muops is of type SeparatedConvolutionInternal (1 term, all dim, 1 disp)

            // getmuop uses ConvolutionND

            SeparatedConvolutionData<Q,NDIM> op(rank);

            for (int mu=0; mu<rank; ++mu) op.muops[mu] = getmuop_modified(mu, n, disp, source);


            double norm = 0.0;

            for (int mu=0; mu<rank; ++mu) {

                const double munorm = op.muops[mu].norm;

                norm += munorm*munorm;

            }


            op.norm = sqrt(norm);

            mod_data.set(n, key, op);

            return mod_data.getptr(n,key);

        }


        void check_cubic() {

            // !!! NB ... cell volume obtained from global defaults

            const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();

            // Check that the cell is cubic since currently is assumed

            for (std::size_t d=1; d<NDIM; ++d) {

                MADNESS_CHECK(fabs(cell_width(d)-cell_width(0L)) < 1e-14*cell_width(0L));

            }

        }


        /// upsample some of the dimensions of coeff to its child indicated by key


        /// @param[in]  coeff   the coeffs of dim 2*NDIM that will be upsampled

        /// @param[in]  key     the key indicating the child -- only some dimensions will be "reproductive"

        /// @param[in]  particle    if 0: upsample dimensions 0-2

        ///                         if 1: upsample dimensions 3-5

        /// @return     a partially upsampled coefficient tensor

        template<typename T, size_t FDIM>


        GenTensor<T> partial_upsample(const Key<FDIM>& key, const GenTensor<T>& coeff, const int particle) const {


            if (coeff.rank()==0) return GenTensor<T>();

            MADNESS_ASSERT(coeff.dim(0)==k);

            if (NDIM==coeff.ndim()) {

                MADNESS_ASSERT(particle==1);    // other particle, leave this particle unchanged

                return coeff;

            }


            MADNESS_ASSERT(coeff.ndim()==FDIM);

            MADNESS_ASSERT(particle==0 or (2*NDIM==FDIM));


            // the twoscale coefficients: for upsampling use h0/h1; see Alpert Eq (3.35a/b)

            // handle the spectator dimensions with the identity matrix

            const Tensor<T> h[2] = {cdata.h0, cdata.h1};

            Tensor<T> identity(k,k);

            for (int i=0; i<k; ++i) identity(i,i)=1.0;

            Tensor<T> matrices[2*NDIM];


            // get the appropriate twoscale coefficients for each dimension

            if (particle==0) {

                for (size_t ii=0; ii<NDIM; ++ii) matrices[ii]=h[key.translation()[ii]%2];

                for (size_t ii=0; ii<NDIM; ++ii) matrices[ii+NDIM]=identity;

            } else if (particle==1) {

                for (size_t ii=0; ii<NDIM; ++ii) matrices[ii]=identity;

                for (size_t ii=0; ii<NDIM; ++ii) matrices[ii+NDIM]=h[key.translation()[ii+NDIM]%2];

            } else {

                MADNESS_EXCEPTION("unknown particle",1);

            }


            // transform and accumulate on the result

            const GenTensor<T> result=general_transform(coeff,matrices);

            return result;

        }


        /// upsample the sum coefficients of level 1 to sum coeffs on level n+1


        /// specialization of the unfilter method, will transform only the sum coefficients

        /// @param[in]  key     key of level n+1

        /// @param[in]  coeff   sum coefficients of level n (does NOT belong to key!!)

        /// @return     sum     coefficients on level n+1

        template<typename T, size_t FDIM>


        GenTensor<T> upsample(const Key<FDIM>& key, const GenTensor<T>& coeff) const {


            // the twoscale coefficients: for upsampling use h0/h1; see Alpert Eq (3.35a/b)

            // note there are no difference coefficients; if you want that use unfilter

            const Tensor<T> h[2] = {cdata.h0, cdata.h1};

            Tensor<T> matrices[FDIM];


            // get the appropriate twoscale coefficients for each dimension

            for (size_t ii=0; ii<FDIM; ++ii) matrices[ii]=h[key.translation()[ii]%2];


            // transform and accumulate on the result

            const GenTensor<T> result=general_transform(coeff,matrices);

            return result;

        }


        /// initializes range using range of ops[0]

        /// @pre `ops[i].range == ops[0].range`


        void init_range() {

          if (!ops.empty()) {

            for (int d = 0; d != NDIM; ++d) {

              for(const auto & op: ops) {

                MADNESS_ASSERT(op.getop(d)->range == ops[0].getop(d)->range);

              }

              range[d] = ops[0].getop(d)->range;

            }

          }

        }


        /// initializes lattice_sum using `ops[0].lattice_summed()`

        /// @pre `ops[i].lattice_summed() == ops[0].lattice_summed()`


        void init_lattice_summed() {

          if (!ops.empty()) {

            for (int d = 0; d != NDIM; ++d) {

              for (const auto &op : ops) {

                MADNESS_ASSERT(op.lattice_summed() ==

                               ops[0].lattice_summed());

              }

              lattice_summed_ = ops[0].lattice_summed();

            }

          }

        }


    public:


        // For separated convolutions with same operator in each direction (isotropic)


        SeparatedConvolution(World& world,

                             const std::vector< std::shared_ptr< Convolution1D<Q> > >& argops,

                             long k = FunctionDefaults<NDIM>::get_k(),

                             bool doleaves = false)

                : WorldObject< SeparatedConvolution<Q,NDIM> >(world)

                , info()

                , doleaves(doleaves)

                , lattice_summed_(false)  // this will be overridden by init_lattice_summed below

                , modified_(false)

                , particle_(1)

                , destructive_(false)

                , k(k)

                , cdata(FunctionCommonData<Q,NDIM>::get(k))

                , rank(argops.size())

                , vk(NDIM,k)

                , v2k(NDIM,2*k)

                , s0(std::max<std::size_t>(2,NDIM),Slice(0,k-1))

        {


            for (unsigned int mu=0; mu < argops.size(); ++mu) {

              this->ops.push_back(ConvolutionND<Q,NDIM>(argops[mu]));

            }

            init_range();

            init_lattice_summed();


            this->process_pending();

        }


        // For general convolutions


        SeparatedConvolution(World& world,

                             const std::vector< ConvolutionND<Q,NDIM> >& argops,

                             long k = FunctionDefaults<NDIM>::get_k(),

                             bool doleaves = false)

                : WorldObject< SeparatedConvolution<Q,NDIM> >(world)

                , info()

                , doleaves(doleaves)

                , lattice_summed_(false)  // this will be overridden by init_lattice_summed below

                , modified_(false)

                , particle_(1)

                , destructive_(false)

                , ops(argops)

                , k(k)

                , cdata(FunctionCommonData<Q,NDIM>::get(k))

                , rank(argops.size())

                , vk(NDIM,k)

                , v2k(NDIM,2*k)

                , s0(std::max<std::size_t>(2,NDIM),Slice(0,k-1))

        {

            init_range();

            init_lattice_summed();

            this->process_pending();

        }


        /// Constructor for Gaussian Convolutions (mostly for backward compatability)


        SeparatedConvolution(World& world, const OperatorInfo info1,

                             const array_of_bools<NDIM>& lattice_summed = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                             int k=FunctionDefaults<NDIM>::get_k(),

                             bool doleaves = false)

               : SeparatedConvolution(world,Tensor<double>(0l),Tensor<double>(0l),info1.lo,info1.thresh,lattice_summed,k,doleaves,info1.mu) {

            info.type=info1.type;

            info.truncate_lowexp_gaussians = info1.truncate_lowexp_gaussians;

            info.range = info1.range;

            auto [coeff, expnt] = make_coeff_for_operator(world, info, lattice_summed);

            rank=coeff.dim(0);

            range = info.template range_as_array<NDIM>();

            ops.resize(rank);

            initialize(coeff,expnt,range);

            init_lattice_summed();

        }


        /// Constructor for Gaussian Convolutions (mostly for backward compatability)


        SeparatedConvolution(World& world,

                             const Tensor<Q>& coeff, const Tensor<double>& expnt,

                             double lo, double thresh,

                             const array_of_bools<NDIM>& lattice_summed = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                             int k=FunctionDefaults<NDIM>::get_k(),

                             bool doleaves = false,

                             double mu=0.0)

                : WorldObject< SeparatedConvolution<Q,NDIM> >(world)

                , info(mu,lo,thresh,OT_UNDEFINED)

                , doleaves(doleaves)

                ,

              lattice_summed_(lattice_summed)

                , ops(coeff.dim(0))

                , k(k)

                , cdata(FunctionCommonData<Q,NDIM>::get(k))

                , rank(coeff.dim(0))

                , vk(NDIM,k)

                , v2k(NDIM,2*k)

                , s0(std::max<std::size_t>(2,NDIM),Slice(0,k-1)) {

            initialize(coeff,expnt);

            init_range();

            init_lattice_summed();

        }


        void initialize(const Tensor<Q>& coeff, const Tensor<double>& expnt, std::array<KernelRange, NDIM> range = {}) {

            const Tensor<double>& width = FunctionDefaults<NDIM>::get_cell_width();

            const double pi = constants::pi;


            for (int mu=0; mu<rank; ++mu) {

                Q c = std::pow(sqrt(expnt(mu)/pi),static_cast<int>(NDIM)); // Normalization coeff


                // We cache the normalized operator so the factor is the value we must multiply

                // by to recover the coeff we want.

                ops[mu].setfac(coeff(mu)/c);


                for (std::size_t d=0; d<NDIM; ++d) {

                  ops[mu].setop(d,GaussianConvolution1DCache<Q>::get(k, expnt(mu)*width[d]*width[d], 0,

                                       lattice_summed_[d], 0., range[d]));

                }

            }

        }


        /// WSTHORNTON Constructor for Gaussian Convolutions (mostly for backward compatability)


        SeparatedConvolution(World& world,

                             Vector<double,NDIM> args,

                             const Tensor<Q>& coeff, const Tensor<double>& expnt,

                             const array_of_bools<NDIM>& lattice_summed = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                             int k=FunctionDefaults<NDIM>::get_k(),

                             bool doleaves=false)

                : WorldObject< SeparatedConvolution<Q,NDIM> >(world)

                , info(0.0,0.0,0.0,OT_UNDEFINED)

                , doleaves(doleaves)

                , lattice_summed_(lattice_summed)

                , modified_(false)

                , particle_(1)

                , destructive_(false)

                , ops(coeff.dim(0))

                , k(k)

                , cdata(FunctionCommonData<Q,NDIM>::get(k))

                , rank(coeff.dim(0))

                , vk(NDIM,k)

                , v2k(NDIM,2*k)

                , s0(std::max<std::size_t>(2,NDIM),Slice(0,k-1))

        {

            const Tensor<double>& width = FunctionDefaults<NDIM>::get_cell_width();


            for (int mu=0; mu<rank; ++mu) {

                double c = std::pow(sqrt(expnt(mu)/madness::constants::pi),static_cast<int>(NDIM)); // Normalization coeff

                ops[mu].setfac(coeff(mu)/c);

                for (std::size_t d=0; d<NDIM; ++d) {

                  double c2 = sqrt(expnt[mu]*width[d]*width[d]/madness::constants::pi);

                  std::shared_ptr<GaussianConvolution1D<double_complex> >

                      gcptr(new GaussianConvolution1D<double_complex>(k, c2,

                            expnt(mu)*width[d]*width[d], 0, lattice_summed[d], args[d]));

                  ops[mu].setop(d,gcptr);

                }

            }

            init_lattice_summed();

        }


        virtual ~SeparatedConvolution() { }


        void print_timer() const {

            if (this->get_world().rank()==0) {

                timer_full.print("op full tensor       ");

                timer_low_transf.print("op low rank transform");

                timer_low_accumulate.print("op low rank addition ");

            }

        }


        void reset_timer() const {

            if (this->get_world().rank()==0) {

                timer_full.reset();

                timer_low_transf.reset();

                timer_low_accumulate.reset();

            }

        }


        const std::vector< Key<NDIM> >& get_disp(Level n) const {

            return Displacements<NDIM>().get_disp(n, lattice_summed());

        }


        /// @return flag for each axis indicating whether lattice summation is performed in that direction

        const array_of_bools<NDIM>& lattice_summed() const { return lattice_summed_; }

        /// @return flag for each axis indicating whether the domain is periodic in that direction (false by default)

        const array_of_bools<NDIM>& domain_is_periodic() const { return domain_is_periodic_; }

        /// changes domain periodicity

        /// \param domain_is_periodic

        void set_domain_periodicity(const array_of_bools<NDIM>& domain_is_periodic) { domain_is_periodic_ = domain_is_periodic;}


        /// return the operator norm for all terms, all dimensions and 1 displacement


        double norm(Level n, const Key<NDIM>& d, const Key<NDIM>& source_key) const {

            // SeparatedConvolutionData keeps data for all terms and all dimensions and 1 displacement

//            return 1.0;

            return getop(n, d, source_key)->norm;

        }


        /// return that part of a hi-dim key that serves as the base for displacements of this operator


        /// if the function and the operator have the same dimension return key

        /// if the function has a higher dimension than the operator (e.g. in the exchange operator)

        /// return only that part of key that corresponds to the particle this operator works on

        /// @param[in]  key hi-dim key

        /// @return     a lo-dim part of key; typically first or second half

        template<size_t FDIM>

        typename std::enable_if<FDIM!=NDIM, Key<NDIM> >::type


        get_source_key(const Key<FDIM> key) const {

            Key<NDIM> source;

            Key<FDIM-NDIM> dummykey;

            if (particle()==1) key.break_apart(source,dummykey);

            if (particle()==2) key.break_apart(dummykey,source);

            return source;

        }


        /// return that part of a hi-dim key that serves as the base for displacements of this operator


        /// if the function and the operator have the same dimension return key

        /// if the function has a higher dimension than the operator (e.g. in the exchange operator)

        /// return only that part of key that corresponds to the particle this operator works on

        /// @param[in]  key hi-dim key

        /// @return     a lo-dim part of key; typically first or second half

        template<size_t FDIM>

        typename std::enable_if<FDIM==NDIM, Key<NDIM> >::type


        get_source_key(const Key<FDIM> key) const {

            return key;

        }


        /// apply this operator on a function f


        /// the operator does not need to have the same dimension as the function, e,g,

        /// the Poisson kernel for the exchange operator acts only on 1 electron of a

        /// given (pair) function.

        /// @param[in]  f   a function of same or different dimension as this operator

        /// @return     the result function of the same dimensionality as the input function f

        template <typename T, size_t FDIM>


        Function<TENSOR_RESULT_TYPE(T,Q),FDIM> operator()(const Function<T,FDIM>& f) const {

            return madness::apply(*this, f);

        }


        /// apply this on a vector of functions

        template <typename T, size_t FDIM>


        std::vector<Function<TENSOR_RESULT_TYPE(T,Q),FDIM>> operator()(const std::vector<Function<T,FDIM>>& f) const {

            return madness::apply(*this, f);

        }


        /// apply this operator on a separable function f(1,2) = f(1) f(2)


        /// @param[in]  f1   a function of dim LDIM

        /// @param[in]  f2   a function of dim LDIM

        /// @return     the result function of dim NDIM=2*LDIM: g(1,2) = G(1,1',2,2') f(1',2')

        template <typename T, size_t LDIM>

        Function<TENSOR_RESULT_TYPE(T,Q),LDIM+LDIM>


        operator()(const Function<T,LDIM>& f1, const Function<Q,LDIM>& f2) const {

            return madness::apply(*this, std::vector<Function<Q,LDIM>>({f1}),

                                  std::vector<Function<Q,LDIM>>({f2}));

        }


        /// apply this operator on a sum of separable functions f(1,2) = \sum_i f_i(1) f_i(2)


        /// @param[in]  f1   a function of dim LDIM

        /// @param[in]  f2   a function of dim LDIM

        /// @return     the result function of dim NDIM=2*LDIM: g(1,2) = G(1,1',2,2') f(1',2')

        template <typename T, size_t LDIM>

        Function<TENSOR_RESULT_TYPE(T,Q),LDIM+LDIM>


        operator()(const std::vector<Function<T,LDIM>>& f1, const std::vector<Function<Q,LDIM>>& f2) const {

            return madness::apply(*this, f1, f2);

        }


        /// apply this onto another suitable argument, returning the same type


        /// argT must implement argT::apply(const SeparatedConvolution& op, const argT& arg)

        template<typename argT>


        argT operator()(const argT& argument) const {

            return madness::apply(*this,argument);

        }


        /// apply this operator on coefficients in full rank form


        /// @param[in]  coeff   source coeffs in full rank

        /// @param[in]  source  the source key

        /// @param[in]  shift   the displacement, where the source coeffs come from

        /// @param[in]  tol     thresh/#neigh*cnorm

        /// @return     a tensor of full rank with the result op(coeff)

        template <typename T>


        Tensor<TENSOR_RESULT_TYPE(T,Q)> apply(const Key<NDIM>& source,

                                              const Key<NDIM>& shift,

                                              const Tensor<T>& coeff,

                                              double tol) const {

            //PROFILE_MEMBER_FUNC(SeparatedConvolution); // Too fine grain for routine profiling

            MADNESS_ASSERT(coeff.ndim()==NDIM);


            double cpu0=cpu_time();


            typedef TENSOR_RESULT_TYPE(T,Q) resultT;

            const Tensor<T>* input = &coeff;

            Tensor<T> dummy;


            if (not modified()) {

                if (coeff.dim(0) == k) {

                    // This processes leaf nodes with only scaling

                    // coefficients ... FuncImpl::apply by default does not

                    // apply the operator to these since for smoothing operators

                    // it is not necessary.  It is necessary for operators such

                    // as differentiation and time evolution and will also occur

                    // if the application of the operator widens the tree.

                    dummy = Tensor<T>(v2k);

                    dummy(s0) = coeff;

                    input = &dummy;

                }

                else {

                    MADNESS_ASSERT(coeff.dim(0)==2*k);

                }

            }


            tol = 0.01*tol/rank; // Error is per separated term

            ApplyTerms at;

            at.r_term=true;

            at.t_term=(source.level()>0);


            /// SeparatedConvolutionData keeps data for all terms and all dimensions and 1 displacement

            const SeparatedConvolutionData<Q,NDIM>* op = getop(source.level(), shift, source);


            //print("sepop",source,shift,op->norm,tol);


            Tensor<resultT> r(v2k), r0(vk);

            Tensor<resultT> work1(v2k,false), work2(v2k,false);


            if (modified()) {

                   r=Tensor<resultT>(vk);

                   work1=Tensor<resultT>(vk,false);

                   work2=Tensor<resultT>(vk,false);

            }


            const Tensor<T> f0 = copy(coeff(s0));

            for (int mu=0; mu<rank; ++mu) {

                // SeparatedConvolutionInternal keeps data for 1 term and all dimensions and 1 displacement

                const SeparatedConvolutionInternal<Q,NDIM>& muop =  op->muops[mu];

                if (muop.norm > tol) {

                    // ops is of ConvolutionND, returns data for 1 term and all dimensions

                    Q fac = ops[mu].getfac();

                    muopxv_fast(at, muop.ops, *input, f0, r, r0, tol/std::abs(fac), fac,

                                work1, work2);

                }

            }


            r(s0).gaxpy(1.0,r0,1.0);

            double cpu1=cpu_time();

            timer_full.accumulate(cpu1-cpu0);


            return r;

        }


        /// apply this operator on only 1 particle of the coefficients in low rank form


        /// note the unfortunate mess with NDIM: here NDIM is the operator dimension, and FDIM is the

        /// function's dimension, whereas in the function we have OPDIM for the operator and NDIM for

        /// the function

        /// @tparam T   the dimension of the function this operator is applied on. \todo MGR: Make sure info on T is correct. Was previously labeled FDIM.

        /// @param[in]  coeff   source coeffs in SVD (=optimal!) form, in high dimensionality (FDIM)

        /// @param[in]  source  the source key in low dimensionality (NDIM)

        /// @param[in]  shift   the displacement in low dimensionality (NDIM)

        /// @param[in]  tol     thresh/(#neigh*cnorm)

        /// @param[in]  tol2    thresh/#neigh

        /// @return     coeff result

        template<typename T>


        GenTensor<TENSOR_RESULT_TYPE(T,Q)> apply2_lowdim(const Key<NDIM>& source,

                const Key<NDIM>& shift, const GenTensor<T>& coeff, double tol, double tol2) const {


            typedef TENSOR_RESULT_TYPE(T,Q) resultT;


            // prepare access to the singular vectors

            const SVDTensor<T>& svdcoeff=coeff.get_svdtensor();

//            std::vector<Slice> s(coeff.config().dim_per_vector()+1,_);

            std::vector<Slice> s(svdcoeff.dim_per_vector(particle()-1)+1,_);

            // can't use predefined slices and vectors -- they have the wrong dimension

            const std::vector<Slice> s00(coeff.ndim(),Slice(0,k-1));


            // some checks

            MADNESS_ASSERT(coeff.is_svd_tensor());           // for now

            MADNESS_ASSERT(not modified());

            MADNESS_ASSERT(not doleaves);

            MADNESS_ASSERT(coeff.dim(0)==2*k);

            MADNESS_ASSERT(2*NDIM==coeff.ndim());


            double cpu0=cpu_time();

            const SeparatedConvolutionData<Q,NDIM>* op = getop(source.level(), shift, source);


            // some workspace

            Tensor<resultT> work1(v2k,false), work2(v2k,false);


            // sliced input and final result

            const GenTensor<T> f0 = copy(coeff(s00));

            GenTensor<resultT> final=copy(coeff);

            GenTensor<resultT> final0=copy(f0);


            tol = tol/rank*0.01; // Error is per separated term

            tol2= tol2/rank;


            // the operator norm is missing the identity working on the other particle

            // use as (muop.norm*exchange_norm < tol)

            // for some reason the screening is not working at all..

//            double exchange_norm=std::pow(2.0*k,1.5);


            for (int r=0; r<coeff.rank(); ++r) {


                // get the appropriate singular vector (left or right depends on particle)

                // and apply the full tensor muopxv_fast on it, term by term

                s[0]=Slice(r,r);

                const Tensor<T> chunk=svdcoeff.ref_vector(particle()-1)(s).reshape(v2k);

                const Tensor<T> chunk0=f0.get_svdtensor().ref_vector(particle()-1)(s).reshape(vk);

//                const double weight=std::abs(coeff.config().weights(r));


                // accumulate all terms of the operator for a specific term of the function

                Tensor<resultT> result(v2k), result0(vk);


                ApplyTerms at;

                at.r_term=true;

                at.t_term=source.level()>0;


                // this loop will return on result and result0 the terms [(P+Q) G (P+Q)]_1,

                // and [P G P]_1, respectively

                for (int mu=0; mu<rank; ++mu) {

                    const SeparatedConvolutionInternal<Q,NDIM>& muop =  op->muops[mu];

                    Q fac = ops[mu].getfac();

                    muopxv_fast(at, muop.ops, chunk, chunk0, result, result0,

                            tol/std::abs(fac), fac, work1, work2);

                }


                // reinsert the transformed terms into result, leaving the other particle unchanged

                MADNESS_ASSERT(final.get_svdtensor().has_structure());

                final.get_svdtensor().ref_vector(particle()-1)(s)=result;


                if (source.level()>0) {

                    final0.get_svdtensor().ref_vector(particle()-1)(s)=result0;

                } else {

                    final0.get_svdtensor().ref_vector(0)(s)=0.0;

                    final0.get_svdtensor().ref_vector(1)(s)=0.0;

                }


            }

            double cpu1=cpu_time();

            timer_low_transf.accumulate(cpu1-cpu0);


            double cpu00=cpu_time();


            final.reduce_rank(tol2*0.5);

            final0.reduce_rank(tol2*0.5);

            final(s00)+=final0;

            final.reduce_rank(tol2);


            double cpu11=cpu_time();

            timer_low_accumulate.accumulate(cpu11-cpu00);

            return final;

        }


        /// apply this operator on coefficients in low rank form


        /// @param[in]  coeff   source coeffs in SVD (=optimal!) form

        /// @param[in]  tol     thresh/#neigh*cnorm

        /// @param[in]  tol2    thresh/#neigh

        template <typename T>


        GenTensor<TENSOR_RESULT_TYPE(T,Q)> apply2(const Key<NDIM>& source,

                                              const Key<NDIM>& shift,

                                              const GenTensor<T>& coeff,

                                              double tol, double tol2) const {

            PROFILE_MEMBER_FUNC(SeparatedConvolution);

            typedef TENSOR_RESULT_TYPE(T,Q) resultT;


            MADNESS_ASSERT(coeff.ndim()==NDIM);

            MADNESS_ASSERT(coeff.is_svd_tensor());  // we use the rank below

//            MADNESS_EXCEPTION("no apply2",1);

            const TensorType tt=TT_2D;


            const GenTensor<T>* input = &coeff;

            GenTensor<T> dummy;


            if (not modified()) {

                if (coeff.dim(0) == k) {

                    // This processes leaf nodes with only scaling

                    // coefficients ... FuncImpl::apply by default does not

                    // apply the operator to these since for smoothing operators

                    // it is not necessary.  It is necessary for operators such

                    // as differentiation and time evolution and will also occur

                    // if the application of the operator widens the tree.

                    dummy = GenTensor<T>(v2k,TT_2D);

                    dummy(s0) += coeff;

                    input = &dummy;

                }

                else {

                    MADNESS_ASSERT(coeff.dim(0)==2*k);

                }

            }


            tol = tol/rank; // Error is per separated term

            tol2= tol2/rank;


            const SeparatedConvolutionData<Q,NDIM>* op = getop(source.level(), shift, source);


            GenTensor<resultT> r, r0, result, result0;

            GenTensor<resultT> work1(v2k,tt), work2(v2k,tt);


            if (modified()) {

                r=GenTensor<resultT>(vk,tt);

                work1=GenTensor<resultT>(vk,tt);

                work2=GenTensor<resultT>(vk,tt);

            }


            // collect the results of the individual operator terms

            std::list<GenTensor<T> > r_list;

            std::list<GenTensor<T> > r0_list;


//            const GenTensor<T> f0 = copy(coeff(s0));

            const GenTensor<T> f0 = copy((*input)(s0));

            for (int mu=0; mu<rank; ++mu) {

                const SeparatedConvolutionInternal<Q,NDIM>& muop =  op->muops[mu];

                //print("muop",source, shift, mu, muop.norm);


                // delta(g)  <  delta(T) * || f ||

                if (muop.norm > tol) {


                    // get maximum rank of coeff to contribute:

                    //  delta(g)  <  eps  <  || T || * delta(f)

                    //  delta(coeff) * || T || < tol2

                    const int r_max=SRConf<T>::max_sigma(tol2/muop.norm,coeff.rank(),coeff.get_svdtensor().weights_);

                    //                  print("r_max",coeff.config().weights(r_max));


                    // note that max_sigma is inclusive!

                    if (r_max>=0) {

                        const GenTensor<resultT> chunk=SVDTensor<resultT>(input->get_svdtensor().get_configs(0,r_max));

                        const GenTensor<resultT> chunk0=SVDTensor<resultT>(f0.get_svdtensor().get_configs(0,r_max));


                        double cpu0=cpu_time();


                        Q fac = ops[mu].getfac();

                        muopxv_fast2(source.level(), muop.ops, chunk, chunk0, r, r0,

                                tol/std::abs(fac), fac, work1, work2);

                        double cpu1=cpu_time();

                        timer_low_transf.accumulate(cpu1-cpu0);


                        r_list.push_back(r);

                        r0_list.push_back(r0);

                    }

                }

            }


            // finally accumulate all the resultant terms into one tensor

            double cpu0=cpu_time();


            result0=reduce(r0_list,tol2*rank);

            if (r_list.size()>0) r_list.front()(s0)+=result0;

            result=reduce(r_list,tol2*rank);

//            result.reduce_rank(tol2*rank);


            double cpu1=cpu_time();

            timer_low_accumulate.accumulate(cpu1-cpu0);

            timer_stats_accumulate.accumulate(result.rank());

            return result;

        }


        /// estimate the ratio of cost of full rank versus low rank


        /// @param[in]  source  source key

        /// @param[in]  shift   displacement

        /// @param[in]  tol     thresh/#neigh/cnorm

        /// @param[in]  tol2    thresh/#neigh

        /// @return cost_ratio  r=-1:   no terms left

        ///                     0<r<1:  better to do full rank

        ///                     1<r:    better to do low rank

        template<typename T>


        double estimate_costs(const Key<NDIM>& source,

                const Key<NDIM>& shift,

                const GenTensor<T>& coeff,

                double tol, double tol2) const {


            if (coeff.is_full_tensor()) return 0.5;

            if (2*NDIM==coeff.ndim()) return 1.5;

            MADNESS_ASSERT(NDIM==coeff.ndim());

            MADNESS_ASSERT(coeff.is_svd_tensor());


            const SeparatedConvolutionData<Q,NDIM>* op = getop(source.level(), shift, source);


            tol = tol/rank; // Error is per separated term

            tol2= tol2/rank;


            const double full_operator_cost=pow(coeff.dim(0),NDIM+1);

            const double low_operator_cost=pow(coeff.dim(0),NDIM/2+1);

            const double low_reduction_cost=pow(coeff.dim(0),NDIM/2);


            double full_cost=0.0;

            double low_cost=0.0;


            long initial_rank=0;

            long final_rank=sqrt(coeff.size())*0.05;    // size=ncol*nrow; final rank is 5% of max rank


            for (int mu=0; mu<rank; ++mu) {

                const SeparatedConvolutionInternal<Q,NDIM>& muop =  op->muops[mu];


                // delta(g)  <  delta(T) * || f ||

                if (muop.norm > tol) {

                    // note that max_sigma is inclusive: it returns a slice w(Slice(0,i))

                    long nterms=SRConf<T>::max_sigma(tol2/muop.norm,coeff.rank(),coeff.get_svdtensor().weights_)+1;


                    // take only the first overlap computation of rank reduction into account

//                    low_cost+=nterms*low_operator_cost + 2.0*nterms*nterms*low_reduction_cost;

                    initial_rank+=nterms;


                    full_cost+=full_operator_cost;

                }

            }

            low_cost=initial_rank*low_operator_cost + initial_rank*final_rank*low_reduction_cost;


            // include random empirical factor of 2

            double ratio=-1.0;

            if (low_cost>0.0) ratio=full_cost/low_cost;

//            print("nterms, full, low, full/low", full_cost, low_cost,shift.distsq(), ratio);

            return ratio;


        }


        /// construct the tensortrain representation of the operator


        /// @param[in]  source  source coefficient box

        /// @param[in]  shift   displacement

        /// @param[in]  tol     threshold for the TT truncation

        /// @param[in]  do_R    compute the R term of the operator (2k^d)

        /// @param[in]  do_T    compute the T term of the operator (k^d), including factor -1

        /// Both do_R and do_T may be used simultaneously, then the final

        /// operator will have dimensions (2k^d)


        TensorTrain<double> make_tt_representation(const Key<NDIM>& source,

                const Key<NDIM>& shift, double tol, bool do_R, bool do_T) const {


            if (not (do_R or do_T)) {

                print("no operator requested in make_tt_representation??");

                MADNESS_EXCEPTION("you're sure you know what you're doing?",1);

            }


            const SeparatedConvolutionData<Q,NDIM>* op = getop(source.level(), shift, source);


            // check for significant ranks since the R/T matrices' construction

            // might have been omitted. Tnorm is always smaller than Rnorm

            long lo=0,hi=rank;

            for (int mu=0; mu<rank; ++mu) {

                double Rnorm=1.0;

                for (std::size_t d=0; d<NDIM; ++d) Rnorm *= op->muops[mu].ops[d]->Rnorm;

                if (Rnorm>1.e-20) hi=mu;

                if ((Rnorm<1.e-20) and (mu<hi)) lo=mu;

            }

            hi++;lo++;


            // think about dimensions

            long rank_eff=(hi-lo);    // R or T matrices

            long step=1;

            if (do_R and do_T) {        // R and T matrices

                rank_eff*=2;

                step*=2;

            }


            long k2k=k;             // T matrices

            if (do_R) k2k=2*k;      // R matrices


            // construct empty TT cores and fill them with the significant R/T matrices

            std::vector<Tensor<double> > cores(NDIM,Tensor<double>(rank_eff,k2k,k2k,rank_eff));

            cores[0]=Tensor<double>(k2k,k2k,rank_eff);

            cores[NDIM-1]=Tensor<double>(rank_eff,k2k,k2k);


            for (int mu=lo, r=0; mu<hi; ++mu, ++r) {

                const SeparatedConvolutionInternal<Q,NDIM>& muop =  op->muops[mu];

                const Q fac = ops[mu].getfac();

                const Slice sr0(step*r,  step*r,  0);

                const Slice sr1(step*r+step-1,step*r+step-1,0);

                const Slice s00(0,k-1,1);


                if (do_R) {

                    cores[0](_,  _  ,sr0)=muop.ops[0]->R;

                    for (std::size_t idim=1; idim<NDIM-1; ++idim) {

                          cores[idim](sr0,_  ,_  ,sr0)=muop.ops[idim]->R;

                    }

                    cores[NDIM-1](sr0,_  ,_  )=muop.ops[NDIM-1]->R*fac;

                }


                if (do_T) {

                    cores[0](s00,s00,sr1)=muop.ops[0]->T;

                    for (std::size_t idim=1; idim<NDIM-1; ++idim) {

                        cores[idim](sr1,s00,s00,sr1)=muop.ops[idim]->T;

                    }

                    cores[NDIM-1](sr1,s00,s00)=muop.ops[NDIM-1]->T*(-fac);

                }

            }


            // construct TT representation

            TensorTrain<double> tt(cores);


            // need to reshape for the TT truncation

            tt.make_tensor();

            tt.truncate(tol*GenTensor<double>::fac_reduce());

            tt.make_operator();


            return tt;

        }


        static bool can_combine(const SeparatedConvolution<Q,NDIM>& left, const SeparatedConvolution<Q,NDIM>& right) {

            return (combine_OT(left,right).type!=OT_UNDEFINED);

        }


        /// return operator type and other info of the combined operator (e.g. fg = f(1,2)* g(1,2)


        static OperatorInfo combine_OT(const SeparatedConvolution<Q,NDIM>& left, const SeparatedConvolution<Q,NDIM>& right) {

            OperatorInfo info=left.info;

            if ((left.info.type==OT_F12) and (right.info.type==OT_G12)) {

                info.type=OT_FG12;

            } else if ((left.info.type==OT_GAUSS) and (right.info.type==OT_GAUSS)) {

                info=right.info;

                info.type=OT_GAUSS;

                info.mu=2.0*right.info.mu;

            } else if ((left.info.type==OT_SLATER) and (right.info.type==OT_SLATER)) {

                info=right.info;

                info.type=OT_SLATER;

                info.mu=2.0*right.info.mu;

            } else if ((left.info.type==OT_G12) and (right.info.type==OT_F12)) {

                info=right.info;

                info.type=OT_FG12;

            } else if ((left.info.type==OT_G12) and (right.info.type==OT_F212)) {

                info=right.info;

                info.type=OT_F2G12;

            } else if (((left.info.type==OT_F212) and (right.info.type==OT_G12)) or

                ((left.info.type==OT_F12) and (right.info.type==OT_FG12)) or

                ((left.info.type==OT_FG12) and (right.info.type==OT_F12))) {

                info=right.info;

                info.type=OT_F2G12;

                if (right.info.type!=OT_G12) MADNESS_CHECK(right.info.mu == left.info.mu);

            } else if ((left.info.type==OT_F12) and (right.info.type==OT_F12)) {

                info.type=OT_F212;

                // keep the original gamma

                // (f12)^2 = (1- slater12)^2  = 1/(4 gamma) (1 - 2 exp(-gamma) + exp(-2 gamma))

                MADNESS_CHECK(right.info.mu == left.info.mu);

            } else {

                MADNESS_EXCEPTION("unknown combination of SeparatedConvolutions: feel free to extend in operator.h",1);

            }

            return info;

        }


        /// combine 2 convolution operators to one


        static SeparatedConvolution<Q,NDIM> combine(const SeparatedConvolution<Q,NDIM>& left,

                                                    const SeparatedConvolution<Q,NDIM>& right) {

            MADNESS_CHECK(can_combine(left,right));

            MADNESS_CHECK(left.get_world().id()==right.get_world().id());

            MADNESS_CHECK(left.lattice_summed() == right.lattice_summed());


            auto info=combine_OT(left,right);

            return SeparatedConvolution<Q,NDIM>(left.get_world(),info,left.lattice_summed(),left.k);

        }


        /// combine 2 convolution operators to one


        friend SeparatedConvolution<Q,NDIM> combine(const std::shared_ptr<SeparatedConvolution<Q,NDIM>> left,

                                                    const std::shared_ptr<SeparatedConvolution<Q,NDIM>> right) {

            SeparatedConvolution<Q,NDIM> result;

            if (left and right) {

                return combine(*left, *right);

            } else if (left) {

                return *left;

            } else if (right) {

                return *right;

            } else {

                MADNESS_EXCEPTION("can't combine empty SeparatedConvolutions",1);

            }

            return result;

        }


    };


    /// Factory function generating separated kernel for convolution with 1/r in 3D.

    static

    inline


    SeparatedConvolution<double_complex,3> PeriodicHFExchangeOperator(World& world,

                                                   Vector<double,3> args,

                                                   double lo,

                                                   double eps,

                                                   const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                   int k=FunctionDefaults<3>::get_k()) {

      const Tensor<double> &cell_width = FunctionDefaults<3>::get_cell_width();

      double hi = cell_width.normf(); // Diagonal width of cell


      // Extend kernel range for lattice summation

      const auto lattice_sum_any = lattice_sum.any();

      if (lattice_sum_any) {

        hi *= 100;

      }


      GFit<double, 3> fit = GFit<double, 3>::CoulombFit(lo, hi, eps, false);

      Tensor<double> coeff = fit.coeffs();

      Tensor<double> expnt = fit.exponents();


      if (lattice_sum_any) {

        fit.truncate_periodic_expansion(coeff, expnt, cell_width.max(), true);

      }


      return SeparatedConvolution<double_complex, 3>(world, args, coeff, expnt,

                                                     lattice_sum, k, false);

    }


    /// Factory function generating separated kernel for convolution with 1/r in 3D.

    static

    inline


    SeparatedConvolution<double,3> CoulombOperator(World& world,

                                                   double lo,

                                                   double eps,

                                                   const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                           int k=FunctionDefaults<3>::get_k())

    {

        return SeparatedConvolution<double,3>(world,OperatorInfo(0.0,lo,eps,OT_G12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with 1/r in 3D.

    static

    inline


    SeparatedConvolution<double,3>* CoulombOperatorPtr(World& world,

                                                       double lo,

                                                       double eps,

                                                       const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                       int k=FunctionDefaults<3>::get_k())

    {

        return new SeparatedConvolution<double,3>(world,OperatorInfo(0.0,lo,eps,OT_G12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with BSH kernel in general NDIM

    template <std::size_t NDIM>

    static inline

    SeparatedConvolution<double,NDIM>


    BSHOperator(World& world, double mu, double lo, double eps,

                const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                int k=FunctionDefaults<NDIM>::get_k()) {

        if (eps>1.e-4) {

            if (world.rank()==0) print("the accuracy in BSHOperator is too small, tighten the threshold",eps);

            MADNESS_EXCEPTION("0",1);

        }

        return SeparatedConvolution<double,NDIM>(world,OperatorInfo(mu,lo,eps,OT_BSH),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with BSH kernel in general NDIM

    template <std::size_t NDIM>

    static inline

    SeparatedConvolution<double,NDIM>*


    BSHOperatorPtr(World& world, double mu, double lo, double eps,

                   const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                   int k=FunctionDefaults<NDIM>::get_k()) {

        if (eps>1.e-4) {

            if (world.rank()==0) print("the accuracy in BSHOperator is too small, tighten the threshold",eps);

            MADNESS_EXCEPTION("0",1);

        }

        return new SeparatedConvolution<double,NDIM>(world,OperatorInfo(mu,lo,eps,OT_BSH),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D

    static inline SeparatedConvolution<double,3>


    BSHOperator3D(World& world, double mu, double lo, double eps,

                  const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                  int k=FunctionDefaults<3>::get_k()) {

        return SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_BSH),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D

    static

    inline


    SeparatedConvolution<double_complex,3> PeriodicBSHOperator3D(World& world,

                                                         Vector<double,3> args,

                                                         double mu,

                                                         double lo,

                                                         double eps,

                                                         const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                         int k=FunctionDefaults<3>::get_k())


    {

      const Tensor<double> &cell_width = FunctionDefaults<3>::get_cell_width();

      double hi = cell_width.normf(); // Diagonal width of cell

      // Extend kernel range for lattice summation

      const auto lattice_sum_any = lattice_sum.any();

      if (lattice_sum_any) {

        hi *= 100;

      }


      GFit<double, 3> fit = GFit<double, 3>::BSHFit(mu, lo, hi, eps, false);

      Tensor<double> coeff = fit.coeffs();

      Tensor<double> expnt = fit.exponents();


      if (lattice_sum_any) {

        fit.truncate_periodic_expansion(coeff, expnt, cell_width.max(), false);

      }

      return SeparatedConvolution<double_complex, 3>(world, args, coeff, expnt,

                                                     lattice_sum, k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D

    static inline


    SeparatedConvolution<double_complex,3>* PeriodicBSHOperatorPtr3D(World& world,

                                                         Vector<double,3> args,

                                                         double mu,

                                                         double lo,

                                                         double eps,

                                                         const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                         int k=FunctionDefaults<3>::get_k())


    {

      const Tensor<double> &cell_width = FunctionDefaults<3>::get_cell_width();

      double hi = cell_width.normf(); // Diagonal width of cell

      // Extend kernel range for lattice summation

      const auto lattice_sum_any = lattice_sum.any();

      if (lattice_sum_any) {

        hi *= 100;

      }


      GFit<double, 3> fit = GFit<double, 3>::BSHFit(mu, lo, hi, eps, false);

      Tensor<double> coeff = fit.coeffs();

      Tensor<double> expnt = fit.exponents();


      if (lattice_sum_any) {

        fit.truncate_periodic_expansion(coeff, expnt, cell_width.max(), false);

      }

      return new SeparatedConvolution<double_complex, 3>(world, args, coeff,

                                                         expnt, lattice_sum, k);

    }


    static inline SeparatedConvolution<double,3>


    SlaterF12Operator(World& world, double mu, double lo, double eps,

                      const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                      int k=FunctionDefaults<3>::get_k()) {

        return SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_F12),lattice_sum,k);

    }


    static inline SeparatedConvolution<double,3> SlaterF12sqOperator(World& world,

                                                                   double mu, double lo, double eps,

                                                                   const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                                   int k=FunctionDefaults<3>::get_k()) {

        return SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_F212),lattice_sum,k);

    }


    static inline SeparatedConvolution<double,3>* SlaterF12sqOperatorPtr(World& world,

                                                                       double mu, double lo, double eps,

                                                                       const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                                       int k=FunctionDefaults<3>::get_k()) {

        return new SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_F212),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r) in 3D

    template<std::size_t NDIM=3>


    static inline SeparatedConvolution<double,NDIM> SlaterOperator(World& world,

            double mu, double lo, double eps,

                const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

            int k=FunctionDefaults<NDIM>::get_k()) {

        return SeparatedConvolution<double,NDIM>(world,OperatorInfo(mu,lo,eps,OT_SLATER),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r*r)


    /// lo and eps are not used here

    template<std::size_t NDIM>


    static inline SeparatedConvolution<double,NDIM> GaussOperator(World& world,

                                                                double mu, double lo=0.0, double eps=0.0,

                                                                const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                                                                int k=FunctionDefaults<NDIM>::get_k()) {

        return SeparatedConvolution<double,NDIM>(world,OperatorInfo(mu,lo,eps,OT_GAUSS),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r*r) in 3D


    /// lo and eps are not used here

    template<std::size_t NDIM>


    static inline SeparatedConvolution<double, NDIM>* GaussOperatorPtr(World& world,

                                                                     double mu, double lo=0.0, double eps=0.0,

                                                                     const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                                                                     int k = FunctionDefaults<NDIM>::get_k()) {

        return new SeparatedConvolution<double,NDIM>(world,OperatorInfo(mu,lo,eps,OT_GAUSS),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r) in 3D

    /// Note that the 1/(2mu) factor of SlaterF12Operator is not included, this is just the exponential function

    template<std::size_t NDIM>


    static inline SeparatedConvolution<double, NDIM>* SlaterOperatorPtr_ND(World& world,

                                                                     double mu, double lo, double eps,

                                                                     const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

                                                                     int k = FunctionDefaults<NDIM>::get_k()) {

        return new SeparatedConvolution<double,NDIM>(world,OperatorInfo(mu,lo,eps,OT_SLATER),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r) in 3D

    /// Note that the 1/(2mu) factor of SlaterF12Operator is not included, this is just the exponential function


    static inline SeparatedConvolution<double, 3>* SlaterOperatorPtr(World& world,

                                                                 double mu, double lo, double eps,

                                                                 const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                                                                 int k = FunctionDefaults<3>::get_k()) {

        return new SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_SLATER),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with (1 - exp(-mu*r))/(2 mu) in 3D


    /// includes the factor 1/(2 mu)


    static inline SeparatedConvolution<double,3>* SlaterF12OperatorPtr(World& world,

            double mu, double lo, double eps,

                const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

            int k=FunctionDefaults<3>::get_k()) {

        return new SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_F12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with 1/(2 mu)*(1 - exp(-mu*r))/r in 3D


    /// fg = (1 - exp(-gamma r12))  / r12 = 1/r12 - exp(-gamma r12)/r12 = coulomb - bsh

    /// includes the factor 1/(2 mu)

    static inline SeparatedConvolution<double,3>


    FGOperator(World& world, double mu, double lo, double eps,

               const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

               int k=FunctionDefaults<3>::get_k()) {

        return SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_FG12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with 1/(2 mu)*(1 - exp(-mu*r))/r in 3D


    /// fg = (1 - exp(-gamma r12))  / r12 = 1/r12 - exp(-gamma r12)/r12 = coulomb - bsh

    /// includes the factor 1/(2 mu)

    static inline SeparatedConvolution<double,3>*


    FGOperatorPtr(World& world, double mu, double lo, double eps,

                  const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                  int k=FunctionDefaults<3>::get_k()) {

        return new SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_FG12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with (1/(2 mu)*(1 - exp(-mu*r)))^2/r in 3D


    /// f2g = (1/(2 gamma) (1 - exp(-gamma r12)))^2  / r12

    ///     = 1/(4 gamma) * [ 1/r12 - 2 exp(-gamma r12)/r12 + exp(-2 gamma r12)/r12 ]

    /// includes the factor 1/(2 mu)^2

    static inline SeparatedConvolution<double,3>*


    F2GOperatorPtr(World& world, double mu, double lo, double eps,

                   const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                   int k=FunctionDefaults<3>::get_k()) {

        return new SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_F2G12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution with (1/(2 mu)*(1 - exp(-mu*r)))^2/r in 3D


    /// f2g = (1/(2 gamma) (1 - exp(-gamma r12)))^2  / r12

    ///     = 1/(4 gamma) * [ 1/r12 - 2 exp(-gamma r12)/r12 + exp(-2 gamma r12)/r12 ]

    /// includes the factor 1/(2 mu)^2

    static inline SeparatedConvolution<double,3>


    F2GOperator(World& world, double mu, double lo, double eps,

                const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                int k=FunctionDefaults<3>::get_k()) {

        return SeparatedConvolution<double,3>(world,OperatorInfo(mu,lo,eps,OT_F2G12),lattice_sum,k);

    }


    /// Factory function generating separated kernel for convolution a normalized

    /// Gaussian (aka a widened delta function)


    static inline SeparatedConvolution<double,3> SmoothingOperator3D(World& world,

            double eps,

            const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

            int k=FunctionDefaults<3>::get_k()) {


        double exponent = 1.0/(2.0*eps);

        Tensor<double> coeffs(1), exponents(1);

        exponents(0L) =  exponent;

        coeffs(0L)=pow(exponent/M_PI,0.5*3.0);  // norm of the gaussian

        return SeparatedConvolution<double,3>(world, coeffs, exponents, 1.e-8, eps, lattice_sum, k);

    }


    /// Factory function generating separated kernel for convolution a normalized

    /// Gaussian (aka a widened delta function)

    template<std::size_t NDIM>


    static inline SeparatedConvolution<double,NDIM> SmoothingOperator(World& world,

            double eps,

            const array_of_bools<NDIM>& lattice_sum = FunctionDefaults<NDIM>::get_bc().is_periodic(),

            int k=FunctionDefaults<NDIM>::get_k()) {


        double exponent = 1.0/(2.0*eps);

        Tensor<double> coeffs(1), exponents(1);

        exponents(0L) =  exponent;

        coeffs(0L)=pow(exponent/M_PI,0.5*NDIM);  // norm of the gaussian

        return SeparatedConvolution<double,NDIM>(world, coeffs, exponents, 1.e-8, eps, lattice_sum, k);

    }


    /// Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D

    static

    inline


    SeparatedConvolution<double,3>* BSHOperatorPtr3D(World& world,

                                                     double mu,

                                                     double lo,

                                                     double eps,

                                                     const array_of_bools<3>& lattice_summed = FunctionDefaults<3>::get_bc().is_periodic(),

                                                     int k=FunctionDefaults<3>::get_k()) {

      const Tensor<double> &cell_width = FunctionDefaults<3>::get_cell_width();

      double hi = cell_width.normf(); // Diagonal width of cell

      // Extend kernel range for lattice summation

      // N.B. if have periodic boundaries, extend range just in case will be using periodic domain

      const auto lattice_summed_any = lattice_summed.any();

      if (lattice_summed.any() || FunctionDefaults<3>::get_bc().is_periodic_any()) {

        hi *= 100;

      }


      GFit<double, 3> fit = GFit<double, 3>::BSHFit(mu, lo, hi, eps, false);

      Tensor<double> coeff = fit.coeffs();

      Tensor<double> expnt = fit.exponents();


      if (lattice_summed_any) {

        // convolution with Gaussians of exponents <= 0.25/(L^2) contribute only a constant shift

        // the largest spacing along lattice summed axes thus controls the smallest Gaussian exponent that NEEDS to be included

        double max_lattice_spacing = 0;

        for(int d=0; d!=3; ++d) {

          if (lattice_summed[d])

            max_lattice_spacing =

                std::max(max_lattice_spacing, cell_width(d));

        }

        // WARNING: discardG0 = true ignores the coefficients of truncated

        //          terms

        fit.truncate_periodic_expansion(coeff, expnt, max_lattice_spacing,

                                        /* discardG0 = */ false);

      }

      return new SeparatedConvolution<double, 3>(world, coeff, expnt, lo, eps,

                                                 lattice_summed, k);

    }


    /// Factory function generating operator for convolution with grad(1/r) in 3D


    /// Returns a 3-vector containing the convolution operator for the

    /// x, y, and z components of grad(1/r)

    static

    inline

    std::vector< std::shared_ptr< SeparatedConvolution<double,3> > >


    GradCoulombOperator(World& world,

                        double lo,

                        double eps,

                        const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                        int k=FunctionDefaults<3>::get_k()) {

      typedef SeparatedConvolution<double, 3> real_convolution_3d;

      typedef std::shared_ptr<real_convolution_3d> real_convolution_3d_ptr;

      const double pi = constants::pi;

      const Tensor<double> width = FunctionDefaults<3>::get_cell_width();

      double hi = width.normf(); // Diagonal width of cell

      // Extend kernel range for lattice summation

      const auto lattice_sum_any = lattice_sum.any();

      if (lattice_sum_any) {

        hi *= 100;

      }


      GFit<double, 3> fit = GFit<double, 3>::CoulombFit(lo, hi, eps, false);

      Tensor<double> coeff = fit.coeffs();

      Tensor<double> expnt = fit.exponents();


      if (lattice_sum_any) {

        fit.truncate_periodic_expansion(coeff, expnt, width.max(), true);

      }


      int rank = coeff.dim(0);


      std::vector<real_convolution_3d_ptr> gradG(3);


      for (int dir = 0; dir < 3; dir++) {

        std::vector<ConvolutionND<double, 3>> ops(rank);

        for (int mu = 0; mu < rank; mu++) {

          // We cache the normalized operator so the factor is the value we must multiply by to recover the coeff we want.

          double c = std::pow(sqrt(expnt(mu) / pi), 3); // Normalization coeff

          ops[mu].setfac(coeff(mu) / c / width[dir]);


          for (int d = 0; d < 3; d++) {

            if (d != dir)

              ops[mu].setop(d, GaussianConvolution1DCache<double>::get(

                                   k, expnt(mu) * width[d] * width[d], 0,

                                   lattice_sum[d]));

          }

          ops[mu].setop(dir, GaussianConvolution1DCache<double>::get(

                                 k, expnt(mu) * width[dir] * width[dir], 1,

                                 lattice_sum[dir]));

        }

        gradG[dir] = real_convolution_3d_ptr(

            new SeparatedConvolution<double, 3>(world, ops));

      }


      return gradG;

    }


    /// Factory function generating operator for convolution with grad(bsh) in 3D


    /// Returns a 3-vector containing the convolution operator for the

    /// x, y, and z components of grad(bsh)

    static

    inline

    std::vector< std::shared_ptr< SeparatedConvolution<double,3> > >


    GradBSHOperator(World& world,

                        double mu,

                        double lo,

                        double eps,

                        const array_of_bools<3>& lattice_sum = FunctionDefaults<3>::get_bc().is_periodic(),

                        int k=FunctionDefaults<3>::get_k()) {

      typedef SeparatedConvolution<double, 3> real_convolution_3d;

      typedef std::shared_ptr<real_convolution_3d> real_convolution_3d_ptr;

      const double pi = constants::pi;

      const Tensor<double> width = FunctionDefaults<3>::get_cell_width();

      double hi = width.normf(); // Diagonal width of cell

      // Extend kernel range for lattice summation

      const auto lattice_sum_any = lattice_sum.any();

      if (lattice_sum_any) {

        hi *= 100;

      }


      GFit<double, 3> fit = GFit<double, 3>::BSHFit(mu, lo, hi, eps, false);

      Tensor<double> coeff = fit.coeffs();

      Tensor<double> expnt = fit.exponents();


      if (lattice_sum_any) {

        fit.truncate_periodic_expansion(coeff, expnt, width.max(), true);

      }


      int rank = coeff.dim(0);


      std::vector<real_convolution_3d_ptr> gradG(3);


      for (int dir = 0; dir < 3; dir++) {

        std::vector<ConvolutionND<double, 3>> ops(rank);

        for (int mu = 0; mu < rank; mu++) {

          // We cache the normalized operator so the factor is the value we must multiply by to recover the coeff we want.

          double c = std::pow(sqrt(expnt(mu) / pi), 3); // Normalization coeff

          ops[mu].setfac(coeff(mu) / c / width[dir]);


          for (int d = 0; d < 3; d++) {

            if (d != dir)

              ops[mu].setop(d, GaussianConvolution1DCache<double>::get(

                                   k, expnt(mu) * width[d] * width[d], 0,

                                   lattice_sum[d]));

          }

          ops[mu].setop(dir, GaussianConvolution1DCache<double>::get(

                                 k, expnt(mu) * width[dir] * width[dir], 1,

                                 lattice_sum[dir]));

        }

        gradG[dir] = real_convolution_3d_ptr(

            new SeparatedConvolution<double, 3>(world, ops));

      }


      return gradG;

    }


    namespace archive {

        template <class Archive, class T, std::size_t NDIM>


        struct ArchiveLoadImpl<Archive,const SeparatedConvolution<T,NDIM>*> {


            static inline void load(const Archive& ar, const SeparatedConvolution<T,NDIM>*& ptr) {

                WorldObject< SeparatedConvolution<T,NDIM> >* p = nullptr;

                ar & p;

                ptr = static_cast< const SeparatedConvolution<T,NDIM>* >(p);

            }


        };


        template <class Archive, class T, std::size_t NDIM>


        struct ArchiveStoreImpl<Archive,const SeparatedConvolution<T,NDIM>*> {


            static inline void store(const Archive& ar, const SeparatedConvolution<T,NDIM>*const& ptr) {

                ar & static_cast< const WorldObject< SeparatedConvolution<T,NDIM> >* >(ptr);

            }


        };


    }


}


#endif // MADNESS_MRA_OPERATOR_H__INCLUDED

q
double q(double t)
Definition DKops.h:18

adquad.h

aligned.h
Provides routines for internal use optimized for aligned data.

madness::BaseTensor::dim
long dim(int i) const
Returns the size of dimension i.
Definition basetensor.h:147

madness::BaseTensor::ndim
long ndim() const
Returns the number of dimensions in the tensor.
Definition basetensor.h:144

madness::Convolution1D
Provides the common functionality/interface of all 1D convolutions.
Definition convolution1d.h:258

madness::ConvolutionND
Array of 1D convolutions (one / dimension)
Definition convolution1d.h:584

madness::Displacements
Holds displacements for applying operators to avoid replicating for all operators.
Definition displacements.h:51

madness::Displacements::get_disp
const std::vector< Key< NDIM > > & get_disp(Level n, const array_of_bools< NDIM > &kernel_lattice_sum_axes)
Definition displacements.h:211

madness::FunctionCommonData
FunctionCommonData holds all Function data common for given k.
Definition function_common_data.h:52

madness::FunctionCommonData::h0
Tensor< double > h0
Definition function_common_data.h:105

madness::FunctionCommonData::h1
Tensor< double > h1
Definition function_common_data.h:105

madness::FunctionDefaults
FunctionDefaults holds default paramaters as static class members.
Definition funcdefaults.h:100

madness::FunctionDefaults::get_cell_width
static const Tensor< double > & get_cell_width()
Returns the width of each user cell dimension.
Definition funcdefaults.h:369

madness::Function
A multiresolution adaptive numerical function.
Definition mra.h:139

madness::GFit
Definition gfit.h:57

madness::GFit::BSHFit
static GFit BSHFit(double mu, double lo, double hi, double eps, bool prnt=false)
return a fit for the bound-state Helmholtz function
Definition gfit.h:117

madness::GFit::CoulombFit
static GFit CoulombFit(double lo, double hi, double eps, bool prnt=false)
return a fit for the Coulomb function
Definition gfit.h:102

madness::GaussianConvolution1D
1D convolution with (derivative) Gaussian; coeff and expnt given in simulation coordinates [0,...
Definition convolution1d.h:734

madness::GenTensor
Definition lowranktensor.h:59

madness::GenTensor::dim
long dim(const int i) const
return the number of entries in dimension i
Definition lowranktensor.h:391

madness::GenTensor::ndim
long ndim() const
Definition lowranktensor.h:386

madness::GenTensor::is_full_tensor
constexpr bool is_full_tensor() const
Definition gentensor.h:224

madness::GenTensor::reduce_rank
void reduce_rank(const double &eps)
Definition gentensor.h:217

madness::GenTensor::rank
long rank() const
Definition gentensor.h:212

madness::GenTensor::size
long size() const
Definition lowranktensor.h:482

madness::GenTensor::get_svdtensor
SVDTensor< T > & get_svdtensor()
Definition gentensor.h:228

madness::GenTensor::ptr
const BaseTensor * ptr() const
might return a NULL pointer!
Definition lowranktensor.h:709

madness::GenTensor::scale
IsSupported< TensorTypeData< Q >, GenTensor< T > & >::type scale(Q fac)
Inplace multiplication by scalar of supported type (legacy name)
Definition lowranktensor.h:426

madness::GenTensor::is_svd_tensor
constexpr bool is_svd_tensor() const
Definition gentensor.h:222

madness::Key
Key is the index for a node of the 2^NDIM-tree.
Definition key.h:69

madness::Key::merge_with
Key< NDIM+LDIM > merge_with(const Key< LDIM > &rhs) const
merge with other key (ie concatenate), use level of rhs, not of this
Definition key.h:405

madness::Key::translation
const Vector< Translation, NDIM > & translation() const
Definition key.h:173

madness::Key::break_apart
void break_apart(Key< LDIM > &key1, Key< KDIM > &key2) const
break key into two low-dimensional keys
Definition key.h:343

madness::SRConf::ref_vector
Tensor< T > & ref_vector(const unsigned int &idim)
return reference to one of the vectors F
Definition srconf.h:530

madness::SRConf::dim_per_vector
int dim_per_vector(int idim) const
return the number of physical dimensions
Definition srconf.h:665

madness::SRConf::max_sigma
static int max_sigma(const double &thresh, const long &rank, const Tensor< double > &w)
Definition srconf.h:109

madness::SVDTensor
Definition SVDTensor.h:42

madness::SeparatedConvolution
Convolutions in separated form (including Gaussian)
Definition operator.h:139

madness::SeparatedConvolution::timer_low_transf
Timer timer_low_transf
Definition operator.h:165

madness::SeparatedConvolution::destructive_
bool destructive_
destroy the argument or restore it (expensive for 6d functions)
Definition operator.h:159

madness::SeparatedConvolution::apply2
GenTensor< TENSOR_RESULT_TYPE(T, Q)> apply2(const Key< NDIM > &source, const Key< NDIM > &shift, const GenTensor< T > &coeff, double tol, double tol2) const
apply this operator on coefficients in low rank form
Definition operator.h:1458

madness::SeparatedConvolution::range
std::array< KernelRange, NDIM > range
kernel range is along axis d is limited by range[d] if it's nonnull
Definition operator.h:154

madness::SeparatedConvolution::lattice_summed
const array_of_bools< NDIM > & lattice_summed() const
Definition operator.h:1178

madness::SeparatedConvolution::particle_
int particle_
must only be 1 or 2
Definition operator.h:158

madness::SeparatedConvolution::muopxv_fast2
void muopxv_fast2(Level n, const ConvolutionData1D< Q > *const ops_1d[NDIM], const GenTensor< T > &f, const GenTensor< T > &f0, GenTensor< TENSOR_RESULT_TYPE(T, Q)> &result, GenTensor< TENSOR_RESULT_TYPE(T, Q)> &result0, double tol, const Q mufac, GenTensor< TENSOR_RESULT_TYPE(T, Q)> &work1, GenTensor< TENSOR_RESULT_TYPE(T, Q)> &work2) const
Apply one of the separated terms, accumulating into the result.
Definition operator.h:574

madness::SeparatedConvolution::gamma
const double & gamma() const
Definition operator.h:200

madness::SeparatedConvolution::apply2_lowdim
GenTensor< TENSOR_RESULT_TYPE(T, Q)> apply2_lowdim(const Key< NDIM > &source, const Key< NDIM > &shift, const GenTensor< T > &coeff, double tol, double tol2) const
apply this operator on only 1 particle of the coefficients in low rank form
Definition operator.h:1362

madness::SeparatedConvolution::make_coeff_for_operator
static std::pair< Tensor< Q >, Tensor< Q > > make_coeff_for_operator(World &world, double mu, double lo, double eps, OpType type, const array_of_bools< NDIM > &lattice_summed)
Definition operator.h:226

madness::SeparatedConvolution::ops
std::vector< ConvolutionND< Q, NDIM > > ops
ConvolutionND keeps data for 1 term, all dimensions, 1 displacement.
Definition operator.h:172

madness::SeparatedConvolution::operator()
Function< TENSOR_RESULT_TYPE(T, Q), LDIM+LDIM > operator()(const Function< T, LDIM > &f1, const Function< Q, LDIM > &f2) const
apply this operator on a separable function f(1,2) = f(1) f(2)
Definition operator.h:1247

madness::SeparatedConvolution::getop
const SeparatedConvolutionData< Q, NDIM > * getop(Level n, const Key< NDIM > &d, const Key< NDIM > &source) const
get the data for all terms and all dimensions for one displacement
Definition operator.h:821

madness::SeparatedConvolution::set_domain_periodicity
void set_domain_periodicity(const array_of_bools< NDIM > &domain_is_periodic)
Definition operator.h:1183

madness::SeparatedConvolution::mu
const double & mu() const
Definition operator.h:201

madness::SeparatedConvolution::munorm2
double munorm2(Level n, const ConvolutionData1D< Q > *ops[]) const
Definition operator.h:661

madness::SeparatedConvolution::destructive
const bool & destructive() const
Definition operator.h:198

madness::SeparatedConvolution::timer_stats_accumulate
Timer timer_stats_accumulate
Definition operator.h:167

madness::SeparatedConvolution::data
SimpleCache< SeparatedConvolutionData< Q, NDIM >, NDIM > data
cache for all terms, dims and displacements
Definition operator.h:181

madness::SeparatedConvolution::get_source_key
std::enable_if< FDIM!=NDIM, Key< NDIM > >::type get_source_key(const Key< FDIM > key) const
return that part of a hi-dim key that serves as the base for displacements of this operator
Definition operator.h:1201

madness::SeparatedConvolution::munorm2_ns
double munorm2_ns(Level n, const ConvolutionData1D< Q > *ops[]) const
Definition operator.h:669

madness::SeparatedConvolution::get_source_key
std::enable_if< FDIM==NDIM, Key< NDIM > >::type get_source_key(const Key< FDIM > key) const
return that part of a hi-dim key that serves as the base for displacements of this operator
Definition operator.h:1218

madness::SeparatedConvolution::~SeparatedConvolution
virtual ~SeparatedConvolution()
Definition operator.h:1155

madness::SeparatedConvolution::muopxv_fast
void muopxv_fast(ApplyTerms at, const ConvolutionData1D< Q > *const ops_1d[NDIM], const Tensor< T > &f, const Tensor< T > &f0, Tensor< TENSOR_RESULT_TYPE(T, Q)> &result, Tensor< TENSOR_RESULT_TYPE(T, Q)> &result0, const double tol, const Q mufac, Tensor< TENSOR_RESULT_TYPE(T, Q)> &work1, Tensor< TENSOR_RESULT_TYPE(T, Q)> &work2) const
Apply one of the separated terms, accumulating into the result.
Definition operator.h:463

madness::SeparatedConvolution::mod_data
SimpleCache< SeparatedConvolutionData< Q, NDIM >, 2 *NDIM > mod_data
cache for all terms, dims and displacements
Definition operator.h:182

madness::SeparatedConvolution::domain_is_periodic
const array_of_bools< NDIM > & domain_is_periodic() const
Definition operator.h:1180

madness::SeparatedConvolution::operator()
argT operator()(const argT &argument) const
apply this onto another suitable argument, returning the same type
Definition operator.h:1267

madness::SeparatedConvolution::timer_full
Timer timer_full
Definition operator.h:164

madness::SeparatedConvolution::make_coeff_for_operator
static std::pair< Tensor< double >, Tensor< double > > make_coeff_for_operator(World &world, OperatorInfo &info, const array_of_bools< NDIM > &lattice_summed)
Definition operator.h:250

madness::SeparatedConvolution::set_particle
SeparatedConvolution< Q, NDIM > & set_particle(const int p)
Definition operator.h:191

madness::SeparatedConvolution::SeparatedConvolution
SeparatedConvolution(World &world, const std::vector< std::shared_ptr< Convolution1D< Q > > > &argops, long k=FunctionDefaults< NDIM >::get_k(), bool doleaves=false)
Definition operator.h:1004

madness::SeparatedConvolution::operator()
Function< TENSOR_RESULT_TYPE(T, Q), LDIM+LDIM > operator()(const std::vector< Function< T, LDIM > > &f1, const std::vector< Function< Q, LDIM > > &f2) const
apply this operator on a sum of separable functions f(1,2) = \sum_i f_i(1) f_i(2)
Definition operator.h:1259

madness::SeparatedConvolution::init_lattice_summed
void init_lattice_summed()
Definition operator.h:989

madness::SeparatedConvolution::opT
Q opT
The apply function uses this to infer resultT=opT*inputT.
Definition operator.h:142

madness::SeparatedConvolution::SeparatedConvolution
SeparatedConvolution(World &world, const OperatorInfo info1, const array_of_bools< NDIM > &lattice_summed=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k(), bool doleaves=false)
Constructor for Gaussian Convolutions (mostly for backward compatability)
Definition operator.h:1058

madness::SeparatedConvolution::getmuop
const SeparatedConvolutionInternal< Q, NDIM > getmuop(int mu, Level n, const Key< NDIM > &disp) const
get the transformation matrices for 1 term and all dimensions and one displacement
Definition operator.h:765

madness::SeparatedConvolution::getop_ns
const SeparatedConvolutionData< Q, NDIM > * getop_ns(Level n, const Key< NDIM > &d) const
get the data for all terms and all dimensions for one displacement
Definition operator.h:835

madness::SeparatedConvolution::v2k
const std::vector< long > v2k
Definition operator.h:177

madness::SeparatedConvolution::get_range
const std::array< KernelRange, NDIM > & get_range() const
Definition operator.h:205

madness::SeparatedConvolution::get_rank
int get_rank() const
Definition operator.h:202

madness::SeparatedConvolution::operator()
Function< TENSOR_RESULT_TYPE(T, Q), FDIM > operator()(const Function< T, FDIM > &f) const
apply this operator on a function f
Definition operator.h:1230

madness::SeparatedConvolution::doleaves
bool doleaves
If should be applied to leaf coefficients ... false by default.
Definition operator.h:146

madness::SeparatedConvolution::print_timer
void print_timer() const
Definition operator.h:1157

madness::SeparatedConvolution::SeparatedConvolution
SeparatedConvolution(World &world, const std::vector< ConvolutionND< Q, NDIM > > &argops, long k=FunctionDefaults< NDIM >::get_k(), bool doleaves=false)
Definition operator.h:1033

madness::SeparatedConvolution::get_disp
const std::vector< Key< NDIM > > & get_disp(Level n) const
Definition operator.h:1173

madness::SeparatedConvolution::apply_transformation3
void apply_transformation3(const Tensor< T > trans2[NDIM], const Tensor< T > &f, const Q mufac, Tensor< R > &result) const
accumulate into result
Definition operator.h:386

madness::SeparatedConvolution::combine
static SeparatedConvolution< Q, NDIM > combine(const SeparatedConvolution< Q, NDIM > &left, const SeparatedConvolution< Q, NDIM > &right)
combine 2 convolution operators to one
Definition operator.h:1743

madness::SeparatedConvolution::vk
const std::vector< long > vk
Definition operator.h:176

madness::SeparatedConvolution::norm
double norm(Level n, const Key< NDIM > &d, const Key< NDIM > &source_key) const
return the operator norm for all terms, all dimensions and 1 displacement
Definition operator.h:1186

madness::SeparatedConvolution::make_tt_representation
TensorTrain< double > make_tt_representation(const Key< NDIM > &source, const Key< NDIM > &shift, double tol, bool do_R, bool do_T) const
construct the tensortrain representation of the operator
Definition operator.h:1625

madness::SeparatedConvolution::reset_timer
void reset_timer() const
Definition operator.h:1165

madness::SeparatedConvolution::combine_OT
static OperatorInfo combine_OT(const SeparatedConvolution< Q, NDIM > &left, const SeparatedConvolution< Q, NDIM > &right)
return operator type and other info of the combined operator (e.g. fg = f(1,2)* g(1,...
Definition operator.h:1706

madness::SeparatedConvolution::upsample
GenTensor< T > upsample(const Key< FDIM > &key, const GenTensor< T > &coeff) const
upsample the sum coefficients of level 1 to sum coeffs on level n+1
Definition operator.h:959

madness::SeparatedConvolution::print_timings
bool print_timings
Definition operator.h:160

madness::SeparatedConvolution::timer_low_accumulate
Timer timer_low_accumulate
Definition operator.h:166

madness::SeparatedConvolution::estimate_costs
double estimate_costs(const Key< NDIM > &source, const Key< NDIM > &shift, const GenTensor< T > &coeff, double tol, double tol2) const
estimate the ratio of cost of full rank versus low rank
Definition operator.h:1566

madness::SeparatedConvolution::range_restricted
bool range_restricted() const
Definition operator.h:206

madness::SeparatedConvolution::get_k
int get_k() const
Definition operator.h:203

madness::SeparatedConvolution::apply
Tensor< TENSOR_RESULT_TYPE(T, Q)> apply(const Key< NDIM > &source, const Key< NDIM > &shift, const Tensor< T > &coeff, double tol) const
apply this operator on coefficients in full rank form
Definition operator.h:1280

madness::SeparatedConvolution::s0
const std::vector< Slice > s0
Definition operator.h:178

madness::SeparatedConvolution::lattice_summed_
array_of_bools< NDIM > lattice_summed_
Definition operator.h:150

madness::SeparatedConvolution::apply_transformation2
void apply_transformation2(Level n, long dimk, double tol, const Tensor< T > trans2[NDIM], const GenTensor< T > &f, GenTensor< R > &work1, GenTensor< R > &work2, const Q mufac, GenTensor< R > &result) const
don't accumulate, since we want to do this at apply()
Definition operator.h:403

madness::SeparatedConvolution::particle
const int & particle() const
Definition operator.h:190

madness::SeparatedConvolution::operator()
std::vector< Function< TENSOR_RESULT_TYPE(T, Q), FDIM > > operator()(const std::vector< Function< T, FDIM > > &f) const
apply this on a vector of functions
Definition operator.h:1236

madness::SeparatedConvolution::modified
const bool & modified() const
Definition operator.h:187

madness::SeparatedConvolution::cdata
const FunctionCommonData< Q, NDIM > & cdata
Definition operator.h:174

madness::SeparatedConvolution::modified
bool & modified()
Definition operator.h:186

madness::SeparatedConvolution::k
const int k
Definition operator.h:173

madness::SeparatedConvolution::combine
friend SeparatedConvolution< Q, NDIM > combine(const std::shared_ptr< SeparatedConvolution< Q, NDIM > > left, const std::shared_ptr< SeparatedConvolution< Q, NDIM > > right)
combine 2 convolution operators to one
Definition operator.h:1754

madness::SeparatedConvolution::info
OperatorInfo info
Definition operator.h:144

madness::SeparatedConvolution::SeparatedConvolution
SeparatedConvolution(World &world, const Tensor< Q > &coeff, const Tensor< double > &expnt, double lo, double thresh, const array_of_bools< NDIM > &lattice_summed=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k(), bool doleaves=false, double mu=0.0)
Constructor for Gaussian Convolutions (mostly for backward compatability)
Definition operator.h:1075

madness::SeparatedConvolution::domain_is_periodic_
array_of_bools< NDIM > domain_is_periodic_
Definition operator.h:152

madness::SeparatedConvolution::keyT
Key< NDIM > keyT
Definition operator.h:162

madness::SeparatedConvolution::init_range
void init_range()
Definition operator.h:976

madness::SeparatedConvolution::opdim
static const size_t opdim
Definition operator.h:163

madness::SeparatedConvolution::destructive
bool & destructive()
Definition operator.h:197

madness::SeparatedConvolution::apply_transformation
void apply_transformation(long dimk, const Transformation trans[NDIM], const Tensor< T > &f, Tensor< R > &work1, Tensor< R > &work2, const Q mufac, Tensor< R > &result) const
Definition operator.h:323

madness::SeparatedConvolution::initialize
void initialize(const Tensor< Q > &coeff, const Tensor< double > &expnt, std::array< KernelRange, NDIM > range={})
Definition operator.h:1099

madness::SeparatedConvolution::check_cubic
void check_cubic()
Definition operator.h:898

madness::SeparatedConvolution::modified_
bool modified_
use modified NS form
Definition operator.h:157

madness::SeparatedConvolution::rank
int rank
Definition operator.h:175

madness::SeparatedConvolution::partial_upsample
GenTensor< T > partial_upsample(const Key< FDIM > &key, const GenTensor< T > &coeff, const int particle) const
upsample some of the dimensions of coeff to its child indicated by key
Definition operator.h:916

madness::SeparatedConvolution::munorm2_modified
double munorm2_modified(Level n, const ConvolutionData1D< Q > *ops_1d[]) const
Definition operator.h:702

madness::SeparatedConvolution::particle
int & particle()
Definition operator.h:189

madness::SeparatedConvolution::SeparatedConvolution
SeparatedConvolution(World &world, Vector< double, NDIM > args, const Tensor< Q > &coeff, const Tensor< double > &expnt, const array_of_bools< NDIM > &lattice_summed=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k(), bool doleaves=false)
WSTHORNTON Constructor for Gaussian Convolutions (mostly for backward compatability)
Definition operator.h:1118

madness::SeparatedConvolution::can_combine
static bool can_combine(const SeparatedConvolution< Q, NDIM > &left, const SeparatedConvolution< Q, NDIM > &right)
Definition operator.h:1701

madness::SeparatedConvolution::get_ops
const std::vector< ConvolutionND< Q, NDIM > > & get_ops() const
Definition operator.h:204

madness::SeparatedConvolution::getop_modified
const SeparatedConvolutionData< Q, NDIM > * getop_modified(Level n, const Key< NDIM > &disp, const Key< NDIM > &source) const
get the data for all terms and all dimensions for one displacement (modified NS form)
Definition operator.h:869

madness::SeparatedConvolution::getmuop_modified
const SeparatedConvolutionInternal< Q, NDIM > getmuop_modified(int mu, Level n, const Key< NDIM > &disp, const Key< NDIM > &source) const
get the transformation matrices for 1 term and all dimensions and one displacement
Definition operator.h:794

madness::SimpleCache
Simplified interface around hash_map to cache stuff for 1D.
Definition simplecache.h:46

madness::Slice
A slice defines a sub-range or patch of a dimension.
Definition slice.h:103

madness::TensorTrain
Definition tensortrain.h:123

madness::TensorTrain::truncate
std::enable_if<!std::is_arithmetic< R >::value, void >::type truncate(double eps)
recompress and truncate this TT representation
Definition tensortrain.h:883

madness::TensorTrain::make_operator
TensorTrain< T > & make_operator()
convert this into an operator representation (r,k',k,r)
Definition tensortrain.h:1188

madness::TensorTrain::make_tensor
TensorTrain< T > & make_tensor()
convert this into a tensor representation (r,k,r)
Definition tensortrain.h:1176

madness::Tensor
A tensor is a multidimensional array.
Definition tensor.h:317

madness::Tensor::normf
float_scalar_type normf() const
Returns the Frobenius norm of the tensor.
Definition tensor.h:1726

madness::Tensor::ptr
T * ptr()
Returns a pointer to the internal data.
Definition tensor.h:1825

madness::Tensor::scale
IsSupported< TensorTypeData< Q >, Tensor< T > & >::type scale(Q x)
Inplace multiplication by scalar of supported type (legacy name)
Definition tensor.h:686

madness::Tensor::max
T max(long *ind=0) const
Return the maximum value (and if ind is non-null, its index) in the Tensor.
Definition tensor.h:1703

madness::Timer
Definition function_common_data.h:169

madness::Timer::print
void print(std::string line="") const
print timer
Definition function_common_data.h:216

madness::Timer::accumulate
void accumulate(const double time) const
accumulate timer
Definition function_common_data.h:183

madness::Timer::reset
void reset() const
Definition function_common_data.h:210

madness::Vector
A simple, fixed dimension vector.
Definition vector.h:64

madness::WorldObject
Implements most parts of a globally addressable object (via unique ID).
Definition world_object.h:364

madness::WorldObject< SeparatedConvolution< Q, NDIM > >::get_world
World & get_world() const
Returns a reference to the world.
Definition world_object.h:717

madness::WorldObject< SeparatedConvolution< Q, NDIM > >::world
World & world
The World this object belongs to. (Think globally, act locally).
Definition world_object.h:381

madness::WorldObject< SeparatedConvolution< Q, NDIM > >::process_pending
void process_pending()
To be called from derived constructor to process pending messages.
Definition world_object.h:656

madness::World
A parallel world class.
Definition world.h:132

madness::World::rank
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320

madness::World::id
unsigned long id() const
Definition world.h:315

madness::array_of_bools
syntactic sugar for std::array<bool, N>
Definition array_of_bools.h:19

madness::array_of_bools::any
bool any() const
Definition array_of_bools.h:38

constants.h
Defines common mathematical and physical constants.

convolution1d.h
Computes most matrix elements over 1D operators (including Gaussians)

R
static const double R
Definition csqrt.cc:46

f1
double(* f1)(const coord_3d &)
Definition derivatives.cc:55

p
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72

f2
double(* f2)(const coord_3d &)
Definition derivatives.cc:56

lo
static double lo
Definition dirac-hatom.cc:23

shift
static double shift
Definition dirac-hatom.cc:19

displacements.h

function_common_data.h

gfit.h
fit isotropic functions to a set of Gaussians with controlled precision

T
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28

v
static const double v
Definition hatom_sf_dirac.cc:20

op
Tensor< double > op(const Tensor< double > &x)
Definition kain.cc:508

pow
static double pow(const double *a, const double *b)
Definition lda.h:74

max
#define max(a, b)
Definition lda.h:51

MADNESS_CHECK
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:182

MADNESS_EXCEPTION
#define MADNESS_EXCEPTION(msg, value)
Macro for throwing a MADNESS exception.
Definition madness_exception.h:119

MADNESS_ASSERT
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134

madness::constants::pi
constexpr double pi
Mathematical constant .
Definition constants.h:48

madness
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10

madness::lattice_sum
array_of_bools< NDIM > lattice_sum()
Definition bc.h:231

madness::BSHOperator3D
static SeparatedConvolution< double, 3 > BSHOperator3D(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D.
Definition operator.h:1859

madness::SlaterF12sqOperatorPtr
static SeparatedConvolution< double, 3 > * SlaterF12sqOperatorPtr(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Definition operator.h:1941

madness::GaussOperatorPtr
static SeparatedConvolution< double, NDIM > * GaussOperatorPtr(World &world, double mu, double lo=0.0, double eps=0.0, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r*r) in 3D.
Definition operator.h:1972

madness::real_convolution_3d_ptr
std::shared_ptr< real_convolution_3d > real_convolution_3d_ptr
Definition functypedefs.h:150

madness::cpu_time
static double cpu_time()
Returns the cpu time in seconds relative to an arbitrary origin.
Definition timers.h:127

madness::BSHOperator
static SeparatedConvolution< double, NDIM > BSHOperator(World &world, double mu, double lo, double eps, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Factory function generating separated kernel for convolution with BSH kernel in general NDIM.
Definition operator.h:1832

madness::SlaterOperatorPtr
static SeparatedConvolution< double, 3 > * SlaterOperatorPtr(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Definition operator.h:1992

madness::SlaterF12OperatorPtr
static SeparatedConvolution< double, 3 > * SlaterF12OperatorPtr(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with (1 - exp(-mu*r))/(2 mu) in 3D.
Definition operator.h:2002

madness::real_convolution_3d
SeparatedConvolution< double, 3 > real_convolution_3d
Definition functypedefs.h:136

madness::general_transform
GenTensor< TENSOR_RESULT_TYPE(R, Q)> general_transform(const GenTensor< R > &t, const Tensor< Q > c[])
Definition gentensor.h:274

madness::CoulombOperatorPtr
static SeparatedConvolution< double, 3 > * CoulombOperatorPtr(World &world, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/r in 3D.
Definition operator.h:1818

madness::PeriodicBSHOperator3D
static SeparatedConvolution< double_complex, 3 > PeriodicBSHOperator3D(World &world, Vector< double, 3 > args, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D.
Definition operator.h:1868

madness::GradCoulombOperator
static std::vector< std::shared_ptr< SeparatedConvolution< double, 3 > > > GradCoulombOperator(World &world, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating operator for convolution with grad(1/r) in 3D.
Definition operator.h:2134

madness::fast_transpose
void fast_transpose(long n, long m, const T *a, T *MADNESS_RESTRICT b)
a(n,m) --> b(m,n) ... optimized for smallish matrices
Definition convolution1d.h:70

madness::SlaterF12Operator
static SeparatedConvolution< double, 3 > SlaterF12Operator(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Definition operator.h:1928

madness::GaussOperator
static SeparatedConvolution< double, NDIM > GaussOperator(World &world, double mu, double lo=0.0, double eps=0.0, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r*r)
Definition operator.h:1961

madness::Translation
int64_t Translation
Definition key.h:57

madness::CoulombOperator
static SeparatedConvolution< double, 3 > CoulombOperator(World &world, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/r in 3D.
Definition operator.h:1805

madness::F2GOperator
static SeparatedConvolution< double, 3 > F2GOperator(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with (1/(2 mu)*(1 - exp(-mu*r)))^2/r in ...
Definition operator.h:2050

madness::GradBSHOperator
static std::vector< std::shared_ptr< SeparatedConvolution< double, 3 > > > GradBSHOperator(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating operator for convolution with grad(bsh) in 3D.
Definition operator.h:2193

madness::mTxmq_padding
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition mtxmq.h:96

madness::SmoothingOperator3D
static SeparatedConvolution< double, 3 > SmoothingOperator3D(World &world, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Definition operator.h:2059

madness::SmoothingOperator
static SeparatedConvolution< double, NDIM > SmoothingOperator(World &world, double eps, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Definition operator.h:2074

madness::_
static const Slice _(0,-1, 1)

madness::F2GOperatorPtr
static SeparatedConvolution< double, 3 > * F2GOperatorPtr(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with (1/(2 mu)*(1 - exp(-mu*r)))^2/r in ...
Definition operator.h:2038

madness::Level
int Level
Definition key.h:58

madness::OpType
OpType
operator types
Definition operatorinfo.h:11

madness::OT_FG12
@ OT_FG12
1-exp(-r)
Definition operatorinfo.h:18

madness::OT_SLATER
@ OT_SLATER
1/r
Definition operatorinfo.h:15

madness::OT_GAUSS
@ OT_GAUSS
exp(-r)
Definition operatorinfo.h:16

madness::OT_BSH
@ OT_BSH
(1-exp(-r))^2/r = 1/r + exp(-2r)/r - 2 exp(-r)/r
Definition operatorinfo.h:21

madness::OT_F12
@ OT_F12
exp(-r2)
Definition operatorinfo.h:17

madness::OT_F212
@ OT_F212
(1-exp(-r))/r
Definition operatorinfo.h:19

madness::OT_UNDEFINED
@ OT_UNDEFINED
Definition operatorinfo.h:12

madness::OT_G12
@ OT_G12
indicates the identity
Definition operatorinfo.h:14

madness::OT_F2G12
@ OT_F2G12
(1-exp(-r))^2
Definition operatorinfo.h:20

madness::FGOperatorPtr
static SeparatedConvolution< double, 3 > * FGOperatorPtr(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/(2 mu)*(1 - exp(-mu*r))/r in 3D.
Definition operator.h:2026

madness::SlaterF12sqOperator
static SeparatedConvolution< double, 3 > SlaterF12sqOperator(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Definition operator.h:1934

madness::print
void print(const T &t, const Ts &... ts)
Print items to std::cout (items separated by spaces) and terminate with a new line.
Definition print.h:225

madness::apply
response_space apply(World &world, std::vector< std::vector< std::shared_ptr< real_convolution_3d > > > &op, response_space &f)
Definition basic_operators.cc:43

madness::TensorType
TensorType
low rank representations of tensors (see gentensor.h)
Definition gentensor.h:120

madness::TT_2D
@ TT_2D
Definition gentensor.h:120

madness::f
NDIM & f
Definition mra.h:2481

madness::BSHOperatorPtr
static SeparatedConvolution< double, NDIM > * BSHOperatorPtr(World &world, double mu, double lo, double eps, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Factory function generating separated kernel for convolution with BSH kernel in general NDIM.
Definition operator.h:1846

madness::reduce
GenTensor< T > reduce(std::list< GenTensor< T > > &addends, double eps, bool are_optimal=false)
add all the GenTensors of a given list
Definition gentensor.h:246

madness::SlaterOperator
static SeparatedConvolution< double, NDIM > SlaterOperator(World &world, double mu, double lo, double eps, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r) in 3D.
Definition operator.h:1950

madness::FGOperator
static SeparatedConvolution< double, 3 > FGOperator(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/(2 mu)*(1 - exp(-mu*r))/r in 3D.
Definition operator.h:2015

madness::type
std::string type(const PairType &n)
Definition PNOParameters.h:18

madness::BSHOperatorPtr3D
static SeparatedConvolution< double, 3 > * BSHOperatorPtr3D(World &world, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_summed=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D.
Definition operator.h:2089

madness::PeriodicBSHOperatorPtr3D
static SeparatedConvolution< double_complex, 3 > * PeriodicBSHOperatorPtr3D(World &world, Vector< double, 3 > args, double mu, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with exp(-mu*r)/(4*pi*r) in 3D.
Definition operator.h:1898

madness::SlaterOperatorPtr_ND
static SeparatedConvolution< double, NDIM > * SlaterOperatorPtr_ND(World &world, double mu, double lo, double eps, const array_of_bools< NDIM > &lattice_sum=FunctionDefaults< NDIM >::get_bc().is_periodic(), int k=FunctionDefaults< NDIM >::get_k())
Definition operator.h:1983

madness::PeriodicHFExchangeOperator
static SeparatedConvolution< double_complex, 3 > PeriodicHFExchangeOperator(World &world, Vector< double, 3 > args, double lo, double eps, const array_of_bools< 3 > &lattice_sum=FunctionDefaults< 3 >::get_bc().is_periodic(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/r in 3D.
Definition operator.h:1775

madness::copy
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2066

madness::mTxmq
void mTxmq(long dimi, long dimj, long dimk, T *MADNESS_RESTRICT c, const T *a, const T *b, long ldb=-1)
Matrix = Matrix transpose * matrix ... MKL interface version.
Definition mxm.h:257

madness::dir
static const string dir
Definition corepotential.cc:249

madness::aligned_axpy
static void aligned_axpy(long n, T *MADNESS_RESTRICT a, const T *MADNESS_RESTRICT b, Q s)
Definition aligned.h:75

std
Definition mraimpl.h:50

std::abs
static long abs(long a)
Definition tensor.h:218

mu
const double mu
Definition navstokes_cosines.cc:95

b
static const double b
Definition nonlinschro.cc:119

d
static const double d
Definition nonlinschro.cc:121

a
static const double a
Definition nonlinschro.cc:118

operatorinfo.h

Q
double Q(double a)
Definition relops.cc:20

c
static const double c
Definition relops.cc:10

L
static const double L
Definition rk.cc:46

thresh
static const double thresh
Definition rk.cc:45

k
static const long k
Definition rk.cc:44

simplecache.h

data
Definition test_ccpairfunction.cc:22

madness::ConvolutionData1D
!!! Note that if Rnormf is zero then ALL of the tensors are empty
Definition convolution1d.h:162

madness::ConvolutionData1D::N_up
double N_up
Definition convolution1d.h:173

madness::ConvolutionData1D::N_F
double N_F
the norms according to Beylkin 2008, Eq. (21) ff
Definition convolution1d.h:173

madness::ConvolutionData1D::N_diff
double N_diff
Definition convolution1d.h:173

madness::GaussianConvolution1DCache
Definition convolution1d.h:991

madness::OperatorInfo
Definition operatorinfo.h:58

madness::OperatorInfo::hi
double hi
Definition operatorinfo.h:67

madness::OperatorInfo::type
OpType type
introspection
Definition operatorinfo.h:66

madness::OperatorInfo::mu
double mu
some introspection
Definition operatorinfo.h:63

madness::OperatorInfo::range
std::vector< KernelRange > range
Definition operatorinfo.h:68

madness::OperatorInfo::truncate_lowexp_gaussians
std::optional< bool > truncate_lowexp_gaussians
Definition operatorinfo.h:70

madness::SeparatedConvolutionData
SeparatedConvolutionData keeps data for all terms, all dimensions.
Definition operator.h:93

madness::SeparatedConvolutionData::muops
std::vector< SeparatedConvolutionInternal< Q, NDIM > > muops
Definition operator.h:94

madness::SeparatedConvolutionData::SeparatedConvolutionData
SeparatedConvolutionData(int rank)
Definition operator.h:97

madness::SeparatedConvolutionData::norm
double norm
Definition operator.h:95

madness::SeparatedConvolutionData::SeparatedConvolutionData
SeparatedConvolutionData(const SeparatedConvolutionData< Q, NDIM > &q)
Definition operator.h:98

madness::SeparatedConvolutionInternal
Definition operator.h:84

madness::SeparatedConvolutionInternal::norm
double norm
Definition operator.h:85

madness::SeparatedConvolutionInternal::ops
const ConvolutionData1D< Q > * ops[NDIM]
Definition operator.h:86

madness::SeparatedConvolution::ApplyTerms
laziness for calling lists: which terms to apply
Definition operator.h:211

madness::SeparatedConvolution::ApplyTerms::t_term
bool t_term
Definition operator.h:214

madness::SeparatedConvolution::ApplyTerms::r_term
bool r_term
Definition operator.h:213

madness::SeparatedConvolution::ApplyTerms::any_terms
bool any_terms() const
Definition operator.h:215

madness::SeparatedConvolution::ApplyTerms::ApplyTerms
ApplyTerms()
Definition operator.h:212

madness::SeparatedConvolution::Transformation
too lazy for extended calling lists
Definition operator.h:219

madness::SeparatedConvolution::Transformation::VT
const Q * VT
Definition operator.h:222

madness::SeparatedConvolution::Transformation::U
const Q * U
Definition operator.h:221

madness::SeparatedConvolution::Transformation::r
long r
Definition operator.h:220

madness::archive::ArchiveLoadImpl< Archive, const SeparatedConvolution< T, NDIM > * >::load
static void load(const Archive &ar, const SeparatedConvolution< T, NDIM > *&ptr)
Definition operator.h:2251

madness::archive::ArchiveLoadImpl
Default load of an object via serialize(ar, t).
Definition archive.h:667

madness::archive::ArchiveStoreImpl< Archive, const SeparatedConvolution< T, NDIM > * >::store
static void store(const Archive &ar, const SeparatedConvolution< T, NDIM > *const &ptr)
Definition operator.h:2260

madness::archive::ArchiveStoreImpl
Default store of an object via serialize(ar, t).
Definition archive.h:612

madness::particle
Definition lowrankfunction.h:336

doit
void doit(World &world)
Definition tdse.cc:921

tensor_lapack.h
Prototypes for a partial interface from Tensor to LAPACK.

factorial
int factorial(int n)
Definition test_BSHApply.cc:14

sum
AtomicInt sum
Definition test_atomicint.cc:46

e
void e()
Definition test_sig.cc:75

aa
double aa
Definition testbsh.cc:68

pi
static const double pi
Definition testcosine.cc:6

fit
std::vector< double > fit(size_t m, size_t n, const std::vector< double > N, const std::vector< double > &f)
Definition testfuns.cc:36

NDIM
constexpr std::size_t NDIM
Definition testgconv.cc:54

h
double h(const coord_1d &r)
Definition testgconv.cc:175

source
double source(const coordT &r)
Definition testperiodic.cc:48

TENSOR_RESULT_TYPE
#define TENSOR_RESULT_TYPE(L, R)
This macro simplifies access to TensorResultType.
Definition type_data.h:205

PROFILE_MEMBER_FUNC
#define PROFILE_MEMBER_FUNC(classname)
Definition worldprofile.h:210