1#ifndef SRC_APPS_CHEM_EXCHANGEOPERATOR_H_
2#define SRC_APPS_CHEM_EXCHANGEOPERATOR_H_
16template<
typename T, std::
size_t NDIM>
32 double t1 = double(mul1_timer) * 0.001;
33 double t2 = double(apply_timer) * 0.001;
34 double t3 = double(mul2_timer) * 0.001;
38 if (world.
rank() == 0) {
39 printf(
" cpu time spent in multiply1 %8.2fs\n", t1);
40 printf(
" cpu time spent in apply %8.2fs\n", t2);
41 printf(
" cpu time spent in multiply2 %8.2fs\n", t3);
64 mo_bra =
copy(world, bra);
65 mo_ket =
copy(world, ket);
68 std::string
info()
const {
return "K";}
106 vecfuncT K_macrotask_efficient(
const vecfuncT& vket,
const double mul_tol = 0.0)
const;
109 vecfuncT K_small_memory(
const vecfuncT& vket,
const double mul_tol = 0.0)
const;
112 vecfuncT K_large_memory(
const vecfuncT& vket,
const double mul_tol = 0.0)
const;
116 const vecfuncT& vket, std::shared_ptr<real_convolution_3d> poisson,
117 const bool symmetric,
const double mul_tol = 0.0);
125 bool symmetric_ =
false;
130 double mul_tol = 0.0;
136 double mul_tol = 1.e-7;
137 bool symmetric =
false;
149 bool symmetric =
false;
152 const std::string policy)
const override {
154 partitionT partition1 = do_1d_partition(vsize1, policy);
155 partitionT partition2 = do_1d_partition(vsize2, policy);
157 for (
auto i = partition1.begin(); i != partition1.end(); ++i) {
159 for (
auto j = i; j != partition1.end(); ++j) {
160 Batch batch(i->first.input[0], j->first.input[0], _);
161 double priority=compute_priority(batch);
162 result.push_back(std::make_pair(batch,priority));
165 for (
auto j = partition2.begin(); j != partition2.end(); ++j) {
166 Batch batch(i->first.input[0], j->first.input[0], _);
167 double priority=compute_priority(batch);
168 result.push_back(std::make_pair(batch,priority));
180 long nrow = batch.
input[0].size();
181 long ncol = batch.
input[1].size();
182 return double(nrow * ncol);
188 : nresult(nresult),
lo(
lo), mul_tol(mul_tol), symmetric(symmetric) {
194 typedef std::tuple<const std::vector<Function<T, NDIM>>&,
195 const std::vector<Function<T, NDIM>>&,
198 using resultT = std::vector<Function<T, NDIM>>;
203 std::size_t n = std::get<0>(argtuple).size();
204 resultT result = zero_functions_compressed<T, NDIM>(world, n);
208 std::vector<Function<T, NDIM>>
213 World& world = vf_batch.front().world();
214 resultT Kf = zero_functions_compressed<T, NDIM>(world, nresult);
216 bool diagonal_block = batch.input[0] == batch.input[1];
217 auto& bra_range = batch.input[1];
218 auto& vf_range = batch.input[0];
220 if (vf_range.is_full_size()) vf_range.end = vf_batch.size();
221 if (bra_range.is_full_size()) bra_range.end = bra_batch.size();
226 if (symmetric and diagonal_block) {
227 auto ket_batch = bra_range.copy_batch(vket);
228 vecfuncT resultcolumn = compute_diagonal_batch_in_symmetric_matrix(world, ket_batch, bra_batch,
231 for (
int i = vf_range.begin; i < vf_range.end; ++i)
232 Kf[i] += resultcolumn[i - vf_range.begin];
234 }
else if (symmetric and not diagonal_block) {
235 auto[resultcolumn, resultrow]=compute_offdiagonal_batch_in_symmetric_matrix(world, vket, bra_batch,
238 for (
int i = bra_range.begin; i < bra_range.end; ++i)
239 Kf[i] += resultcolumn[i - bra_range.begin];
240 for (
int i = vf_range.begin; i < vf_range.end; ++i)
241 Kf[i] += resultrow[i - vf_range.begin];
243 auto ket_batch = bra_range.copy_batch(vket);
244 vecfuncT resultcolumn = compute_batch_in_asymmetric_matrix(world, ket_batch, bra_batch, vf_batch);
245 for (
int i = vf_range.begin; i < vf_range.end; ++i)
246 Kf[i] += resultcolumn[i - vf_range.begin];
263 double mul_tol = 0.0;
264 double symmetric =
true;
281 double mul_tol = 0.0;
282 double symmetric =
false;
295 std::pair<vecfuncT, vecfuncT> compute_offdiagonal_batch_in_symmetric_matrix(
World& subworld,
Operators for the molecular HF and DFT code.
a batch consists of a 2D-input batch and a 1D-output batch: K-batch <- (I-batch, J-batch)
Definition macrotaskpartitioner.h:132
std::vector< Batch_1D > input
Definition macrotaskpartitioner.h:135
custom partitioning for the exchange operator in exchangeoperator.h
Definition exchangeoperator.h:143
double compute_priority(const Batch &batch) const override
compute the priority of this task for non-dumb scheduling
Definition exchangeoperator.h:178
MacroTaskPartitionerExchange(const bool symmetric)
Definition exchangeoperator.h:145
partitionT do_partitioning(const std::size_t &vsize1, const std::size_t &vsize2, const std::string policy) const override
override this if you want your own partitioning
Definition exchangeoperator.h:151
Definition exchangeoperator.h:132
vecfuncT compute_diagonal_batch_in_symmetric_matrix(World &subworld, const vecfuncT &ket_batch, const vecfuncT &bra_batch, const vecfuncT &vf_batch) const
compute a batch of the exchange matrix, with identical ranges, exploiting the matrix symmetry
Definition exchangeoperator.h:258
MacroTaskExchangeSimple(const long nresult, const double lo, const double mul_tol, const bool symmetric)
Definition exchangeoperator.h:187
long nresult
Definition exchangeoperator.h:134
std::vector< Function< T, NDIM > > resultT
Definition exchangeoperator.h:198
vecfuncT compute_batch_in_asymmetric_matrix(World &subworld, const vecfuncT &ket_batch, const vecfuncT &bra_batch, const vecfuncT &vf_batch) const
compute a batch of the exchange matrix, with non-identical ranges
Definition exchangeoperator.h:277
std::vector< Function< T, NDIM > > operator()(const std::vector< Function< T, NDIM > > &vf_batch, const std::vector< Function< T, NDIM > > &bra_batch, const std::vector< Function< T, NDIM > > &vket)
Definition exchangeoperator.h:209
std::tuple< const std::vector< Function< T, NDIM > > &, const std::vector< Function< T, NDIM > > &, const std::vector< Function< T, NDIM > > & > argtupleT
Definition exchangeoperator.h:196
resultT allocator(World &world, const argtupleT &argtuple) const
Definition exchangeoperator.h:202
Definition exchangeoperator.h:17
static std::atomic< long > mul1_timer
timing
Definition exchangeoperator.h:23
ExchangeImpl & symmetric(const bool flag)
Definition exchangeoperator.h:88
static void print_timer(World &world)
Definition exchangeoperator.h:31
ExchangeImpl & set_printlevel(const long &level)
Definition exchangeoperator.h:98
Exchange< T, NDIM >::Algorithm Algorithm
Definition exchangeoperator.h:47
static void reset_timer()
Definition exchangeoperator.h:25
vecfuncT mo_bra
is the exchange matrix symmetric? K phi_i = \sum_k \phi_k \int \phi_k \phi_i
Definition exchangeoperator.h:126
World & world
Definition exchangeoperator.h:123
std::shared_ptr< MacroTaskQ > taskq
Definition exchangeoperator.h:124
Function< T, NDIM > functionT
Definition exchangeoperator.h:18
bool is_symmetric() const
Definition exchangeoperator.h:81
ExchangeImpl & set_taskq(std::shared_ptr< MacroTaskQ > taskq1)
Definition exchangeoperator.h:83
std::vector< functionT > vecfuncT
Definition exchangeoperator.h:19
ExchangeImpl & set_algorithm(const Algorithm &alg)
Definition exchangeoperator.h:93
static std::atomic< long > apply_timer
Definition exchangeoperator.h:21
ExchangeImpl(World &world, const double lo, const double thresh)
default ctor
Definition exchangeoperator.h:51
std::string info() const
Definition exchangeoperator.h:68
bool do_print_timings() const
Definition exchangeoperator.h:119
bool printdebug() const
Definition exchangeoperator.h:121
static auto set_poisson(World &world, const double lo, const double econv=FunctionDefaults< 3 >::get_thresh())
Definition exchangeoperator.h:70
static std::atomic< long > mul2_timer
Definition exchangeoperator.h:22
void set_bra_and_ket(const vecfuncT &bra, const vecfuncT &ket)
set the bra and ket orbital spaces, and the occupation
Definition exchangeoperator.h:63
Definition SCFOperators.h:104
Algorithm
Definition SCFOperators.h:116
@ multiworld_efficient
Definition SCFOperators.h:117
Function< T, NDIM > operator()(const Function< T, NDIM > &ket) const
Definition SCFOperators.h:150
std::vector< functionT > vecfuncT
Definition SCFOperators.h:110
FunctionDefaults holds default paramaters as static class members.
Definition funcdefaults.h:204
static const double & get_thresh()
Returns the default threshold.
Definition funcdefaults.h:279
A multiresolution adaptive numerical function.
Definition mra.h:122
Definition macrotaskq.h:716
partition one (two) vectors into 1D (2D) batches.
Definition macrotaskpartitioner.h:190
std::list< std::pair< Batch, double > > partitionT
Definition macrotaskpartitioner.h:194
The Nemo class.
Definition nemo.h:326
std::shared_ptr< MacroTaskQ > taskq
Definition SCFOperators.h:70
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:870
A parallel world class.
Definition world.h:132
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:318
WorldGopInterface & gop
Global operations.
Definition world.h:205
Declares the Cloud class for storing data and transfering them between worlds.
static double lo
Definition dirac-hatom.cc:23
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Declares the macrotaskq and MacroTaskBase classes.
General header file for using MADNESS.
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:182
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
static SeparatedConvolution< double, 3 > * CoulombOperatorPtr(World &world, double lo, double eps, const BoundaryConditions< 3 > &bc=FunctionDefaults< 3 >::get_bc(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/r in 3D.
Definition operator.h:1762
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2002
static const double thresh
Definition rk.cc:45
static const std::size_t NDIM
Definition testpdiff.cc:42