MADNESS 0.10.1
exchangeoperator.h
Go to the documentation of this file.
1#ifndef SRC_APPS_CHEM_EXCHANGEOPERATOR_H_
2#define SRC_APPS_CHEM_EXCHANGEOPERATOR_H_
3
4#include<madness.h>
8
9namespace madness {
10
11// forward declaration
12class SCF;
13class Nemo;
14
15
16template<typename T, std::size_t NDIM>
19 typedef std::vector<functionT> vecfuncT;
20
21 static inline std::atomic<long> apply_timer;
22 static inline std::atomic<long> mul2_timer;
23 static inline std::atomic<long> mul1_timer; ///< timing
24 static inline double elapsed_time;
25
26 static void reset_timer() {
27 mul1_timer = 0l;
28 mul2_timer = 0l;
29 apply_timer = 0l;
30 elapsed_time = 0.0;
31 }
32
33public:
34 nlohmann::json gather_timings(World& world) const {
35 double t1 = double(mul1_timer) * 0.001;
36 double t2 = double(apply_timer) * 0.001;
37 double t3 = double(mul2_timer) * 0.001;
38 world.gop.sum(t1);
39 world.gop.sum(t2);
40 world.gop.sum(t3);
41 nlohmann::json j;
42 j["multiply1"] = t1;
43 j["apply"] = t2;
44 j["multiply2"] = t3;
45 j["total"] = elapsed_time;
46 return j;
47 }
48
49 void print_timer(World& world) const {
50 auto timings= gather_timings(world);
51 if (world.rank() == 0) {
52 printf(" cpu time spent in multiply1 %8.2fs\n", timings["multiply1"].template get<double>());
53 printf(" cpu time spent in apply %8.2fs\n", timings["apply"].template get<double>());
54 printf(" cpu time spent in multiply2 %8.2fs\n", timings["multiply2"].template get<double>());
55 printf(" total wall time %8.2fs\n", timings["total"].template get<double>());
56 }
57 }
58
59
62 MacroTaskInfo macro_task_info = MacroTaskInfo::preset("default");
63
64 /// default ctor
65 ExchangeImpl(World& world, const double lo, const double thresh) : world(world), lo(lo), thresh(thresh) {}
66
67 /// ctor with a conventional calculation
68 ExchangeImpl(World& world, const SCF *calc, const int ispin) ;
69
70 /// ctor with a nemo calculation
71 ExchangeImpl(World& world, const Nemo *nemo, const int ispin);
72
73 /// set the bra and ket orbital spaces, and the occupation
74
75 /// @param[in] bra bra space, must be provided as complex conjugate
76 /// @param[in] ket ket space
77 void set_bra_and_ket(const vecfuncT& bra, const vecfuncT& ket) {
78 mo_bra = copy(world, bra);
79 mo_ket = copy(world, ket);
80 }
81
82 std::string info() const {return "K";}
83
84 static auto set_poisson(World& world, const double lo, const double econv = FunctionDefaults<3>::get_thresh()) {
85 return std::shared_ptr<real_convolution_3d>(CoulombOperatorPtr(world, lo, econv));
86 }
87
88 /// apply the exchange operator on a vector of functions
89
90 /// note that only one spin is used (either alpha or beta orbitals)
91 /// @param[in] vket the orbitals |i> that the operator is applied on
92 /// @return a vector of orbitals K| i>
93 vecfuncT operator()(const vecfuncT& vket) const;
94
95 bool is_symmetric() const { return symmetric_; }
96
97 ExchangeImpl& set_taskq(std::shared_ptr<MacroTaskQ> taskq1) {
98 this->taskq=taskq1;
99 return *this;
100 }
101
102 ExchangeImpl& symmetric(const bool flag) {
103 symmetric_ = flag;
104 return *this;
105 }
106
108 macro_task_info = info;
109 return *this;
110 }
111
112 ExchangeImpl& set_macro_task_info(const std::vector<std::string>& info) {
113 macro_task_info.from_vector_of_strings(info);
114 return *this;
115 }
116
118 algorithm_ = alg;
119 return *this;
120 }
121
122 ExchangeImpl& set_printlevel(const long& level) {
123 printlevel=level;
124 return *this;
125 }
126
127 std::shared_ptr<MacroTaskQ> get_taskq() const {return taskq;}
128
129 World& get_world() const {return world;}
130
131 nlohmann::json get_statistics() const {return statistics;}
132
133 /// return some statistics about the current settings
134 nlohmann::json gather_statistics() const {
135 nlohmann::json j;
136 j["symmetric"] = symmetric_;
137 j["lo"] = lo;
138 j["thresh"] = thresh;
139 j["mul_tol"] = mul_tol;
140 j["printlevel"] = printlevel;
141 j["algorithm"] = to_string(algorithm_);
142 j["macro_task_info"] = macro_task_info.to_json();
143 auto timings = gather_timings(world);
144 j.update(timings);
145 return j;
146 }
147
148private:
149
150 /// exchange using macrotasks, i.e. apply K on a function in individual worlds
151 vecfuncT K_macrotask_efficient(const vecfuncT& vket, const double mul_tol = 0.0) const;
152
153 /// exchange using macrotasks, i.e. apply K on a function in individual worlds row-wise
154 vecfuncT K_macrotask_efficient_row(const vecfuncT& vket, const double mul_tol = 0.0) const;
155
156 /// computing the full square of the double sum (over vket and the K orbitals)
157 vecfuncT K_small_memory(const vecfuncT& vket, const double mul_tol = 0.0) const;
158
159 /// computing the upper triangle of the double sum (over vket and the K orbitals)
160 vecfuncT K_large_memory(const vecfuncT& vket, const double mul_tol = 0.0) const;
161
162 /// computing the upper triangle of the double sum (over vket and the K orbitals)
163 static vecfuncT compute_K_tile(World& world, const vecfuncT& mo_bra, const vecfuncT& mo_ket,
164 const vecfuncT& vket, std::shared_ptr<real_convolution_3d> poisson,
165 const bool symmetric, const double mul_tol = 0.0);
166
167 inline bool printdebug() const {return printlevel >= 10; }
168 inline bool printprogress() const {return (printlevel>=4) and (not (printdebug()));}
169 inline bool printtimings() const {return printlevel>=3;}
170 inline bool printtimings_detail() const {return printlevel>=4;}
171
173 std::shared_ptr<MacroTaskQ> taskq;
174 bool symmetric_ = false; /// is the exchange matrix symmetric? K phi_i = \sum_k \phi_k \int \phi_k \phi_i
175 vecfuncT mo_bra, mo_ket; ///< MOs for bra and ket
176 double lo = 1.e-4;
178 long printlevel = 0;
180
181 mutable nlohmann::json statistics; ///< statistics of the Cloud (timings, memory) and of the parameters of this run
182
184
186 double lo = 1.e-4;
187 double mul_tol = 1.e-7;
188 bool symmetric = false;
189
190 /// custom partitioning for the exchange operator in exchangeoperator.h
191
192 /// arguments are: result[i] += sum_k vket[k] \int 1/r vbra[k] f[i]
193 /// with f and vbra being batched, result and vket being passed on as a whole
195 public:
196 MacroTaskPartitionerExchange(const bool symmetric) : symmetric(symmetric) {
197 max_batch_size=30;
198 }
199
200 bool symmetric = false;
201
202 partitionT do_partitioning(const std::size_t& vsize1, const std::size_t& vsize2,
203 const std::string policy) const override {
204
205 partitionT partition1 = do_1d_partition(vsize1, policy);
206 partitionT partition2 = do_1d_partition(vsize2, policy);
207 partitionT result;
208 for (auto i = partition1.begin(); i != partition1.end(); ++i) {
209 if (symmetric) {
210 for (auto j = i; j != partition1.end(); ++j) {
211 Batch batch(i->first.input[0], j->first.input[0], _);
212 double priority=compute_priority(batch);
213 result.push_back(std::make_pair(batch,priority));
214 }
215 } else {
216 for (auto j = partition2.begin(); j != partition2.end(); ++j) {
217 Batch batch(i->first.input[0], j->first.input[0], _);
218 double priority=compute_priority(batch);
219 result.push_back(std::make_pair(batch,priority));
220 }
221 }
222 }
223 return result;
224 }
225
226 /// compute the priority of this task for non-dumb scheduling
227
228 /// \return the priority as double number (no limits)
229 double compute_priority(const Batch& batch) const override {
230 MADNESS_CHECK(batch.input.size() == 2); // must be quadratic batches
231 long nrow = batch.input[0].size();
232 long ncol = batch.input[1].size();
233 return double(nrow * ncol);
234 }
235 };
236
237 public:
238 MacroTaskExchangeSimple(const long nresult, const double lo, const double mul_tol, const bool symmetric)
239 : nresult(nresult), lo(lo), mul_tol(mul_tol), symmetric(symmetric) {
240 partitioner.reset(new MacroTaskPartitionerExchange(symmetric));
241 }
242
243
244 // you need to define the exact argument(s) of operator() as tuple
245 typedef std::tuple<const std::vector<Function<T, NDIM>>&,
246 const std::vector<Function<T, NDIM>>&,
247 const std::vector<Function<T, NDIM>>&> argtupleT;
248
249 using resultT = std::vector<Function<T, NDIM>>;
250
251 // you need to define an empty constructor for the result
252 // resultT must implement operator+=(const resultT&)
253 resultT allocator(World& world, const argtupleT& argtuple) const {
254 std::size_t n = std::get<0>(argtuple).size();
255 resultT result = zero_functions_compressed<T, NDIM>(world, n);
256 return result;
257 }
258
259 std::vector<Function<T, NDIM>>
260 operator()(const std::vector<Function<T, NDIM>>& vf_batch, // will be batched (column)
261 const std::vector<Function<T, NDIM>>& bra_batch, // will be batched (row)
262 const std::vector<Function<T, NDIM>>& vket) { // will not be batched
263
264 World& world = vf_batch.front().world();
265 resultT Kf = zero_functions_compressed<T, NDIM>(world, nresult);
266
267 bool diagonal_block = batch.input[0] == batch.input[1];
268 auto& bra_range = batch.input[1]; // corresponds to vbra
269 auto& vf_range = batch.input[0]; // corresponds to vf_batch
270
271 if (vf_range.is_full_size()) vf_range.end = vf_batch.size();
272 if (bra_range.is_full_size()) bra_range.end = bra_batch.size();
273
274 MADNESS_CHECK(vf_range.end <= nresult);
275 if (symmetric) MADNESS_CHECK(bra_range.end <= nresult);
276
277 if (symmetric and diagonal_block) {
278 auto ket_batch = bra_range.copy_batch(vket);
279 vecfuncT resultcolumn = compute_diagonal_batch_in_symmetric_matrix(world, ket_batch, bra_batch,
280 vf_batch);
281
282 for (int i = vf_range.begin; i < vf_range.end; ++i){
283 Kf[i] += resultcolumn[i - vf_range.begin];}
284
285 } else if (symmetric and not diagonal_block) {
286 auto[resultcolumn, resultrow]=compute_offdiagonal_batch_in_symmetric_matrix(world, vket, bra_batch,
287 vf_batch);
288
289 for (int i = bra_range.begin; i < bra_range.end; ++i){
290 Kf[i] += resultcolumn[i - bra_range.begin];}
291 for (int i = vf_range.begin; i < vf_range.end; ++i){
292 Kf[i] += resultrow[i - vf_range.begin];}
293 } else {
294 auto ket_batch = bra_range.copy_batch(vket);
295 vecfuncT resultcolumn = compute_batch_in_asymmetric_matrix(world, ket_batch, bra_batch, vf_batch);
296 for (int i = vf_range.begin; i < vf_range.end; ++i)
297 Kf[i] += resultcolumn[i - vf_range.begin];
298 }
299 return Kf;
300 }
301
302 /// compute a batch of the exchange matrix, with identical ranges, exploiting the matrix symmetry
303
304 /// \param subworld the world we're computing in
305 /// \param cloud where to store the results
306 /// \param bra_batch the bra batch of orbitals (including the nuclear correlation factor square)
307 /// \param ket_batch the ket batch of orbitals, i.e. the orbitals to premultiply with
308 /// \param vf_batch the argument of the exchange operator
310 const vecfuncT& ket_batch, // is batched
311 const vecfuncT& bra_batch, // is batched
312 const vecfuncT& vf_batch // is batched
313 ) const {
314 double mul_tol = 0.0;
315 double symmetric = true;
316 auto poisson = Exchange<double, 3>::ExchangeImpl::set_poisson(subworld, lo);
317 return Exchange<T, NDIM>::ExchangeImpl::compute_K_tile(subworld, bra_batch, ket_batch, vf_batch, poisson, symmetric,
318 mul_tol);
319 }
320
321 /// compute a batch of the exchange matrix, with non-identical ranges
322
323 /// \param subworld the world we're computing in
324 /// \param cloud where to store the results
325 /// \param bra_batch the bra batch of orbitals (including the nuclear correlation factor square)
326 /// \param ket_batch the ket batch of orbitals, i.e. the orbitals to premultiply with
327 /// \param vf_batch the argument of the exchange operator
329 const vecfuncT& ket_batch,
330 const vecfuncT& bra_batch,
331 const vecfuncT& vf_batch) const {
332 double mul_tol = 0.0;
333 double symmetric = false;
334 auto poisson = Exchange<double, 3>::ExchangeImpl::set_poisson(subworld, lo);
335 return Exchange<T, NDIM>::ExchangeImpl::compute_K_tile(subworld, bra_batch, ket_batch, vf_batch, poisson, symmetric,
336 mul_tol);
337 }
338
339 /// compute a batch of the exchange matrix, with non-identical ranges
340
341 /// \param subworld the world we're computing in
342 /// \param cloud where to store the results
343 /// \param bra_batch the bra batch of orbitals (including the nuclear correlation factor square)
344 /// \param ket_batch the ket batch of orbitals, i.e. the orbitals to premultiply with
345 /// \param vf_batch the argument of the exchange operator
346 std::pair<vecfuncT, vecfuncT> compute_offdiagonal_batch_in_symmetric_matrix(World& subworld,
347 const vecfuncT& ket, // not batched
348 const vecfuncT& bra_batch, // batched
349 const vecfuncT& vf_batch) const; // batched
350
351 };
352
354
356 double lo = 1.e-4;
357 double mul_tol = 1.e-7;
358 bool symmetric = false;
360
361 /// custom partitioning for the exchange operator in exchangeoperator.h
363 public:
365 max_batch_size=1;
366 }
367 };
368
369 public:
370 MacroTaskExchangeRow(const long nresult, const double lo, const double mul_tol, const Algorithm algorithm)
371 : nresult(nresult), lo(lo), mul_tol(mul_tol), algorithm_(algorithm) {
372 partitioner.reset(new MacroTaskPartitionerRow());
373 name="MacroTaskExchangeRow";
374 }
375
376 // you need to define the exact argument(s) of operator() as tuple
377 typedef std::tuple<const std::vector<Function<T, NDIM>>&,
378 const std::vector<Function<T, NDIM>>&,
379 const std::vector<Function<T, NDIM>>&> argtupleT;
380
381 using resultT = std::vector<Function<T, NDIM>>;
382
383 // you need to define an empty constructor for the result
384 // resultT must implement operator+=(const resultT&)
385 resultT allocator(World& world, const argtupleT& argtuple) const {
386 std::size_t n = std::get<0>(argtuple).size();
387 resultT result = zero_functions_compressed<T, NDIM>(world, n);
388 return result;
389 }
390
391 /// compute exchange row-wise for a fixed orbital phi_i of vket
392
393 /// create 2 worlds: one fetches the function coefficients from the universe, the other
394 /// does the computation, then swap. The result is copied back to the universe
395 std::vector<Function<T, NDIM>>
396 operator()(const std::vector<Function<T, NDIM>>& vket,
397 const std::vector<Function<T, NDIM>>& mo_bra,
398 const std::vector<Function<T, NDIM>>& mo_ket) {
399 std::vector<Function<T,NDIM>> result;
400 if (algorithm_==fetch_compute) {
401 result=row_fetch_compute(vket,mo_bra,mo_ket);
402 } else if (algorithm_==multiworld_efficient_row) {
403 result=row(vket,mo_bra,mo_ket);
404 } else {
405 MADNESS_EXCEPTION("unknown algorithm in Exchange::MacroTaskExchangeRow::operator()",1);
406 }
407 return result;
408 }
409
410 std::vector<Function<T,NDIM>>
411 row(const std::vector<Function<T, NDIM>>& vket,
412 const std::vector<Function<T, NDIM>>& mo_bra,
413 const std::vector<Function<T, NDIM>>& mo_ket) {
414
415 double cpu0, cpu1;
416 World& world = vket.front().world();
417 mul_tol = 0.0;
418
419 resultT Kf = zero_functions_compressed<T, NDIM>(world, 1);
420 vecfuncT psif = zero_functions_compressed<T,NDIM>(world, mo_bra.size());
422
423 // !! NO !! vket is batched, starts at batch.input[0].begin
424 // auto& i = batch.input[0].begin;
425 long i=0;
426 MADNESS_CHECK_THROW(vket.size()==1,"out-of-bounds error in Exchange::MacroTaskExchangeRow::operator()");
427 size_t min_tile = 10;
428 size_t ntile = std::min(mo_bra.size(), min_tile);
429
430 for (size_t ilo=0; ilo<mo_bra.size(); ilo+=ntile){
431 cpu0 = cpu_time();
432 size_t iend = std::min(ilo+ntile,mo_bra.size());
433 vecfuncT tmp_mo_bra(mo_bra.begin()+ilo,mo_bra.begin()+iend);
434 auto tmp_psif = mul_sparse(world, vket[i], tmp_mo_bra, mul_tol);
435 truncate(world, tmp_psif);
436 cpu1 = cpu_time();
437 mul1_timer += long((cpu1 - cpu0) * 1000l);
438
439 cpu0 = cpu_time();
440 tmp_psif = apply(world, *poisson.get(), tmp_psif);
441 truncate(world, tmp_psif);
442 cpu1 = cpu_time();
443 apply_timer += long((cpu1 - cpu0) * 1000l);
444
445 cpu0 = cpu_time();
446 vecfuncT tmp_mo_ket(mo_ket.begin()+ilo,mo_ket.begin()+iend);
447 auto tmp_Kf = dot(world, tmp_mo_ket, tmp_psif);
448 cpu1 = cpu_time();
449 mul2_timer += long((cpu1 - cpu0) * 1000l);
450
451 Kf[0] += tmp_Kf;
452 truncate(world, Kf);
453 }
454
455 return Kf;
456 }
457
458 std::vector<Function<T,NDIM>>
459 row_fetch_compute(const std::vector<Function<T, NDIM>>& vket,
460 const std::vector<Function<T, NDIM>>& mo_bra,
461 const std::vector<Function<T, NDIM>>& mo_ket) {
462
464 double total_execution_time=0.0;
465 double total_fetch_time=0.0;
466 double total_fetch_spawn_time=0.0;
467
468 resultT Kf = zero_functions_compressed<T, NDIM>(*subworld_ptr, 1);
469 {
470 // create the two worlds that will be used for fetching and computing
471 // std::shared_ptr<World> executing_world(subworld_ptr);
472 double cpu0=cpu_time();
473 SafeMPI::Intracomm comm = subworld_ptr->mpi.comm();
474 std::shared_ptr<World> fetching_world(new World(comm.Clone()));
475 std::shared_ptr<World> executing_world(new World(comm.Clone()));
476 double cpu1=cpu_time();
477 print("time to create two worlds:",cpu1-cpu0,"seconds");
478 print("executing_world.id()",executing_world->id(),"fetching_world.id()",fetching_world->id(),"in MacroTaskExchangeRow");
479
480 {
481 auto poisson1 = Exchange<double, 3>::ExchangeImpl::set_poisson(*executing_world, lo);
482 auto poisson2 = Exchange<double, 3>::ExchangeImpl::set_poisson(*fetching_world, lo);
483
484 functionT phi1=copy(*executing_world,vket[0]);
485 functionT phi2=copy(*fetching_world,vket[0]);
486
487 // !! NO !! vket is batched, starts at batch.input[0].begin
488 // auto& i = batch.input[0].begin;
489 MADNESS_CHECK_THROW(vket.size()==1,"out-of-bounds error in Exchange::MacroTaskExchangeRow::operator()");
490 size_t min_tile = 10;
491 size_t ntile = std::min(mo_bra.size(), min_tile);
492
493 struct Tile {
494 size_t ilo;
495 size_t iend;
496 };
497
498
499 // copy the data from the universe bra and ket to subworld bra and ket
500 // returns a pair of vectors in the subworld which are still awaiting the function coefficients
501 auto fetch_data = [&](World& world, const Tile& tile) {
502 MADNESS_CHECK_THROW(mo_bra.size()==mo_ket.size(),
503 "bra and ket size mismatch in Exchange::MacroTaskExchangeRow::execute()");
504
505 std::size_t sz=tile.iend-tile.ilo;
506 vecfuncT subworld_bra(sz);
507 vecfuncT subworld_ket;
508 for (size_t i=tile.ilo; i<tile.iend; ++i) {
509 auto f=copy(world,mo_bra[i],false);
510 subworld_bra[i-tile.ilo]=f;
511 subworld_ket.push_back(copy(world, mo_ket[i],false));
512 }
513 return std::make_pair(subworld_bra,subworld_ket);
514 };
515
516 // apply the exchange operator on phi for a a tile of mo_bra and mo_ket
517 auto execute = [&](World& world, auto poisson, const functionT& phi, const vecfuncT& mo_bra, const vecfuncT& mo_ket) {
518 MADNESS_CHECK_THROW(mo_bra.size()==mo_ket.size(),
519 "bra and ket size mismatch in Exchange::MacroTaskExchangeRow::execute()");
520
521 auto world_id=world.id();
522 auto phi_id=phi.world().id();
523 auto bra_id=mo_bra.front().world().id();
524 auto ket_id=mo_ket.front().world().id();
525 std::string msg="world mismatch in Exchange::MacroTaskExchangeRow::execute(): ";
526 msg+="world.id()="+std::to_string(world_id)+", ";
527 msg+="phi.world().id()="+std::to_string(phi_id)+", ";
528 msg+="bra.world().id()="+std::to_string(bra_id)+", ";
529 msg+="ket.world().id()="+std::to_string(ket_id);
530 if (not (world_id==phi_id && world_id==bra_id && world_id==ket_id)) {
531 print(msg);
532 }
533 MADNESS_CHECK_THROW(world_id==phi_id && world_id==bra_id && world_id==ket_id,msg.c_str());
534
535 double cpu0 = cpu_time();
536 auto tmp_psif = mul_sparse(world, phi, mo_bra, mul_tol);
537 truncate(world, tmp_psif);
538 double cpu1 = cpu_time();
539 mul1_timer += long((cpu1 - cpu0) * 1000l);
540
541 cpu0 = cpu_time();
542 tmp_psif = apply(world, *poisson.get(), tmp_psif);
543 truncate(world, tmp_psif);
544 cpu1 = cpu_time();
545 apply_timer += long((cpu1 - cpu0) * 1000l);
546
547 cpu0 = cpu_time();
548 auto tmp_Kf = dot(world, mo_ket, tmp_psif);
549 cpu1 = cpu_time();
550 mul2_timer += long((cpu1 - cpu0) * 1000l);
551
552 return tmp_Kf.truncate();
553
554 };
555
556 std::vector<Tile> tiles;
557 for (size_t ilo=0; ilo<mo_bra.size(); ilo+=ntile) {
558 tiles.push_back(Tile{ilo,std::min(ilo+ntile,mo_bra.size())});
559 }
560
561 vecfuncT tmp_mo_bra1,tmp_mo_ket1;
562 vecfuncT tmp_mo_bra2,tmp_mo_ket2;
563
564 for (size_t itile=0; itile<tiles.size(); ++itile) {
565 Tile& tile = tiles[itile];
566
567 if (itile==0) {
568 double t0=cpu_time();
569 print("fetching tile",tile.ilo,"into world",executing_world->id());
570 std::tie(tmp_mo_bra1,tmp_mo_ket1)=fetch_data(*executing_world,tiles[itile]);
571 fetching_world->gop.set_forbid_fence(false);
572 double t2=cpu_time();
573 executing_world->gop.fence();
574 double t1=cpu_time();
575 total_fetch_time += (t1 - t0);
576 total_fetch_spawn_time += (t2 - t0);
577 }
578
579 if (itile>=0) {
580 double t0=cpu_time();
581 fetching_world->gop.set_forbid_fence(true);
582 if (itile<tiles.size()-1) {
583 // fetch data into fetching_world while computing in executing_world
584 print("fetching tile",tiles[itile+1].ilo,"into world",fetching_world->id()," at time ",wall_time());
585 std::tie(tmp_mo_bra2,tmp_mo_ket2)=fetch_data(*fetching_world,tiles[itile+1]);
586 }
587 fetching_world->gop.set_forbid_fence(false);
588 double t2=cpu_time();
589 // uncomment the next line to enforce that fetching is finished before executing
590 // fetching_world->gop.fence();
591 double t1=cpu_time();
592 total_fetch_time += (t1 - t0);
593 total_fetch_spawn_time += (t2 - t0);
594
595 print("executing tile",tile.ilo,"in world",executing_world->id());
596 double dpu0=cpu_time();
597 Kf[0]+=execute(*executing_world,poisson1,phi1,tmp_mo_bra1,tmp_mo_ket1);
598 double dpu1=cpu_time();
599 print("time to execute tile",tile.ilo,"in world",executing_world->id(),dpu1-dpu0,"seconds");
600 total_execution_time += dpu1-dpu0;
601
602 fetching_world->gop.fence();
603
604 // change roles of the two worlds
605 std::swap(poisson1,poisson2);
606 std::swap(phi1,phi2);
607 std::swap(tmp_mo_bra2,tmp_mo_bra1);
608 std::swap(tmp_mo_ket2,tmp_mo_ket1);
609 std::swap(executing_world,fetching_world);
610 }
611 }
612 } // objects living in the two worlds must be destroyed before the worlds are freed
613
614 // deferred destruction of WorldObjects happens here
615 fetching_world->gop.fence();
616 executing_world->gop.fence();
617 double cpu2=cpu_time();
618 print("overall time: ",cpu2-cpu0,"seconds");
619 print("total execution time:",total_execution_time,"seconds");
620 print("total fetch time:",total_fetch_time,"seconds");
621 print("total fetch spawn time:",total_fetch_spawn_time,"seconds");
622 } // worlds are destroyed here
623
624 return Kf;
625 }
626 };
627};
628
629} /* namespace madness */
630
631#endif /* SRC_APPS_CHEM_EXCHANGEOPERATOR_H_ */
Operators for the molecular HF and DFT code.
Wrapper around MPI_Comm. Has a shallow copy constructor; use Create(Get_group()) for deep copy.
Definition safempi.h:497
Intracomm Clone() const
Definition safempi.h:696
a batch consists of a 2D-input batch and a 1D-output batch: K-batch <- (I-batch, J-batch)
Definition macrotaskpartitioner.h:124
std::vector< Batch_1D > input
Definition macrotaskpartitioner.h:127
custom partitioning for the exchange operator in exchangeoperator.h
Definition exchangeoperator.h:362
resultT allocator(World &world, const argtupleT &argtuple) const
Definition exchangeoperator.h:385
std::vector< Function< T, NDIM > > row_fetch_compute(const std::vector< Function< T, NDIM > > &vket, const std::vector< Function< T, NDIM > > &mo_bra, const std::vector< Function< T, NDIM > > &mo_ket)
Definition exchangeoperator.h:459
std::vector< Function< T, NDIM > > operator()(const std::vector< Function< T, NDIM > > &vket, const std::vector< Function< T, NDIM > > &mo_bra, const std::vector< Function< T, NDIM > > &mo_ket)
compute exchange row-wise for a fixed orbital phi_i of vket
Definition exchangeoperator.h:396
long nresult
Definition exchangeoperator.h:355
std::tuple< const std::vector< Function< T, NDIM > > &, const std::vector< Function< T, NDIM > > &, const std::vector< Function< T, NDIM > > & > argtupleT
Definition exchangeoperator.h:379
std::vector< Function< T, NDIM > > row(const std::vector< Function< T, NDIM > > &vket, const std::vector< Function< T, NDIM > > &mo_bra, const std::vector< Function< T, NDIM > > &mo_ket)
Definition exchangeoperator.h:411
std::vector< Function< T, NDIM > > resultT
Definition exchangeoperator.h:381
Algorithm algorithm_
Definition exchangeoperator.h:359
MacroTaskExchangeRow(const long nresult, const double lo, const double mul_tol, const Algorithm algorithm)
Definition exchangeoperator.h:370
custom partitioning for the exchange operator in exchangeoperator.h
Definition exchangeoperator.h:194
double compute_priority(const Batch &batch) const override
compute the priority of this task for non-dumb scheduling
Definition exchangeoperator.h:229
MacroTaskPartitionerExchange(const bool symmetric)
Definition exchangeoperator.h:196
partitionT do_partitioning(const std::size_t &vsize1, const std::size_t &vsize2, const std::string policy) const override
override this if you want your own partitioning
Definition exchangeoperator.h:202
vecfuncT compute_diagonal_batch_in_symmetric_matrix(World &subworld, const vecfuncT &ket_batch, const vecfuncT &bra_batch, const vecfuncT &vf_batch) const
compute a batch of the exchange matrix, with identical ranges, exploiting the matrix symmetry
Definition exchangeoperator.h:309
MacroTaskExchangeSimple(const long nresult, const double lo, const double mul_tol, const bool symmetric)
Definition exchangeoperator.h:238
long nresult
Definition exchangeoperator.h:185
std::vector< Function< T, NDIM > > resultT
Definition exchangeoperator.h:249
vecfuncT compute_batch_in_asymmetric_matrix(World &subworld, const vecfuncT &ket_batch, const vecfuncT &bra_batch, const vecfuncT &vf_batch) const
compute a batch of the exchange matrix, with non-identical ranges
Definition exchangeoperator.h:328
std::vector< Function< T, NDIM > > operator()(const std::vector< Function< T, NDIM > > &vf_batch, const std::vector< Function< T, NDIM > > &bra_batch, const std::vector< Function< T, NDIM > > &vket)
Definition exchangeoperator.h:260
std::tuple< const std::vector< Function< T, NDIM > > &, const std::vector< Function< T, NDIM > > &, const std::vector< Function< T, NDIM > > & > argtupleT
Definition exchangeoperator.h:247
resultT allocator(World &world, const argtupleT &argtuple) const
Definition exchangeoperator.h:253
Definition exchangeoperator.h:17
static std::atomic< long > mul1_timer
timing
Definition exchangeoperator.h:23
Exchange< T, NDIM >::ExchangeAlgorithm Algorithm
Definition exchangeoperator.h:60
bool printtimings() const
Definition exchangeoperator.h:169
ExchangeImpl & symmetric(const bool flag)
Definition exchangeoperator.h:102
ExchangeImpl & set_printlevel(const long &level)
Definition exchangeoperator.h:122
nlohmann::json statistics
statistics of the Cloud (timings, memory) and of the parameters of this run
Definition exchangeoperator.h:181
static double elapsed_time
Definition exchangeoperator.h:24
void print_timer(World &world) const
Definition exchangeoperator.h:49
World & get_world() const
Definition exchangeoperator.h:129
ExchangeImpl & set_macro_task_info(const std::vector< std::string > &info)
Definition exchangeoperator.h:112
nlohmann::json get_statistics() const
Definition exchangeoperator.h:131
ExchangeImpl & set_macro_task_info(const MacroTaskInfo &info)
Definition exchangeoperator.h:107
static void reset_timer()
Definition exchangeoperator.h:26
vecfuncT mo_bra
is the exchange matrix symmetric? K phi_i = \sum_k \phi_k \int \phi_k \phi_i
Definition exchangeoperator.h:175
World & world
Definition exchangeoperator.h:172
std::shared_ptr< MacroTaskQ > taskq
Definition exchangeoperator.h:173
Function< T, NDIM > functionT
Definition exchangeoperator.h:18
nlohmann::json gather_statistics() const
return some statistics about the current settings
Definition exchangeoperator.h:134
bool is_symmetric() const
Definition exchangeoperator.h:95
ExchangeImpl & set_taskq(std::shared_ptr< MacroTaskQ > taskq1)
Definition exchangeoperator.h:97
std::vector< functionT > vecfuncT
Definition exchangeoperator.h:19
ExchangeImpl & set_algorithm(const Algorithm &alg)
Definition exchangeoperator.h:117
bool printprogress() const
Definition exchangeoperator.h:168
static std::atomic< long > apply_timer
Definition exchangeoperator.h:21
ExchangeImpl(World &world, const double lo, const double thresh)
default ctor
Definition exchangeoperator.h:65
std::string info() const
Definition exchangeoperator.h:82
nlohmann::json gather_timings(World &world) const
Definition exchangeoperator.h:34
bool printtimings_detail() const
Definition exchangeoperator.h:170
bool printdebug() const
Definition exchangeoperator.h:167
std::shared_ptr< MacroTaskQ > get_taskq() const
Definition exchangeoperator.h:127
static auto set_poisson(World &world, const double lo, const double econv=FunctionDefaults< 3 >::get_thresh())
Definition exchangeoperator.h:84
static std::atomic< long > mul2_timer
Definition exchangeoperator.h:22
void set_bra_and_ket(const vecfuncT &bra, const vecfuncT &ket)
set the bra and ket orbital spaces, and the occupation
Definition exchangeoperator.h:77
Definition SCFOperators.h:105
static std::string to_string(const ExchangeAlgorithm alg)
Definition SCFOperators.h:144
ExchangeAlgorithm
Definition SCFOperators.h:117
@ multiworld_efficient_row
Definition SCFOperators.h:118
Function< T, NDIM > operator()(const Function< T, NDIM > &ket) const
Definition SCFOperators.h:198
std::vector< functionT > vecfuncT
Definition SCFOperators.h:111
std::string info() const
print some information about this operator
Definition SCFOperators.h:173
FunctionDefaults holds default paramaters as static class members.
Definition funcdefaults.h:100
static const double & get_thresh()
Returns the default threshold.
Definition funcdefaults.h:177
A multiresolution adaptive numerical function.
Definition mra.h:139
Definition macrotaskq.h:1288
partition one (two) vectors into 1D (2D) batches.
Definition macrotaskpartitioner.h:182
std::list< std::pair< Batch, double > > partitionT
Definition macrotaskpartitioner.h:186
The Nemo class.
Definition nemo.h:326
nlohmann::json statistics
Definition SCFOperators.h:64
std::shared_ptr< MacroTaskQ > taskq
Definition SCFOperators.h:71
Definition SCF.h:190
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:872
A parallel world class.
Definition world.h:132
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:330
unsigned long id() const
Definition world.h:315
WorldGopInterface & gop
Global operations.
Definition world.h:207
Declares the Cloud class for storing data and transfering them between worlds.
double(* f)(const coord_3d &)
Definition derivatives.cc:54
static double lo
Definition dirac-hatom.cc:23
std::vector< Spinor > truncate(std::vector< Spinor > arg)
Definition dirac-hatom.cc:503
Fcwf apply(World &world, real_convolution_3d &op, const Fcwf &psi)
Definition fcwf.cc:281
Fcwf copy(Fcwf psi)
Definition fcwf.cc:338
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28
Declares the macrotaskq and MacroTaskBase classes.
General header file for using MADNESS.
#define MADNESS_CHECK(condition)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:182
#define MADNESS_EXCEPTION(msg, value)
Macro for throwing a MADNESS exception.
Definition madness_exception.h:119
#define MADNESS_CHECK_THROW(condition, msg)
Check a condition — even in a release build the condition is always evaluated so it can have side eff...
Definition madness_exception.h:207
void print(const tensorT &t)
Definition mcpfit.cc:140
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
Function< TENSOR_RESULT_TYPE(L, R), NDIM > mul_sparse(const Function< L, NDIM > &left, const Function< R, NDIM > &right, double tol, bool fence=true)
Sparse multiplication — left and right must be reconstructed and if tol!=0 have tree of norms already...
Definition mra.h:1836
Function< TENSOR_RESULT_TYPE(T, R), NDIM > dot(World &world, const std::vector< Function< T, NDIM > > &a, const std::vector< Function< R, NDIM > > &b, bool fence=true)
Multiplies and sums two vectors of functions r = \sum_i a[i] * b[i].
Definition vmra.h:1589
double wall_time()
Returns the wall time in seconds relative to an arbitrary origin.
Definition timers.cc:48
static SeparatedConvolution< double, 3 > * CoulombOperatorPtr(World &world, double lo, double eps, const std::array< LatticeRange, 3 > &lattice_ranges=FunctionDefaults< 3 >::get_bc().lattice_range(), int k=FunctionDefaults< 3 >::get_k())
Factory function generating separated kernel for convolution with 1/r in 3D.
Definition operator.h:1759
std::string name(const FuncType &type, const int ex=-1)
Definition ccpairfunction.h:28
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2096
static const double thresh
Definition rk.cc:45
Definition macrotaskq.h:280
static MacroTaskInfo preset(const std::string name)
Definition macrotaskq.h:313
nlohmann::json to_json() const
Definition macrotaskq.h:448
void from_vector_of_strings(const std::vector< std::string > &vec)
set policy from a vector of strings, assuming the order is storage policy, cloud distribution policy,...
Definition macrotaskq.h:373
class to temporarily redirect output to cout
Definition print.h:277
double cpu_time()
Definition test_list.cc:43
constexpr std::size_t NDIM
Definition testgconv.cc:54