MADNESS 0.10.1
lbdeux.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30
31 $Id$
32*/
33#ifndef MADNESS_MRA_IBDEUX_H__INCLUDED
34#define MADNESS_MRA_IBDEUX_H__INCLUDED
35
37#include <map>
38#include <queue>
41
42#include <madness/mra/key.h>
44
45/// \file mra/lbdeux.h
46/// \brief Implements (2nd generation) static load/data balancing for functions
47/// \ingroup function
48
49namespace madness {
50
51 template<typename T, std::size_t NDIM>
52 class FunctionNode;
53
54 template<typename T, std::size_t NDIM>
55 class Function;
56
57 template <std::size_t NDIM>
58 class LBDeuxPmap : public WorldDCPmapInterface< Key<NDIM> > {
59 typedef Key<NDIM> keyT;
60 typedef std::pair<keyT,ProcessID> pairT;
61 typedef std::map<keyT,ProcessID> mapT;
63 typedef typename mapT::const_iterator iteratorT;
64
65 public:
66 LBDeuxPmap(const std::vector<pairT>& v) {
67 for (unsigned int i=0; i<v.size(); ++i) {
68 map.insert(v[i]);
69 }
70 }
71
72 // If level 0 is not entered as a node this will
73 // be an infinite loop.
74 ProcessID owner(const keyT& key) const {
75 while (key.level() >= 0) {
76 iteratorT it = map.find(key);
77 if (it == map.end()) {
78 return owner(key.parent());
79 }
80 else {
81 return it->second;
82 }
83 }
84 madness::print("Mon Dieux!", key);
85 throw "LBDeuxPmap: lookup failed";
86 }
87
88 void print() const {
89 madness::print("LBDeuxPmap");
90 }
91 };
92
93
94
95 template <std::size_t NDIM>
96 class LBNodeDeux {
97 static const int nchild = (1<<NDIM);
98 typedef Key<NDIM> keyT;
101
102 double child_cost[nchild]; // Removed volatile since all parallel updates will be under mutex from active message and no unprotected reads
103 double my_cost;
106
108
109 /// Computes index of child key in this node using last bit of translations
110 int index(const keyT& key) {
111 int ind = 0;
112 for (std::size_t d=0; d<NDIM; ++d) ind += ((key.translation()[d])&0x1) << d;
113 return ind;
114 }
115
116 public:
118 : my_cost(0.0), total_cost(0.0), gotkids(false) {
119 nsummed = 0;
120 for (int i=0; i<nchild; ++i)
121 child_cost[i] = 0.0;
122 }
123
125 my_cost(other.my_cost), total_cost(other.total_cost), gotkids(other.gotkids)
126 {
127 nsummed = other.nsummed;
128 for (int i=0; i<nchild; ++i)
129 child_cost[i] = other.child_cost[i];
130 }
131
133 for (int i=0; i<nchild; ++i)
134 child_cost[i] = other.child_cost[i];
135 my_cost = other.my_cost;
136 total_cost = other.total_cost;
137 gotkids = other.gotkids;
138 nsummed = other.nsummed;
139
140 return *this;
141 }
142
143 bool has_children() const {
144 return gotkids;
145 }
146
147 double get_total_cost() const {
148 return total_cost;
149 }
150
151 /// Accumulates cost into this node
152 void add(double cost, bool got_kids) {
153 total_cost = (my_cost += cost);
154 gotkids = gotkids || got_kids;
155 }
156
157 /// Accumulates cost up the tree from children
158 void sum(const treeT& tree, const keyT& child, double value) {
159 child_cost[index(child)] = value;
160 ++nsummed;
161 if (nsummed == nchild) {
162 for (int i=0; i<nchild; ++i) total_cost += child_cost[i];
163 if (child.level() > 1) {
164 keyT key = child.parent();
165 keyT parent = key.parent();
166 const_cast<treeT&>(tree).task(parent, &nodeT::sum, tree, key, double(total_cost));
167 }
168 }
169 }
170
171
172 /// Logically deletes this node by setting cost to -1
173
174 /// Cannot actually erase this node from the container since the send() handler
175 /// is holding an accessor to it.
176 void deleter(const treeT& tree, const keyT& key) {
177 total_cost = my_cost = -1.0;
178 if (has_children()) {
179 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
180 const keyT child = kit.key();
181 const_cast<treeT&>(tree).task(child, &nodeT::deleter, tree, child);
182 }
183 }
184 }
185
186 /// Descends tree deleting all except internal nodes and sub-tree parents
187 void partition(const treeT& tree, const keyT& key, double avg) {
188 if (has_children()) {
189 // Sort children in descending cost order
190 keyT keys[nchild];
191 double vals[nchild];
192 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
193 const keyT child = kit.key();
194 int ind = index(child);
195 keys[ind] = child;
196 vals[ind] = child_cost[ind];
197 }
198 for (int i=0; i<nchild; ++i) {
199 for (int j=i+1; j<nchild; ++j) {
200 if (vals[i] < vals[j]) {
201 std::swap(vals[i],vals[j]);
202 std::swap(keys[i],keys[j]);
203 }
204 }
205 }
206
207 // Split off subtrees in decreasing cost order
208 for (int i=0; i<nchild; ++i) {
209 if (total_cost <= avg) {
210 const_cast<treeT&>(tree).task(keys[i], &nodeT::deleter, tree, keys[i]);
211 }
212 else {
213 total_cost -= vals[i];
214 const_cast<treeT&>(tree).task(keys[i], &nodeT::partition, tree, keys[i], avg);
215 }
216 }
217 }
218 }
219
220 template <typename Archive>
221 void serialize(Archive& ar) {
222 ar & archive::wrap_opaque(this,1);
223 }
224 };
225
226
227 template <std::size_t NDIM>
232 typedef typename treeT::iterator iteratorT;
236
237
238 template <typename T, typename costT>
239 struct add_op {
241 const costT& costfn;
243 void operator()(const keyT& key, const FunctionNode<T,NDIM>& node) const {
244 if (lb->tree.is_local(key))
245 lb->tree.send(key, &nodeT::add, costfn(key,node), node.has_children());
246 else
247 lb->tree.task(key, &nodeT::add, costfn(key,node), node.has_children());
248 }
249 };
250
251 /// Sums costs up the tree returning to everyone the total cost
252 double sum() {
253 world.gop.fence();
254 const_iteratorT end = tree.end();
255 for (const_iteratorT it=tree.begin(); it!=end; ++it) {
256 const keyT& key = it->first;
257 const nodeT& node = it->second;
258 if (!node.has_children() && key.level() > 0) {
259 tree.task(key.parent(), &nodeT::sum, tree, key, node.get_total_cost());
260 }
261 }
262 world.gop.fence();
263 double total;
264 keyT key0(0);
265 if (world.rank() == tree.owner(key0)) {
266 total = tree.find(key0).get()->second.get_total_cost();
267 }
268 world.gop.broadcast(total, tree.owner(key0));
269 world.gop.fence();
270
271 return total;
272 }
273
274 /// Used to sort results into descending order
275 static bool compare(const std::pair<keyT,double>& a, const std::pair<keyT,double>& b) {
276 return a.second < b.second;
277 }
278
279
280 public:
282 : world(world)
283 , tree(world, FunctionDefaults<NDIM>::get_pmap()) {
284 world.gop.fence();
285 };
286
287 /// Accumulates cost from a function
288 template <typename T, typename costT>
289 void add_tree(const Function<T,NDIM>& f, const costT& costfn, bool fence=false) {
290 const_cast<Function<T,NDIM>&>(f).unaryop_node(add_op<T,costT>(this,costfn), fence);
291 }
292
293 /// Printing for the curious
294 void print_tree(const keyT& key = keyT(0)) {
295 Future<iteratorT> futit = tree.find(key);
296 iteratorT it = futit.get();
297 if (it != tree.end()) {
298 for (int i=0; i<key.level(); ++i) std::cout << " ";
299 print(key, it->second.get_total_cost());
300
301 if (it->second.has_children()) {
302 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
303 print_tree(kit.key());
304 }
305 }
306 }
307 }
308
309 struct CostPerProc {
310 double cost;
311 int proc;
312 CostPerProc() : cost(0.0), proc(0) {}
313 CostPerProc(double cost, int proc) : cost(cost), proc(proc) {}
314 bool operator<(const CostPerProc& other) const {
315 return cost > other.cost; // Want ascending order
316 }
317 };
318
319 /// Actually does the partitioning of the tree
320 std::shared_ptr< WorldDCPmapInterface<keyT> > load_balance(double fac = 1.0, bool printstuff=false) {
321 world.gop.fence();
322 // Compute full tree of costs
323 double avg = sum()/(world.size()*fac);
324 //if (world.rank() == 0) print_tree();
325 world.gop.fence();
326
327 // Create partitioning
328 keyT key0(0);
329 if (world.rank() == tree.owner(key0)) {
330 tree.send(key0, &nodeT::partition, tree, key0, avg*1.1);
331 }
332 world.gop.fence();
333
334 // Collect entire vector onto node0
335 std::vector< std::pair<keyT,double> > results;
336 const_iteratorT end = tree.end();
337 for (const_iteratorT it=tree.begin(); it!=end; ++it) {
338 if (it->second.get_total_cost() >= 0) {
339 results.push_back(std::make_pair(it->first,it->second.get_total_cost()));
340 }
341 }
342 results = world.gop.concat0(results, 128*1024*1024);
343 world.gop.fence();
344
345 std::vector< std::pair<keyT,ProcessID> > map;
346
347 if (world.rank() == 0) {
348
349 std::sort(results.begin(), results.end(), compare);
350 if (printstuff) {
351 print("THESE ARE THE INITIAL SUBTREES");
352 for (unsigned int i=0; i<results.size(); ++i) print(i,results[i]);
353 }
354
355 // Now use bin packing to cram the results together
356 map.reserve(results.size());
357
358 // Shove the first nproc entries directly into the queue
359 unsigned int nproc = world.size();
360 std::priority_queue<CostPerProc> costs;
361 for (unsigned int p=0; p<nproc && !results.empty(); ++p) {
362 const std::pair<keyT,double>& f = results.back();
363 costs.push(CostPerProc(f.second,p));
364 map.push_back(std::make_pair(f.first,p));
365 results.pop_back();
366 }
367
368 // Process the remainder using the sorting maintained by the priority queue
369 while (!results.empty()) {
370 const std::pair<keyT,double>& f = results.back();
371 CostPerProc top = costs.top();
372 costs.pop();
373 top.cost += f.second;
374 costs.push(top);
375 map.push_back(std::make_pair(f.first,top.proc));
376 results.pop_back();
377 }
378 if (printstuff) {
379 print("THIS IS THE MAP");
380 print(map);
381 print("THESE ARE THE COSTS PER PROCESSOR");
382 while (!costs.empty()) {
383 print(costs.top().proc,costs.top().cost);
384 costs.pop();
385 }
386 }
387 }
388
389 world.gop.fence();
391 world.gop.fence();
392
393 // Return the Procmap
394
395 return std::shared_ptr< WorldDCPmapInterface<keyT> >(new LBDeuxPmap<NDIM>(map));
396 }
397 };
398}
399
400
401#endif // MADNESS_MRA_IBDEUX_H__INCLUDED
402
Implements AtomicInt.
An integer with atomic set, get, read+increment, read+decrement, and decrement+test operations.
Definition atomicint.h:126
FunctionDefaults holds default paramaters as static class members.
Definition funcdefaults.h:204
FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree.
Definition funcimpl.h:124
bool has_children() const
Returns true if this node has children.
Definition funcimpl.h:204
A multiresolution adaptive numerical function.
Definition mra.h:122
A future is a possibly yet unevaluated value.
Definition future.h:373
T & get(bool dowork=true) &
Gets the value, waiting if necessary.
Definition future.h:574
Iterates in lexical order thru all children of a key.
Definition key.h:374
Key is the index for a node of the 2^NDIM-tree.
Definition key.h:66
Level level() const
Definition key.h:159
Key parent(int generation=1) const
Returns the key of the parent.
Definition key.h:187
Definition lbdeux.h:58
ProcessID owner(const keyT &key) const
Maps key to processor.
Definition lbdeux.h:74
void print() const
Definition lbdeux.h:88
Key< NDIM > keyT
Definition lbdeux.h:59
mapT map
Definition lbdeux.h:62
std::map< keyT, ProcessID > mapT
Definition lbdeux.h:61
mapT::const_iterator iteratorT
Definition lbdeux.h:63
LBDeuxPmap(const std::vector< pairT > &v)
Definition lbdeux.h:66
std::pair< keyT, ProcessID > pairT
Definition lbdeux.h:60
Definition lbdeux.h:96
void add(double cost, bool got_kids)
Accumulates cost into this node.
Definition lbdeux.h:152
double get_total_cost() const
Definition lbdeux.h:147
LBNodeDeux()
Definition lbdeux.h:117
void serialize(Archive &ar)
Definition lbdeux.h:221
Key< NDIM > keyT
Definition lbdeux.h:98
void partition(const treeT &tree, const keyT &key, double avg)
Descends tree deleting all except internal nodes and sub-tree parents.
Definition lbdeux.h:187
bool gotkids
Definition lbdeux.h:105
AtomicInt nsummed
Definition lbdeux.h:107
LBNodeDeux< NDIM > & operator=(const LBNodeDeux< NDIM > &other)
Definition lbdeux.h:132
double total_cost
Definition lbdeux.h:104
static const int nchild
Definition lbdeux.h:97
bool has_children() const
Definition lbdeux.h:143
WorldContainer< keyT, nodeT > treeT
Definition lbdeux.h:100
int index(const keyT &key)
Computes index of child key in this node using last bit of translations.
Definition lbdeux.h:110
LBNodeDeux(const LBNodeDeux< NDIM > &other)
Definition lbdeux.h:124
double my_cost
Definition lbdeux.h:103
double child_cost[nchild]
Definition lbdeux.h:102
LBNodeDeux< NDIM > nodeT
Definition lbdeux.h:99
void deleter(const treeT &tree, const keyT &key)
Logically deletes this node by setting cost to -1.
Definition lbdeux.h:176
void sum(const treeT &tree, const keyT &child, double value)
Accumulates cost up the tree from children.
Definition lbdeux.h:158
Definition lbdeux.h:228
Key< NDIM > keyT
Definition lbdeux.h:229
World & world
Definition lbdeux.h:234
treeT::iterator iteratorT
Definition lbdeux.h:232
treeT tree
Definition lbdeux.h:235
LBNodeDeux< NDIM > nodeT
Definition lbdeux.h:230
std::shared_ptr< WorldDCPmapInterface< keyT > > load_balance(double fac=1.0, bool printstuff=false)
Actually does the partitioning of the tree.
Definition lbdeux.h:320
void print_tree(const keyT &key=keyT(0))
Printing for the curious.
Definition lbdeux.h:294
treeT::const_iterator const_iteratorT
Definition lbdeux.h:233
double sum()
Sums costs up the tree returning to everyone the total cost.
Definition lbdeux.h:252
static bool compare(const std::pair< keyT, double > &a, const std::pair< keyT, double > &b)
Used to sort results into descending order.
Definition lbdeux.h:275
void add_tree(const Function< T, NDIM > &f, const costT &costfn, bool fence=false)
Accumulates cost from a function.
Definition lbdeux.h:289
WorldContainer< keyT, nodeT > treeT
Definition lbdeux.h:231
LoadBalanceDeux(World &world)
Definition lbdeux.h:281
Makes a distributed container with specified attributes.
Definition worlddc.h:866
bool find(accessor &acc, const keyT &key)
Write access to LOCAL value by key. Returns true if found, false otherwise (always false for remote).
Definition worlddc.h:987
iterator begin()
Returns an iterator to the beginning of the local data (no communication)
Definition worlddc.h:1070
ProcessID owner(const keyT &key) const
Returns processor that logically owns key (no communication)
Definition worlddc.h:1034
implT::const_iterator const_iterator
Definition worlddc.h:872
iterator end()
Returns an iterator past the end of the local data (no communication)
Definition worlddc.h:1084
implT::iterator iterator
Definition worlddc.h:871
Future< REMFUTURE(MEMFUN_RETURNT(memfunT))> task(const keyT &key, memfunT memfun, const TaskAttributes &attr=TaskAttributes())
Adds task "resultT memfun()" in process owning item (non-blocking comm if remote)
Definition worlddc.h:1426
bool is_local(const keyT &key) const
Returns true if the key maps to the local processor (no communication)
Definition worlddc.h:1041
Future< MEMFUN_RETURNT(memfunT)> send(const keyT &key, memfunT memfun)
Sends message "resultT memfun()" to item (non-blocking comm if remote)
Definition worlddc.h:1183
Interface to be provided by any process map.
Definition worlddc.h:82
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:754
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:161
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
Broadcasts bytes from process root while still processing AM & tasks.
Definition worldgop.cc:173
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition worldgop.h:953
A parallel world class.
Definition world.h:132
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:318
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:328
WorldGopInterface & gop
Global operations.
Definition world.h:205
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72
Provides FunctionDefaults and utilities for coordinate transformation.
archive_array< unsigned char > wrap_opaque(const T *, unsigned int)
Factory function to wrap a pointer to contiguous data as an opaque (uchar) archive_array.
Definition archive.h:925
static const double v
Definition hatom_sf_dirac.cc:20
Multidimension Key for MRA tree and associated iterators.
Macros and tools pertaining to the configuration of MADNESS.
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void print(const T &t, const Ts &... ts)
Print items to std::cout (items separated by spaces) and terminate with a new line.
Definition print.h:225
NDIM & f
Definition mra.h:2416
static const double b
Definition nonlinschro.cc:119
static const double d
Definition nonlinschro.cc:121
static const double a
Definition nonlinschro.cc:118
CostPerProc(double cost, int proc)
Definition lbdeux.h:313
int proc
Definition lbdeux.h:311
bool operator<(const CostPerProc &other) const
Definition lbdeux.h:314
double cost
Definition lbdeux.h:310
CostPerProc()
Definition lbdeux.h:312
Definition lbdeux.h:239
void operator()(const keyT &key, const FunctionNode< T, NDIM > &node) const
Definition lbdeux.h:243
const costT & costfn
Definition lbdeux.h:241
LoadBalanceDeux * lb
Definition lbdeux.h:240
add_op(LoadBalanceDeux *lb, const costT &costfn)
Definition lbdeux.h:242
int task(int i)
Definition test_runtime.cpp:4
static const std::size_t NDIM
Definition testpdiff.cc:42
Implements WorldContainer.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43