MADNESS 0.10.1
memory_measurement.h
Go to the documentation of this file.
1//
2// Created by Florian Bischoff on 2/16/25.
3//
4
5#ifndef MEMORY_MEASUREMENT_H
6#define MEMORY_MEASUREMENT_H
7
8
10#include<madness/mra/mra.h>
11
12namespace madness {
13 /// measure the memory usage of all FunctionImpl objects of all worlds
14
15 /// Assuming FunctionImpl are the largest objects in a calculation
16 /// data is kept as key-value pairs in a map: key=world_id,rank,hostname,NDIM, value=#functions,memory_GB
18 public:
19 /// forward declaration
20 struct MemKey;
21 struct MemInfo;
22
23 /// measure the memory usage of all objects of all worlds
24 static std::map<MemKey,MemInfo> measure_and_print(World& world) {
26 world.gop.fence();
28 world.gop.fence();
29 mm.print_memory_map(world); // will reduce memory_map to rank0 of universe
30 world.gop.fence();
31 // mm.clear_map();
32 return mm.world_memory_map; // on rank0 only!
33 }
34
35 private:
36 /// get the hostname of this machine, rank-local
37 static std::string get_hostname() {
38 char buffer[256];
39 gethostname(buffer, 256);
40 return std::string(buffer);
41 }
42
43 public:
44 struct MemKey {
45 unsigned long world_id=1;
46 unsigned long rank=0;
47 std::string hostname="localhost";
48 std::size_t DIM=0;
49 MemKey() = default;
50 explicit MemKey(World& world) : world_id(world.id()), rank(world.rank()) {
52 }
53
54 template<typename T, std::size_t NDIM>
55 explicit MemKey(const FunctionImpl<T,NDIM>& fimpl) : MemKey(fimpl.world) {
56 DIM=NDIM;
57 }
58 MemKey(const MemKey& other) = default;
59
60 template<typename Archive>
61 void serialize(Archive& ar) const {
62 ar & world_id & rank & hostname & DIM;
63 }
64 };
65 friend bool operator<(const MemKey& lhs, const MemKey& other) {
66 if (lhs.hostname!=other.hostname) return lhs.hostname<other.hostname;
67 if (lhs.world_id!=other.world_id) return lhs.world_id<other.world_id;
68 if (lhs.rank!=other.rank) return lhs.rank<other.rank;
69 return lhs.DIM<other.DIM;
70 }
71
72 struct MemInfo {
73 MemInfo() =default;
74 MemInfo(const MemInfo& other) =default;
76 double memory_GB=0.0;
77 template <typename Archive>
78 void serialize(Archive& ar) const {
80 }
81 };
82
83 typedef std::map<MemKey,MemInfo> MemInfoMapT;
84
85
86 private:
87 template<typename T, std::size_t NDIM>
89 World& world=*World::world_from_id(obj_id.get_world_id());
90 auto ptr_opt = world.ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(obj_id);
91 if (!ptr_opt)
92 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0);
93 return (dynamic_cast< const FunctionImpl<T,NDIM>*>(*ptr_opt));
94 }
95
96 /// keeps track of the memory usage of all objects of one or many worlds **on this rank**
98 bool debug=false;
99
100 template<typename T, std::size_t NDIM>
102 const double toGB=double(sizeof(T))/(1024*1024*1024); // convert to GB
103 auto sz=f.nCoeff_local();
104 if (debug) print("funcimpl<T,",NDIM,"> id",f.id(), "rank",f.world.rank(),"size in GB",sz*toGB);
105
106 // accumulate the sizes into the world_memory_map
107 world_memory_map[MemKey(f)].num_functions++;
108 world_memory_map[MemKey(f)].memory_GB+=sz*toGB;
109 }
110
111 public:
112
113 /// add all FunctionImpl<T,NDIM> objects of the given world to the memory map
114 /// the memory map is a rank-local object
115 void search_world(World& world) {
116
117 auto all_objects=world.get_object_ids();
118 if (debug and (world.rank()==0)) print("objects in this world ",all_objects);
119
120 for (const auto& obj : all_objects) {
121 if (auto funcimpl=cast_to_funcimpl_ptr<double,1>(obj)) add_memory_to_map(*funcimpl);
122 if (auto funcimpl=cast_to_funcimpl_ptr<double,2>(obj)) add_memory_to_map(*funcimpl);
123 if (auto funcimpl=cast_to_funcimpl_ptr<double,3>(obj)) add_memory_to_map(*funcimpl);
124 if (auto funcimpl=cast_to_funcimpl_ptr<double,4>(obj)) add_memory_to_map(*funcimpl);
125 if (auto funcimpl=cast_to_funcimpl_ptr<double,5>(obj)) add_memory_to_map(*funcimpl);
126 if (auto funcimpl=cast_to_funcimpl_ptr<double,6>(obj)) add_memory_to_map(*funcimpl);
127 }
128 }
129
130 /// add all FunctionImpl<T,NDIM> objects **of all worlds** to the memory map
131 /// the memory map is a rank-local object
133 auto all_worlds=World::get_world_ids(); // all worlds but the default world
134 all_worlds.push_back(World::get_default().id()); // add the default world
135 if (debug) print("searching worlds",all_worlds);
136 for (auto world_id : all_worlds) {
137 if (debug) print("searching world",world_id);
138 World* thisworld=World::world_from_id(world_id);
139 if (World::exists(thisworld)) search_world(*thisworld);
140 }
141 }
142
143 /// reset the memory map
144 void clear_map() {
145 world_memory_map.clear();
146 }
147
148 /// gather all information of the map on rank 0 of the universe
149 void reduce_map(World& universe) {
150 // turn map into vector
151 std::vector<std::pair<MemKey,MemInfo>> memory_vec(world_memory_map.begin(),world_memory_map.end());
152 // gather all data on rank 0
153 memory_vec=universe.gop.concat0(memory_vec);
154 // turn back into map
155 clear_map();
156 for (const auto& [memkey,memval] : memory_vec) {
157 world_memory_map[memkey]=memval;
158 }
159 }
160
161
162 /// given the hostname, return number of ranks and total rss on that node
163 static std::map<std::string,std::pair<int,double>> host_to_nrank_and_rss_map(World& universe) {
164 auto accumulate_left =[](std::pair<int,double>& a, const std::pair<int,double>& b) {
165 a.first++;
166 a.second+=b.second;
167 };
168 auto rank_to_host=rank_to_host_and_rss_map(universe);
169 std::map<std::string,std::pair<int,double>> host_to_rank;
170 for (const auto& [rank,hostname_and_rss] : rank_to_host) {
171 accumulate_left(host_to_rank[hostname_and_rss.first],std::pair<int,double>(rank,hostname_and_rss.second));
172 }
173 return host_to_rank;
174 }
175
176
177 /// accumulate the memory usage of all objects of all worlds for this rank per host
178
179 /// integrate out world and dim from MemKey, result lives on rank 0 only
180 std::vector<std::pair<std::pair<std::string,long>,double>> memory_per_host_and_rank(World& world) const {
181
182 std::map<std::pair<std::string,long>,double> memory_per_host;
183 for (const auto& [memkey,memval] : world_memory_map) {
184 memory_per_host[{memkey.hostname,memkey.rank}]+=memval.memory_GB;
185 }
186
187 // turn map into vector and sort
188 std::vector<std::pair<std::pair<std::string,long>,double>> memory_per_host_vec(memory_per_host.begin(),memory_per_host.end());
189 std::sort(memory_per_host_vec.begin(),memory_per_host_vec.end(),[](const auto& a, const auto& b){return a.first<b.first;});
190
191 return memory_per_host_vec;
192 }
193
194 /// accumulate the memory usage of all objects of all worlds over all ranks per host
195
196 /// integrate out world, dim and rank, only hostname is left
197 std::vector<std::pair<std::string,double>> memory_per_host_all_ranks(
198 const std::vector<std::pair<std::pair<std::string,long>,double>>& mem_per_host_and_rank) const {
199 std::map<std::string,double> mem_per_host;
200 for (auto& [hostname_and_rank,memory] : mem_per_host_and_rank) {
201 auto hostname=hostname_and_rank.first;
202 mem_per_host[hostname]+=memory;
203 }
204 // turn map into vector
205 std::vector<std::pair<std::string,double>> mem_per_host_vec(mem_per_host.begin(),mem_per_host.end());
206 return mem_per_host_vec;
207 }
208
209 /// return the total memory usage over all hosts
210 static double total_memory(const MemInfoMapT& memmap) {
211 double total_memory=0.0;
212 for (const auto& [memkey,memval] : memmap) {
213 total_memory+=memval.memory_GB;
214 }
215 return total_memory;
216 }
217
218 /// return the maximum memory usage over all hosts
219 static double max_memory(const MemInfoMapT& memmap) {
220 double max_memory=0.0;
221 for (const auto& [memkey,memval] : memmap) {
222 if (memval.memory_GB>max_memory) max_memory=memval.memory_GB;
223 }
224 return max_memory;
225 }
226
227 /// return the minimum memory usage over all hosts
228 static double min_memory(const MemInfoMapT& memmap) {
229 double min_memory=std::numeric_limits<double>::max();
230 for (const auto& [memkey,memval] : memmap) {
231 if (memval.memory_GB<min_memory) min_memory=memval.memory_GB;
232 }
233 return min_memory;
234 }
235
236 /// @param[in] msg a message to print before the memory map
237 /// @param[in] world used only for clean printing
238 void print_memory_map(World& world, std::string msg="") {
239 reduce_map(world);
240 world.gop.fence();
241 if (world.rank()==0) {
242 print("final memory map:",msg);
243 print("hostname world rank DIM #funcs memory_GB");
244 }
245 constexpr std::size_t bufsize=256;
246 char line[bufsize];
247
248 // print all information
249 world.gop.fence();
250 // turn into vector
251 std::vector<std::pair<MemKey,MemInfo>> memory_vec(world_memory_map.begin(),world_memory_map.end());
252 std::sort(memory_vec.begin(),memory_vec.end(),[](const std::pair<MemKey,MemInfo>& a, const std::pair<MemKey,MemInfo>& b){return a.first<b.first;});
253 for (const auto& [memkey,memval] : memory_vec) {
254 snprintf(line, bufsize, "%20s %12lu %5lu %5lu %5lu %e", memkey.hostname.c_str(), memkey.world_id, memkey.rank, memkey.DIM, memval.num_functions, memval.memory_GB);
255 print(std::string(line));
256 }
257 snprintf(line, bufsize, "%20s %e", "all hosts", total_memory(world_memory_map));
258 world.gop.fence();
259
260
261 // print memory on each host
262 auto mem_per_host_and_rank=memory_per_host_and_rank(world);
263 auto host_to_nrank_and_rss=host_to_nrank_and_rss_map(world);
264 if (world.rank()==0) {
265 print("memory per host");
266 auto info=memory_per_host_all_ranks(mem_per_host_and_rank);
267 print("hostname memory_GB nrank(universe) rss_GB/host");
268 // print("hostname memory_GB");
269 for (const auto& [hostname,memory] : info) {
270 snprintf(line, bufsize, "%20s %e %d %e", hostname.c_str(), memory,
271 host_to_nrank_and_rss[hostname].first, host_to_nrank_and_rss[hostname].second);
272 print(std::string(line));
273 }
274 }
275 if (world.rank()==0) {
276 auto info=memory_per_host_all_ranks(mem_per_host_and_rank);
277 double total_mem=total_memory(world_memory_map);
278 double total_rss=0.0;
279 for (auto& [hostname,memory] : info) {
280 total_rss+=host_to_nrank_and_rss[hostname].second;
281 }
282 std::string word="all hosts";
283 snprintf(line, bufsize, "%20s %e %d %e",
284 word.c_str(), total_mem, world.size(), total_rss);
285 print(std::string(line));
286 }
287
288 }
289
290 };
291}
292
293#endif //MEMORY_MEASUREMENT_H
FunctionImpl holds all Function state to facilitate shallow copy semantics.
Definition funcimpl.h:945
measure the memory usage of all FunctionImpl objects of all worlds
Definition memory_measurement.h:17
static std::string get_hostname()
get the hostname of this machine, rank-local
Definition memory_measurement.h:37
const FunctionImpl< T, NDIM > * cast_to_funcimpl_ptr(const uniqueidT obj_id)
Definition memory_measurement.h:88
static double min_memory(const MemInfoMapT &memmap)
return the minimum memory usage over all hosts
Definition memory_measurement.h:228
static double total_memory(const MemInfoMapT &memmap)
return the total memory usage over all hosts
Definition memory_measurement.h:210
std::vector< std::pair< std::string, double > > memory_per_host_all_ranks(const std::vector< std::pair< std::pair< std::string, long >, double > > &mem_per_host_and_rank) const
accumulate the memory usage of all objects of all worlds over all ranks per host
Definition memory_measurement.h:197
void print_memory_map(World &world, std::string msg="")
Definition memory_measurement.h:238
std::map< MemKey, MemInfo > MemInfoMapT
Definition memory_measurement.h:83
MemInfoMapT world_memory_map
keeps track of the memory usage of all objects of one or many worlds on this rank
Definition memory_measurement.h:97
void reduce_map(World &universe)
gather all information of the map on rank 0 of the universe
Definition memory_measurement.h:149
friend bool operator<(const MemKey &lhs, const MemKey &other)
Definition memory_measurement.h:65
static std::map< MemKey, MemInfo > measure_and_print(World &world)
measure the memory usage of all objects of all worlds
Definition memory_measurement.h:24
void clear_map()
reset the memory map
Definition memory_measurement.h:144
void search_world(World &world)
Definition memory_measurement.h:115
void add_memory_to_map(const FunctionImpl< T, NDIM > &f)
Definition memory_measurement.h:101
static std::map< std::string, std::pair< int, double > > host_to_nrank_and_rss_map(World &universe)
given the hostname, return number of ranks and total rss on that node
Definition memory_measurement.h:163
static double max_memory(const MemInfoMapT &memmap)
return the maximum memory usage over all hosts
Definition memory_measurement.h:219
void search_all_worlds()
Definition memory_measurement.h:132
bool debug
Definition memory_measurement.h:98
std::vector< std::pair< std::pair< std::string, long >, double > > memory_per_host_and_rank(World &world) const
accumulate the memory usage of all objects of all worlds for this rank per host
Definition memory_measurement.h:180
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:161
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition worldgop.h:955
Implements most parts of a globally addressable object (via unique ID).
Definition world_object.h:366
A parallel world class.
Definition world.h:132
static World & get_default()
Default World object accessor.
Definition world.h:260
static World * world_from_id(std::uint64_t id)
Convert a World ID to a World pointer.
Definition world.h:492
std::vector< uniqueidT > get_object_ids() const
Returns a vector of all unique IDs in this World.
Definition world.h:468
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320
static std::vector< unsigned long > get_world_ids()
return a vector containing all world ids
Definition world.h:476
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:330
WorldGopInterface & gop
Global operations.
Definition world.h:207
static bool exists(World *world)
Check if the World exists in the registry.
Definition world.h:249
std::optional< T * > ptr_from_id(uniqueidT id) const
Look up a local pointer from a world-wide unique ID.
Definition world.h:416
Class for unique global IDs.
Definition uniqueid.h:53
unsigned long get_world_id() const
Access the World ID.
Definition uniqueid.h:90
const std::size_t bufsize
Definition derivatives.cc:16
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28
#define MADNESS_EXCEPTION(msg, value)
Macro for throwing a MADNESS exception.
Definition madness_exception.h:119
Main include file for MADNESS and defines Function interface.
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void print(const T &t, const Ts &... ts)
Print items to std::cout (items separated by spaces) and terminate with a new line.
Definition print.h:226
NDIM & f
Definition mra.h:2528
std::map< long, std::pair< std::string, double > > rank_to_host_and_rss_map(World &universe)
return a mapping rank to hostname, return value on rank 0 only
Definition ranks_and_hosts.cpp:36
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
Definition memory_measurement.h:72
long num_functions
Definition memory_measurement.h:75
void serialize(Archive &ar) const
Definition memory_measurement.h:78
double memory_GB
Definition memory_measurement.h:76
MemInfo(const MemInfo &other)=default
Definition memory_measurement.h:44
MemKey(World &world)
Definition memory_measurement.h:50
unsigned long rank
Definition memory_measurement.h:46
MemKey(const MemKey &other)=default
std::size_t DIM
Definition memory_measurement.h:48
void serialize(Archive &ar) const
Definition memory_measurement.h:61
MemKey(const FunctionImpl< T, NDIM > &fimpl)
Definition memory_measurement.h:55
unsigned long world_id
Definition memory_measurement.h:45
std::string hostname
Definition memory_measurement.h:47
constexpr std::size_t NDIM
Definition testgconv.cc:54