MADNESS  0.10.1
worldprofile.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31  $Id$
32 */
33 #ifndef MADNESS_WORLD_WORLDPROFILE_H__INCLUDED
34 #define MADNESS_WORLD_WORLDPROFILE_H__INCLUDED
35 
36 #include <madness/madness_config.h>
37 #include <madness/world/worldrmi.h>
40 #include <string>
41 #include <vector>
42 
43 // NEED TO ADD ATTRIBUTION TO SHINY ON SOURCE FORGE
44 
45 namespace madness {
46 
47  class World;
48 
49  /// Simple container for parallel profile statistic
50  template <typename T>
51  struct ProfileStat {
52  T value, max, min, sum; // local value, parallel max, min, sum
53  ProcessID pmax, pmin; // processor with max, min values
54 
55  ProfileStat() = default;
56 
57  /// Copies local stats into parallel stats in prep for global reduction
59  max = min = sum = value;
60  pmax = pmin = me;
61  }
62 
63  /// Reduction of parallel data (max, min, sum)
64  void par_reduce(const ProfileStat<T>& other) {
65  if (other.max > max) {
66  max = other.max;
67  pmax = other.pmax;
68  }
69  if (other.min < min) {
70  min = other.min;
71  pmin = other.pmin;
72  }
73  sum += other.sum;
74  }
75 
76  /// Zeros all data
77  void clear() {
78  value = max = min = sum = 0;
79  pmax = pmin = 0;
80  }
81  }; // struct ProfileStat
82 
83  /// Used to store profiler info
84  struct WorldProfileEntry : public Spinlock {
85  std::string name; ///< name of the entry
86  static const int MAX_NTHREAD=64;
87  int depth[MAX_NTHREAD]; ///< depth of recursive calls by thread (0 if no active calls)
88 
89  ProfileStat<unsigned long> count; ///< count of times called
90  ProfileStat<double> xcpu; ///< exclusive cpu time (i.e., excluding calls)
91  ProfileStat<double> icpu; ///< inclusive cpu call (i.e., including calls)
92  ProfileStat<unsigned long> xnmsg_sent; ///< No. of active messages sent ... exclusive
93  ProfileStat<unsigned long> inmsg_sent; ///< No. of active messages sent ... inclusive
94  ProfileStat<unsigned long> xnmsg_recv; ///< No. of active messages recv ... exclusive
95  ProfileStat<unsigned long> inmsg_recv; ///< No. of active messages recv ... inclusive
96  ProfileStat<unsigned long> xnbyt_sent; ///< No. of bytes sent ... exclusive
97  ProfileStat<unsigned long> inbyt_sent; ///< No. of bytes sent ... inclusive
98  ProfileStat<unsigned long> xnbyt_recv; ///< No. of bytes recv ... exclusive
99  ProfileStat<unsigned long> inbyt_recv; ///< No. of bytes recv ... inclusive
100 
101  WorldProfileEntry(const char* name = "");
102 
103  WorldProfileEntry(const WorldProfileEntry& other);
104 
106 
107  static bool exclusivecmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
108 
109  static bool inclusivecmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
110 
111  static bool exclusivebytcmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
112 
113  static bool inclusivebytcmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
114 
116 
117  void par_reduce(const WorldProfileEntry& other);
118 
119  void clear();
120 
121  template <class Archive>
122  void serialize(const Archive& ar) {
124  }
125  }; // struct WorldProfileEntry
126 
127 
128  /// Singleton-like class for holding profiling data and functionality
129 
130  /// Use the macros PROFILE_FUNC, PROFILE_BLOCK, PROFILE_MEMBER_FUNC
131  class WorldProfile {
132  //static ConcurrentHashMap<std::string,WorldProfileEntry> items;
133  static std::vector<WorldProfileEntry> items;
134  static Spinlock mutex;
135  static double cpu_start;
136  static double wall_start;
137 
138  static std::vector<WorldProfileEntry>& nvitems();
139 
140 
141  /// Returns id of the entry associated with the name. Returns -1 if not found;
142  static int find(const std::string& name);
143 
144 
145  public:
146  /// Returns id for the name, registering if necessary.
147  static int register_id(const char* name);
148 
149  /// Returns id for the name, registering if necessary.
150  static int register_id(const char* classname, const char* function);
151 
152  /// Clears all profiling information
153  static void clear();
154 
155  /// Returns a reference to the specified entry. Throws if id is invalid.
156  static WorldProfileEntry& get_entry(int id);
157 
158  /// Prints global profiling information. Global fence involved. Implemented in worldstuff.cc
159  static void print(World& world);
160 
161  private:
162  /// Private. Accumlates data from process into parallel statistics. Implemented in worldstuff.cc
163  static void recv_stats(World& world, ProcessID p);
164  };
165 
166 
168  static thread_local WorldProfileObj* call_stack; ///< Current top of this thread's call stack
169  static thread_local int mythreadid; ///< My unique thread id
170  WorldProfileObj* const prev; ///< Pointer to the entry that called me
171  const int id; ///< My entry in the world profiler
172  const double cpu_base; ///< Time that I started executing
173  RMIStats stats_base; ///< Msg stats when I start executing
174  double cpu_start; ///< Time that I was at top of stack
175  RMIStats stats_start; ///< Msg stats when I was at top of stack;
176  public:
177 
178  WorldProfileObj(int id);
179 
180  /// Pause profiling while we are not executing ... accumulate time in self
181  void pause(double now, const RMIStats& stats);
182 
183  /// Resume profiling
184  void resume(double now, const RMIStats& stats);
185 
187  };
188 }
189 
190 #ifdef WORLD_PROFILE_ENABLE
191 # define PROFILE_STRINGIFY(s) #s
192 
193 # define PROFILE_BLOCK(name) \
194  static const int __name##_id=madness::WorldProfile::register_id(PROFILE_STRINGIFY(name)); \
195  madness::WorldProfileObj name(__name##_id)
196 
197 # define PROFILE_FUNC \
198  static const int __profile_id=madness::WorldProfile::register_id(__FUNCTION__); \
199  madness::WorldProfileObj __profile_obj(__profile_id)
200 
201 # define PROFILE_MEMBER_FUNC(classname) \
202  static const int __profile_id=madness::WorldProfile::register_id(PROFILE_STRINGIFY(classname), __FUNCTION__); \
203  madness::WorldProfileObj __profile_obj(__profile_id)
204 
205 
206 #else
207 
208 # define PROFILE_BLOCK(name)
209 # define PROFILE_FUNC
210 # define PROFILE_MEMBER_FUNC(classname)
211 
212 #endif
213 
214 #endif // MADNESS_WORLD_WORLDPROFILE_H__INCLUDED
Spinlock using pthread spinlock operations.
Definition: worldmutex.h:253
Definition: worldprofile.h:167
void resume(double now, const RMIStats &stats)
Resume profiling.
Definition: worldprofile.cc:431
double cpu_start
Time that I was at top of stack.
Definition: worldprofile.h:174
RMIStats stats_base
Msg stats when I start executing.
Definition: worldprofile.h:173
~WorldProfileObj()
Definition: worldprofile.cc:436
static thread_local WorldProfileObj * call_stack
Current top of this thread's call stack.
Definition: worldprofile.h:168
WorldProfileObj(int id)
Definition: worldprofile.cc:407
WorldProfileObj *const prev
Pointer to the entry that called me.
Definition: worldprofile.h:170
static thread_local int mythreadid
My unique thread id.
Definition: worldprofile.h:169
RMIStats stats_start
Msg stats when I was at top of stack;.
Definition: worldprofile.h:175
const int id
My entry in the world profiler.
Definition: worldprofile.h:171
void pause(double now, const RMIStats &stats)
Pause profiling while we are not executing ... accumulate time in self.
Definition: worldprofile.cc:419
const double cpu_base
Time that I started executing.
Definition: worldprofile.h:172
Singleton-like class for holding profiling data and functionality.
Definition: worldprofile.h:131
static void recv_stats(World &world, ProcessID p)
Private. Accumlates data from process into parallel statistics. Implemented in worldstuff....
Definition: worldprofile.cc:388
static Spinlock mutex
Definition: worldprofile.h:134
static std::vector< WorldProfileEntry > items
Definition: worldprofile.h:133
static double wall_start
Definition: worldprofile.h:136
static int find(const std::string &name)
Returns id of the entry associated with the name. Returns -1 if not found;.
Definition: worldprofile.cc:142
static void clear()
Clears all profiling information.
Definition: worldprofile.cc:180
static std::vector< WorldProfileEntry > & nvitems()
Definition: worldprofile.cc:136
static WorldProfileEntry & get_entry(int id)
Returns a reference to the specified entry. Throws if id is invalid.
Definition: worldprofile.cc:191
static double cpu_start
Definition: worldprofile.h:135
static int register_id(const char *name)
Returns id for the name, registering if necessary.
Definition: worldprofile.cc:155
static void print(World &world)
Prints global profiling information. Global fence involved. Implemented in worldstuff....
Definition: worldprofile.cc:295
A parallel world class.
Definition: world.h:132
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition: derivatives.cc:72
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
Macros and tools pertaining to the configuration of MADNESS.
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
std::string name(const FuncType &type, const int ex=-1)
Definition: ccpairfunction.h:28
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
Simple container for parallel profile statistic.
Definition: worldprofile.h:51
T min
Definition: worldprofile.h:52
T sum
Definition: worldprofile.h:52
void clear()
Zeros all data.
Definition: worldprofile.h:77
ProcessID pmax
Definition: worldprofile.h:53
T value
Definition: worldprofile.h:52
T max
Definition: worldprofile.h:52
void init_par_stats(ProcessID me)
Copies local stats into parallel stats in prep for global reduction.
Definition: worldprofile.h:58
ProcessID pmin
Definition: worldprofile.h:53
void par_reduce(const ProfileStat< T > &other)
Reduction of parallel data (max, min, sum)
Definition: worldprofile.h:64
Definition: worldrmi.h:147
Used to store profiler info.
Definition: worldprofile.h:84
static bool inclusivebytcmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition: worldprofile.cc:90
int depth[MAX_NTHREAD]
depth of recursive calls by thread (0 if no active calls)
Definition: worldprofile.h:87
ProfileStat< unsigned long > inmsg_recv
No. of active messages recv ... inclusive.
Definition: worldprofile.h:95
ProfileStat< unsigned long > xnmsg_sent
No. of active messages sent ... exclusive.
Definition: worldprofile.h:92
ProfileStat< unsigned long > xnbyt_recv
No. of bytes recv ... exclusive.
Definition: worldprofile.h:98
ProfileStat< unsigned long > inbyt_recv
No. of bytes recv ... inclusive.
Definition: worldprofile.h:99
ProfileStat< double > xcpu
exclusive cpu time (i.e., excluding calls)
Definition: worldprofile.h:90
ProfileStat< unsigned long > count
count of times called
Definition: worldprofile.h:89
WorldProfileEntry(const char *name="")
Definition: worldprofile.cc:49
ProfileStat< unsigned long > inbyt_sent
No. of bytes sent ... inclusive.
Definition: worldprofile.h:97
void par_reduce(const WorldProfileEntry &other)
Definition: worldprofile.cc:108
ProfileStat< double > icpu
inclusive cpu call (i.e., including calls)
Definition: worldprofile.h:91
ProfileStat< unsigned long > xnmsg_recv
No. of active messages recv ... exclusive.
Definition: worldprofile.h:94
void serialize(const Archive &ar)
Definition: worldprofile.h:122
static const int MAX_NTHREAD
Definition: worldprofile.h:86
ProfileStat< unsigned long > inmsg_sent
No. of active messages sent ... inclusive.
Definition: worldprofile.h:93
static bool exclusivecmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition: worldprofile.cc:78
WorldProfileEntry & operator=(const WorldProfileEntry &other)
Definition: worldprofile.cc:60
void init_par_stats(ProcessID me)
Definition: worldprofile.cc:94
static bool inclusivecmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition: worldprofile.cc:82
static bool exclusivebytcmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition: worldprofile.cc:86
void clear()
Definition: worldprofile.cc:122
ProfileStat< unsigned long > xnbyt_sent
No. of bytes sent ... exclusive.
Definition: worldprofile.h:96
std::string name
name of the entry
Definition: worldprofile.h:85
int me
Definition: test_binsorter.cc:10
static madness::WorldMemInfo stats
Definition: worldmem.cc:64
Implements Mutex, MutexFair, Spinlock, ConditionVariable.
Lowest level API for sending active messages — you should probably be looking at worldam....
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:43