MADNESS 0.10.1
worldprofile.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30
31 $Id$
32*/
33#ifndef MADNESS_WORLD_WORLDPROFILE_H__INCLUDED
34#define MADNESS_WORLD_WORLDPROFILE_H__INCLUDED
35
40#include <string>
41#include <vector>
42
43// NEED TO ADD ATTRIBUTION TO SHINY ON SOURCE FORGE
44
45namespace madness {
46
47 class World;
48
49 /// Simple container for parallel profile statistic
50 template <typename T>
51 struct ProfileStat {
52 T value, max, min, sum; // local value, parallel max, min, sum
53 ProcessID pmax, pmin; // processor with max, min values
54
55 ProfileStat() = default;
56
57 /// Copies local stats into parallel stats in prep for global reduction
59 max = min = sum = value;
60 pmax = pmin = me;
61 }
62
63 /// Reduction of parallel data (max, min, sum)
64 void par_reduce(const ProfileStat<T>& other) {
65 if (other.max > max) {
66 max = other.max;
67 pmax = other.pmax;
68 }
69 if (other.min < min) {
70 min = other.min;
71 pmin = other.pmin;
72 }
73 sum += other.sum;
74 }
75
76 /// Zeros all data
77 void clear() {
78 value = max = min = sum = 0;
79 pmax = pmin = 0;
80 }
81 }; // struct ProfileStat
82
83 /// Used to store profiler info
84 struct WorldProfileEntry : public Spinlock {
85 std::string name; ///< name of the entry
86 static const int MAX_NTHREAD=64;
87 int depth[MAX_NTHREAD]; ///< depth of recursive calls by thread (0 if no active calls)
88
89 ProfileStat<unsigned long> count; ///< count of times called
90 ProfileStat<double> xcpu; ///< exclusive cpu time (i.e., excluding calls)
91 ProfileStat<double> icpu; ///< inclusive cpu call (i.e., including calls)
92 ProfileStat<unsigned long> xnmsg_sent; ///< No. of active messages sent ... exclusive
93 ProfileStat<unsigned long> inmsg_sent; ///< No. of active messages sent ... inclusive
94 ProfileStat<unsigned long> xnmsg_recv; ///< No. of active messages recv ... exclusive
95 ProfileStat<unsigned long> inmsg_recv; ///< No. of active messages recv ... inclusive
96 ProfileStat<unsigned long> xnbyt_sent; ///< No. of bytes sent ... exclusive
97 ProfileStat<unsigned long> inbyt_sent; ///< No. of bytes sent ... inclusive
98 ProfileStat<unsigned long> xnbyt_recv; ///< No. of bytes recv ... exclusive
99 ProfileStat<unsigned long> inbyt_recv; ///< No. of bytes recv ... inclusive
100
101 WorldProfileEntry(const char* name = "");
102
104
106
107 static bool exclusivecmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
108
109 static bool inclusivecmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
110
111 static bool exclusivebytcmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
112
113 static bool inclusivebytcmp(const WorldProfileEntry&a, const WorldProfileEntry& b);
114
116
117 void par_reduce(const WorldProfileEntry& other);
118
119 void clear();
120
121 template <class Archive>
122 void serialize(const Archive& ar) {
124 }
125 }; // struct WorldProfileEntry
126
127
128 /// Singleton-like class for holding profiling data and functionality
129
130 /// Use the macros PROFILE_FUNC, PROFILE_BLOCK, PROFILE_MEMBER_FUNC
132 //static ConcurrentHashMap<std::string,WorldProfileEntry> items;
133 static std::vector<WorldProfileEntry> items;
135 static double cpu_start;
136 static double wall_start;
137
138 static std::vector<WorldProfileEntry>& nvitems();
139
140
141 /// Returns id of the entry associated with the name. Returns -1 if not found;
142 static int find(const std::string& name);
143
144
145 public:
146 /// Returns id for the name, registering if necessary.
147 static int register_id(const char* name);
148
149 /// Returns id for the name, registering if necessary.
150 static int register_id(const char* classname, const char* function);
151
152 /// Clears all profiling information
153 static void clear();
154
155 /// Returns a reference to the specified entry. Throws if id is invalid.
156 static WorldProfileEntry& get_entry(int id);
157
158 /// Prints global profiling information. Global fence involved. Implemented in worldstuff.cc
159 static void print(World& world);
160
161 private:
162 /// Private. Accumlates data from process into parallel statistics. Implemented in worldstuff.cc
163 static void recv_stats(World& world, ProcessID p);
164 };
165
166
168 static thread_local WorldProfileObj* call_stack; ///< Current top of this thread's call stack
169 static thread_local int mythreadid; ///< My unique thread id
170 WorldProfileObj* const prev; ///< Pointer to the entry that called me
171 const int id; ///< My entry in the world profiler
172 const double cpu_base; ///< Time that I started executing
173 RMIStats stats_base; ///< Msg stats when I start executing
174 double cpu_start; ///< Time that I was at top of stack
175 RMIStats stats_start; ///< Msg stats when I was at top of stack;
176 public:
177
178 WorldProfileObj(int id);
179
180 /// Pause profiling while we are not executing ... accumulate time in self
181 void pause(double now, const RMIStats& stats);
182
183 /// Resume profiling
184 void resume(double now, const RMIStats& stats);
185
187 };
188}
189
190#ifdef WORLD_PROFILE_ENABLE
191# define PROFILE_STRINGIFY(s) #s
192
193# define PROFILE_BLOCK(name) \
194 static const int __name##_id=madness::WorldProfile::register_id(PROFILE_STRINGIFY(name)); \
195 madness::WorldProfileObj name(__name##_id)
196
197# define PROFILE_FUNC \
198 static const int __profile_id=madness::WorldProfile::register_id(__FUNCTION__); \
199 madness::WorldProfileObj __profile_obj(__profile_id)
200
201# define PROFILE_MEMBER_FUNC(classname) \
202 static const int __profile_id=madness::WorldProfile::register_id(PROFILE_STRINGIFY(classname), __FUNCTION__); \
203 madness::WorldProfileObj __profile_obj(__profile_id)
204
205
206#else
207
208# define PROFILE_BLOCK(name)
209# define PROFILE_FUNC
210# define PROFILE_MEMBER_FUNC(classname)
211
212#endif
213
214#endif // MADNESS_WORLD_WORLDPROFILE_H__INCLUDED
Spinlock using pthread spinlock operations.
Definition worldmutex.h:253
Definition worldprofile.h:167
void resume(double now, const RMIStats &stats)
Resume profiling.
Definition worldprofile.cc:431
double cpu_start
Time that I was at top of stack.
Definition worldprofile.h:174
RMIStats stats_base
Msg stats when I start executing.
Definition worldprofile.h:173
~WorldProfileObj()
Definition worldprofile.cc:436
static thread_local WorldProfileObj * call_stack
Current top of this thread's call stack.
Definition worldprofile.h:168
WorldProfileObj *const prev
Pointer to the entry that called me.
Definition worldprofile.h:170
static thread_local int mythreadid
My unique thread id.
Definition worldprofile.h:169
RMIStats stats_start
Msg stats when I was at top of stack;.
Definition worldprofile.h:175
const int id
My entry in the world profiler.
Definition worldprofile.h:171
void pause(double now, const RMIStats &stats)
Pause profiling while we are not executing ... accumulate time in self.
Definition worldprofile.cc:419
const double cpu_base
Time that I started executing.
Definition worldprofile.h:172
Singleton-like class for holding profiling data and functionality.
Definition worldprofile.h:131
static void recv_stats(World &world, ProcessID p)
Private. Accumlates data from process into parallel statistics. Implemented in worldstuff....
Definition worldprofile.cc:388
static Spinlock mutex
Definition worldprofile.h:134
static std::vector< WorldProfileEntry > items
Definition worldprofile.h:133
static double wall_start
Definition worldprofile.h:136
static int find(const std::string &name)
Returns id of the entry associated with the name. Returns -1 if not found;.
Definition worldprofile.cc:142
static void clear()
Clears all profiling information.
Definition worldprofile.cc:180
static std::vector< WorldProfileEntry > & nvitems()
Definition worldprofile.cc:136
static WorldProfileEntry & get_entry(int id)
Returns a reference to the specified entry. Throws if id is invalid.
Definition worldprofile.cc:191
static double cpu_start
Definition worldprofile.h:135
static int register_id(const char *name)
Returns id for the name, registering if necessary.
Definition worldprofile.cc:155
static void print(World &world)
Prints global profiling information. Global fence involved. Implemented in worldstuff....
Definition worldprofile.cc:295
A parallel world class.
Definition world.h:132
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72
static double function(const coord_3d &r)
Normalized gaussian.
Definition functionio.cc:100
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Macros and tools pertaining to the configuration of MADNESS.
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
std::string name(const FuncType &type, const int ex=-1)
Definition ccpairfunction.h:28
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
Simple container for parallel profile statistic.
Definition worldprofile.h:51
T min
Definition worldprofile.h:52
T sum
Definition worldprofile.h:52
void clear()
Zeros all data.
Definition worldprofile.h:77
ProcessID pmax
Definition worldprofile.h:53
T value
Definition worldprofile.h:52
T max
Definition worldprofile.h:52
void init_par_stats(ProcessID me)
Copies local stats into parallel stats in prep for global reduction.
Definition worldprofile.h:58
ProcessID pmin
Definition worldprofile.h:53
void par_reduce(const ProfileStat< T > &other)
Reduction of parallel data (max, min, sum)
Definition worldprofile.h:64
Definition worldrmi.h:147
Used to store profiler info.
Definition worldprofile.h:84
static bool inclusivebytcmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition worldprofile.cc:90
int depth[MAX_NTHREAD]
depth of recursive calls by thread (0 if no active calls)
Definition worldprofile.h:87
ProfileStat< unsigned long > inmsg_recv
No. of active messages recv ... inclusive.
Definition worldprofile.h:95
ProfileStat< unsigned long > xnmsg_sent
No. of active messages sent ... exclusive.
Definition worldprofile.h:92
ProfileStat< unsigned long > xnbyt_recv
No. of bytes recv ... exclusive.
Definition worldprofile.h:98
ProfileStat< unsigned long > inbyt_recv
No. of bytes recv ... inclusive.
Definition worldprofile.h:99
ProfileStat< double > xcpu
exclusive cpu time (i.e., excluding calls)
Definition worldprofile.h:90
ProfileStat< unsigned long > count
count of times called
Definition worldprofile.h:89
ProfileStat< unsigned long > inbyt_sent
No. of bytes sent ... inclusive.
Definition worldprofile.h:97
void par_reduce(const WorldProfileEntry &other)
Definition worldprofile.cc:108
ProfileStat< double > icpu
inclusive cpu call (i.e., including calls)
Definition worldprofile.h:91
ProfileStat< unsigned long > xnmsg_recv
No. of active messages recv ... exclusive.
Definition worldprofile.h:94
void serialize(const Archive &ar)
Definition worldprofile.h:122
static const int MAX_NTHREAD
Definition worldprofile.h:86
ProfileStat< unsigned long > inmsg_sent
No. of active messages sent ... inclusive.
Definition worldprofile.h:93
static bool exclusivecmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition worldprofile.cc:78
WorldProfileEntry & operator=(const WorldProfileEntry &other)
Definition worldprofile.cc:60
void init_par_stats(ProcessID me)
Definition worldprofile.cc:94
static bool inclusivecmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition worldprofile.cc:82
static bool exclusivebytcmp(const WorldProfileEntry &a, const WorldProfileEntry &b)
Definition worldprofile.cc:86
void clear()
Definition worldprofile.cc:122
ProfileStat< unsigned long > xnbyt_sent
No. of bytes sent ... exclusive.
Definition worldprofile.h:96
std::string name
name of the entry
Definition worldprofile.h:85
int me
Definition test_binsorter.cc:10
static madness::WorldMemInfo stats
Definition worldmem.cc:64
Implements Mutex, MutexFair, Spinlock, ConditionVariable.
Lowest level API for sending active messages — you should probably be looking at worldam....
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43