32#ifndef MADNESS_WORLD_WORLDGOP_H__INCLUDED
33#define MADNESS_WORLD_WORLDGOP_H__INCLUDED
56 class WorldAmInterface;
60 class DeferredCleanup;
106 template <
typename T>
113 template <
typename T>
120 template <
typename T>
127 template <
typename T>
134 template <
typename T>
173 template <
typename keyT,
typename valueT>
211 template <
typename valueT,
typename keyT>
224 template <
typename keyT,
typename valueT>
225 typename std::enable_if<!is_future<valueT>::value >
::type
231 dist_cache::set_cache_value(key, value);
248 template <
typename keyT,
typename valueT>
253 dist_cache::set_cache_value(key, value);
278 template <
typename keyT>
296 template <
typename keyT,
typename opT>
314 template <
typename tagT,
typename keyT,
typename opT>
351 template <
typename keyT,
typename valueT,
typename taskfnT>
362 arg.get_world()->taskq.add(
arg.get_world()->gop,
taskfn, key,
366 template <
typename keyT,
typename valueT,
typename taskfnT>
381 arg.get_world()->taskq.add(
arg.get_world()->gop,
taskfn, key, value,
390 template <
typename keyT,
typename valueT>
421 template <
typename keyT,
typename valueT>
470 template <
typename tagT,
typename keyT,
typename valueT>
520 template <
typename tagT,
typename keyT,
typename valueT>
549 template <
typename valueT,
typename opT>
557 template <
typename opT>
564 for(std::size_t i = 1ul; i <
results.size(); ++i)
581 template <
typename tagT,
typename keyT,
typename valueT,
typename opT>
591 std::vector<Future<result_type> >
results;
630 int result = std::numeric_limits<int>::max();
635 if (
result_u64>std::numeric_limits<int>::max()) {
638 <<
"!!MADNESS WARNING: Invalid value for environment variable MAD_MAX_REDUCEBCAST_MSG_SIZE.\n"
639 <<
"!!MADNESS WARNING: MAD_MAX_REDUCEBCAST_MSG_SIZE = "
641 result = std::numeric_limits<int>::max();
646 <<
"MADNESS max msg size for GOP reduce/broadcast set to "
647 << result <<
" bytes.\n";
733 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
739 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
745 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
751 template <
typename objT,
752 typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>,
753 typename = std::void_t<
decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>
766 unsigned char* buf =
new unsigned char[
BUFLEN];
782 template <
typename T,
class opT>
792 std::alignment_of_v<T> - 1) /
793 std::alignment_of_v<T>) * std::alignment_of_v<T>;
795 void operator()(
T *ptr) {
800 using sptr_t = std::unique_ptr<T[], free_dtor>;
805 ((std::alignment_of_v<T> +
sizeof(
void *) - 1) /
808#ifdef HAVE_POSIX_MEMALIGN
811 throw std::bad_alloc();
813 return static_cast<T *
>(ptr);
843 buf[i] =
op(buf[i],
buf0[i]);
848 buf[i] =
op(buf[i],
buf1[i]);
869 template <
typename T>
875 template <
typename T>
881 template <
typename T>
887 template <
typename T>
893 template <
typename T>
899 template <
typename T>
904 template <
typename T>
909 template <
typename T>
914 template <
typename T>
919 template <
typename T>
924 template <
typename T>
930 template <
typename T>
936 template <
typename T>
942 template <
typename T>
952 template <
typename T>
956 bufsz = ((
bufsz +
sizeof(
void*) - 1) /
sizeof(
void*)) *
sizeof(
void*);
963 void operator()(std::byte *ptr) {
968 using sptr_t = std::unique_ptr<std::byte[], free_dtor>;
971#ifdef HAVE_POSIX_MEMALIGN
974 throw std::bad_alloc();
976 return static_cast<std::byte *
>(ptr);
978 return static_cast<std::byte *
>(
979 std::aligned_alloc(
sizeof(
void *),
bufsz));
997 std::vector<Tag>
tags;
1066 std::vector<T> left, right;
1074 for (
unsigned int i = 0; i < right.size(); ++i)
1075 left.push_back(right[i]);
1077 for (
unsigned int i=0; i<
v.size(); ++i) left.push_back(
v[i]);
1100 if (batch + 1 ==
nbatch) {
1118 if (parent == -1)
return left;
1119 else return std::vector<T>();
1132 template <
typename valueT,
typename keyT>
1147 template <
typename keyT,
typename valueT>
1179 template <
typename keyT,
typename opT>
1225 template <
typename keyT,
typename opT>
1230 if(group.
size() > 1) {
1261 template <
typename keyT,
typename valueT>
1293 template <
typename keyT,
typename valueT>
1302 if(group.
size() > 1)
1341 template <
typename keyT,
typename valueT,
typename opT>
1397 template <
typename keyT,
typename valueT,
typename opT>
1448 template <
typename keyT,
typename valueT,
typename opT>
1511 template <
typename keyT,
typename valueT,
typename opT>
Implements an archive wrapping a memory buffer.
void binary_tree_info(int root, int &parent, int &child0, int &child1)
Construct info about a binary tree with given root.
Definition safempi.cc:39
int Get_rank() const
Definition safempi.h:714
static int unique_tag_period()
Definition safempi.h:836
int unique_tag()
Returns a unique tag for temporary use (1023<tag<4095)
Definition safempi.h:830
World active message that extends an RMI message.
Definition worldam.h:80
The class used for callbacks (e.g., dependency tracking).
Definition dependency_interface.h:61
A future is a possibly yet unevaluated value.
Definition future.h:373
T & get(bool dowork=true) &
Gets the value, waiting if necessary.
Definition future.h:574
static const Future< T > default_initializer()
See "Gotchas" on Futures about why this exists and how to use it.
Definition future.h:462
bool probe() const
Check whether this future has been assigned.
Definition future.h:631
A collection of processes.
Definition group.h:50
void remote_update() const
Update remote usage count.
Definition group.h:383
void local_update() const
Update local usage count.
Definition group.h:369
ProcessID size() const
Group size accessor.
Definition group.h:429
const DistributedID & id() const
Group id accessor.
Definition group.h:396
bool empty() const
Quary empty group.
Definition group.h:391
ProcessID rank() const
Group rank accessor.
Definition group.h:412
static madness::Future< Group > get_group(const DistributedID &did)
Get group from the registry.
Definition group.cc:90
void make_tree(const ProcessID group_root, ProcessID &parent, ProcessID &child1, ProcessID &child2) const
Compute the binary tree parents and children.
Definition group.h:449
World & get_world() const
Parent world accessor.
Definition group.h:404
Key object that includes the process information.
Definition distributed_id.h:80
static const attrT ATTR_UNORDERED
Definition worldrmi.h:180
Key object that uses a tag to differentiate keys.
Definition distributed_id.h:177
static TaskAttributes hipri()
Definition thread.h:456
void send(ProcessID dest, am_handlerT op, const AmArg *arg, const int attr=RMI::ATTR_ORDERED)
Sends a managed non-blocking active message.
Definition worldam.h:278
Delayed send callback object.
Definition worldgop.h:174
DelayedSend(World &world, const ProcessID dest, const keyT &key, const Future< valueT > &value)
Constructor.
Definition worldgop.h:188
virtual ~DelayedSend()
Definition worldgop.h:193
World & world_
The communication world.
Definition worldgop.h:176
const ProcessID dest_
The destination process id.
Definition worldgop.h:177
Future< valueT > value_
The data to be sent.
Definition worldgop.h:179
const keyT key_
The distributed id associated with value_.
Definition worldgop.h:178
DelayedSend< keyT, valueT > & operator=(const DelayedSend< keyT, valueT > &)
DelayedSend(const DelayedSend< keyT, valueT > &)
virtual void notify()
Notify this object that the future has been set.
Definition worldgop.h:199
Provides collectives that interoperate with the AM and task interfaces.
Definition worldgop.h:145
int max_reducebcast_msg_size() const
Returns the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:695
void lazy_sync(const keyT &key, const opT &op, const Group &group) const
Group lazy sync.
Definition worldgop.h:1226
void send_internal(ProcessID dest, const keyT &key, const Future< valueT > &value) const
Send value to dest.
Definition worldgop.h:249
void max(T *buf, size_t nelem)
Inplace global max while still processing AM & tasks.
Definition worldgop.h:882
static void bcast_handler(const AmArg &arg)
Definition worldgop.h:352
void lazy_sync(const keyT &key, const opT &op) const
Lazy sync.
Definition worldgop.h:1180
World & world_
World object that this is a part of.
Definition worldgop.h:147
int set_max_reducebcast_msg_size(int sz)
Set the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:685
std::shared_ptr< detail::DeferredCleanup > deferred_
Deferred cleanup object.
Definition worldgop.h:148
void reduce(T *buf, std::size_t nelem, opT op)
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Definition worldgop.h:783
void broadcast(T &t)
Broadcast of a scalar from node 0 to all other nodes.
Definition worldgop.h:740
~WorldGopInterface()
Definition worldgop.h:660
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:754
void lazy_sync_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const
Start a distributed lazy sync operation.
Definition worldgop.h:315
void sum(T &a)
Global sum of a scalar while still processing AM & tasks.
Definition worldgop.h:931
int max_reducebcast_msg_size_
maximum size of messages (in bytes) sent by reduce and broadcast
Definition worldgop.h:151
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:161
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
Broadcasts bytes from process root while still processing AM & tasks.
Definition worldgop.cc:173
void bit_and(T *buf, size_t nelem)
Definition worldgop.h:905
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:471
void lazy_sync_parent(const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:279
static Future< valueT > recv_internal(const keyT &key)
Receive data from remote node.
Definition worldgop.h:212
void absmin(T *buf, size_t nelem)
Inplace global absmin while still processing AM & tasks.
Definition worldgop.h:888
void bit_or(T *buf, size_t nelem)
Definition worldgop.h:910
std::enable_if<!is_future< valueT >::value >::type send_internal(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:226
WorldGopInterface(World &world)
Definition worldgop.h:656
bool set_forbid_fence(bool value)
Set forbid_fence flag to new value and return old value.
Definition worldgop.h:674
void bcast(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:1294
void bcast_task(const keyT &key, const valueT &value, const ProcessID root) const
Broadcast task.
Definition worldgop.h:391
void group_bcast_task(const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const
Definition worldgop.h:422
void send(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:1148
void logic_or(T *buf, size_t nelem)
Definition worldgop.h:925
void bcast(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:1262
int initial_max_reducebcast_msg_size()
Definition worldgop.h:629
void serial_invoke(std::function< void()> action)
Executes an action on single (this) thread after ensuring all other work is done.
Definition worldgop.cc:165
static void group_bcast_handler(const AmArg &arg)
Definition worldgop.h:367
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op)
Distributed all reduce.
Definition worldgop.h:1450
Future< typename detail::result_of< opT >::type > reduce_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op)
Distributed reduce.
Definition worldgop.h:583
bool forbid_fence_
forbid calling fence() in case of several active worlds
Definition worldgop.h:150
static detail::result_of< opT >::type reduce_result_task(const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op)
Definition worldgop.h:559
void absmax(T *buf, size_t nelem)
Inplace global absmax while still processing AM & tasks.
Definition worldgop.h:894
void broadcast(T *buf, size_t nelem, ProcessID root)
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Definition worldgop.h:734
void fence_impl(std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false)
Implementation of fence.
Definition worldgop.cc:50
static detail::result_of< opT >::type reduce_task(const valueT &value, const opT &op)
Definition worldgop.h:551
void product(T *buf, size_t nelem)
Inplace global product while still processing AM & tasks.
Definition worldgop.h:900
void min(T *buf, size_t nelem)
Inplace global min while still processing AM & tasks.
Definition worldgop.h:876
void logic_and(T *buf, size_t nelem)
Definition worldgop.h:920
static Future< valueT > recv(const ProcessID source, const keyT &key)
Receive data from source.
Definition worldgop.h:1133
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group)
Distributed group reduce.
Definition worldgop.h:1399
bool debug_
Debug mode.
Definition worldgop.h:149
void min(T &a)
Global min of a scalar while still processing AM & tasks.
Definition worldgop.h:943
void max(T &a)
Global max of a scalar while still processing AM & tasks.
Definition worldgop.h:937
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition worldgop.h:953
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID root)
Distributed reduce.
Definition worldgop.h:1343
void barrier()
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
Definition worldgop.h:700
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op, const Group &group)
Distributed, group all reduce.
Definition worldgop.h:1513
void lazy_sync_children(const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:297
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:521
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:870
void bit_xor(T *buf, size_t nelem)
Definition worldgop.h:915
friend class detail::DeferredCleanup
Definition worldgop.h:153
bool set_debug(bool value)
Set debug flag to new value and return old value.
Definition worldgop.h:667
void broadcast(T &t, ProcessID root)
Broadcast of a scalar from node root to all other nodes.
Definition worldgop.h:746
std::enable_if<!std::is_pointer< T >::value, SafeMPI::Request >::type Isend(const T &datum, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Isend one element.
Definition worldmpi.h:308
SafeMPI::Request Irecv(T *buf, int count, int source, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Async receive data of up to count elements from process source.
Definition worldmpi.h:321
void Send(const T *buf, long lenbuf, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Send array of lenbuf elements to process dest.
Definition worldmpi.h:347
void Recv(T *buf, long lenbuf, int src, int tag) const
Receive data of up to lenbuf elements from process src.
Definition worldmpi.h:374
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition world_task_queue.h:466
A parallel world class.
Definition world.h:132
WorldTaskQueue & taskq
Task queue.
Definition world.h:206
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320
static void await(SafeMPI::Request &request, bool dowork=true)
Wait for a MPI request to complete.
Definition world.h:534
WorldMpiInterface & mpi
MPI interface.
Definition world.h:204
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:330
unsigned long id() const
Definition world.h:315
WorldGopInterface & gop
Global operations.
Definition world.h:207
WorldAmInterface & am
AM interface.
Definition world.h:205
Wraps an archive around a memory buffer for output.
Definition buffer_archive.h:59
std::size_t size() const
Return the amount of data stored (counted) in the buffer.
Definition buffer_archive.h:123
Deferred cleanup of shared_ptr's.
Definition deferred_cleanup.h:60
Distributed caching utility.
Definition dist_cache.h:54
static void get_cache_value(const keyT &key, madness::Future< valueT > &value)
Get the cache value accosted with key.
Definition dist_cache.h:185
static void set_cache_value(const keyT &key, const valueT &value)
Set the cache value accosted with key.
Definition dist_cache.h:146
static bool debug
Definition dirac-hatom.cc:16
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Tensor< typename Tensor< T >::scalar_type > arg(const Tensor< T > &t)
Return a new tensor holding the argument of each element of t (complex types only)
Definition tensor.h:2503
static const double v
Definition hatom_sf_dirac.cc:20
Tensor< double > op(const Tensor< double > &x)
Definition kain.cc:508
#define max(a, b)
Definition lda.h:51
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Intracomm COMM_WORLD
Definition safempi.cc:67
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
std::pair< uniqueidT, std::size_t > DistributedID
Distributed ID which is used to identify objects.
Definition distributed_id.h:48
double abs(double x)
Definition complexfun.h:48
AmArg * copy_am_arg(const AmArg &arg)
Definition worldam.h:170
AmArg * new_am_arg(const argT &... args)
Convenience template for serializing arguments into a new AmArg.
Definition worldam.h:194
bool quiet()
Check if the MADNESS runtime was initialized for quiet operation.
Definition world.cc:77
void error(const char *msg)
Definition world.cc:139
std::string type(const PairType &n)
Definition PNOParameters.h:18
std::uint64_t cstr_to_memory_size(const char *str)
Unit-aware conversion of a C string to a size_t.
Definition units.cc:14
static XNonlinearSolver< std::vector< Function< T, NDIM > >, T, vector_function_allocator< T, NDIM > > nonlinear_vector_solver(World &world, const long nvec)
Definition nonlinsol.h:371
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
int posix_memalign(void **memptr, std::size_t alignment, std::size_t size)
Definition posixmem.h:44
Hash functor.
Definition worldhash.h:233
T operator()(const T &a, const T &b) const
Definition worldgop.h:87
Definition worldgop.h:100
T operator()(const T &a, const T &b) const
Definition worldgop.h:101
Definition worldgop.h:107
T operator()(const T &a, const T &b) const
Definition worldgop.h:108
Definition worldgop.h:114
T operator()(const T &a, const T &b) const
Definition worldgop.h:115
Definition worldgop.h:121
T operator()(const T &a, const T &b) const
Definition worldgop.h:122
Definition worldgop.h:163
Definition worldgop.h:159
Definition worldgop.h:164
Definition worldgop.h:160
Definition worldgop.h:158
Definition worldgop.h:162
Definition worldgop.h:157
Definition worldgop.h:156
Definition worldgop.h:161
Definition worldgop.h:128
T operator()(const T &a, const T &b) const
Definition worldgop.h:129
Definition worldgop.h:135
T operator()(const T &a, const T &b) const
Definition worldgop.h:136
T operator()(const T &a, const T &b) const
Definition worldgop.h:80
T operator()(const T &a, const T &b) const
Definition worldgop.h:94
T operator()(const T &a, const T &b) const
Definition worldgop.h:73
T operator()(const T &a, const T &b) const
Definition worldgop.h:66
fnT::result_type type
Definition function_traits.h:97
T type
Type with Future removed.
Definition type_traits.h:110
#define MPI_INT
Definition stubmpi.h:81
#define MPI_BYTE
Definition stubmpi.h:77
AtomicInt sum
Definition test_atomicint.cc:46
std::pair< int, double > valueT
Definition test_binsorter.cc:6
double source(const coordT &r)
Definition testperiodic.cc:48
const char * status[2]
Definition testperiodic.cc:43
Declares the World class for the parallel runtime environment.
Defines TaskInterface and implements WorldTaskQueue and associated stuff.
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43
int Tag
Used to clearly identify message tag/type.
Definition worldtypes.h:44