32#ifndef MADNESS_WORLD_WORLDGOP_H__INCLUDED
33#define MADNESS_WORLD_WORLDGOP_H__INCLUDED
56 class WorldAmInterface;
60 class DeferredCleanup;
106 template <
typename T>
113 template <
typename T>
120 template <
typename T>
127 template <
typename T>
134 template <
typename T>
173 template <
typename keyT,
typename valueT>
211 template <
typename valueT,
typename keyT>
224 template <
typename keyT,
typename valueT>
225 typename std::enable_if<!is_future<valueT>::value >
::type
231 dist_cache::set_cache_value(key, value);
234 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
248 template <
typename keyT,
typename valueT>
253 dist_cache::set_cache_value(key, value);
258 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
265 const_cast<Future<valueT>&
>(value).register_callback(delayed_send_callback);
278 template <
typename keyT>
296 template <
typename keyT,
typename opT>
314 template <
typename tagT,
typename keyT,
typename opT>
316 const ProcessID child1,
const keyT& key,
const opT&
op)
const {
321 recv_internal<ProcessID>(key_type(key, child0)) :
324 recv_internal<ProcessID>(key_type(key, child1)) :
327 recv_internal<ProcessID>(key_type(key, parent)) :
332 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<key_type, opT>;
334 child0_signal, child1_signal, my_key,
op, parent_signal,
339 if(child0_signal.
probe() && child1_signal.
probe())
342 auto lazy_sync_parent_fn = & WorldGopInterface::template lazy_sync_parent<key_type>;
344 parent, my_key, child0_signal, child1_signal,
351 template <
typename keyT,
typename valueT,
typename taskfnT>
359 arg & taskfn & key & value & root;
362 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key,
366 template <
typename keyT,
typename valueT,
typename taskfnT>
375 arg & taskfn & key & value & group_root & group_key;
381 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key, value,
390 template <
typename keyT,
typename valueT>
396 ProcessID parent = -1, child0 = -1, child1 = -1;
406 void (*handler)(
const AmArg&) =
407 & WorldGopInterface::template bcast_handler<keyT, valueT, taskfnT>;
409 & WorldGopInterface::template bcast_task<keyT, valueT>,
421 template <
typename keyT,
typename valueT>
429 ProcessID parent = -1, child0 = -1, child1 = -1;
430 group.
make_tree(group_root, parent, child0, child1);
441 void (*handler)(
const AmArg&) =
442 & WorldGopInterface::template group_bcast_handler<keyT, valueT, taskfnT>;
444 & WorldGopInterface::template group_bcast_task<keyT, valueT>,
445 key, value, group_root, group.
id());
470 template <
typename tagT,
typename keyT,
typename valueT>
477 const key_type tagged_key(key);
491 auto bcast_task_fn = & WorldGopInterface::template bcast_task<key_type, valueT>;
520 template <
typename tagT,
typename keyT,
typename valueT>
526 const key_type tagged_key(key);
528 if(group.
rank() == group_root) {
533 auto group_bcast_task_fn = & WorldGopInterface::template group_bcast_task<key_type, valueT>;
535 tagged_key, value, group_root, group,
549 template <
typename valueT,
typename opT>
557 template <
typename opT>
564 for(std::size_t i = 1ul; i < results.size(); ++i)
565 op(result.
get(), results[i].get());
581 template <
typename tagT,
typename keyT,
typename valueT,
typename opT>
591 std::vector<Future<result_type> > results;
595 results.push_back(
world_.
taskq.
add(WorldGopInterface::template reduce_task<value_type, opT>,
600 results.push_back(recv_internal<result_type>(key_type(key, child0)));
602 results.push_back(recv_internal<result_type>(key_type(key, child1)));
606 world_.
taskq.
add(WorldGopInterface::template reduce_result_task<opT>,
625 void fence_impl(std::function<
void()> epilogue = []{},
626 bool pause_during_epilogue =
false,
630 int result = std::numeric_limits<int>::max();
631 const auto* initial_max_reducebcast_msg_size_cstr = std::getenv(
"MAD_MAX_REDUCEBCAST_MSG_SIZE");
632 if (initial_max_reducebcast_msg_size_cstr) {
635 if (result_u64>std::numeric_limits<int>::max()) {
638 <<
"!!MADNESS WARNING: Invalid value for environment variable MAD_MAX_REDUCEBCAST_MSG_SIZE.\n"
639 <<
"!!MADNESS WARNING: MAD_MAX_REDUCEBCAST_MSG_SIZE = "
640 << result_u64 <<
"\n";
641 result = std::numeric_limits<int>::max();
643 result =
static_cast<int>(result_u64);
646 <<
"MADNESS max msg size for GOP reduce/broadcast set to "
647 << result <<
" bytes.\n";
733 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
735 broadcast((
void *) buf, nelem*
sizeof(
T), root);
739 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
745 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
751 template <
typename objT,
752 typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>,
753 typename = std::void_t<
decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>
762 BUFLEN = count.
size();
766 unsigned char* buf =
new unsigned char[BUFLEN];
782 template <
typename T,
class opT>
784 static_assert(madness::is_trivially_copyable_v<T>,
"T must be trivially copyable");
788 const std::size_t nelem_per_maxmsg =
791 const auto buf_size = ((
sizeof(
T) * std::min(nelem_per_maxmsg, nelem) +
792 std::alignment_of_v<T> - 1) /
793 std::alignment_of_v<T>) * std::alignment_of_v<T>;
795 void operator()(
T *ptr) {
800 using sptr_t = std::unique_ptr<T[], free_dtor>;
802 auto aligned_buf_alloc = [&]() ->
T* {
804 const std::size_t alignment =
805 ((std::alignment_of_v<T> +
sizeof(
void *) - 1) /
808#ifdef HAVE_POSIX_MEMALIGN
811 throw std::bad_alloc();
813 return static_cast<T *
>(ptr);
815 return static_cast<T *
>(std::aligned_alloc(alignment, buf_size));
821 buf0 = sptr_t(aligned_buf_alloc(),
823 sptr_t buf1(
nullptr);
825 buf1 = sptr_t(aligned_buf_alloc(),
828 auto reduce_impl = [&,
this](
T* buf,
size_t nelem) {
842 for (
long i = 0; i < (long)nelem; ++i)
843 buf[i] =
op(buf[i], buf0[i]);
847 for (
long i = 0; i < (long)nelem; ++i)
848 buf[i] =
op(buf[i], buf1[i]);
861 const int n = std::min(nelem_per_maxmsg, nelem);
869 template <
typename T>
870 inline void sum(
T* buf,
size_t nelem) {
875 template <
typename T>
876 inline void min(
T* buf,
size_t nelem) {
881 template <
typename T>
882 inline void max(
T* buf,
size_t nelem) {
887 template <
typename T>
893 template <
typename T>
899 template <
typename T>
904 template <
typename T>
909 template <
typename T>
914 template <
typename T>
919 template <
typename T>
924 template <
typename T>
930 template <
typename T>
936 template <
typename T>
942 template <
typename T>
952 template <
typename T>
953 std::vector<T>
concat0(
const std::vector<T>&
v,
size_t bufsz=1024*1024) {
956 bufsz = ((bufsz +
sizeof(
void*) - 1) /
sizeof(
void*)) *
sizeof(
void*);
960 int child0_nbatch = 0, child1_nbatch = 0;
963 void operator()(std::byte *ptr) {
968 using sptr_t = std::unique_ptr<std::byte[], free_dtor>;
970 auto aligned_buf_alloc = [&]() -> std::byte* {
971#ifdef HAVE_POSIX_MEMALIGN
974 throw std::bad_alloc();
976 return static_cast<std::byte *
>(ptr);
978 return static_cast<std::byte *
>(
979 std::aligned_alloc(
sizeof(
void *), bufsz));
983 auto buf0 = sptr_t(aligned_buf_alloc(),
985 auto buf1 = sptr_t(aligned_buf_alloc(),
989 const int batch_size =
static_cast<int>(
993 const int max_nbatch = bufsz / batch_size;
995 const int max_ntags = max_nbatch + 1;
997 std::vector<Tag> tags;
998 tags.reserve(max_nbatch);
1001 if (child0 != -1 || child1 != -1) {
1004 auto receive_nbatch = [&,
this]() {
1019 auto receive_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1021 if (child0 != -1 && batch < child0_nbatch) {
1022 int msg_size = batch_size;
1024 if (batch + 1 == child0_nbatch) {
1026 &msg_size, 1,
MPI_INT, child0, tags[0]);
1034 if (child1 != -1 && batch < child1_nbatch) {
1035 int msg_size = batch_size;
1037 if (batch + 1 == child1_nbatch) {
1039 &msg_size, 1,
MPI_INT, child1, tags[0]);
1047 if (child0 != -1 && batch < child0_nbatch) {
1050 if (child1 != -1 && batch < child1_nbatch) {
1055 size_t buf_offset = 0;
1057 while (buf_offset < bufsz) {
1058 receive_batch(batch, buf_offset);
1059 buf_offset += batch_size;
1060 buf_offset = std::min(buf_offset, bufsz);
1066 std::vector<T> left, right;
1074 for (
unsigned int i = 0; i < right.size(); ++i)
1075 left.push_back(right[i]);
1077 for (
unsigned int i=0; i<
v.size(); ++i) left.push_back(
v[i]);
1083 const auto total_nbytes_to_send = ar.
size();
1086 const int nbatch = (total_nbytes_to_send + batch_size - 1) / batch_size;
1090 size_t buf_offset = 0;
1092 while (buf_offset < bufsz) {
1095 auto send_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1096 const int nbytes_to_send =
static_cast<int>(
1097 std::min(
static_cast<size_t>(batch_size),
1098 total_nbytes_to_send - buf_offset));
1100 if (batch + 1 == nbatch) {
1102 &nbytes_to_send, 1,
MPI_INT, parent, tags[0]);
1107 MPI_BYTE, parent, tags[batch + 1]);
1111 send_batch(batch, buf_offset);
1112 buf_offset += batch_size;
1113 buf_offset = std::min(buf_offset, bufsz);
1118 if (parent == -1)
return left;
1119 else return std::vector<T>();
1132 template <
typename valueT,
typename keyT>
1147 template <
typename keyT,
typename valueT>
1179 template <
typename keyT,
typename opT>
1185 ProcessID parent = -1, child0 = -1, child1 = -1;
1188 lazy_sync_internal<LazySyncTag>(parent, child0, child1, key,
op);
1190 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1225 template <
typename keyT,
typename opT>
1230 if(group.
size() > 1) {
1234 ProcessID parent = -1, child0 = -1, child1 = -1;
1235 group.
make_tree(group_root, parent, child0, child1);
1237 lazy_sync_internal<GroupLazySyncTag>(parent, child0, child1, key,
op);
1239 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1261 template <
typename keyT,
typename valueT>
1267 bcast_internal<BcastTag>(key, value, root);
1293 template <
typename keyT,
typename valueT>
1302 if(group.
size() > 1)
1303 bcast_internal<GroupBcastTag>(key, value, group_root, group);
1341 template <
typename keyT,
typename valueT,
typename opT>
1347 ProcessID parent = -1, child0 = -1, child1 = -1;
1350 return reduce_internal<ReduceTag>(parent, child0, child1, root, key,
1397 template <
typename keyT,
typename valueT,
typename opT>
1407 ProcessID parent = -1, child0 = -1, child1 = -1;
1408 group.
make_tree(group_root, parent, child0, child1);
1410 return reduce_internal<ReduceTag>(parent, child0, child1, group_root,
1448 template <
typename keyT,
typename valueT,
typename opT>
1454 ProcessID parent = -1, child0 = -1, child1 = -1;
1459 reduce_internal<AllReduceTag>(parent, child0, child1, root,
1466 bcast_internal<AllReduceTag>(key, reduce_result, root);
1468 return reduce_result;
1511 template <
typename keyT,
typename valueT,
typename opT>
1520 ProcessID parent = -1, child0 = -1, child1 = -1;
1521 group.
make_tree(group_root, parent, child0, child1);
1525 reduce_internal<GroupAllReduceTag>(parent, child0, child1,
1526 group_root, key, value,
op);
1529 if(group.
rank() != group_root)
1533 bcast_internal<GroupAllReduceTag>(key, reduce_result, 0, group);
1535 return reduce_result;
Implements an archive wrapping a memory buffer.
void binary_tree_info(int root, int &parent, int &child0, int &child1)
Construct info about a binary tree with given root.
Definition safempi.cc:39
int Get_rank() const
Definition safempi.h:714
static int unique_tag_period()
Definition safempi.h:836
int unique_tag()
Returns a unique tag for temporary use (1023<tag<4095)
Definition safempi.h:830
World active message that extends an RMI message.
Definition worldam.h:80
The class used for callbacks (e.g., dependency tracking).
Definition dependency_interface.h:61
A future is a possibly yet unevaluated value.
Definition future.h:373
T & get(bool dowork=true) &
Gets the value, waiting if necessary.
Definition future.h:574
static const Future< T > default_initializer()
See "Gotchas" on Futures about why this exists and how to use it.
Definition future.h:462
bool probe() const
Check whether this future has been assigned.
Definition future.h:631
A collection of processes.
Definition group.h:50
void remote_update() const
Update remote usage count.
Definition group.h:383
void local_update() const
Update local usage count.
Definition group.h:369
ProcessID size() const
Group size accessor.
Definition group.h:429
const DistributedID & id() const
Group id accessor.
Definition group.h:396
bool empty() const
Quary empty group.
Definition group.h:391
ProcessID rank() const
Group rank accessor.
Definition group.h:412
static madness::Future< Group > get_group(const DistributedID &did)
Get group from the registry.
Definition group.cc:90
void make_tree(const ProcessID group_root, ProcessID &parent, ProcessID &child1, ProcessID &child2) const
Compute the binary tree parents and children.
Definition group.h:449
World & get_world() const
Parent world accessor.
Definition group.h:404
Key object that includes the process information.
Definition distributed_id.h:80
static const attrT ATTR_UNORDERED
Definition worldrmi.h:180
Key object that uses a tag to differentiate keys.
Definition distributed_id.h:177
static TaskAttributes hipri()
Definition thread.h:450
void send(ProcessID dest, am_handlerT op, const AmArg *arg, const int attr=RMI::ATTR_ORDERED)
Sends a managed non-blocking active message.
Definition worldam.h:278
Delayed send callback object.
Definition worldgop.h:174
DelayedSend(World &world, const ProcessID dest, const keyT &key, const Future< valueT > &value)
Constructor.
Definition worldgop.h:188
virtual ~DelayedSend()
Definition worldgop.h:193
World & world_
The communication world.
Definition worldgop.h:176
const ProcessID dest_
The destination process id.
Definition worldgop.h:177
Future< valueT > value_
The data to be sent.
Definition worldgop.h:179
const keyT key_
The distributed id associated with value_.
Definition worldgop.h:178
DelayedSend< keyT, valueT > & operator=(const DelayedSend< keyT, valueT > &)
DelayedSend(const DelayedSend< keyT, valueT > &)
virtual void notify()
Notify this object that the future has been set.
Definition worldgop.h:199
Provides collectives that interoperate with the AM and task interfaces.
Definition worldgop.h:145
int max_reducebcast_msg_size() const
Returns the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:695
void lazy_sync(const keyT &key, const opT &op, const Group &group) const
Group lazy sync.
Definition worldgop.h:1226
void send_internal(ProcessID dest, const keyT &key, const Future< valueT > &value) const
Send value to dest.
Definition worldgop.h:249
void max(T *buf, size_t nelem)
Inplace global max while still processing AM & tasks.
Definition worldgop.h:882
static void bcast_handler(const AmArg &arg)
Definition worldgop.h:352
void lazy_sync(const keyT &key, const opT &op) const
Lazy sync.
Definition worldgop.h:1180
World & world_
World object that this is a part of.
Definition worldgop.h:147
int set_max_reducebcast_msg_size(int sz)
Set the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:685
std::shared_ptr< detail::DeferredCleanup > deferred_
Deferred cleanup object.
Definition worldgop.h:148
void reduce(T *buf, std::size_t nelem, opT op)
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Definition worldgop.h:783
void broadcast(T &t)
Broadcast of a scalar from node 0 to all other nodes.
Definition worldgop.h:740
~WorldGopInterface()
Definition worldgop.h:660
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:754
void lazy_sync_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const
Start a distributed lazy sync operation.
Definition worldgop.h:315
void sum(T &a)
Global sum of a scalar while still processing AM & tasks.
Definition worldgop.h:931
int max_reducebcast_msg_size_
maximum size of messages (in bytes) sent by reduce and broadcast
Definition worldgop.h:151
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:161
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
Broadcasts bytes from process root while still processing AM & tasks.
Definition worldgop.cc:173
void bit_and(T *buf, size_t nelem)
Definition worldgop.h:905
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:471
void lazy_sync_parent(const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:279
static Future< valueT > recv_internal(const keyT &key)
Receive data from remote node.
Definition worldgop.h:212
void absmin(T *buf, size_t nelem)
Inplace global absmin while still processing AM & tasks.
Definition worldgop.h:888
void bit_or(T *buf, size_t nelem)
Definition worldgop.h:910
std::enable_if<!is_future< valueT >::value >::type send_internal(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:226
WorldGopInterface(World &world)
Definition worldgop.h:656
bool set_forbid_fence(bool value)
Set forbid_fence flag to new value and return old value.
Definition worldgop.h:674
void bcast(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:1294
void bcast_task(const keyT &key, const valueT &value, const ProcessID root) const
Broadcast task.
Definition worldgop.h:391
void group_bcast_task(const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const
Definition worldgop.h:422
void send(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:1148
void logic_or(T *buf, size_t nelem)
Definition worldgop.h:925
void bcast(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:1262
int initial_max_reducebcast_msg_size()
Definition worldgop.h:629
void serial_invoke(std::function< void()> action)
Executes an action on single (this) thread after ensuring all other work is done.
Definition worldgop.cc:165
static void group_bcast_handler(const AmArg &arg)
Definition worldgop.h:367
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op)
Distributed all reduce.
Definition worldgop.h:1450
Future< typename detail::result_of< opT >::type > reduce_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op)
Distributed reduce.
Definition worldgop.h:583
bool forbid_fence_
forbid calling fence() in case of several active worlds
Definition worldgop.h:150
static detail::result_of< opT >::type reduce_result_task(const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op)
Definition worldgop.h:559
void absmax(T *buf, size_t nelem)
Inplace global absmax while still processing AM & tasks.
Definition worldgop.h:894
void broadcast(T *buf, size_t nelem, ProcessID root)
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Definition worldgop.h:734
void fence_impl(std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false)
Implementation of fence.
Definition worldgop.cc:50
static detail::result_of< opT >::type reduce_task(const valueT &value, const opT &op)
Definition worldgop.h:551
void product(T *buf, size_t nelem)
Inplace global product while still processing AM & tasks.
Definition worldgop.h:900
void min(T *buf, size_t nelem)
Inplace global min while still processing AM & tasks.
Definition worldgop.h:876
void logic_and(T *buf, size_t nelem)
Definition worldgop.h:920
static Future< valueT > recv(const ProcessID source, const keyT &key)
Receive data from source.
Definition worldgop.h:1133
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group)
Distributed group reduce.
Definition worldgop.h:1399
bool debug_
Debug mode.
Definition worldgop.h:149
void min(T &a)
Global min of a scalar while still processing AM & tasks.
Definition worldgop.h:943
void max(T &a)
Global max of a scalar while still processing AM & tasks.
Definition worldgop.h:937
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition worldgop.h:953
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID root)
Distributed reduce.
Definition worldgop.h:1343
void barrier()
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
Definition worldgop.h:700
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op, const Group &group)
Distributed, group all reduce.
Definition worldgop.h:1513
void lazy_sync_children(const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:297
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:521
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:870
void bit_xor(T *buf, size_t nelem)
Definition worldgop.h:915
friend class detail::DeferredCleanup
Definition worldgop.h:153
bool set_debug(bool value)
Set debug flag to new value and return old value.
Definition worldgop.h:667
void broadcast(T &t, ProcessID root)
Broadcast of a scalar from node root to all other nodes.
Definition worldgop.h:746
std::enable_if<!std::is_pointer< T >::value, SafeMPI::Request >::type Isend(const T &datum, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Isend one element.
Definition worldmpi.h:308
SafeMPI::Request Irecv(T *buf, int count, int source, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Async receive data of up to count elements from process source.
Definition worldmpi.h:321
void Send(const T *buf, long lenbuf, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Send array of lenbuf elements to process dest.
Definition worldmpi.h:347
void Recv(T *buf, long lenbuf, int src, int tag) const
Receive data of up to lenbuf elements from process src.
Definition worldmpi.h:374
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition world_task_queue.h:466
A parallel world class.
Definition world.h:132
WorldTaskQueue & taskq
Task queue.
Definition world.h:204
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:318
static void await(SafeMPI::Request &request, bool dowork=true)
Wait for a MPI request to complete.
Definition world.h:516
WorldMpiInterface & mpi
MPI interface.
Definition world.h:202
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:328
unsigned long id() const
Definition world.h:313
WorldGopInterface & gop
Global operations.
Definition world.h:205
WorldAmInterface & am
AM interface.
Definition world.h:203
Wraps an archive around a memory buffer for output.
Definition buffer_archive.h:59
std::size_t size() const
Return the amount of data stored (counted) in the buffer.
Definition buffer_archive.h:123
Deferred cleanup of shared_ptr's.
Definition deferred_cleanup.h:60
Distributed caching utility.
Definition dist_cache.h:54
static void get_cache_value(const keyT &key, madness::Future< valueT > &value)
Get the cache value accosted with key.
Definition dist_cache.h:185
static void set_cache_value(const keyT &key, const valueT &value)
Set the cache value accosted with key.
Definition dist_cache.h:146
static bool debug
Definition dirac-hatom.cc:16
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Tensor< typename Tensor< T >::scalar_type > arg(const Tensor< T > &t)
Return a new tensor holding the argument of each element of t (complex types only)
Definition tensor.h:2502
static const double v
Definition hatom_sf_dirac.cc:20
Tensor< double > op(const Tensor< double > &x)
Definition kain.cc:508
#define max(a, b)
Definition lda.h:51
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Intracomm COMM_WORLD
Definition safempi.cc:67
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
std::pair< uniqueidT, std::size_t > DistributedID
Distributed ID which is used to identify objects.
Definition distributed_id.h:48
double abs(double x)
Definition complexfun.h:48
AmArg * copy_am_arg(const AmArg &arg)
Definition worldam.h:170
AmArg * new_am_arg(const argT &... args)
Convenience template for serializing arguments into a new AmArg.
Definition worldam.h:194
bool quiet()
Check if the MADNESS runtime was initialized for quiet operation.
Definition world.cc:77
void error(const char *msg)
Definition world.cc:139
std::string type(const PairType &n)
Definition PNOParameters.h:18
std::uint64_t cstr_to_memory_size(const char *str)
Unit-aware conversion of a C string to a size_t.
Definition units.cc:14
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
int posix_memalign(void **memptr, std::size_t alignment, std::size_t size)
Definition posixmem.h:44
Hash functor.
Definition worldhash.h:233
T operator()(const T &a, const T &b) const
Definition worldgop.h:87
Definition worldgop.h:100
T operator()(const T &a, const T &b) const
Definition worldgop.h:101
Definition worldgop.h:107
T operator()(const T &a, const T &b) const
Definition worldgop.h:108
Definition worldgop.h:114
T operator()(const T &a, const T &b) const
Definition worldgop.h:115
Definition worldgop.h:121
T operator()(const T &a, const T &b) const
Definition worldgop.h:122
Definition worldgop.h:163
Definition worldgop.h:159
Definition worldgop.h:164
Definition worldgop.h:160
Definition worldgop.h:158
Definition worldgop.h:162
Definition worldgop.h:157
Definition worldgop.h:156
Definition worldgop.h:161
Definition worldgop.h:128
T operator()(const T &a, const T &b) const
Definition worldgop.h:129
Definition worldgop.h:135
T operator()(const T &a, const T &b) const
Definition worldgop.h:136
T operator()(const T &a, const T &b) const
Definition worldgop.h:80
T operator()(const T &a, const T &b) const
Definition worldgop.h:94
T operator()(const T &a, const T &b) const
Definition worldgop.h:73
T operator()(const T &a, const T &b) const
Definition worldgop.h:66
fnT::result_type type
Definition function_traits.h:97
T type
Type with Future removed.
Definition type_traits.h:110
#define MPI_INT
Definition stubmpi.h:81
#define MPI_BYTE
Definition stubmpi.h:77
AtomicInt sum
Definition test_atomicint.cc:46
std::pair< int, double > valueT
Definition test_binsorter.cc:6
double source(const coordT &r)
Definition testperiodic.cc:48
const char * status[2]
Definition testperiodic.cc:43
Declares the World class for the parallel runtime environment.
Defines TaskInterface and implements WorldTaskQueue and associated stuff.
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43
int Tag
Used to clearly identify message tag/type.
Definition worldtypes.h:44