32 #ifndef MADNESS_WORLD_WORLDGOP_H__INCLUDED
33 #define MADNESS_WORLD_WORLDGOP_H__INCLUDED
42 #include <type_traits>
56 class WorldAmInterface;
60 class DeferredCleanup;
106 template <
typename T>
113 template <
typename T>
120 template <
typename T>
127 template <
typename T>
134 template <
typename T>
173 template <
typename keyT,
typename valueT>
211 template <
typename valueT,
typename keyT>
224 template <
typename keyT,
typename valueT>
225 typename std::enable_if<!is_future<valueT>::value >
::type
231 dist_cache::set_cache_value(key, value);
234 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
248 template <
typename keyT,
typename valueT>
253 dist_cache::set_cache_value(key, value);
258 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
265 const_cast<Future<valueT>&
>(value).register_callback(delayed_send_callback);
278 template <
typename keyT>
296 template <
typename keyT,
typename opT>
314 template <
typename tagT,
typename keyT,
typename opT>
316 const ProcessID child1,
const keyT& key,
const opT&
op)
const {
321 recv_internal<ProcessID>(key_type(key, child0)) :
324 recv_internal<ProcessID>(key_type(key, child1)) :
327 recv_internal<ProcessID>(key_type(key, parent)) :
332 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<key_type, opT>;
334 child0_signal, child1_signal, my_key,
op, parent_signal,
339 if(child0_signal.
probe() && child1_signal.
probe())
342 auto lazy_sync_parent_fn = & WorldGopInterface::template lazy_sync_parent<key_type>;
344 parent, my_key, child0_signal, child1_signal,
351 template <
typename keyT,
typename valueT,
typename taskfnT>
359 arg & taskfn & key & value & root;
362 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key,
366 template <
typename keyT,
typename valueT,
typename taskfnT>
375 arg & taskfn & key & value & group_root & group_key;
381 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key, value,
390 template <
typename keyT,
typename valueT>
396 ProcessID parent = -1, child0 = -1, child1 = -1;
406 void (*handler)(
const AmArg&) =
407 & WorldGopInterface::template bcast_handler<keyT, valueT, taskfnT>;
409 & WorldGopInterface::template bcast_task<keyT, valueT>,
421 template <
typename keyT,
typename valueT>
429 ProcessID parent = -1, child0 = -1, child1 = -1;
430 group.
make_tree(group_root, parent, child0, child1);
441 void (*handler)(
const AmArg&) =
442 & WorldGopInterface::template group_bcast_handler<keyT, valueT, taskfnT>;
444 & WorldGopInterface::template group_bcast_task<keyT, valueT>,
445 key, value, group_root, group.
id());
470 template <
typename tagT,
typename keyT,
typename valueT>
477 const key_type tagged_key(key);
491 auto bcast_task_fn = & WorldGopInterface::template bcast_task<key_type, valueT>;
520 template <
typename tagT,
typename keyT,
typename valueT>
526 const key_type tagged_key(key);
528 if(group.
rank() == group_root) {
533 auto group_bcast_task_fn = & WorldGopInterface::template group_bcast_task<key_type, valueT>;
535 tagged_key, value, group_root, group,
549 template <
typename valueT,
typename opT>
557 template <
typename opT>
564 for(std::size_t i = 1ul; i < results.size(); ++i)
565 op(result.
get(), results[i].get());
581 template <
typename tagT,
typename keyT,
typename valueT,
typename opT>
591 std::vector<Future<result_type> > results;
595 results.push_back(
world_.
taskq.
add(WorldGopInterface::template reduce_task<value_type, opT>,
600 results.push_back(recv_internal<result_type>(key_type(key, child0)));
602 results.push_back(recv_internal<result_type>(key_type(key, child1)));
606 world_.
taskq.
add(WorldGopInterface::template reduce_result_task<opT>,
626 bool pause_during_epilogue =
false,
631 const auto* initial_max_reducebcast_msg_size_cstr = std::getenv(
"MAD_MAX_REDUCEBCAST_MSG_SIZE");
632 if (initial_max_reducebcast_msg_size_cstr) {
638 <<
"!!MADNESS WARNING: Invalid value for environment variable MAD_MAX_REDUCEBCAST_MSG_SIZE.\n"
639 <<
"!!MADNESS WARNING: MAD_MAX_REDUCEBCAST_MSG_SIZE = "
640 << result_u64 <<
"\n";
643 result =
static_cast<int>(result_u64);
646 <<
"MADNESS max msg size for GOP reduce/broadcast set to "
647 << result <<
" bytes.\n";
733 template <
typename T,
typename = std::enable_if_t<std::is_trivially_copyable_v<T>>>
735 broadcast((
void *) buf, nelem*
sizeof(
T), root);
739 template <
typename T,
typename = std::enable_if_t<std::is_trivially_copyable_v<T>>>
745 template <
typename T,
typename = std::enable_if_t<std::is_trivially_copyable_v<T>>>
751 template <
typename objT,
752 typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>,
753 typename =
std::void_t<decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>
762 BUFLEN = count.
size();
766 unsigned char* buf =
new unsigned char[BUFLEN];
782 template <
typename T,
class opT>
784 static_assert(std::is_trivially_copyable_v<T>,
"T must be trivially copyable");
788 const std::size_t nelem_per_maxmsg =
791 const auto buf_size = ((
sizeof(
T) * std::min(nelem_per_maxmsg, nelem) +
792 std::alignment_of_v<T> - 1) /
793 std::alignment_of_v<T>) * std::alignment_of_v<T>;
795 void operator()(
T *ptr) {
800 using sptr_t = std::unique_ptr<T[], free_dtor>;
802 auto aligned_buf_alloc = [&]() ->
T* {
804 const std::size_t alignment =
805 ((std::alignment_of_v<T> +
sizeof(
void *) - 1) /
808 #ifdef HAVE_POSIX_MEMALIGN
811 throw std::bad_alloc();
813 return static_cast<T *
>(ptr);
815 return static_cast<T *
>(std::aligned_alloc(alignment, buf_size));
821 buf0 = sptr_t(aligned_buf_alloc(),
823 sptr_t buf1(
nullptr);
825 buf1 = sptr_t(aligned_buf_alloc(),
828 auto reduce_impl = [&,
this](
T* buf,
size_t nelem) {
842 for (
long i = 0; i < (long)nelem; ++i)
843 buf[i] =
op(buf[i], buf0[i]);
847 for (
long i = 0; i < (long)nelem; ++i)
848 buf[i] =
op(buf[i], buf1[i]);
861 const int n = std::min(nelem_per_maxmsg, nelem);
869 template <
typename T>
870 inline void sum(
T* buf,
size_t nelem) {
875 template <
typename T>
876 inline void min(
T* buf,
size_t nelem) {
881 template <
typename T>
882 inline void max(
T* buf,
size_t nelem) {
887 template <
typename T>
893 template <
typename T>
899 template <
typename T>
904 template <
typename T>
909 template <
typename T>
914 template <
typename T>
919 template <
typename T>
924 template <
typename T>
930 template <
typename T>
936 template <
typename T>
942 template <
typename T>
952 template <
typename T>
953 std::vector<T>
concat0(
const std::vector<T>&
v,
size_t bufsz=1024*1024) {
956 bufsz = ((bufsz +
sizeof(
void*) - 1) /
sizeof(
void*)) *
sizeof(
void*);
960 int child0_nbatch = 0, child1_nbatch = 0;
963 void operator()(std::byte *ptr) {
968 using sptr_t = std::unique_ptr<std::byte[], free_dtor>;
970 auto buf0 = sptr_t(
static_cast<std::byte *
>(
971 std::aligned_alloc(
sizeof(
void *), bufsz)),
973 auto buf1 = sptr_t(
static_cast<std::byte *
>(
974 std::aligned_alloc(
sizeof(
void *), bufsz)),
978 const int batch_size =
static_cast<int>(
982 const int max_nbatch = bufsz / batch_size;
984 const int max_ntags = max_nbatch + 1;
986 std::vector<Tag> tags;
987 tags.reserve(max_nbatch);
990 if (child0 != -1 || child1 != -1) {
993 auto receive_nbatch = [&,
this]() {
1008 auto receive_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1010 if (child0 != -1 && batch < child0_nbatch) {
1011 int msg_size = batch_size;
1013 if (batch + 1 == child0_nbatch) {
1015 &msg_size, 1,
MPI_INT, child0, tags[0]);
1023 if (child1 != -1 && batch < child1_nbatch) {
1024 int msg_size = batch_size;
1026 if (batch + 1 == child1_nbatch) {
1028 &msg_size, 1,
MPI_INT, child1, tags[0]);
1036 if (child0 != -1 && batch < child0_nbatch) {
1039 if (child1 != -1 && batch < child1_nbatch) {
1044 size_t buf_offset = 0;
1046 while (buf_offset < bufsz) {
1047 receive_batch(batch, buf_offset);
1048 buf_offset += batch_size;
1049 buf_offset = std::min(buf_offset, bufsz);
1055 std::vector<T> left, right;
1063 for (
unsigned int i = 0; i < right.size(); ++i)
1064 left.push_back(right[i]);
1066 for (
unsigned int i=0; i<
v.size(); ++i) left.push_back(
v[i]);
1072 const auto total_nbytes_to_send = ar.
size();
1075 const int nbatch = (total_nbytes_to_send + batch_size - 1) / batch_size;
1079 size_t buf_offset = 0;
1081 while (buf_offset < bufsz) {
1084 auto send_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1085 const int nbytes_to_send =
static_cast<int>(
1086 std::min(
static_cast<size_t>(batch_size),
1087 total_nbytes_to_send - buf_offset));
1089 if (batch + 1 == nbatch) {
1091 &nbytes_to_send, 1,
MPI_INT, parent, tags[0]);
1096 MPI_BYTE, parent, tags[batch + 1]);
1100 send_batch(batch, buf_offset);
1101 buf_offset += batch_size;
1102 buf_offset = std::min(buf_offset, bufsz);
1107 if (parent == -1)
return left;
1108 else return std::vector<T>();
1121 template <
typename valueT,
typename keyT>
1136 template <
typename keyT,
typename valueT>
1168 template <
typename keyT,
typename opT>
1174 ProcessID parent = -1, child0 = -1, child1 = -1;
1177 lazy_sync_internal<LazySyncTag>(parent, child0, child1, key,
op);
1179 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1214 template <
typename keyT,
typename opT>
1219 if(group.
size() > 1) {
1223 ProcessID parent = -1, child0 = -1, child1 = -1;
1224 group.
make_tree(group_root, parent, child0, child1);
1226 lazy_sync_internal<GroupLazySyncTag>(parent, child0, child1, key,
op);
1228 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1250 template <
typename keyT,
typename valueT>
1256 bcast_internal<BcastTag>(key, value, root);
1282 template <
typename keyT,
typename valueT>
1291 if(group.
size() > 1)
1292 bcast_internal<GroupBcastTag>(key, value, group_root, group);
1330 template <
typename keyT,
typename valueT,
typename opT>
1336 ProcessID parent = -1, child0 = -1, child1 = -1;
1339 return reduce_internal<ReduceTag>(parent, child0, child1, root, key,
1386 template <
typename keyT,
typename valueT,
typename opT>
1396 ProcessID parent = -1, child0 = -1, child1 = -1;
1397 group.
make_tree(group_root, parent, child0, child1);
1399 return reduce_internal<ReduceTag>(parent, child0, child1, group_root,
1437 template <
typename keyT,
typename valueT,
typename opT>
1443 ProcessID parent = -1, child0 = -1, child1 = -1;
1448 reduce_internal<AllReduceTag>(parent, child0, child1, root,
1455 bcast_internal<AllReduceTag>(key, reduce_result, root);
1457 return reduce_result;
1500 template <
typename keyT,
typename valueT,
typename opT>
1509 ProcessID parent = -1, child0 = -1, child1 = -1;
1510 group.
make_tree(group_root, parent, child0, child1);
1514 reduce_internal<GroupAllReduceTag>(parent, child0, child1,
1515 group_root, key, value,
op);
1518 if(group.
rank() != group_root)
1522 bcast_internal<GroupAllReduceTag>(key, reduce_result, 0, group);
1524 return reduce_result;
Implements an archive wrapping a memory buffer.
void binary_tree_info(int root, int &parent, int &child0, int &child1)
Construct info about a binary tree with given root.
Definition: safempi.cc:39
int Get_rank() const
Definition: safempi.h:714
static int unique_tag_period()
Definition: safempi.h:836
int unique_tag()
Returns a unique tag for temporary use (1023<tag<4095)
Definition: safempi.h:830
Definition: safempi.h:289
World active message that extends an RMI message.
Definition: worldam.h:80
The class used for callbacks (e.g., dependency tracking).
Definition: dependency_interface.h:61
T & get(bool dowork=true) &
Gets the value, waiting if necessary.
Definition: future.h:574
static const Future< T > default_initializer()
See "Gotchas" on Futures about why this exists and how to use it.
Definition: future.h:462
bool probe() const
Check whether this future has been assigned.
Definition: future.h:631
A collection of processes.
Definition: group.h:50
void remote_update() const
Update remote usage count.
Definition: group.h:383
void local_update() const
Update local usage count.
Definition: group.h:369
ProcessID size() const
Group size accessor.
Definition: group.h:429
const DistributedID & id() const
Group id accessor.
Definition: group.h:396
World & get_world() const
Parent world accessor.
Definition: group.h:404
bool empty() const
Quary empty group.
Definition: group.h:391
ProcessID rank() const
Group rank accessor.
Definition: group.h:412
static madness::Future< Group > get_group(const DistributedID &did)
Get group from the registry.
Definition: group.cc:90
void make_tree(const ProcessID group_root, ProcessID &parent, ProcessID &child1, ProcessID &child2) const
Compute the binary tree parents and children.
Definition: group.h:449
Key object that includes the process information.
Definition: distributed_id.h:80
static const attrT ATTR_UNORDERED
Definition: worldrmi.h:180
Key object that uses a tag to differentiate keys.
Definition: distributed_id.h:177
static TaskAttributes hipri()
Definition: thread.h:450
void send(ProcessID dest, am_handlerT op, const AmArg *arg, const int attr=RMI::ATTR_ORDERED)
Sends a managed non-blocking active message.
Definition: worldam.h:278
Delayed send callback object.
Definition: worldgop.h:174
DelayedSend(World &world, const ProcessID dest, const keyT &key, const Future< valueT > &value)
Constructor.
Definition: worldgop.h:188
virtual ~DelayedSend()
Definition: worldgop.h:193
World & world_
The communication world.
Definition: worldgop.h:176
const ProcessID dest_
The destination process id.
Definition: worldgop.h:177
Future< valueT > value_
The data to be sent.
Definition: worldgop.h:179
const keyT key_
The distributed id associated with value_.
Definition: worldgop.h:178
DelayedSend< keyT, valueT > & operator=(const DelayedSend< keyT, valueT > &)
DelayedSend(const DelayedSend< keyT, valueT > &)
virtual void notify()
Notify this object that the future has been set.
Definition: worldgop.h:199
Provides collectives that interoperate with the AM and task interfaces.
Definition: worldgop.h:145
int max_reducebcast_msg_size() const
Returns the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition: worldgop.h:695
void lazy_sync(const keyT &key, const opT &op, const Group &group) const
Group lazy sync.
Definition: worldgop.h:1215
void send_internal(ProcessID dest, const keyT &key, const Future< valueT > &value) const
Send value to dest.
Definition: worldgop.h:249
void max(T *buf, size_t nelem)
Inplace global max while still processing AM & tasks.
Definition: worldgop.h:882
static void bcast_handler(const AmArg &arg)
Definition: worldgop.h:352
void lazy_sync(const keyT &key, const opT &op) const
Lazy sync.
Definition: worldgop.h:1169
static Future< valueT > recv(const ProcessID source, const keyT &key)
Receive data from source.
Definition: worldgop.h:1122
World & world_
World object that this is a part of.
Definition: worldgop.h:147
int set_max_reducebcast_msg_size(int sz)
Set the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition: worldgop.h:685
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID root)
Distributed reduce.
Definition: worldgop.h:1332
std::shared_ptr< detail::DeferredCleanup > deferred_
Deferred cleanup object.
Definition: worldgop.h:148
void reduce(T *buf, std::size_t nelem, opT op)
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Definition: worldgop.h:783
void broadcast(T &t)
Broadcast of a scalar from node 0 to all other nodes.
Definition: worldgop.h:740
~WorldGopInterface()
Definition: worldgop.h:660
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition: worldgop.h:754
void lazy_sync_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const
Start a distributed lazy sync operation.
Definition: worldgop.h:315
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op, const Group &group)
Distributed, group all reduce.
Definition: worldgop.h:1502
void sum(T &a)
Global sum of a scalar while still processing AM & tasks.
Definition: worldgop.h:931
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group)
Distributed group reduce.
Definition: worldgop.h:1388
int max_reducebcast_msg_size_
maximum size of messages (in bytes) sent by reduce and broadcast
Definition: worldgop.h:151
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition: worldgop.cc:161
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
Broadcasts bytes from process root while still processing AM & tasks.
Definition: worldgop.cc:173
static detail::result_of< opT >::type reduce_result_task(const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op)
Definition: worldgop.h:559
void bit_and(T *buf, size_t nelem)
Definition: worldgop.h:905
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition: worldgop.h:471
void lazy_sync_parent(const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const
Lazy sync parent task.
Definition: worldgop.h:279
void absmin(T *buf, size_t nelem)
Inplace global absmin while still processing AM & tasks.
Definition: worldgop.h:888
void bit_or(T *buf, size_t nelem)
Definition: worldgop.h:910
WorldGopInterface(World &world)
Definition: worldgop.h:656
bool set_forbid_fence(bool value)
Set forbid_fence flag to new value and return old value.
Definition: worldgop.h:674
void bcast(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition: worldgop.h:1283
void bcast_task(const keyT &key, const valueT &value, const ProcessID root) const
Broadcast task.
Definition: worldgop.h:391
void group_bcast_task(const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const
Definition: worldgop.h:422
void send(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition: worldgop.h:1137
void logic_or(T *buf, size_t nelem)
Definition: worldgop.h:925
void bcast(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition: worldgop.h:1251
int initial_max_reducebcast_msg_size()
Definition: worldgop.h:629
void serial_invoke(std::function< void()> action)
Executes an action on single (this) thread after ensuring all other work is done.
Definition: worldgop.cc:165
static void group_bcast_handler(const AmArg &arg)
Definition: worldgop.h:367
bool forbid_fence_
forbid calling fence() in case of several active worlds
Definition: worldgop.h:150
void absmax(T *buf, size_t nelem)
Inplace global absmax while still processing AM & tasks.
Definition: worldgop.h:894
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op)
Distributed all reduce.
Definition: worldgop.h:1439
void broadcast(T *buf, size_t nelem, ProcessID root)
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Definition: worldgop.h:734
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition: worldgop.h:953
void fence_impl(std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false)
Implementation of fence.
Definition: worldgop.cc:50
void product(T *buf, size_t nelem)
Inplace global product while still processing AM & tasks.
Definition: worldgop.h:900
void min(T *buf, size_t nelem)
Inplace global min while still processing AM & tasks.
Definition: worldgop.h:876
void logic_and(T *buf, size_t nelem)
Definition: worldgop.h:920
static detail::result_of< opT >::type reduce_task(const valueT &value, const opT &op)
Definition: worldgop.h:551
static Future< valueT > recv_internal(const keyT &key)
Receive data from remote node.
Definition: worldgop.h:212
std::enable_if<!is_future< valueT >::value >::type send_internal(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition: worldgop.h:226
Future< typename detail::result_of< opT >::type > reduce_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op)
Distributed reduce.
Definition: worldgop.h:583
bool debug_
Debug mode.
Definition: worldgop.h:149
void min(T &a)
Global min of a scalar while still processing AM & tasks.
Definition: worldgop.h:943
void max(T &a)
Global max of a scalar while still processing AM & tasks.
Definition: worldgop.h:937
void barrier()
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
Definition: worldgop.h:700
void lazy_sync_children(const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const
Lazy sync parent task.
Definition: worldgop.h:297
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition: worldgop.h:521
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition: worldgop.h:870
void bit_xor(T *buf, size_t nelem)
Definition: worldgop.h:915
bool set_debug(bool value)
Set debug flag to new value and return old value.
Definition: worldgop.h:667
void broadcast(T &t, ProcessID root)
Broadcast of a scalar from node root to all other nodes.
Definition: worldgop.h:746
std::enable_if<!std::is_pointer< T >::value, SafeMPI::Request >::type Isend(const T &datum, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Isend one element.
Definition: worldmpi.h:308
SafeMPI::Request Irecv(T *buf, int count, int source, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Async receive data of up to count elements from process source.
Definition: worldmpi.h:321
void Send(const T *buf, long lenbuf, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Send array of lenbuf elements to process dest.
Definition: worldmpi.h:347
void Recv(T *buf, long lenbuf, int src, int tag) const
Receive data of up to lenbuf elements from process src.
Definition: worldmpi.h:374
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition: world_task_queue.h:466
A parallel world class.
Definition: world.h:132
WorldTaskQueue & taskq
Task queue.
Definition: world.h:204
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition: world.h:318
static void await(SafeMPI::Request &request, bool dowork=true)
Wait for a MPI request to complete.
Definition: world.h:516
WorldMpiInterface & mpi
MPI interface.
Definition: world.h:202
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition: world.h:328
unsigned long id() const
Definition: world.h:313
WorldGopInterface & gop
Global operations.
Definition: world.h:205
WorldAmInterface & am
AM interface.
Definition: world.h:203
Wraps an archive around a memory buffer for output.
Definition: buffer_archive.h:59
std::size_t size() const
Return the amount of data stored (counted) in the buffer.
Definition: buffer_archive.h:123
Deferred cleanup of shared_ptr's.
Definition: deferred_cleanup.h:60
Distributed caching utility.
Definition: dist_cache.h:54
static void get_cache_value(const keyT &key, madness::Future< valueT > &value)
Get the cache value accosted with key.
Definition: dist_cache.h:185
static void set_cache_value(const keyT &key, const valueT &value)
Set the cache value accosted with key.
Definition: dist_cache.h:146
static bool debug
Definition: dirac-hatom.cc:16
static double function(const coord_3d &r)
Normalized gaussian.
Definition: functionio.cc:100
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
Tensor< typename Tensor< T >::scalar_type > arg(const Tensor< T > &t)
Return a new tensor holding the argument of each element of t (complex types only)
Definition: tensor.h:2502
static const double v
Definition: hatom_sf_dirac.cc:20
Tensor< double > op(const Tensor< double > &x)
Definition: kain.cc:508
#define max(a, b)
Definition: lda.h:51
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition: madness_exception.h:134
Intracomm COMM_WORLD
Definition: safempi.cc:67
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
std::pair< uniqueidT, std::size_t > DistributedID
Distributed ID which is used to identify objects.
Definition: distributed_id.h:48
double abs(double x)
Definition: complexfun.h:48
AmArg * new_am_arg(const argT &... args)
Convenience template for serializing arguments into a new AmArg.
Definition: worldam.h:194
bool quiet()
Check if the MADNESS runtime was initialized for quiet operation.
Definition: world.cc:77
void error(const char *msg)
Definition: world.cc:139
std::string type(const PairType &n)
Definition: PNOParameters.h:18
std::uint64_t cstr_to_memory_size(const char *str)
Unit-aware conversion of a C string to a size_t.
Definition: units.cc:14
void swap(Vector< T, N > &l, Vector< T, N > &r)
Swap the contents of two Vectors.
Definition: vector.h:497
AmArg * copy_am_arg(const AmArg &arg)
Definition: worldam.h:170
static const double b
Definition: nonlinschro.cc:119
static const double a
Definition: nonlinschro.cc:118
int posix_memalign(void **memptr, std::size_t alignment, std::size_t size)
Definition: posixmem.h:44
Definition: worldgop.h:86
T operator()(const T &a, const T &b) const
Definition: worldgop.h:87
Definition: worldgop.h:100
T operator()(const T &a, const T &b) const
Definition: worldgop.h:101
Definition: worldgop.h:107
T operator()(const T &a, const T &b) const
Definition: worldgop.h:108
Definition: worldgop.h:114
T operator()(const T &a, const T &b) const
Definition: worldgop.h:115
Definition: worldgop.h:121
T operator()(const T &a, const T &b) const
Definition: worldgop.h:122
Definition: worldgop.h:163
Definition: worldgop.h:159
Definition: worldgop.h:164
Definition: worldgop.h:160
Definition: worldgop.h:158
Definition: worldgop.h:162
Definition: worldgop.h:157
Definition: worldgop.h:156
Definition: worldgop.h:161
Definition: worldgop.h:128
T operator()(const T &a, const T &b) const
Definition: worldgop.h:129
Definition: worldgop.h:135
T operator()(const T &a, const T &b) const
Definition: worldgop.h:136
Definition: worldgop.h:79
T operator()(const T &a, const T &b) const
Definition: worldgop.h:80
Definition: worldgop.h:93
T operator()(const T &a, const T &b) const
Definition: worldgop.h:94
Definition: worldgop.h:72
T operator()(const T &a, const T &b) const
Definition: worldgop.h:73
Definition: worldgop.h:65
T operator()(const T &a, const T &b) const
Definition: worldgop.h:66
fnT::result_type type
Definition: function_traits.h:97
T type
Type with Future removed.
Definition: type_traits.h:110
#define MPI_INT
Definition: stubmpi.h:78
#define MPI_BYTE
Definition: stubmpi.h:74
std::pair< int, double > valueT
Definition: test_binsorter.cc:6
double source(const coordT &r)
Definition: testperiodic.cc:48
const char * status[2]
Definition: testperiodic.cc:43
Declares the World class for the parallel runtime environment.
Defines TaskInterface and implements WorldTaskQueue and associated stuff.
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:43
int Tag
Used to clearly identify message tag/type.
Definition: worldtypes.h:44