32#ifndef MADNESS_WORLD_WORLDGOP_H__INCLUDED
33#define MADNESS_WORLD_WORLDGOP_H__INCLUDED
56 class WorldAmInterface;
60 class DeferredCleanup;
100 template <
typename T>
108 template <
typename T>
115 template <
typename T>
122 template <
typename T>
129 template <
typename T>
136 template <
typename T>
176 template <
typename keyT,
typename valueT>
214 template <
typename valueT,
typename keyT>
227 template <
typename keyT,
typename valueT>
228 typename std::enable_if<!is_future<valueT>::value >
::type
234 dist_cache::set_cache_value(key, value);
237 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
251 template <
typename keyT,
typename valueT>
256 dist_cache::set_cache_value(key, value);
261 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
268 const_cast<Future<valueT>&
>(value).register_callback(delayed_send_callback);
281 template <
typename keyT>
299 template <
typename keyT,
typename opT>
317 template <
typename tagT,
typename keyT,
typename opT>
324 recv_internal<ProcessID>(key_type(key, child0)) :
327 recv_internal<ProcessID>(key_type(key, child1)) :
330 recv_internal<ProcessID>(key_type(key, parent)) :
335 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<key_type, opT>;
337 child0_signal, child1_signal, my_key,
op, parent_signal,
342 if(child0_signal.
probe() && child1_signal.
probe())
345 auto lazy_sync_parent_fn = & WorldGopInterface::template lazy_sync_parent<key_type>;
347 parent, my_key, child0_signal, child1_signal,
354 template <
typename keyT,
typename valueT,
typename taskfnT>
362 arg & taskfn & key & value & root;
365 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key,
369 template <
typename keyT,
typename valueT,
typename taskfnT>
378 arg & taskfn & key & value & group_root & group_key;
384 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key, value,
393 template <
typename keyT,
typename valueT>
399 ProcessID parent = -1, child0 = -1, child1 = -1;
409 void (*handler)(
const AmArg&) =
410 & WorldGopInterface::template bcast_handler<keyT, valueT, taskfnT>;
412 & WorldGopInterface::template bcast_task<keyT, valueT>,
424 template <
typename keyT,
typename valueT>
432 ProcessID parent = -1, child0 = -1, child1 = -1;
433 group.
make_tree(group_root, parent, child0, child1);
444 void (*handler)(
const AmArg&) =
445 & WorldGopInterface::template group_bcast_handler<keyT, valueT, taskfnT>;
447 & WorldGopInterface::template group_bcast_task<keyT, valueT>,
448 key, value, group_root, group.
id());
473 template <
typename tagT,
typename keyT,
typename valueT>
480 const key_type tagged_key(key);
494 auto bcast_task_fn = & WorldGopInterface::template bcast_task<key_type, valueT>;
523 template <
typename tagT,
typename keyT,
typename valueT>
529 const key_type tagged_key(key);
531 if(group.
rank() == group_root) {
536 auto group_bcast_task_fn = & WorldGopInterface::template group_bcast_task<key_type, valueT>;
538 tagged_key, value, group_root, group,
552 template <
typename valueT,
typename opT>
560 template <
typename opT>
567 for(std::size_t i = 1ul; i < results.size(); ++i)
568 op(result.
get(), results[i].get());
584 template <
typename tagT,
typename keyT,
typename valueT,
typename opT>
594 std::vector<Future<result_type> > results;
598 results.push_back(
world_.
taskq.
add(WorldGopInterface::template reduce_task<value_type, opT>,
603 results.push_back(recv_internal<result_type>(key_type(key, child0)));
605 results.push_back(recv_internal<result_type>(key_type(key, child1)));
609 world_.
taskq.
add(WorldGopInterface::template reduce_result_task<opT>,
628 void fence_impl(std::function<
void()> epilogue = []{},
629 bool pause_during_epilogue =
false,
633 int result = std::numeric_limits<int>::max();
634 const auto* initial_max_reducebcast_msg_size_cstr = std::getenv(
"MAD_MAX_REDUCEBCAST_MSG_SIZE");
635 if (initial_max_reducebcast_msg_size_cstr) {
638 if (result_u64>std::numeric_limits<int>::max()) {
641 <<
"!!MADNESS WARNING: Invalid value for environment variable MAD_MAX_REDUCEBCAST_MSG_SIZE.\n"
642 <<
"!!MADNESS WARNING: MAD_MAX_REDUCEBCAST_MSG_SIZE = "
643 << result_u64 <<
"\n";
644 result = std::numeric_limits<int>::max();
646 result =
static_cast<int>(result_u64);
649 <<
"MADNESS max msg size for GOP reduce/broadcast set to "
650 << result <<
" bytes.\n";
753 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
755 broadcast((
void *) buf, nelem*
sizeof(
T), root);
759 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
765 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
771 template <
typename objT,
772 typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>,
773 typename = std::void_t<
decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>
782 BUFLEN = count.
size();
786 unsigned char* buf =
new unsigned char[BUFLEN];
802 template <
typename T,
class opT>
804 static_assert(madness::is_trivially_copyable_v<T>,
"T must be trivially copyable");
808 const std::size_t nelem_per_maxmsg =
811 const auto buf_size = ((
sizeof(
T) * std::min(nelem_per_maxmsg, nelem) +
812 std::alignment_of_v<T> - 1) /
813 std::alignment_of_v<T>) * std::alignment_of_v<T>;
815 void operator()(
T *ptr) {
820 using sptr_t = std::unique_ptr<T[], free_dtor>;
822 auto aligned_buf_alloc = [&]() ->
T* {
824 const std::size_t alignment =
825 ((std::alignment_of_v<T> +
sizeof(
void *) - 1) /
828#ifdef HAVE_POSIX_MEMALIGN
831 throw std::bad_alloc();
833 return static_cast<T *
>(ptr);
835 return static_cast<T *
>(std::aligned_alloc(alignment, buf_size));
841 buf0 = sptr_t(aligned_buf_alloc(),
843 sptr_t buf1(
nullptr);
845 buf1 = sptr_t(aligned_buf_alloc(),
848 auto reduce_impl = [&,
this](
T* buf,
size_t nelem) {
862 for (
long i = 0; i < (long)nelem; ++i)
863 buf[i] =
op(buf[i], buf0[i]);
867 for (
long i = 0; i < (long)nelem; ++i)
868 buf[i] =
op(buf[i], buf1[i]);
881 const int n = std::min(nelem_per_maxmsg, nelem);
889 template <
typename T>
890 inline void sum(
T* buf,
size_t nelem) {
895 template <
typename T>
896 inline void min(
T* buf,
size_t nelem) {
901 template <
typename T>
902 inline void max(
T* buf,
size_t nelem) {
907 template <
typename T>
913 template <
typename T>
919 template <
typename T>
924 template <
typename T>
929 template <
typename T>
934 template <
typename T>
939 template <
typename T>
944 template <
typename T>
950 template <
typename T>
956 template <
typename T>
962 template <
typename T>
972 template <
typename T>
973 std::vector<T>
concat0(
const std::vector<T>&
v,
size_t bufsz=1024*1024) {
976 bufsz = ((bufsz +
sizeof(
void*) - 1) /
sizeof(
void*)) *
sizeof(
void*);
980 int child0_nbatch = 0, child1_nbatch = 0;
983 void operator()(std::byte *ptr) {
988 using sptr_t = std::unique_ptr<std::byte[], free_dtor>;
990 auto aligned_buf_alloc = [&]() -> std::byte* {
991#ifdef HAVE_POSIX_MEMALIGN
994 throw std::bad_alloc();
996 return static_cast<std::byte *
>(ptr);
998 return static_cast<std::byte *
>(
999 std::aligned_alloc(
sizeof(
void *), bufsz));
1003 auto buf0 = sptr_t(aligned_buf_alloc(),
1005 auto buf1 = sptr_t(aligned_buf_alloc(),
1009 const int batch_size =
static_cast<int>(
1013 const int max_nbatch = bufsz / batch_size;
1015 const int max_ntags = max_nbatch + 1;
1017 std::vector<Tag> tags;
1018 tags.reserve(max_nbatch);
1021 if (child0 != -1 || child1 != -1) {
1024 auto receive_nbatch = [&,
this]() {
1039 auto receive_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1041 if (child0 != -1 && batch < child0_nbatch) {
1042 int msg_size = batch_size;
1044 if (batch + 1 == child0_nbatch) {
1046 &msg_size, 1,
MPI_INT, child0, tags[0]);
1054 if (child1 != -1 && batch < child1_nbatch) {
1055 int msg_size = batch_size;
1057 if (batch + 1 == child1_nbatch) {
1059 &msg_size, 1,
MPI_INT, child1, tags[0]);
1067 if (child0 != -1 && batch < child0_nbatch) {
1070 if (child1 != -1 && batch < child1_nbatch) {
1075 size_t buf_offset = 0;
1077 while (buf_offset < bufsz) {
1078 receive_batch(batch, buf_offset);
1079 buf_offset += batch_size;
1080 buf_offset = std::min(buf_offset, bufsz);
1086 std::vector<T> left, right;
1094 for (
unsigned int i = 0; i < right.size(); ++i)
1095 left.push_back(right[i]);
1097 for (
unsigned int i=0; i<
v.size(); ++i) left.push_back(
v[i]);
1103 const auto total_nbytes_to_send = ar.
size();
1106 const int nbatch = (total_nbytes_to_send + batch_size - 1) / batch_size;
1110 size_t buf_offset = 0;
1112 while (buf_offset < bufsz) {
1115 auto send_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1116 const int nbytes_to_send =
static_cast<int>(
1117 std::min(
static_cast<size_t>(batch_size),
1118 total_nbytes_to_send - buf_offset));
1120 if (batch + 1 == nbatch) {
1122 &nbytes_to_send, 1,
MPI_INT, parent, tags[0]);
1127 MPI_BYTE, parent, tags[batch + 1]);
1131 send_batch(batch, buf_offset);
1132 buf_offset += batch_size;
1133 buf_offset = std::min(buf_offset, bufsz);
1138 if (parent == -1)
return left;
1139 else return std::vector<T>();
1152 template <
typename valueT,
typename keyT>
1167 template <
typename keyT,
typename valueT>
1199 template <
typename keyT,
typename opT>
1205 ProcessID parent = -1, child0 = -1, child1 = -1;
1208 lazy_sync_internal<LazySyncTag>(parent, child0, child1, key,
op);
1210 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1245 template <
typename keyT,
typename opT>
1250 if(group.
size() > 1) {
1254 ProcessID parent = -1, child0 = -1, child1 = -1;
1255 group.
make_tree(group_root, parent, child0, child1);
1257 lazy_sync_internal<GroupLazySyncTag>(parent, child0, child1, key,
op);
1259 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1281 template <
typename keyT,
typename valueT>
1287 bcast_internal<BcastTag>(key, value, root);
1313 template <
typename keyT,
typename valueT>
1322 if(group.
size() > 1)
1323 bcast_internal<GroupBcastTag>(key, value, group_root, group);
1361 template <
typename keyT,
typename valueT,
typename opT>
1367 ProcessID parent = -1, child0 = -1, child1 = -1;
1370 return reduce_internal<ReduceTag>(parent, child0, child1, root, key,
1417 template <
typename keyT,
typename valueT,
typename opT>
1427 ProcessID parent = -1, child0 = -1, child1 = -1;
1428 group.
make_tree(group_root, parent, child0, child1);
1430 return reduce_internal<ReduceTag>(parent, child0, child1, group_root,
1468 template <
typename keyT,
typename valueT,
typename opT>
1474 ProcessID parent = -1, child0 = -1, child1 = -1;
1479 reduce_internal<AllReduceTag>(parent, child0, child1, root,
1486 bcast_internal<AllReduceTag>(key, reduce_result, root);
1488 return reduce_result;
1531 template <
typename keyT,
typename valueT,
typename opT>
1540 ProcessID parent = -1, child0 = -1, child1 = -1;
1541 group.
make_tree(group_root, parent, child0, child1);
1545 reduce_internal<GroupAllReduceTag>(parent, child0, child1,
1546 group_root, key, value,
op);
1549 if(group.
rank() != group_root)
1553 bcast_internal<GroupAllReduceTag>(key, reduce_result, 0, group);
1555 return reduce_result;
Implements an archive wrapping a memory buffer.
void binary_tree_info(int root, int &parent, int &child0, int &child1)
Construct info about a binary tree with given root.
Definition safempi.cc:39
int Get_rank() const
Definition safempi.h:721
static int unique_tag_period()
Definition safempi.h:843
int unique_tag()
Returns a unique tag for temporary use (1023<tag<4095)
Definition safempi.h:837
World active message that extends an RMI message.
Definition worldam.h:80
The class used for callbacks (e.g., dependency tracking).
Definition dependency_interface.h:61
A future is a possibly yet unevaluated value.
Definition future.h:370
T & get(bool dowork=true) &
Gets the value, waiting if necessary.
Definition future.h:571
static const Future< T > default_initializer()
See "Gotchas" on Futures about why this exists and how to use it.
Definition future.h:459
bool probe() const
Check whether this future has been assigned.
Definition future.h:628
A collection of processes.
Definition group.h:50
void remote_update() const
Update remote usage count.
Definition group.h:383
void local_update() const
Update local usage count.
Definition group.h:369
ProcessID size() const
Group size accessor.
Definition group.h:429
const DistributedID & id() const
Group id accessor.
Definition group.h:396
bool empty() const
Quary empty group.
Definition group.h:391
ProcessID rank() const
Group rank accessor.
Definition group.h:412
static madness::Future< Group > get_group(const DistributedID &did)
Get group from the registry.
Definition group.cc:90
void make_tree(const ProcessID group_root, ProcessID &parent, ProcessID &child1, ProcessID &child2) const
Compute the binary tree parents and children.
Definition group.h:449
World & get_world() const
Parent world accessor.
Definition group.h:404
Key object that includes the process information.
Definition distributed_id.h:80
static const attrT ATTR_UNORDERED
Definition worldrmi.h:180
Key object that uses a tag to differentiate keys.
Definition distributed_id.h:177
static TaskAttributes hipri()
Definition thread.h:456
void send(ProcessID dest, am_handlerT op, const AmArg *arg, const int attr=RMI::ATTR_ORDERED)
Sends a managed non-blocking active message.
Definition worldam.h:278
Delayed send callback object.
Definition worldgop.h:177
DelayedSend(World &world, const ProcessID dest, const keyT &key, const Future< valueT > &value)
Constructor.
Definition worldgop.h:191
virtual ~DelayedSend()
Definition worldgop.h:196
World & world_
The communication world.
Definition worldgop.h:179
const ProcessID dest_
The destination process id.
Definition worldgop.h:180
Future< valueT > value_
The data to be sent.
Definition worldgop.h:182
const keyT key_
The distributed id associated with value_.
Definition worldgop.h:181
DelayedSend< keyT, valueT > & operator=(const DelayedSend< keyT, valueT > &)
DelayedSend(const DelayedSend< keyT, valueT > &)
virtual void notify()
Notify this object that the future has been set.
Definition worldgop.h:202
Provides collectives that interoperate with the AM and task interfaces.
Definition worldgop.h:147
int max_reducebcast_msg_size() const
Returns the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:698
void lazy_sync(const keyT &key, const opT &op, const Group &group) const
Group lazy sync.
Definition worldgop.h:1246
void send_internal(ProcessID dest, const keyT &key, const Future< valueT > &value) const
Send value to dest.
Definition worldgop.h:252
void max(T *buf, size_t nelem)
Inplace global max while still processing AM & tasks.
Definition worldgop.h:902
static void bcast_handler(const AmArg &arg)
Definition worldgop.h:355
void lazy_sync(const keyT &key, const opT &op) const
Lazy sync.
Definition worldgop.h:1200
World & world_
World object that this is a part of.
Definition worldgop.h:149
int set_max_reducebcast_msg_size(int sz)
Set the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:688
std::shared_ptr< detail::DeferredCleanup > deferred_
Deferred cleanup object.
Definition worldgop.h:150
void reduce(T *buf, std::size_t nelem, opT op)
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Definition worldgop.h:803
void broadcast(T &t)
Broadcast of a scalar from node 0 to all other nodes.
Definition worldgop.h:760
~WorldGopInterface()
Definition worldgop.h:663
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:774
void lazy_sync_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const
Start a distributed lazy sync operation.
Definition worldgop.h:318
bool in_do_cleanup_
set while this gop's deferred_->do_cleanup() is running inside fence_impl, so that destructors invoke...
Definition worldgop.h:154
void sum(T &a)
Global sum of a scalar while still processing AM & tasks.
Definition worldgop.h:951
int max_reducebcast_msg_size_
maximum size of messages (in bytes) sent by reduce and broadcast
Definition worldgop.h:153
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:176
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
Broadcasts bytes from process root while still processing AM & tasks.
Definition worldgop.cc:188
void bit_and(T *buf, size_t nelem)
Definition worldgop.h:925
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:474
void lazy_sync_parent(const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:282
static Future< valueT > recv_internal(const keyT &key)
Receive data from remote node.
Definition worldgop.h:215
void absmin(T *buf, size_t nelem)
Inplace global absmin while still processing AM & tasks.
Definition worldgop.h:908
void bit_or(T *buf, size_t nelem)
Definition worldgop.h:930
std::enable_if<!is_future< valueT >::value >::type send_internal(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:229
WorldGopInterface(World &world)
Definition worldgop.h:659
bool set_forbid_fence(bool value)
Set forbid_fence flag to new value and return old value.
Definition worldgop.h:677
void bcast(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:1314
void bcast_task(const keyT &key, const valueT &value, const ProcessID root) const
Broadcast task.
Definition worldgop.h:394
void group_bcast_task(const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const
Definition worldgop.h:425
void send(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:1168
void logic_or(T *buf, size_t nelem)
Definition worldgop.h:945
void bcast(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:1282
int initial_max_reducebcast_msg_size()
Definition worldgop.h:632
void serial_invoke(std::function< void()> action)
Executes an action on single (this) thread after ensuring all other work is done.
Definition worldgop.cc:180
static void group_bcast_handler(const AmArg &arg)
Definition worldgop.h:370
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op)
Distributed all reduce.
Definition worldgop.h:1470
Future< typename detail::result_of< opT >::type > reduce_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op)
Distributed reduce.
Definition worldgop.h:586
bool forbid_fence_
forbid calling fence() in case of several active worlds
Definition worldgop.h:152
static detail::result_of< opT >::type reduce_result_task(const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op)
Definition worldgop.h:562
void absmax(T *buf, size_t nelem)
Inplace global absmax while still processing AM & tasks.
Definition worldgop.h:914
void broadcast(T *buf, size_t nelem, ProcessID root)
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Definition worldgop.h:754
void fence_impl(std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false)
Implementation of fence.
Definition worldgop.cc:50
static detail::result_of< opT >::type reduce_task(const valueT &value, const opT &op)
Definition worldgop.h:554
void product(T *buf, size_t nelem)
Inplace global product while still processing AM & tasks.
Definition worldgop.h:920
void min(T *buf, size_t nelem)
Inplace global min while still processing AM & tasks.
Definition worldgop.h:896
void logic_and(T *buf, size_t nelem)
Definition worldgop.h:940
bool is_in_do_cleanup() const
Definition worldgop.h:715
static Future< valueT > recv(const ProcessID source, const keyT &key)
Receive data from source.
Definition worldgop.h:1153
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group)
Distributed group reduce.
Definition worldgop.h:1419
bool debug_
Debug mode.
Definition worldgop.h:151
void min(T &a)
Global min of a scalar while still processing AM & tasks.
Definition worldgop.h:963
void max(T &a)
Global max of a scalar while still processing AM & tasks.
Definition worldgop.h:957
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition worldgop.h:973
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID root)
Distributed reduce.
Definition worldgop.h:1363
void barrier()
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
Definition worldgop.h:720
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op, const Group &group)
Distributed, group all reduce.
Definition worldgop.h:1533
void lazy_sync_children(const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:300
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:524
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:890
void bit_xor(T *buf, size_t nelem)
Definition worldgop.h:935
friend class detail::DeferredCleanup
Definition worldgop.h:156
bool set_debug(bool value)
Set debug flag to new value and return old value.
Definition worldgop.h:670
void broadcast(T &t, ProcessID root)
Broadcast of a scalar from node root to all other nodes.
Definition worldgop.h:766
std::enable_if<!std::is_pointer< T >::value, SafeMPI::Request >::type Isend(const T &datum, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Isend one element.
Definition worldmpi.h:308
SafeMPI::Request Irecv(T *buf, int count, int source, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Async receive data of up to count elements from process source.
Definition worldmpi.h:321
void Send(const T *buf, long lenbuf, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Send array of lenbuf elements to process dest.
Definition worldmpi.h:347
void Recv(T *buf, long lenbuf, int src, int tag) const
Receive data of up to lenbuf elements from process src.
Definition worldmpi.h:374
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition world_task_queue.h:466
A parallel world class.
Definition world.h:132
WorldTaskQueue & taskq
Task queue.
Definition world.h:206
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320
static void await(SafeMPI::Request &request, bool dowork=true)
Wait for a MPI request to complete.
Definition world.h:534
WorldMpiInterface & mpi
MPI interface.
Definition world.h:204
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:330
unsigned long id() const
Definition world.h:315
WorldGopInterface & gop
Global operations.
Definition world.h:207
WorldAmInterface & am
AM interface.
Definition world.h:205
Wraps an archive around a memory buffer for output.
Definition buffer_archive.h:59
std::size_t size() const
Return the amount of data stored (counted) in the buffer.
Definition buffer_archive.h:123
Deferred cleanup of shared_ptr's.
Definition deferred_cleanup.h:60
Distributed caching utility.
Definition dist_cache.h:54
static void get_cache_value(const keyT &key, madness::Future< valueT > &value)
Get the cache value accosted with key.
Definition dist_cache.h:185
static void set_cache_value(const keyT &key, const valueT &value)
Set the cache value accosted with key.
Definition dist_cache.h:146
static bool debug
Definition dirac-hatom.cc:16
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28
Tensor< typename Tensor< T >::scalar_type > arg(const Tensor< T > &t)
Return a new tensor holding the argument of each element of t (complex types only)
Definition tensor.h:2518
static const double v
Definition hatom_sf_dirac.cc:20
Tensor< double > op(const Tensor< double > &x)
Definition kain.cc:508
#define max(a, b)
Definition lda.h:51
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Intracomm COMM_WORLD
Definition safempi.cc:67
Definition potentialmanager.cc:41
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
std::pair< uniqueidT, std::size_t > DistributedID
Distributed ID which is used to identify objects.
Definition distributed_id.h:48
double abs(double x)
Definition complexfun.h:48
AmArg * copy_am_arg(const AmArg &arg)
Definition worldam.h:170
AmArg * new_am_arg(const argT &... args)
Convenience template for serializing arguments into a new AmArg.
Definition worldam.h:194
bool quiet()
Check if the MADNESS runtime was initialized for quiet operation.
Definition world.cc:77
void error(const char *msg)
Definition world.cc:142
std::string type(const PairType &n)
Definition PNOParameters.h:18
std::uint64_t cstr_to_memory_size(const char *str)
Unit-aware conversion of a C string to a size_t.
Definition units.cc:14
static long abs(long a)
Definition tensor.h:218
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
int posix_memalign(void **memptr, std::size_t alignment, std::size_t size)
Definition posixmem.h:44
Hash functor.
Definition worldhash.h:233
T operator()(const T &a, const T &b) const
Definition worldgop.h:87
Definition worldgop.h:101
T operator()(const T &a, const T &b) const
Definition worldgop.h:102
Definition worldgop.h:109
T operator()(const T &a, const T &b) const
Definition worldgop.h:110
Definition worldgop.h:116
T operator()(const T &a, const T &b) const
Definition worldgop.h:117
Definition worldgop.h:123
T operator()(const T &a, const T &b) const
Definition worldgop.h:124
Definition worldgop.h:166
Definition worldgop.h:162
Definition worldgop.h:167
Definition worldgop.h:163
Definition worldgop.h:161
Definition worldgop.h:165
Definition worldgop.h:160
Definition worldgop.h:159
Definition worldgop.h:164
Definition worldgop.h:130
T operator()(const T &a, const T &b) const
Definition worldgop.h:131
Definition worldgop.h:137
T operator()(const T &a, const T &b) const
Definition worldgop.h:138
T operator()(const T &a, const T &b) const
Definition worldgop.h:80
T operator()(const T &a, const T &b) const
Definition worldgop.h:95
T operator()(const T &a, const T &b) const
Definition worldgop.h:73
T operator()(const T &a, const T &b) const
Definition worldgop.h:66
fnT::result_type type
Definition function_traits.h:97
T type
Type with Future removed.
Definition type_traits.h:111
#define MPI_INT
Definition stubmpi.h:81
#define MPI_BYTE
Definition stubmpi.h:77
AtomicInt sum
Definition test_atomicint.cc:46
std::pair< int, double > valueT
Definition test_binsorter.cc:6
double source(const coordT &r)
Definition testperiodic.cc:48
const char * status[2]
Definition testperiodic.cc:43
Declares the World class for the parallel runtime environment.
Defines TaskInterface and implements WorldTaskQueue and associated stuff.
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43
int Tag
Used to clearly identify message tag/type.
Definition worldtypes.h:44