32#ifndef MADNESS_WORLD_WORLDGOP_H__INCLUDED
33#define MADNESS_WORLD_WORLDGOP_H__INCLUDED
56 class WorldAmInterface;
60 class DeferredCleanup;
100 template <
typename T>
108 template <
typename T>
115 template <
typename T>
122 template <
typename T>
129 template <
typename T>
136 template <
typename T>
175 template <
typename keyT,
typename valueT>
213 template <
typename valueT,
typename keyT>
226 template <
typename keyT,
typename valueT>
227 typename std::enable_if<!is_future<valueT>::value >
::type
233 dist_cache::set_cache_value(key, value);
236 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
250 template <
typename keyT,
typename valueT>
255 dist_cache::set_cache_value(key, value);
260 world_.
taskq.
add(dest, dist_cache::template set_cache_value<valueT>, key,
267 const_cast<Future<valueT>&
>(value).register_callback(delayed_send_callback);
280 template <
typename keyT>
298 template <
typename keyT,
typename opT>
316 template <
typename tagT,
typename keyT,
typename opT>
318 const ProcessID child1,
const keyT& key,
const opT&
op)
const {
323 recv_internal<ProcessID>(key_type(key, child0)) :
326 recv_internal<ProcessID>(key_type(key, child1)) :
329 recv_internal<ProcessID>(key_type(key, parent)) :
334 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<key_type, opT>;
336 child0_signal, child1_signal, my_key,
op, parent_signal,
341 if(child0_signal.
probe() && child1_signal.
probe())
344 auto lazy_sync_parent_fn = & WorldGopInterface::template lazy_sync_parent<key_type>;
346 parent, my_key, child0_signal, child1_signal,
353 template <
typename keyT,
typename valueT,
typename taskfnT>
361 arg & taskfn & key & value & root;
364 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key,
368 template <
typename keyT,
typename valueT,
typename taskfnT>
377 arg & taskfn & key & value & group_root & group_key;
383 arg.get_world()->taskq.add(
arg.get_world()->gop, taskfn, key, value,
392 template <
typename keyT,
typename valueT>
398 ProcessID parent = -1, child0 = -1, child1 = -1;
408 void (*handler)(
const AmArg&) =
409 & WorldGopInterface::template bcast_handler<keyT, valueT, taskfnT>;
411 & WorldGopInterface::template bcast_task<keyT, valueT>,
423 template <
typename keyT,
typename valueT>
431 ProcessID parent = -1, child0 = -1, child1 = -1;
432 group.
make_tree(group_root, parent, child0, child1);
443 void (*handler)(
const AmArg&) =
444 & WorldGopInterface::template group_bcast_handler<keyT, valueT, taskfnT>;
446 & WorldGopInterface::template group_bcast_task<keyT, valueT>,
447 key, value, group_root, group.
id());
472 template <
typename tagT,
typename keyT,
typename valueT>
479 const key_type tagged_key(key);
493 auto bcast_task_fn = & WorldGopInterface::template bcast_task<key_type, valueT>;
522 template <
typename tagT,
typename keyT,
typename valueT>
528 const key_type tagged_key(key);
530 if(group.
rank() == group_root) {
535 auto group_bcast_task_fn = & WorldGopInterface::template group_bcast_task<key_type, valueT>;
537 tagged_key, value, group_root, group,
551 template <
typename valueT,
typename opT>
559 template <
typename opT>
566 for(std::size_t i = 1ul; i < results.size(); ++i)
567 op(result.
get(), results[i].get());
583 template <
typename tagT,
typename keyT,
typename valueT,
typename opT>
593 std::vector<Future<result_type> > results;
597 results.push_back(
world_.
taskq.
add(WorldGopInterface::template reduce_task<value_type, opT>,
602 results.push_back(recv_internal<result_type>(key_type(key, child0)));
604 results.push_back(recv_internal<result_type>(key_type(key, child1)));
608 world_.
taskq.
add(WorldGopInterface::template reduce_result_task<opT>,
627 void fence_impl(std::function<
void()> epilogue = []{},
628 bool pause_during_epilogue =
false,
632 int result = std::numeric_limits<int>::max();
633 const auto* initial_max_reducebcast_msg_size_cstr = std::getenv(
"MAD_MAX_REDUCEBCAST_MSG_SIZE");
634 if (initial_max_reducebcast_msg_size_cstr) {
637 if (result_u64>std::numeric_limits<int>::max()) {
640 <<
"!!MADNESS WARNING: Invalid value for environment variable MAD_MAX_REDUCEBCAST_MSG_SIZE.\n"
641 <<
"!!MADNESS WARNING: MAD_MAX_REDUCEBCAST_MSG_SIZE = "
642 << result_u64 <<
"\n";
643 result = std::numeric_limits<int>::max();
645 result =
static_cast<int>(result_u64);
648 <<
"MADNESS max msg size for GOP reduce/broadcast set to "
649 << result <<
" bytes.\n";
735 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
737 broadcast((
void *) buf, nelem*
sizeof(
T), root);
741 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
747 template <
typename T,
typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
753 template <
typename objT,
754 typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>,
755 typename = std::void_t<
decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>
764 BUFLEN = count.
size();
768 unsigned char* buf =
new unsigned char[BUFLEN];
784 template <
typename T,
class opT>
786 static_assert(madness::is_trivially_copyable_v<T>,
"T must be trivially copyable");
790 const std::size_t nelem_per_maxmsg =
793 const auto buf_size = ((
sizeof(
T) * std::min(nelem_per_maxmsg, nelem) +
794 std::alignment_of_v<T> - 1) /
795 std::alignment_of_v<T>) * std::alignment_of_v<T>;
797 void operator()(
T *ptr) {
802 using sptr_t = std::unique_ptr<T[], free_dtor>;
804 auto aligned_buf_alloc = [&]() ->
T* {
806 const std::size_t alignment =
807 ((std::alignment_of_v<T> +
sizeof(
void *) - 1) /
810#ifdef HAVE_POSIX_MEMALIGN
813 throw std::bad_alloc();
815 return static_cast<T *
>(ptr);
817 return static_cast<T *
>(std::aligned_alloc(alignment, buf_size));
823 buf0 = sptr_t(aligned_buf_alloc(),
825 sptr_t buf1(
nullptr);
827 buf1 = sptr_t(aligned_buf_alloc(),
830 auto reduce_impl = [&,
this](
T* buf,
size_t nelem) {
844 for (
long i = 0; i < (long)nelem; ++i)
845 buf[i] =
op(buf[i], buf0[i]);
849 for (
long i = 0; i < (long)nelem; ++i)
850 buf[i] =
op(buf[i], buf1[i]);
863 const int n = std::min(nelem_per_maxmsg, nelem);
871 template <
typename T>
872 inline void sum(
T* buf,
size_t nelem) {
877 template <
typename T>
878 inline void min(
T* buf,
size_t nelem) {
883 template <
typename T>
884 inline void max(
T* buf,
size_t nelem) {
889 template <
typename T>
895 template <
typename T>
901 template <
typename T>
906 template <
typename T>
911 template <
typename T>
916 template <
typename T>
921 template <
typename T>
926 template <
typename T>
932 template <
typename T>
938 template <
typename T>
944 template <
typename T>
954 template <
typename T>
955 std::vector<T>
concat0(
const std::vector<T>&
v,
size_t bufsz=1024*1024) {
958 bufsz = ((bufsz +
sizeof(
void*) - 1) /
sizeof(
void*)) *
sizeof(
void*);
962 int child0_nbatch = 0, child1_nbatch = 0;
965 void operator()(std::byte *ptr) {
970 using sptr_t = std::unique_ptr<std::byte[], free_dtor>;
972 auto aligned_buf_alloc = [&]() -> std::byte* {
973#ifdef HAVE_POSIX_MEMALIGN
976 throw std::bad_alloc();
978 return static_cast<std::byte *
>(ptr);
980 return static_cast<std::byte *
>(
981 std::aligned_alloc(
sizeof(
void *), bufsz));
985 auto buf0 = sptr_t(aligned_buf_alloc(),
987 auto buf1 = sptr_t(aligned_buf_alloc(),
991 const int batch_size =
static_cast<int>(
995 const int max_nbatch = bufsz / batch_size;
997 const int max_ntags = max_nbatch + 1;
999 std::vector<Tag> tags;
1000 tags.reserve(max_nbatch);
1003 if (child0 != -1 || child1 != -1) {
1006 auto receive_nbatch = [&,
this]() {
1021 auto receive_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1023 if (child0 != -1 && batch < child0_nbatch) {
1024 int msg_size = batch_size;
1026 if (batch + 1 == child0_nbatch) {
1028 &msg_size, 1,
MPI_INT, child0, tags[0]);
1036 if (child1 != -1 && batch < child1_nbatch) {
1037 int msg_size = batch_size;
1039 if (batch + 1 == child1_nbatch) {
1041 &msg_size, 1,
MPI_INT, child1, tags[0]);
1049 if (child0 != -1 && batch < child0_nbatch) {
1052 if (child1 != -1 && batch < child1_nbatch) {
1057 size_t buf_offset = 0;
1059 while (buf_offset < bufsz) {
1060 receive_batch(batch, buf_offset);
1061 buf_offset += batch_size;
1062 buf_offset = std::min(buf_offset, bufsz);
1068 std::vector<T> left, right;
1076 for (
unsigned int i = 0; i < right.size(); ++i)
1077 left.push_back(right[i]);
1079 for (
unsigned int i=0; i<
v.size(); ++i) left.push_back(
v[i]);
1085 const auto total_nbytes_to_send = ar.
size();
1088 const int nbatch = (total_nbytes_to_send + batch_size - 1) / batch_size;
1092 size_t buf_offset = 0;
1094 while (buf_offset < bufsz) {
1097 auto send_batch = [&,
this](
const int batch,
const size_t buf_offset) {
1098 const int nbytes_to_send =
static_cast<int>(
1099 std::min(
static_cast<size_t>(batch_size),
1100 total_nbytes_to_send - buf_offset));
1102 if (batch + 1 == nbatch) {
1104 &nbytes_to_send, 1,
MPI_INT, parent, tags[0]);
1109 MPI_BYTE, parent, tags[batch + 1]);
1113 send_batch(batch, buf_offset);
1114 buf_offset += batch_size;
1115 buf_offset = std::min(buf_offset, bufsz);
1120 if (parent == -1)
return left;
1121 else return std::vector<T>();
1134 template <
typename valueT,
typename keyT>
1149 template <
typename keyT,
typename valueT>
1181 template <
typename keyT,
typename opT>
1187 ProcessID parent = -1, child0 = -1, child1 = -1;
1190 lazy_sync_internal<LazySyncTag>(parent, child0, child1, key,
op);
1192 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1227 template <
typename keyT,
typename opT>
1232 if(group.
size() > 1) {
1236 ProcessID parent = -1, child0 = -1, child1 = -1;
1237 group.
make_tree(group_root, parent, child0, child1);
1239 lazy_sync_internal<GroupLazySyncTag>(parent, child0, child1, key,
op);
1241 auto lazy_sync_children_fn = & WorldGopInterface::template lazy_sync_children<keyT, opT>;
1263 template <
typename keyT,
typename valueT>
1269 bcast_internal<BcastTag>(key, value, root);
1295 template <
typename keyT,
typename valueT>
1304 if(group.
size() > 1)
1305 bcast_internal<GroupBcastTag>(key, value, group_root, group);
1343 template <
typename keyT,
typename valueT,
typename opT>
1349 ProcessID parent = -1, child0 = -1, child1 = -1;
1352 return reduce_internal<ReduceTag>(parent, child0, child1, root, key,
1399 template <
typename keyT,
typename valueT,
typename opT>
1409 ProcessID parent = -1, child0 = -1, child1 = -1;
1410 group.
make_tree(group_root, parent, child0, child1);
1412 return reduce_internal<ReduceTag>(parent, child0, child1, group_root,
1450 template <
typename keyT,
typename valueT,
typename opT>
1456 ProcessID parent = -1, child0 = -1, child1 = -1;
1461 reduce_internal<AllReduceTag>(parent, child0, child1, root,
1468 bcast_internal<AllReduceTag>(key, reduce_result, root);
1470 return reduce_result;
1513 template <
typename keyT,
typename valueT,
typename opT>
1522 ProcessID parent = -1, child0 = -1, child1 = -1;
1523 group.
make_tree(group_root, parent, child0, child1);
1527 reduce_internal<GroupAllReduceTag>(parent, child0, child1,
1528 group_root, key, value,
op);
1531 if(group.
rank() != group_root)
1535 bcast_internal<GroupAllReduceTag>(key, reduce_result, 0, group);
1537 return reduce_result;
Implements an archive wrapping a memory buffer.
void binary_tree_info(int root, int &parent, int &child0, int &child1)
Construct info about a binary tree with given root.
Definition safempi.cc:39
int Get_rank() const
Definition safempi.h:714
static int unique_tag_period()
Definition safempi.h:836
int unique_tag()
Returns a unique tag for temporary use (1023<tag<4095)
Definition safempi.h:830
World active message that extends an RMI message.
Definition worldam.h:80
The class used for callbacks (e.g., dependency tracking).
Definition dependency_interface.h:61
A future is a possibly yet unevaluated value.
Definition future.h:373
T & get(bool dowork=true) &
Gets the value, waiting if necessary.
Definition future.h:574
static const Future< T > default_initializer()
See "Gotchas" on Futures about why this exists and how to use it.
Definition future.h:462
bool probe() const
Check whether this future has been assigned.
Definition future.h:631
A collection of processes.
Definition group.h:50
void remote_update() const
Update remote usage count.
Definition group.h:383
void local_update() const
Update local usage count.
Definition group.h:369
ProcessID size() const
Group size accessor.
Definition group.h:429
const DistributedID & id() const
Group id accessor.
Definition group.h:396
bool empty() const
Quary empty group.
Definition group.h:391
ProcessID rank() const
Group rank accessor.
Definition group.h:412
static madness::Future< Group > get_group(const DistributedID &did)
Get group from the registry.
Definition group.cc:90
void make_tree(const ProcessID group_root, ProcessID &parent, ProcessID &child1, ProcessID &child2) const
Compute the binary tree parents and children.
Definition group.h:449
World & get_world() const
Parent world accessor.
Definition group.h:404
Key object that includes the process information.
Definition distributed_id.h:80
static const attrT ATTR_UNORDERED
Definition worldrmi.h:180
Key object that uses a tag to differentiate keys.
Definition distributed_id.h:177
static TaskAttributes hipri()
Definition thread.h:456
void send(ProcessID dest, am_handlerT op, const AmArg *arg, const int attr=RMI::ATTR_ORDERED)
Sends a managed non-blocking active message.
Definition worldam.h:278
Delayed send callback object.
Definition worldgop.h:176
DelayedSend(World &world, const ProcessID dest, const keyT &key, const Future< valueT > &value)
Constructor.
Definition worldgop.h:190
virtual ~DelayedSend()
Definition worldgop.h:195
World & world_
The communication world.
Definition worldgop.h:178
const ProcessID dest_
The destination process id.
Definition worldgop.h:179
Future< valueT > value_
The data to be sent.
Definition worldgop.h:181
const keyT key_
The distributed id associated with value_.
Definition worldgop.h:180
DelayedSend< keyT, valueT > & operator=(const DelayedSend< keyT, valueT > &)
DelayedSend(const DelayedSend< keyT, valueT > &)
virtual void notify()
Notify this object that the future has been set.
Definition worldgop.h:201
Provides collectives that interoperate with the AM and task interfaces.
Definition worldgop.h:147
int max_reducebcast_msg_size() const
Returns the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:697
void lazy_sync(const keyT &key, const opT &op, const Group &group) const
Group lazy sync.
Definition worldgop.h:1228
void send_internal(ProcessID dest, const keyT &key, const Future< valueT > &value) const
Send value to dest.
Definition worldgop.h:251
void max(T *buf, size_t nelem)
Inplace global max while still processing AM & tasks.
Definition worldgop.h:884
static void bcast_handler(const AmArg &arg)
Definition worldgop.h:354
void lazy_sync(const keyT &key, const opT &op) const
Lazy sync.
Definition worldgop.h:1182
World & world_
World object that this is a part of.
Definition worldgop.h:149
int set_max_reducebcast_msg_size(int sz)
Set the maximum size of messages (in bytes) sent by reduce and broadcast.
Definition worldgop.h:687
std::shared_ptr< detail::DeferredCleanup > deferred_
Deferred cleanup object.
Definition worldgop.h:150
void reduce(T *buf, std::size_t nelem, opT op)
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Definition worldgop.h:785
void broadcast(T &t)
Broadcast of a scalar from node 0 to all other nodes.
Definition worldgop.h:742
~WorldGopInterface()
Definition worldgop.h:662
void broadcast_serializable(objT &obj, ProcessID root)
Broadcast a serializable object.
Definition worldgop.h:756
void lazy_sync_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const
Start a distributed lazy sync operation.
Definition worldgop.h:317
void sum(T &a)
Global sum of a scalar while still processing AM & tasks.
Definition worldgop.h:933
int max_reducebcast_msg_size_
maximum size of messages (in bytes) sent by reduce and broadcast
Definition worldgop.h:153
void fence(bool debug=false)
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Definition worldgop.cc:161
void broadcast(void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
Broadcasts bytes from process root while still processing AM & tasks.
Definition worldgop.cc:173
void bit_and(T *buf, size_t nelem)
Definition worldgop.h:907
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:473
void lazy_sync_parent(const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:281
static Future< valueT > recv_internal(const keyT &key)
Receive data from remote node.
Definition worldgop.h:214
void absmin(T *buf, size_t nelem)
Inplace global absmin while still processing AM & tasks.
Definition worldgop.h:890
void bit_or(T *buf, size_t nelem)
Definition worldgop.h:912
std::enable_if<!is_future< valueT >::value >::type send_internal(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:228
WorldGopInterface(World &world)
Definition worldgop.h:658
bool set_forbid_fence(bool value)
Set forbid_fence flag to new value and return old value.
Definition worldgop.h:676
void bcast(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:1296
void bcast_task(const keyT &key, const valueT &value, const ProcessID root) const
Broadcast task.
Definition worldgop.h:393
void group_bcast_task(const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const
Definition worldgop.h:424
void send(const ProcessID dest, const keyT &key, const valueT &value) const
Send value to dest.
Definition worldgop.h:1150
void logic_or(T *buf, size_t nelem)
Definition worldgop.h:927
void bcast(const keyT &key, Future< valueT > &value, const ProcessID root) const
Broadcast.
Definition worldgop.h:1264
int initial_max_reducebcast_msg_size()
Definition worldgop.h:631
void serial_invoke(std::function< void()> action)
Executes an action on single (this) thread after ensuring all other work is done.
Definition worldgop.cc:165
static void group_bcast_handler(const AmArg &arg)
Definition worldgop.h:369
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op)
Distributed all reduce.
Definition worldgop.h:1452
Future< typename detail::result_of< opT >::type > reduce_internal(const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op)
Distributed reduce.
Definition worldgop.h:585
bool forbid_fence_
forbid calling fence() in case of several active worlds
Definition worldgop.h:152
static detail::result_of< opT >::type reduce_result_task(const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op)
Definition worldgop.h:561
void absmax(T *buf, size_t nelem)
Inplace global absmax while still processing AM & tasks.
Definition worldgop.h:896
void broadcast(T *buf, size_t nelem, ProcessID root)
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Definition worldgop.h:736
void fence_impl(std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false)
Implementation of fence.
Definition worldgop.cc:50
static detail::result_of< opT >::type reduce_task(const valueT &value, const opT &op)
Definition worldgop.h:553
void product(T *buf, size_t nelem)
Inplace global product while still processing AM & tasks.
Definition worldgop.h:902
void min(T *buf, size_t nelem)
Inplace global min while still processing AM & tasks.
Definition worldgop.h:878
void logic_and(T *buf, size_t nelem)
Definition worldgop.h:922
static Future< valueT > recv(const ProcessID source, const keyT &key)
Receive data from source.
Definition worldgop.h:1135
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group)
Distributed group reduce.
Definition worldgop.h:1401
bool debug_
Debug mode.
Definition worldgop.h:151
void min(T &a)
Global min of a scalar while still processing AM & tasks.
Definition worldgop.h:945
void max(T &a)
Global max of a scalar while still processing AM & tasks.
Definition worldgop.h:939
std::vector< T > concat0(const std::vector< T > &v, size_t bufsz=1024 *1024)
Concatenate an STL vector of serializable stuff onto node 0.
Definition worldgop.h:955
Future< typename detail::result_of< opT >::type > reduce(const keyT &key, const valueT &value, const opT &op, const ProcessID root)
Distributed reduce.
Definition worldgop.h:1345
void barrier()
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
Definition worldgop.h:702
Future< typename detail::result_of< opT >::type > all_reduce(const keyT &key, const valueT &value, const opT &op, const Group &group)
Distributed, group all reduce.
Definition worldgop.h:1515
void lazy_sync_children(const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const
Lazy sync parent task.
Definition worldgop.h:299
void bcast_internal(const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
Group broadcast.
Definition worldgop.h:523
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition worldgop.h:872
void bit_xor(T *buf, size_t nelem)
Definition worldgop.h:917
friend class detail::DeferredCleanup
Definition worldgop.h:155
bool set_debug(bool value)
Set debug flag to new value and return old value.
Definition worldgop.h:669
void broadcast(T &t, ProcessID root)
Broadcast of a scalar from node root to all other nodes.
Definition worldgop.h:748
std::enable_if<!std::is_pointer< T >::value, SafeMPI::Request >::type Isend(const T &datum, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Isend one element.
Definition worldmpi.h:308
SafeMPI::Request Irecv(T *buf, int count, int source, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Async receive data of up to count elements from process source.
Definition worldmpi.h:321
void Send(const T *buf, long lenbuf, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Send array of lenbuf elements to process dest.
Definition worldmpi.h:347
void Recv(T *buf, long lenbuf, int src, int tag) const
Receive data of up to lenbuf elements from process src.
Definition worldmpi.h:374
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition world_task_queue.h:466
A parallel world class.
Definition world.h:132
WorldTaskQueue & taskq
Task queue.
Definition world.h:206
ProcessID rank() const
Returns the process rank in this World (same as MPI_Comm_rank()).
Definition world.h:320
static void await(SafeMPI::Request &request, bool dowork=true)
Wait for a MPI request to complete.
Definition world.h:534
WorldMpiInterface & mpi
MPI interface.
Definition world.h:204
ProcessID size() const
Returns the number of processes in this World (same as MPI_Comm_size()).
Definition world.h:330
unsigned long id() const
Definition world.h:315
WorldGopInterface & gop
Global operations.
Definition world.h:207
WorldAmInterface & am
AM interface.
Definition world.h:205
Wraps an archive around a memory buffer for output.
Definition buffer_archive.h:59
std::size_t size() const
Return the amount of data stored (counted) in the buffer.
Definition buffer_archive.h:123
Deferred cleanup of shared_ptr's.
Definition deferred_cleanup.h:60
Distributed caching utility.
Definition dist_cache.h:54
static void get_cache_value(const keyT &key, madness::Future< valueT > &value)
Get the cache value accosted with key.
Definition dist_cache.h:185
static void set_cache_value(const keyT &key, const valueT &value)
Set the cache value accosted with key.
Definition dist_cache.h:146
static bool debug
Definition dirac-hatom.cc:16
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Tensor< typename Tensor< T >::scalar_type > arg(const Tensor< T > &t)
Return a new tensor holding the argument of each element of t (complex types only)
Definition tensor.h:2503
static const double v
Definition hatom_sf_dirac.cc:20
Tensor< double > op(const Tensor< double > &x)
Definition kain.cc:508
#define max(a, b)
Definition lda.h:51
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Intracomm COMM_WORLD
Definition safempi.cc:67
Definition potentialmanager.cc:41
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
std::pair< uniqueidT, std::size_t > DistributedID
Distributed ID which is used to identify objects.
Definition distributed_id.h:48
double abs(double x)
Definition complexfun.h:48
AmArg * copy_am_arg(const AmArg &arg)
Definition worldam.h:170
AmArg * new_am_arg(const argT &... args)
Convenience template for serializing arguments into a new AmArg.
Definition worldam.h:194
bool quiet()
Check if the MADNESS runtime was initialized for quiet operation.
Definition world.cc:77
void error(const char *msg)
Definition world.cc:139
std::string type(const PairType &n)
Definition PNOParameters.h:18
std::uint64_t cstr_to_memory_size(const char *str)
Unit-aware conversion of a C string to a size_t.
Definition units.cc:14
static long abs(long a)
Definition tensor.h:218
static const double b
Definition nonlinschro.cc:119
static const double a
Definition nonlinschro.cc:118
int posix_memalign(void **memptr, std::size_t alignment, std::size_t size)
Definition posixmem.h:44
Hash functor.
Definition worldhash.h:233
T operator()(const T &a, const T &b) const
Definition worldgop.h:87
Definition worldgop.h:101
T operator()(const T &a, const T &b) const
Definition worldgop.h:102
Definition worldgop.h:109
T operator()(const T &a, const T &b) const
Definition worldgop.h:110
Definition worldgop.h:116
T operator()(const T &a, const T &b) const
Definition worldgop.h:117
Definition worldgop.h:123
T operator()(const T &a, const T &b) const
Definition worldgop.h:124
Definition worldgop.h:165
Definition worldgop.h:161
Definition worldgop.h:166
Definition worldgop.h:162
Definition worldgop.h:160
Definition worldgop.h:164
Definition worldgop.h:159
Definition worldgop.h:158
Definition worldgop.h:163
Definition worldgop.h:130
T operator()(const T &a, const T &b) const
Definition worldgop.h:131
Definition worldgop.h:137
T operator()(const T &a, const T &b) const
Definition worldgop.h:138
T operator()(const T &a, const T &b) const
Definition worldgop.h:80
T operator()(const T &a, const T &b) const
Definition worldgop.h:95
T operator()(const T &a, const T &b) const
Definition worldgop.h:73
T operator()(const T &a, const T &b) const
Definition worldgop.h:66
fnT::result_type type
Definition function_traits.h:97
T type
Type with Future removed.
Definition type_traits.h:110
#define MPI_INT
Definition stubmpi.h:81
#define MPI_BYTE
Definition stubmpi.h:77
AtomicInt sum
Definition test_atomicint.cc:46
std::pair< int, double > valueT
Definition test_binsorter.cc:6
double source(const coordT &r)
Definition testperiodic.cc:48
const char * status[2]
Definition testperiodic.cc:43
Declares the World class for the parallel runtime environment.
Defines TaskInterface and implements WorldTaskQueue and associated stuff.
Defines types used by the parallel runtime.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43
int Tag
Used to clearly identify message tag/type.
Definition worldtypes.h:44