Provides collectives that interoperate with the AM and task interfaces. More...

#include <worldgop.h>

Collaboration diagram for madness::WorldGopInterface:

[legend]

Classes
struct	AllReduceTag

struct	BcastTag

class	DelayedSend
	Delayed send callback object. More...

struct	GroupAllReduceTag

struct	GroupBcastTag

struct	GroupLazySyncTag

struct	GroupReduceTag

struct	LazySyncTag

struct	PointToPointTag

struct	ReduceTag

Public Member Functions
	WorldGopInterface (World &world)

	~WorldGopInterface ()

template<typename T >
void	absmax (T *buf, size_t nelem)
	Inplace global absmax while still processing AM & tasks.

template<typename T >
void	absmin (T *buf, size_t nelem)
	Inplace global absmin while still processing AM & tasks.

template<typename keyT , typename valueT , typename opT >
Future< typename detail::result_of< opT >::type >	all_reduce (const keyT &key, const valueT &value, const opT &op)
	Distributed all reduce.

template<typename keyT , typename valueT , typename opT >
Future< typename detail::result_of< opT >::type >	all_reduce (const keyT &key, const valueT &value, const opT &op, const Group &group)
	Distributed, group all reduce.

void	barrier ()
	Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.

template<typename keyT , typename valueT >
void	bcast (const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
	Group broadcast.

template<typename keyT , typename valueT >
void	bcast (const keyT &key, Future< valueT > &value, const ProcessID root) const
	Broadcast.

template<typename T >
void	bit_and (T *buf, size_t nelem)

template<typename T >
void	bit_or (T *buf, size_t nelem)

template<typename T >
void	bit_xor (T *buf, size_t nelem)

template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
void	broadcast (T &t)
	Broadcast of a scalar from node 0 to all other nodes.

template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
void	broadcast (T &t, ProcessID root)
	Broadcast of a scalar from node root to all other nodes.

template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>
void	broadcast (T *buf, size_t nelem, ProcessID root)
	Broadcasts typed contiguous data from process root while still processing AM & tasks.

void	broadcast (void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1)
	Broadcasts bytes from process root while still processing AM & tasks.

template<typename objT , typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>, typename = std::void_t<decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>
void	broadcast_serializable (objT &obj, ProcessID root)
	Broadcast a serializable object.

template<typename T >
std::vector< T >	concat0 (const std::vector< T > &v, size_t bufsz=1024 *1024)
	Concatenate an STL vector of serializable stuff onto node 0.

void	fence (bool debug=false)
	Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.

template<typename keyT , typename opT >
void	lazy_sync (const keyT &key, const opT &op) const
	Lazy sync.

template<typename keyT , typename opT >
void	lazy_sync (const keyT &key, const opT &op, const Group &group) const
	Group lazy sync.

template<typename T >
void	logic_and (T *buf, size_t nelem)

template<typename T >
void	logic_or (T *buf, size_t nelem)

template<typename T >
void	max (T &a)
	Global max of a scalar while still processing AM & tasks.

template<typename T >
void	max (T *buf, size_t nelem)
	Inplace global max while still processing AM & tasks.

int	max_reducebcast_msg_size () const
	Returns the maximum size of messages (in bytes) sent by reduce and broadcast.

template<typename T >
void	min (T &a)
	Global min of a scalar while still processing AM & tasks.

template<typename T >
void	min (T *buf, size_t nelem)
	Inplace global min while still processing AM & tasks.

template<typename T >
void	product (T *buf, size_t nelem)
	Inplace global product while still processing AM & tasks.

template<typename keyT , typename valueT , typename opT >
Future< typename detail::result_of< opT >::type >	reduce (const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group)
	Distributed group reduce.

template<typename keyT , typename valueT , typename opT >
Future< typename detail::result_of< opT >::type >	reduce (const keyT &key, const valueT &value, const opT &op, const ProcessID root)
	Distributed reduce.

template<typename T , class opT >
void	reduce (T *buf, std::size_t nelem, opT op)
	Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.

template<typename keyT , typename valueT >
void	send (const ProcessID dest, const keyT &key, const valueT &value) const
	Send value to `dest`.

void	serial_invoke (std::function< void()> action)
	Executes an action on single (this) thread after ensuring all other work is done.

bool	set_debug (bool value)
	Set debug flag to new value and return old value.

bool	set_forbid_fence (bool value)
	Set forbid_fence flag to new value and return old value.

int	set_max_reducebcast_msg_size (int sz)
	Set the maximum size of messages (in bytes) sent by reduce and broadcast.

template<typename T >
void	sum (T &a)
	Global sum of a scalar while still processing AM & tasks.

template<typename T >
void	sum (T *buf, size_t nelem)
	Inplace global sum while still processing AM & tasks.

Static Public Member Functions
template<typename valueT , typename keyT >
static Future< valueT >	recv (const ProcessID source, const keyT &key)
	Receive data from `source`.

Private Member Functions
template<typename tagT , typename keyT , typename valueT >
void	bcast_internal (const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const
	Group broadcast.

template<typename tagT , typename keyT , typename valueT >
void	bcast_internal (const keyT &key, Future< valueT > &value, const ProcessID root) const
	Broadcast.

template<typename keyT , typename valueT >
void	bcast_task (const keyT &key, const valueT &value, const ProcessID root) const
	Broadcast task.

void	fence_impl (std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false)
	Implementation of fence.

template<typename keyT , typename valueT >
void	group_bcast_task (const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const

int	initial_max_reducebcast_msg_size ()

template<typename keyT , typename opT >
void	lazy_sync_children (const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const
	Lazy sync parent task.

template<typename tagT , typename keyT , typename opT >
void	lazy_sync_internal (const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const
	Start a distributed lazy sync operation.

template<typename keyT >
void	lazy_sync_parent (const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const
	Lazy sync parent task.

template<typename tagT , typename keyT , typename valueT , typename opT >
Future< typename detail::result_of< opT >::type >	reduce_internal (const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op)
	Distributed reduce.

template<typename keyT , typename valueT >
std::enable_if<!is_future< valueT >::value >::type	send_internal (const ProcessID dest, const keyT &key, const valueT &value) const
	Send `value` to `dest`.

template<typename keyT , typename valueT >
void	send_internal (ProcessID dest, const keyT &key, const Future< valueT > &value) const
	Send `value` to `dest`.

Static Private Member Functions
template<typename keyT , typename valueT , typename taskfnT >
static void	bcast_handler (const AmArg &arg)

template<typename keyT , typename valueT , typename taskfnT >
static void	group_bcast_handler (const AmArg &arg)

template<typename valueT , typename keyT >
static Future< valueT >	recv_internal (const keyT &key)
	Receive data from remote node.

template<typename opT >
static detail::result_of< opT >::type	reduce_result_task (const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op)

template<typename valueT , typename opT >
static detail::result_of< opT >::type	reduce_task (const valueT &value, const opT &op)

Private Attributes
bool	debug_
	Debug mode.

std::shared_ptr< detail::DeferredCleanup >	deferred_
	Deferred cleanup object.

bool	forbid_fence_ =false
	forbid calling fence() in case of several active worlds

int	max_reducebcast_msg_size_ = std::numeric_limits<int>::max()
	maximum size of messages (in bytes) sent by reduce and broadcast

World &	world_
	World object that this is a part of.

Friends
class	detail::DeferredCleanup

Detailed Description

Provides collectives that interoperate with the AM and task interfaces.

If native AM interoperates with MPI we probably should map these to MPI.

Constructor & Destructor Documentation

◆ WorldGopInterface()

madness::WorldGopInterface::WorldGopInterface ( World & world )

inline

◆ ~WorldGopInterface()

madness::WorldGopInterface::~WorldGopInterface ( )

inline

References deferred_.

Member Function Documentation

◆ absmax()

template<typename T >

void madness::WorldGopInterface::absmax	(	T *	buf,
		size_t	nelem
	)

inline

Inplace global absmax while still processing AM & tasks.

References madness::nonlinear_vector_solver().

◆ absmin()

template<typename T >

void madness::WorldGopInterface::absmin	(	T *	buf,
		size_t	nelem
	)

inline

Inplace global absmin while still processing AM & tasks.

References madness::nonlinear_vector_solver().

◆ all_reduce() [1/2]

template<typename keyT , typename valueT , typename opT >

Future< typename detail::result_of< opT >::type > madness::WorldGopInterface::all_reduce	(	const keyT &	key,
		const valueT &	value,
		const opT &	op
	)

inline

Distributed all reduce.

The reduce functor must have the following signature:

class ReduceFunc {
public:
    // Typedefs
    typedef ... result_type;
    typedef ... argument_type;
 
    // Constructors
    ReduceFunc(const ReduceFunc&);
 
    // Initialization operation, which returns a default result object
    result_type operator()() const;
 
    // Reduce two result objects
    void operator()(result_type&, const result_type&) const;
 
    // Reduce a result object and an argument object
    void operator()(result_type&, const argument_type&) const;
}; // class ReduceFunc

Template Parameters

keyT	The key type
valueT	The data type to be reduced
opT	The reduction operation type

Parameters

key	The key associated with this reduction
value	The local value to be reduced
op	The reduction operation to be applied to local and remote data

Returns: A future to the reduce value on the root process, otherwise an uninitialized future that may be ignored.

Note: It is the user's responsibility to ensure that key does not conflict with other calls to all_reduce. Keys may be reused after the associated operation has finished.

References SafeMPI::Intracomm::binary_tree_info(), madness::World::mpi, madness::nonlinear_vector_solver(), op(), madness::World::rank(), madness::World::size(), and world_.

◆ all_reduce() [2/2]

template<typename keyT , typename valueT , typename opT >

Future< typename detail::result_of< opT >::type > madness::WorldGopInterface::all_reduce	(	const keyT &	key,
		const valueT &	value,
		const opT &	op,
		const Group &	group
	)

inline

Distributed, group all reduce.

The reduce functor must have the following signature:

class ReduceFunc {
public:
    // Typedefs
    typedef ... result_type;
    typedef ... argument_type;
 
    // Constructors
    ReduceFunc(const ReduceFunc&);
 
    // Initialization operation, which returns a default result object
    result_type operator()() const;
 
    // Reduce two result objects
    void operator()(result_type&, const result_type&) const;
 
    // Reduce a result object and an argument object
    void operator()(result_type&, const argument_type&) const;
}; // class ReduceFunc

Template Parameters

keyT	The key type
valueT	The data type to be reduced
opT	The reduction operation type

Parameters

key	The key associated with this reduction
value	The local value to be reduced
op	The reduction operation to be applied to local and remote data
group	The group that will preform the reduction

Returns: A future to the reduce value on the root process, otherwise an uninitialized future that may be ignored

Exceptions

madness::Exception	When `group` is empty
madness::Exception	When `group` is not registered
madness::Exception	When the world id of `group` is not equal to that of the world used to construct this object
madness::Exception	When this process is not in the group

Note: It is the user's responsibility to ensure that key does not conflict with other calls to reduce. Keys may be reused after the associated operation has finished.

References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), MADNESS_ASSERT, madness::Group::make_tree(), madness::nonlinear_vector_solver(), op(), madness::Group::rank(), madness::Group::size(), and world_.

◆ barrier()

void madness::WorldGopInterface::barrier ( )

inline

Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.

References madness::error(), madness::World::rank(), madness::World::size(), sum, and world_.

Referenced by madness::plotvtk_data(), and madness::plotvtk_data().

◆ bcast() [1/2]

template<typename keyT , typename valueT >

void madness::WorldGopInterface::bcast	(	const keyT &	key,
		Future< valueT > &	value,
		const ProcessID	group_root,
		const Group &	group
	)		const

inline

Group broadcast.

Broadcast data from the group_root process to all processes in group. The input/output data is held by value.

Parameters

[in]	key	The key associated with this broadcast
[in,out]	value	On the `group_root` process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast
	group_root	The process in `group` that owns the data to be broadcast
	group	The process group where value will be broadcast

Exceptions

madness::Exception	When `group` is empty
madness::Exception	When `group` is not registered
madness::Exception	When the world id of `group` is not equal to that of the world used to construct this object
madness::Exception	When this process is not in the group
madness::Exception	When `group_root` is less than 0 or greater than or equal to `group` size
madness::Exception	When `data` has been set except on the `root` process

Note: It is the user's responsibility to ensure that key does not conflict with other calls to bcast. Keys may be reused after the associated operation has finished.

References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), MADNESS_ASSERT, madness::nonlinear_vector_solver(), madness::Future< T >::probe(), madness::Group::rank(), madness::Group::size(), and world_.

◆ bcast() [2/2]

template<typename keyT , typename valueT >

void madness::WorldGopInterface::bcast	(	const keyT &	key,
		Future< valueT > &	value,
		const ProcessID	root
	)		const

inline

Broadcast.

Broadcast data from the root process to all processes. The input/ output data is held by value.

Parameters

[in]	key	The key associated with this broadcast
[in,out]	value	On the `root` process, this is used as the input data that will be broadcast to all other processes. On other processes it is used as the output to the broadcast.
	root	The process that owns the data to be broadcast

Exceptions

madness::Exception	When `root` is less than 0 or greater than or equal to the world size.
madness::Exception	When `value` has been set, except on the `root` process.

Note: It is the user's responsibility to ensure that key does not conflict with other calls to bcast. Keys may be reused after the associated operation has finished.

References MADNESS_ASSERT, madness::nonlinear_vector_solver(), madness::Future< T >::probe(), madness::World::rank(), madness::World::size(), and world_.

◆ bcast_handler()

template<typename keyT , typename valueT , typename taskfnT >

static void madness::WorldGopInterface::bcast_handler ( const AmArg & arg )

inlinestaticprivate

References madness::arg(), madness::TaskAttributes::hipri(), and madness::nonlinear_vector_solver().

◆ bcast_internal() [1/2]

template<typename tagT , typename keyT , typename valueT >

void madness::WorldGopInterface::bcast_internal	(	const keyT &	key,
		Future< valueT > &	value,
		const ProcessID	group_root,
		const Group &	group
	)		const

inlineprivate

Group broadcast.

Broadcast data from the group_root process to all processes in group. The input/output data is held by value.

Template Parameters

tagT	The tag type that is attached to `keyT`
keyT	The base key type
valueT	The value type that will be broadcast

Parameters

[in]	key	The key associated with this broadcast
[in,out]	value	On the `group_root` process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast
	group_root	The process in `group` that owns the data to be broadcast
	group	The process group where value will be broadcast

Exceptions

madness::Exception When value has been set, except on the group_root process.

References madness::WorldTaskQueue::add(), madness::Future< T >::get(), madness::detail::DistCache< keyT >::get_cache_value(), group_bcast_task(), madness::TaskAttributes::hipri(), madness::Group::local_update(), MADNESS_ASSERT, madness::nonlinear_vector_solver(), madness::Future< T >::probe(), madness::Group::rank(), madness::World::taskq, and world_.

◆ bcast_internal() [2/2]

template<typename tagT , typename keyT , typename valueT >

void madness::WorldGopInterface::bcast_internal	(	const keyT &	key,
		Future< valueT > &	value,
		const ProcessID	root
	)		const

inlineprivate

Broadcast.

Broadcast data from the root process to all processes in world. The input/output data is held by value.

Template Parameters

tagT	The tag type that is attached to `keyT`
keyT	The base key type
valueT	The value type that will be broadcast

Parameters

[in]	key	The key associated with this broadcast
[in,out]	value	On the `root` process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast
	root	The process that owns the data to be broadcast

Exceptions

madness::Exception When value has been set, except on the root process.

References madness::WorldTaskQueue::add(), bcast_task(), madness::Future< T >::get(), madness::detail::DistCache< keyT >::get_cache_value(), madness::TaskAttributes::hipri(), MADNESS_ASSERT, madness::nonlinear_vector_solver(), madness::Future< T >::probe(), madness::World::rank(), madness::World::size(), madness::World::taskq, and world_.

◆ bcast_task()

template<typename keyT , typename valueT >

void madness::WorldGopInterface::bcast_task	(	const keyT &	key,
		const valueT &	value,
		const ProcessID	root
	)		const

inlineprivate

Broadcast task.

This task will set the local cache with the broadcast data and send it to child processes in the binary tree.

References madness::World::am, madness::RMI::ATTR_UNORDERED, SafeMPI::Intracomm::binary_tree_info(), madness::copy_am_arg(), madness::World::mpi, madness::new_am_arg(), madness::nonlinear_vector_solver(), madness::WorldAmInterface::send(), madness::detail::DistCache< keyT >::set_cache_value(), and world_.

Referenced by bcast_internal().

◆ bit_and()

template<typename T >

void madness::WorldGopInterface::bit_and	(	T *	buf,
		size_t	nelem
	)

inline

References madness::nonlinear_vector_solver().

◆ bit_or()

template<typename T >

void madness::WorldGopInterface::bit_or	(	T *	buf,
		size_t	nelem
	)

inline

References madness::nonlinear_vector_solver().

◆ bit_xor()

template<typename T >

void madness::WorldGopInterface::bit_xor	(	T *	buf,
		size_t	nelem
	)

inline

References madness::nonlinear_vector_solver().

◆ broadcast() [1/4]

template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>

void madness::WorldGopInterface::broadcast ( T & t )

inline

Broadcast of a scalar from node 0 to all other nodes.

References broadcast().

◆ broadcast() [2/4]

template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>

void madness::WorldGopInterface::broadcast	(	T &	t,
		ProcessID	root
	)

inline

Broadcast of a scalar from node root to all other nodes.

References broadcast().

◆ broadcast() [3/4]

template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>>

void madness::WorldGopInterface::broadcast	(	T *	buf,
		size_t	nelem,
		ProcessID	root
	)

inline

Broadcasts typed contiguous data from process root while still processing AM & tasks.

Optimizations can be added for long messages

References broadcast(), madness::nonlinear_vector_solver(), and T().

◆ broadcast() [4/4]

void madness::WorldGopInterface::broadcast	(	void *	buf,
		size_t	nbyte,
		ProcessID	root,
		bool	dowork = `true`,
		Tag	bcast_tag = `-1`
	)

Broadcasts bytes from process root while still processing AM & tasks.

Optimizations can be added for long messages

References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), max_reducebcast_msg_size(), madness::World::mpi, MPI_BYTE, madness::nonlinear_vector_solver(), SafeMPI::Intracomm::unique_tag(), and world_.

Referenced by broadcast(), broadcast(), broadcast(), broadcast_serializable(), madness::Localizer::compute_core_valence_separation_transformation_matrix(), doit(), madness::archive::BaseParallelArchive< Archive >::exists(), fence_impl(), get_fock_transformation(), madness::SCF::get_fock_transformation(), madness::load_coeffs(), madness::load_quadrature(), madness::Localizer::localize_boys(), madness::Localizer::localize_new(), main(), madness::archive::BaseParallelArchive< Archive >::open(), BoysLocalization::operator()(), propagate(), propagate(), reduce(), madness::SCF::restart_aos(), madness::LoadBalanceDeux< NDIM >::sum(), test14(), test2(), madness::SubspaceK< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().

◆ broadcast_serializable()

template<typename objT , typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>, typename = std::void_t<decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>>

void madness::WorldGopInterface::broadcast_serializable	(	objT &	obj,
		ProcessID	root
	)

inline

Broadcast a serializable object.

References broadcast(), MADNESS_ASSERT, madness::nonlinear_vector_solver(), madness::World::rank(), madness::archive::BufferOutputArchive::size(), madness::World::size(), and world_.

Referenced by madness::AC< NDIM >::AC(), Calculation::Calculation(), DF::DF(), madness::Molecule::Molecule(), madness::SCF::SCF(), madness::archive::BaseParallelArchive< Archive >::broadcast(), Calculation::calc_optimal_coeffs(), doit(), madness::QCCalculationParametersBase::file_exists(), madness::ScalarResultImpl< T >::get(), madness::MacroTaskQ::get_scheduled_task_number(), madness::Solver< T, NDIM >::init(), madness::LoadBalanceDeux< NDIM >::load_balance(), main(), madness::QCCalculationParametersBase::read_commandline_options(), madness::QCCalculationParametersBase::read_input(), madness::WorldContainerImpl< keyT, valueT, hashfunT >::replicate(), madness::SCF::restart_aos(), madness::sygvp(), madness::SubspaceK< T, NDIM >::update_subspace(), madness::Subspace< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().

◆ concat0()

template<typename T >

std::vector< T > madness::WorldGopInterface::concat0	(	const std::vector< T > &	v,
		size_t	bufsz = `1024*1024`
	)

inline

Concatenate an STL vector of serializable stuff onto node 0.

Parameters

[in]	v	input vector
[in]	bufsz	the max number of bytes in the result; must be less than std::numeric_limits<int>::max()

Returns: on rank 0 returns the concatenated vector, elsewhere returns an empty vector

References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), MADNESS_ASSERT, max_reducebcast_msg_size(), madness::World::mpi, MPI_BYTE, MPI_INT, madness::nonlinear_vector_solver(), posix_memalign(), madness::WorldMpiInterface::Recv(), madness::WorldMpiInterface::Send(), madness::archive::BufferOutputArchive::size(), SafeMPI::Intracomm::unique_tag(), SafeMPI::Intracomm::unique_tag_period(), v, and world_.

Referenced by madness::LoadBalanceDeux< NDIM >::load_balance(), madness::print_memory_usage(), madness::MemoryMeasurer::rank_to_host_and_rss_map(), and madness::MemoryMeasurer::reduce_map().

◆ fence()

void madness::WorldGopInterface::fence ( bool debug = false )

Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.

References debug, and fence_impl().

◆ fence_impl()

void madness::WorldGopInterface::fence_impl	(	std::function< void()>	epilogue = `[]{}`,
		bool	pause_during_epilogue = `false`,
		bool	debug = `false`
	)

private

Implementation of fence.

Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.

Parameters

[in]	epilogue	the action to execute (by the calling thread) immediately after the fence
[in]	pause_during_epilogue	whether to suspend work while executing epilogue
[in]	debug	set to true to print progress statistics using madness::print(); the default is false.

Warning: currently only pause_during_epilogue=false is supported

Runs Dykstra-like termination algorithm on binary tree by locally ensuring ntask=0 and all am sent and processed, and then participating in a global sum of nsent and nrecv. Then globally checks that nsent=nrecv and that both are constant over two traversals. We are then we are sure that all tasks and AM are processed and there no AM in flight.

References madness::World::am, madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), broadcast(), debug, deferred_, madness::WorldTaskQueue::fence(), forbid_fence_, madness::WorldAmInterface::free_managed_buffers(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), MADNESS_ASSERT, MADNESS_CHECK, madness::World::mpi, MPI_BYTE, madness::nonlinear_vector_solver(), madness::WorldAmInterface::nrecv, madness::WorldAmInterface::nsent, madness::print(), PROFILE_MEMBER_FUNC, madness::World::rank(), madness::WorldTaskQueue::size(), sum, madness::World::taskq, SafeMPI::Intracomm::unique_tag(), and world_.

Referenced by fence().

◆ group_bcast_handler()

template<typename keyT , typename valueT , typename taskfnT >

static void madness::WorldGopInterface::group_bcast_handler ( const AmArg & arg )

inlinestaticprivate

References madness::arg(), madness::Group::get_group(), madness::TaskAttributes::hipri(), and madness::nonlinear_vector_solver().

◆ group_bcast_task()

template<typename keyT , typename valueT >

void madness::WorldGopInterface::group_bcast_task	(	const keyT &	key,
		const valueT &	value,
		const ProcessID	group_root,
		const Group &	group
	)		const

inlineprivate

References madness::World::am, madness::RMI::ATTR_UNORDERED, madness::copy_am_arg(), madness::Group::id(), madness::Group::make_tree(), madness::new_am_arg(), madness::nonlinear_vector_solver(), madness::Group::remote_update(), madness::WorldAmInterface::send(), madness::detail::DistCache< keyT >::set_cache_value(), and world_.

Referenced by bcast_internal().

◆ initial_max_reducebcast_msg_size()

int madness::WorldGopInterface::initial_max_reducebcast_msg_size ( )

inlineprivate

References SafeMPI::COMM_WORLD, madness::cstr_to_memory_size(), SafeMPI::Intracomm::Get_rank(), madness::nonlinear_vector_solver(), and madness::quiet().

◆ lazy_sync() [1/2]

template<typename keyT , typename opT >

void madness::WorldGopInterface::lazy_sync	(	const keyT &	key,
		const opT &	op
	)		const

inline

Lazy sync.

Lazy sync functions are asynchronous barriers with a nullary functor that is called after all processes have called it with the same key. You can think of lazy_sync as an asynchronous barrier. The lazy_sync functor must have the following signature:

class SyncFunc {
public:
    // typedefs
    typedef void result_type;
 
    // Constructors
    SyncFunc(const SyncFunc&);
 
    // The function that performs the sync operation
    void operator()();
 
}; // class SyncFunc

Template Parameters

keyT	The key type
opT	The operation type

Parameters

key	The sync key
op	The sync operation to be executed on this process

Note: It is the user's responsibility to ensure that key does not conflict with other calls to lazy_sync. Keys may be reused after the associated operation has finished.

References madness::WorldTaskQueue::add(), SafeMPI::Intracomm::binary_tree_info(), madness::TaskAttributes::hipri(), madness::World::mpi, madness::nonlinear_vector_solver(), op(), madness::World::size(), madness::World::taskq, and world_.

◆ lazy_sync() [2/2]

template<typename keyT , typename opT >

void madness::WorldGopInterface::lazy_sync	(	const keyT &	key,
		const opT &	op,
		const Group &	group
	)		const

inline

Group lazy sync.

Lazy sync functions are asynchronous barriers with a nullary functor that is called after all processes in the group have called it with the same key. You can think of lazy_sync as an asynchronous barrier. The op functor must have the following signature:

class SyncFunc {
public:
    // typedefs
    typedef void result_type;
 
    // Constructors
    SyncFunc(const SyncFunc&);
 
    // The function that performs the sync operation
    void operator()();
 
}; // class SyncFunc

Template Parameters

keyT	The key type
opT	The operation type

Parameters

key	The sync key
op	The sync operation to be executed on this process

Note: It is the user's responsibility to ensure that key does not conflict with other calls to lazy_sync. Keys may be reused after the associated operation has finished.

References madness::WorldTaskQueue::add(), madness::Group::empty(), madness::Group::get_world(), madness::TaskAttributes::hipri(), madness::World::id(), MADNESS_ASSERT, madness::Group::make_tree(), madness::nonlinear_vector_solver(), op(), madness::Group::size(), madness::World::taskq, and world_.

◆ lazy_sync_children()

template<typename keyT , typename opT >

void madness::WorldGopInterface::lazy_sync_children	(	const ProcessID	child0,
		const ProcessID	child1,
		const keyT &	key,
		opT &	op,
		const ProcessID
	)		const

inlineprivate

Lazy sync parent task.

Send signal to the child processes in the binary tree for a lazy sync operation. After the signal has been sent to the children, the sync operation, op, will be run.

Template Parameters

keyT	The key type
opT	The sync operation type

Parameters

child0	The first child process of this process in the binary tree
child1	The second child process of this process in the binary tree
key	The key associated with the sync operation
op	The sync operation that will be run

References madness::nonlinear_vector_solver(), op(), and send_internal().

◆ lazy_sync_internal()

template<typename tagT , typename keyT , typename opT >

void madness::WorldGopInterface::lazy_sync_internal	(	const ProcessID	parent,
		const ProcessID	child0,
		const ProcessID	child1,
		const keyT &	key,
		const opT &	op
	)		const

inlineprivate

Start a distributed lazy sync operation.

Parameters

key	The sync key
op	The sync operation to be executed on this process

References madness::WorldTaskQueue::add(), madness::TaskAttributes::hipri(), madness::nonlinear_vector_solver(), op(), madness::World::rank(), send_internal(), madness::World::taskq, and world_.

◆ lazy_sync_parent()

template<typename keyT >

void madness::WorldGopInterface::lazy_sync_parent	(	const ProcessID	parent,
		const keyT &	key,
		const ProcessID	,
		const ProcessID
	)		const

inlineprivate

Lazy sync parent task.

Send signal to the parent process in the binary tree for a lazy sync operation.

Template Parameters

keyT	The key type

Parameters

parent	The parent process of this process in the binary tree
key	The lazy sync key

References send_internal().

◆ logic_and()

template<typename T >

void madness::WorldGopInterface::logic_and	(	T *	buf,
		size_t	nelem
	)

inline

References madness::nonlinear_vector_solver().

◆ logic_or()

template<typename T >

void madness::WorldGopInterface::logic_or	(	T *	buf,
		size_t	nelem
	)

inline

References madness::nonlinear_vector_solver().

◆ max() [1/2]

template<typename T >

void madness::WorldGopInterface::max ( T & a )

inline

Global max of a scalar while still processing AM & tasks.

References a, and max.

◆ max() [2/2]

template<typename T >

void madness::WorldGopInterface::max	(	T *	buf,
		size_t	nelem
	)

inline

Inplace global max while still processing AM & tasks.

References madness::nonlinear_vector_solver().

Referenced by madness::Cloud::print_size(), madness::print_stats(), test15(), and madness::time_transform().

◆ max_reducebcast_msg_size()

int madness::WorldGopInterface::max_reducebcast_msg_size ( ) const

inline

Returns the maximum size of messages (in bytes) sent by reduce and broadcast.

Returns: the maximum size of messages (in bytes) sent by reduce and broadcast

References max_reducebcast_msg_size_.

Referenced by broadcast(), concat0(), and reduce().

◆ min() [1/2]

template<typename T >

void madness::WorldGopInterface::min ( T & a )

inline

Global min of a scalar while still processing AM & tasks.

References a, and min().

◆ min() [2/2]

template<typename T >

void madness::WorldGopInterface::min	(	T *	buf,
		size_t	nelem
	)

inline

Inplace global min while still processing AM & tasks.

References madness::nonlinear_vector_solver().

Referenced by min(), madness::Cloud::print_size(), madness::print_stats(), and madness::time_transform().

◆ product()

template<typename T >

void madness::WorldGopInterface::product	(	T *	buf,
		size_t	nelem
	)

inline

Inplace global product while still processing AM & tasks.

References madness::nonlinear_vector_solver().

◆ recv()

template<typename valueT , typename keyT >

static Future< valueT > madness::WorldGopInterface::recv	(	const ProcessID	source,
		const keyT &	key
	)

inlinestatic

Receive data from source.

Template Parameters

valueT	The data type stored in cache
keyT	The key type

Parameters

source	The process that is sending the data to this process
key	The key associated with the received data

Returns: A future that will be set with the received data

Note: It is the user's responsibility to ensure that key does not conflict with other calls to recv. Keys may be reused after the associated operation has finished.

References madness::nonlinear_vector_solver(), and source().

◆ recv_internal()

template<typename valueT , typename keyT >

static Future< valueT > madness::WorldGopInterface::recv_internal ( const keyT & key )

inlinestaticprivate

Receive data from remote node.

Template Parameters

valueT The data type stored in cache

Parameters

key	The distributed ID

Returns: A future to the data

References madness::nonlinear_vector_solver().

◆ reduce() [1/3]

template<typename keyT , typename valueT , typename opT >

Future< typename detail::result_of< opT >::type > madness::WorldGopInterface::reduce	(	const keyT &	key,
		const valueT &	value,
		const opT &	op,
		const ProcessID	group_root,
		const Group &	group
	)

inline

Distributed group reduce.

The reduce functor must have the following signature:

class ReduceFunc {
public:
    // Typedefs
    typedef ... result_type;
    typedef ... argument_type;
 
    // Constructors
    ReduceFunc(const ReduceFunc&);
 
    // Initialization operation, which returns a default result object
    result_type operator()() const;
 
    // Reduce two result objects
    void operator()(result_type&, const result_type&) const;
 
    // Reduce a result object and an argument object
    void operator()(result_type&, const argument_type&) const;
}; // class ReduceFunc

Template Parameters

keyT	The key type
valueT	The data type to be reduced
opT	The reduction operation type

Parameters

key	The key associated with this reduction
value	The local value to be reduced
op	The reduction operation to be applied to local and remote data
group_root	The group process that will receive the result of the reduction
group	The group that will preform the reduction

Returns: A future to the reduce value on the root process, otherwise an uninitialized future that may be ignored.

Exceptions

madness::Exception	When `group` is empty
madness::Exception	When `group` is not registered
madness::Exception	When the world id of `group` is not equal to that of the world used to construct this object
madness::Exception	When this process is not in the group
madness::Exception	When `group_root` is less than zero or greater than or equal to `group` size.

Note: It is the user's responsibility to ensure that key does not conflict with other calls to reduce. Keys may be reused after the associated operation has finished.

References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), MADNESS_ASSERT, madness::Group::make_tree(), madness::nonlinear_vector_solver(), op(), madness::Group::size(), and world_.

◆ reduce() [2/3]

template<typename keyT , typename valueT , typename opT >

Future< typename detail::result_of< opT >::type > madness::WorldGopInterface::reduce	(	const keyT &	key,
		const valueT &	value,
		const opT &	op,
		const ProcessID	root
	)

inline

Distributed reduce.

The reduce functor must have the following signature:

class ReduceFunc {
public:
    // Typedefs
    typedef ... result_type;
    typedef ... argument_type;
 
    // Constructors
    ReduceFunc(const ReduceFunc&);
 
    // Initialization operation, which returns a default result object
    result_type operator()() const;
 
    // Reduce two result objects
    void operator()(result_type&, const result_type&) const;
 
    // Reduce a result object and an argument object
    void operator()(result_type&, const argument_type&) const;
}; // class ReduceFunc

Template Parameters

keyT	The key type
valueT	The data type to be reduced
opT	The reduction operation type

Parameters

key	The key associated with this reduction
value	The local value to be reduced
op	The reduction operation to be applied to local and remote data
root	The process that will receive the result of the reduction

Returns: A future to the reduce value on the root process, otherwise an uninitialized future that may be ignored.

Note: It is the user's responsibility to ensure that key does not conflict with other calls to reduce. Keys may be reused after the associated operation has finished.

References SafeMPI::Intracomm::binary_tree_info(), MADNESS_ASSERT, madness::World::mpi, madness::nonlinear_vector_solver(), op(), madness::World::size(), and world_.

◆ reduce() [3/3]

template<typename T , class opT >

void madness::WorldGopInterface::reduce	(	T *	buf,
		std::size_t	nelem,
		opT	op
	)

inline

Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.

Optimizations can be added for long messages and to reduce the memory footprint

References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), broadcast(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), MADNESS_ASSERT, max_reducebcast_msg_size(), madness::World::mpi, MPI_BYTE, madness::nonlinear_vector_solver(), op(), posix_memalign(), T(), SafeMPI::Intracomm::unique_tag(), and world_.

◆ reduce_internal()

template<typename tagT , typename keyT , typename valueT , typename opT >

Future< typename detail::result_of< opT >::type > madness::WorldGopInterface::reduce_internal	(	const ProcessID	parent,
		const ProcessID	child0,
		const ProcessID	child1,
		const ProcessID	root,
		const keyT &	key,
		const valueT &	value,
		const opT &	op
	)

inlineprivate

Distributed reduce.

Template Parameters

tagT	The tag type to be added to the key type
keyT	The key type
valueT	The data type to be reduced
opT	The reduction operation type

Parameters

key	The key associated with this reduction
value	The local value to be reduced
op	The reduction operation to be applied to local and remote data
root	The process that will receive the result of the reduction

Returns: A future to the reduce value on the root process, otherwise an uninitialized future that may be ignored.

References madness::WorldTaskQueue::add(), madness::Future< T >::default_initializer(), madness::TaskAttributes::hipri(), madness::nonlinear_vector_solver(), op(), madness::World::rank(), send_internal(), madness::World::taskq, and world_.

◆ reduce_result_task()

template<typename opT >

static detail::result_of< opT >::type madness::WorldGopInterface::reduce_result_task	(	const std::vector< Future< typename detail::result_of< opT >::type > > &	results,
		const opT &	op
	)

inlinestaticprivate

References madness::Future< T >::get(), MADNESS_ASSERT, madness::nonlinear_vector_solver(), and op().

◆ reduce_task()

template<typename valueT , typename opT >

static detail::result_of< opT >::type madness::WorldGopInterface::reduce_task	(	const valueT &	value,
		const opT &	op
	)

inlinestaticprivate

References op().

◆ send()

template<typename keyT , typename valueT >

void madness::WorldGopInterface::send	(	const ProcessID	dest,
		const keyT &	key,
		const valueT &	value
	)		const

inline

Send value to dest.

Template Parameters

keyT	The key type
valueT	The value type (this may be a `Future` type)

Parameters

dest	The process where the data will be sent
key	The key that is associated with the data
value	The data to be sent to `dest`

Note: It is the user's responsibility to ensure that key does not conflict with other calls to send. Keys may be reused after the associated operation has finished.

References madness::World::rank(), send_internal(), and world_.

◆ send_internal() [1/2]

template<typename keyT , typename valueT >

std::enable_if<!is_future< valueT >::value >::type madness::WorldGopInterface::send_internal	(	const ProcessID	dest,
		const keyT &	key,
		const valueT &	value
	)		const

inlineprivate

Send value to dest.

Send non-future data to dest.

Template Parameters

keyT	The key type
valueT	The value type

Parameters

dest	The node where the data will be sent
key	The key that is associated with the data
value	The data to be sent to `dest`

References madness::WorldTaskQueue::add(), madness::TaskAttributes::hipri(), madness::nonlinear_vector_solver(), madness::World::rank(), madness::World::taskq, and world_.

Referenced by lazy_sync_children(), lazy_sync_internal(), lazy_sync_parent(), madness::WorldGopInterface::DelayedSend< keyT, valueT >::notify(), reduce_internal(), and send().

◆ send_internal() [2/2]

template<typename keyT , typename valueT >

void madness::WorldGopInterface::send_internal	(	ProcessID	dest,
		const keyT &	key,
		const Future< valueT > &	value
	)		const

inlineprivate

Send value to dest.

Send data that is stored in a future to dest. The data in value is only sent to the remote process once it has been set.

Template Parameters

keyT	The key type
valueT	The value type

Parameters

dest	The node where the data will be sent
key	The key that is associated with the data
value	The data to be sent to `dest`

References madness::WorldTaskQueue::add(), madness::Future< T >::get(), madness::TaskAttributes::hipri(), madness::nonlinear_vector_solver(), madness::Future< T >::probe(), madness::World::rank(), madness::World::taskq, and world_.

◆ serial_invoke()

void madness::WorldGopInterface::serial_invoke ( std::function< void()> action )

Executes an action on single (this) thread after ensuring all other work is done.

Parameters

[in] action the action to execute (by the calling thread)

References fence().

◆ set_debug()

bool madness::WorldGopInterface::set_debug ( bool value )

inline

Set debug flag to new value and return old value.

References debug_, and status.

◆ set_forbid_fence()

bool madness::WorldGopInterface::set_forbid_fence ( bool value )

inline

Set forbid_fence flag to new value and return old value.

References forbid_fence_, and status.

Referenced by madness::innerXX(), and madness::MacroTaskQ::run_all().

◆ set_max_reducebcast_msg_size()

int madness::WorldGopInterface::set_max_reducebcast_msg_size ( int sz )

inline

Set the maximum size of messages (in bytes) sent by reduce and broadcast.

Parameters

sz	the maximum size of messages (in bytes) sent by reduce and broadcast

Returns: the previous maximum size of messages (in bytes) sent by reduce and broadcast

Precondition: sz>0

References MADNESS_ASSERT, and max_reducebcast_msg_size_.

Referenced by test14().

◆ sum() [1/2]

template<typename T >

void madness::WorldGopInterface::sum ( T & a )

inline

Global sum of a scalar while still processing AM & tasks.

References a, and sum.

◆ sum() [2/2]

template<typename T >

void madness::WorldGopInterface::sum	(	T *	buf,
		size_t	nelem
	)

inline

Inplace global sum while still processing AM & tasks.

References madness::nonlinear_vector_solver().

Referenced by madness::DistributedMatrix< T >::copy_to_replicated(), madness::DistributedMatrix< T >::copy_to_replicated_patch(), doit(), madness::SystolicFixOrbitalOrders::end_iteration_hook(), madness::SystolicPMOrbitalLocalize::end_iteration_hook(), madness::WorldDCPmapInterface< keyT >::global_size(), madness::inner(), madness::inner(), line_plot(), main(), madness::matrix_dot(), madness::matrix_dot_old(), madness::matrix_inner(), madness::matrix_inner_old(), madness::norm2(), madness::norm2s(), madness::norm2s(), madness::norm2s_T(), madness::CCSize::print(), madness::WorldDCPmapInterface< keyT >::print_data_sizes(), madness::Cloud::print_size(), madness::print_stats(), madness::Exchange< T, NDIM >::ExchangeImpl< T, NDIM >::print_timer(), madness::Cloud::print_timings(), madness::MacroTaskQ::run_all(), madness::archive::ArchiveStoreImpl< ParallelOutputArchive< VectorOutputArchive >, WorldContainer< keyT, valueT > >::store(), test1(), madness::SubspaceK< T, NDIM >::update_subspace(), madness::Subspace< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().

Friends And Related Symbol Documentation

◆ detail::DeferredCleanup

friend class detail::DeferredCleanup

friend

Member Data Documentation

◆ debug_

bool madness::WorldGopInterface::debug_

private

Debug mode.

Referenced by set_debug().

◆ deferred_

std::shared_ptr<detail::DeferredCleanup> madness::WorldGopInterface::deferred_

private

Deferred cleanup object.

Referenced by ~WorldGopInterface(), and fence_impl().

◆ forbid_fence_

bool madness::WorldGopInterface::forbid_fence_ =false

private

forbid calling fence() in case of several active worlds

Referenced by fence_impl(), and set_forbid_fence().

◆ max_reducebcast_msg_size_

int madness::WorldGopInterface::max_reducebcast_msg_size_ = std::numeric_limits<int>::max()

private

maximum size of messages (in bytes) sent by reduce and broadcast

Referenced by max_reducebcast_msg_size(), and set_max_reducebcast_msg_size().

◆ world_

World& madness::WorldGopInterface::world_

private

World object that this is a part of.

Referenced by all_reduce(), all_reduce(), barrier(), bcast(), bcast(), bcast_internal(), bcast_internal(), bcast_task(), broadcast(), broadcast_serializable(), concat0(), fence_impl(), group_bcast_task(), lazy_sync(), lazy_sync(), lazy_sync_internal(), reduce(), reduce(), reduce(), reduce_internal(), send(), send_internal(), and send_internal().

The documentation for this class was generated from the following files:

Classes

Public Member Functions

Static Public Member Functions

Private Member Functions

Static Private Member Functions

Private Attributes

Friends

Detailed Description

Constructor & Destructor Documentation

◆ WorldGopInterface()

◆ ~WorldGopInterface()

Member Function Documentation

◆ absmax()

◆ absmin()

◆ all_reduce() [1/2]

◆ all_reduce() [2/2]

◆ barrier()

◆ bcast() [1/2]

◆ bcast() [2/2]

◆ bcast_handler()

◆ bcast_internal() [1/2]

◆ bcast_internal() [2/2]

◆ bcast_task()

◆ bit_and()

◆ bit_or()

◆ bit_xor()

◆ broadcast() [1/4]

◆ broadcast() [2/4]

◆ broadcast() [3/4]

◆ broadcast() [4/4]

◆ broadcast_serializable()

◆ concat0()

◆ fence()

◆ fence_impl()

◆ group_bcast_handler()

◆ group_bcast_task()

◆ initial_max_reducebcast_msg_size()

◆ lazy_sync() [1/2]

◆ lazy_sync() [2/2]

◆ lazy_sync_children()

◆ lazy_sync_internal()

◆ lazy_sync_parent()

◆ logic_and()

◆ logic_or()

◆ max() [1/2]

◆ max() [2/2]

◆ max_reducebcast_msg_size()

◆ min() [1/2]

◆ min() [2/2]

◆ product()

◆ recv()

◆ recv_internal()

◆ reduce() [1/3]

◆ reduce() [2/3]

◆ reduce() [3/3]

◆ reduce_internal()

◆ reduce_result_task()

◆ reduce_task()

◆ send()

◆ send_internal() [1/2]

◆ send_internal() [2/2]

◆ serial_invoke()

◆ set_debug()

◆ set_forbid_fence()

◆ set_max_reducebcast_msg_size()

◆ sum() [1/2]

◆ sum() [2/2]

Friends And Related Symbol Documentation

◆ detail::DeferredCleanup

Member Data Documentation

◆ debug_

◆ deferred_

◆ forbid_fence_

◆ max_reducebcast_msg_size_

◆ world_