MADNESS 0.10.1
|
Provides collectives that interoperate with the AM and task interfaces. More...
#include <worldgop.h>
Classes | |
struct | AllReduceTag |
struct | BcastTag |
class | DelayedSend |
Delayed send callback object. More... | |
struct | GroupAllReduceTag |
struct | GroupBcastTag |
struct | GroupLazySyncTag |
struct | GroupReduceTag |
struct | LazySyncTag |
struct | PointToPointTag |
struct | ReduceTag |
Public Member Functions | |
WorldGopInterface (World &world) | |
~WorldGopInterface () | |
template<typename T > | |
void | absmax (T *buf, size_t nelem) |
Inplace global absmax while still processing AM & tasks. | |
template<typename T > | |
void | absmin (T *buf, size_t nelem) |
Inplace global absmin while still processing AM & tasks. | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | all_reduce (const keyT &key, const valueT &value, const opT &op) |
Distributed all reduce. | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | all_reduce (const keyT &key, const valueT &value, const opT &op, const Group &group) |
Distributed, group all reduce. | |
void | barrier () |
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks. | |
template<typename keyT , typename valueT > | |
void | bcast (const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const |
Group broadcast. | |
template<typename keyT , typename valueT > | |
void | bcast (const keyT &key, Future< valueT > &value, const ProcessID root) const |
Broadcast. | |
template<typename T > | |
void | bit_and (T *buf, size_t nelem) |
template<typename T > | |
void | bit_or (T *buf, size_t nelem) |
template<typename T > | |
void | bit_xor (T *buf, size_t nelem) |
template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>> | |
void | broadcast (T &t) |
Broadcast of a scalar from node 0 to all other nodes. | |
template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>> | |
void | broadcast (T &t, ProcessID root) |
Broadcast of a scalar from node root to all other nodes. | |
template<typename T , typename = std::enable_if_t<madness::is_trivially_copyable_v<T>>> | |
void | broadcast (T *buf, size_t nelem, ProcessID root) |
Broadcasts typed contiguous data from process root while still processing AM & tasks. | |
void | broadcast (void *buf, size_t nbyte, ProcessID root, bool dowork=true, Tag bcast_tag=-1) |
Broadcasts bytes from process root while still processing AM & tasks. | |
template<typename objT , typename = std::void_t<decltype(std::declval<archive::BufferInputArchive&>()&std::declval<objT&>())>, typename = std::void_t<decltype(std::declval<archive::BufferOutputArchive&>()&std::declval<const objT&>())>> | |
void | broadcast_serializable (objT &obj, ProcessID root) |
Broadcast a serializable object. | |
template<typename T > | |
std::vector< T > | concat0 (const std::vector< T > &v, size_t bufsz=1024 *1024) |
Concatenate an STL vector of serializable stuff onto node 0. | |
void | fence (bool debug=false) |
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks. | |
template<typename keyT , typename opT > | |
void | lazy_sync (const keyT &key, const opT &op) const |
Lazy sync. | |
template<typename keyT , typename opT > | |
void | lazy_sync (const keyT &key, const opT &op, const Group &group) const |
Group lazy sync. | |
template<typename T > | |
void | logic_and (T *buf, size_t nelem) |
template<typename T > | |
void | logic_or (T *buf, size_t nelem) |
template<typename T > | |
void | max (T &a) |
Global max of a scalar while still processing AM & tasks. | |
template<typename T > | |
void | max (T *buf, size_t nelem) |
Inplace global max while still processing AM & tasks. | |
int | max_reducebcast_msg_size () const |
Returns the maximum size of messages (in bytes) sent by reduce and broadcast. | |
template<typename T > | |
void | min (T &a) |
Global min of a scalar while still processing AM & tasks. | |
template<typename T > | |
void | min (T *buf, size_t nelem) |
Inplace global min while still processing AM & tasks. | |
template<typename T > | |
void | product (T *buf, size_t nelem) |
Inplace global product while still processing AM & tasks. | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | reduce (const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group) |
Distributed group reduce. | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | reduce (const keyT &key, const valueT &value, const opT &op, const ProcessID root) |
Distributed reduce. | |
template<typename T , class opT > | |
void | reduce (T *buf, std::size_t nelem, opT op) |
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks. | |
template<typename keyT , typename valueT > | |
void | send (const ProcessID dest, const keyT &key, const valueT &value) const |
Send value to dest . | |
void | serial_invoke (std::function< void()> action) |
Executes an action on single (this) thread after ensuring all other work is done. | |
bool | set_debug (bool value) |
Set debug flag to new value and return old value. | |
bool | set_forbid_fence (bool value) |
Set forbid_fence flag to new value and return old value. | |
int | set_max_reducebcast_msg_size (int sz) |
Set the maximum size of messages (in bytes) sent by reduce and broadcast. | |
template<typename T > | |
void | sum (T &a) |
Global sum of a scalar while still processing AM & tasks. | |
template<typename T > | |
void | sum (T *buf, size_t nelem) |
Inplace global sum while still processing AM & tasks. | |
Static Public Member Functions | |
template<typename valueT , typename keyT > | |
static Future< valueT > | recv (const ProcessID source, const keyT &key) |
Receive data from source . | |
Private Member Functions | |
template<typename tagT , typename keyT , typename valueT > | |
void | bcast_internal (const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const |
Group broadcast. | |
template<typename tagT , typename keyT , typename valueT > | |
void | bcast_internal (const keyT &key, Future< valueT > &value, const ProcessID root) const |
Broadcast. | |
template<typename keyT , typename valueT > | |
void | bcast_task (const keyT &key, const valueT &value, const ProcessID root) const |
Broadcast task. | |
void | fence_impl (std::function< void()> epilogue=[]{}, bool pause_during_epilogue=false, bool debug=false) |
Implementation of fence. | |
template<typename keyT , typename valueT > | |
void | group_bcast_task (const keyT &key, const valueT &value, const ProcessID group_root, const Group &group) const |
int | initial_max_reducebcast_msg_size () |
template<typename keyT , typename opT > | |
void | lazy_sync_children (const ProcessID child0, const ProcessID child1, const keyT &key, opT &op, const ProcessID) const |
Lazy sync parent task. | |
template<typename tagT , typename keyT , typename opT > | |
void | lazy_sync_internal (const ProcessID parent, const ProcessID child0, const ProcessID child1, const keyT &key, const opT &op) const |
Start a distributed lazy sync operation. | |
template<typename keyT > | |
void | lazy_sync_parent (const ProcessID parent, const keyT &key, const ProcessID, const ProcessID) const |
Lazy sync parent task. | |
template<typename tagT , typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | reduce_internal (const ProcessID parent, const ProcessID child0, const ProcessID child1, const ProcessID root, const keyT &key, const valueT &value, const opT &op) |
Distributed reduce. | |
template<typename keyT , typename valueT > | |
std::enable_if<!is_future< valueT >::value >::type | send_internal (const ProcessID dest, const keyT &key, const valueT &value) const |
Send value to dest . | |
template<typename keyT , typename valueT > | |
void | send_internal (ProcessID dest, const keyT &key, const Future< valueT > &value) const |
Send value to dest . | |
Static Private Member Functions | |
template<typename keyT , typename valueT , typename taskfnT > | |
static void | bcast_handler (const AmArg &arg) |
template<typename keyT , typename valueT , typename taskfnT > | |
static void | group_bcast_handler (const AmArg &arg) |
template<typename valueT , typename keyT > | |
static Future< valueT > | recv_internal (const keyT &key) |
Receive data from remote node. | |
template<typename opT > | |
static detail::result_of< opT >::type | reduce_result_task (const std::vector< Future< typename detail::result_of< opT >::type > > &results, const opT &op) |
template<typename valueT , typename opT > | |
static detail::result_of< opT >::type | reduce_task (const valueT &value, const opT &op) |
Private Attributes | |
bool | debug_ |
Debug mode. | |
std::shared_ptr< detail::DeferredCleanup > | deferred_ |
Deferred cleanup object. | |
bool | forbid_fence_ =false |
forbid calling fence() in case of several active worlds | |
int | max_reducebcast_msg_size_ = std::numeric_limits<int>::max() |
maximum size of messages (in bytes) sent by reduce and broadcast | |
World & | world_ |
World object that this is a part of. | |
Friends | |
class | detail::DeferredCleanup |
Provides collectives that interoperate with the AM and task interfaces.
If native AM interoperates with MPI we probably should map these to MPI.
|
inline |
|
inline |
References deferred_.
Inplace global absmax while still processing AM & tasks.
Inplace global absmin while still processing AM & tasks.
|
inline |
Distributed all reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
key
does not conflict with other calls to all_reduce
. Keys may be reused after the associated operation has finished. References SafeMPI::Intracomm::binary_tree_info(), madness::World::mpi, op(), madness::World::rank(), madness::World::size(), and world_.
|
inline |
Distributed, group all reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
group | The group that will preform the reduction |
madness::Exception | When group is empty |
madness::Exception | When group is not registered |
madness::Exception | When the world id of group is not equal to that of the world used to construct this object |
madness::Exception | When this process is not in the group |
key
does not conflict with other calls to reduce
. Keys may be reused after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), MADNESS_ASSERT, madness::Group::make_tree(), op(), madness::Group::rank(), madness::Group::size(), and world_.
|
inline |
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
References madness::error(), madness::World::rank(), madness::World::size(), sum, and world_.
Referenced by madness::plotvtk_data(), and madness::plotvtk_data().
|
inline |
Group broadcast.
Broadcast data from the group_root
process to all processes in group
. The input/output data is held by value
.
[in] | key | The key associated with this broadcast |
[in,out] | value | On the group_root process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast |
group_root | The process in group that owns the data to be broadcast | |
group | The process group where value will be broadcast |
madness::Exception | When group is empty |
madness::Exception | When group is not registered |
madness::Exception | When the world id of group is not equal to that of the world used to construct this object |
madness::Exception | When this process is not in the group |
madness::Exception | When group_root is less than 0 or greater than or equal to group size |
madness::Exception | When data has been set except on the root process |
key
does not conflict with other calls to bcast
. Keys may be reused after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), MADNESS_ASSERT, madness::Future< T >::probe(), madness::Group::rank(), madness::Group::size(), and world_.
|
inline |
Broadcast.
Broadcast data from the root
process to all processes. The input/ output data is held by value
.
[in] | key | The key associated with this broadcast |
[in,out] | value | On the root process, this is used as the input data that will be broadcast to all other processes. On other processes it is used as the output to the broadcast. |
root | The process that owns the data to be broadcast |
madness::Exception | When root is less than 0 or greater than or equal to the world size. |
madness::Exception | When value has been set, except on the root process. |
key
does not conflict with other calls to bcast
. Keys may be reused after the associated operation has finished. References MADNESS_ASSERT, madness::Future< T >::probe(), madness::World::rank(), madness::World::size(), and world_.
|
inlinestaticprivate |
References madness::arg(), and madness::TaskAttributes::hipri().
|
inlineprivate |
Group broadcast.
Broadcast data from the group_root
process to all processes in group
. The input/output data is held by value
.
tagT | The tag type that is attached to keyT |
keyT | The base key type |
valueT | The value type that will be broadcast |
[in] | key | The key associated with this broadcast |
[in,out] | value | On the group_root process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast |
group_root | The process in group that owns the data to be broadcast | |
group | The process group where value will be broadcast |
madness::Exception | When value has been set, except on the group_root process. |
References madness::WorldTaskQueue::add(), madness::Future< T >::get(), madness::detail::DistCache< keyT >::get_cache_value(), group_bcast_task(), madness::TaskAttributes::hipri(), madness::Group::local_update(), MADNESS_ASSERT, madness::Future< T >::probe(), madness::Group::rank(), madness::World::taskq, and world_.
|
inlineprivate |
Broadcast.
Broadcast data from the root
process to all processes in world
. The input/output data is held by value
.
tagT | The tag type that is attached to keyT |
keyT | The base key type |
valueT | The value type that will be broadcast |
[in] | key | The key associated with this broadcast |
[in,out] | value | On the root process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast |
root | The process that owns the data to be broadcast |
madness::Exception | When value has been set, except on the root process. |
References madness::WorldTaskQueue::add(), bcast_task(), madness::Future< T >::get(), madness::detail::DistCache< keyT >::get_cache_value(), madness::TaskAttributes::hipri(), MADNESS_ASSERT, madness::Future< T >::probe(), madness::World::rank(), madness::World::size(), madness::World::taskq, and world_.
|
inlineprivate |
Broadcast task.
This task will set the local cache with the broadcast data and send it to child processes in the binary tree.
References madness::World::am, madness::RMI::ATTR_UNORDERED, SafeMPI::Intracomm::binary_tree_info(), madness::copy_am_arg(), madness::World::mpi, madness::new_am_arg(), madness::WorldAmInterface::send(), madness::detail::DistCache< keyT >::set_cache_value(), and world_.
Referenced by bcast_internal().
|
inline |
Broadcast of a scalar from node 0 to all other nodes.
References broadcast().
|
inline |
Broadcast of a scalar from node root to all other nodes.
References broadcast().
|
inline |
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Optimizations can be added for long messages
References broadcast(), and T().
void madness::WorldGopInterface::broadcast | ( | void * | buf, |
size_t | nbyte, | ||
ProcessID | root, | ||
bool | dowork = true , |
||
Tag | bcast_tag = -1 |
||
) |
Broadcasts bytes from process root while still processing AM & tasks.
Optimizations can be added for long messages
References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), max_reducebcast_msg_size(), madness::World::mpi, MPI_BYTE, SafeMPI::Intracomm::unique_tag(), and world_.
Referenced by broadcast(), broadcast(), broadcast(), broadcast_serializable(), madness::Localizer::compute_core_valence_separation_transformation_matrix(), doit(), madness::archive::BaseParallelArchive< Archive >::exists(), fence_impl(), get_fock_transformation(), madness::SCF::get_fock_transformation(), madness::load_coeffs(), madness::load_quadrature(), madness::Localizer::localize_boys(), madness::Localizer::localize_new(), main(), madness::archive::BaseParallelArchive< Archive >::open(), BoysLocalization::operator()(), propagate(), propagate(), reduce(), madness::SCF::restart_aos(), madness::LoadBalanceDeux< NDIM >::sum(), test14(), test2(), madness::SubspaceK< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().
|
inline |
Broadcast a serializable object.
References broadcast(), MADNESS_ASSERT, madness::World::rank(), madness::archive::BufferOutputArchive::size(), madness::World::size(), and world_.
Referenced by madness::AC< NDIM >::AC(), Calculation::Calculation(), DF::DF(), madness::Molecule::Molecule(), madness::SCF::SCF(), madness::archive::BaseParallelArchive< Archive >::broadcast(), Calculation::calc_optimal_coeffs(), doit(), madness::QCCalculationParametersBase::file_exists(), madness::ScalarResult< T >::get(), madness::MacroTaskQ::get_scheduled_task_number(), madness::Solver< T, NDIM >::init(), madness::LoadBalanceDeux< NDIM >::load_balance(), main(), madness::QCCalculationParametersBase::read_commandline_options(), madness::QCCalculationParametersBase::read_input(), madness::WorldContainerImpl< keyT, valueT, hashfunT >::replicate(), madness::SCF::restart_aos(), madness::sygvp(), madness::SubspaceK< T, NDIM >::update_subspace(), madness::Subspace< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().
|
inline |
Concatenate an STL vector of serializable stuff onto node 0.
[in] | v | input vector |
[in] | bufsz | the max number of bytes in the result; must be less than std::numeric_limits<int>::max() |
References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), MADNESS_ASSERT, max_reducebcast_msg_size(), madness::World::mpi, MPI_BYTE, MPI_INT, posix_memalign(), madness::WorldMpiInterface::Recv(), madness::WorldMpiInterface::Send(), madness::archive::BufferOutputArchive::size(), SafeMPI::Intracomm::unique_tag(), SafeMPI::Intracomm::unique_tag_period(), v, and world_.
Referenced by madness::LoadBalanceDeux< NDIM >::load_balance().
void madness::WorldGopInterface::fence | ( | bool | debug = false | ) |
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
References debug, and fence_impl().
Referenced by DF::DF(), madness::FunctionImpl< T, NDIM >::FunctionImpl(), madness::LoadBalanceDeux< NDIM >::LoadBalanceDeux(), madness::Znemo::timer::timer(), madness::timer::timer(), madness::abssq(), madness::add(), madness::add(), madness::add(), madness::add(), madness::MP2::add_local_coupling(), madness::apply(), madness::apply(), madness::FunctionImpl< T, NDIM >::apply(), madness::apply(), madness::apply(), madness::apply(), madness::apply(), madness::apply(), madness::apply(), madness::apply(), madness::FunctionImpl< T, NDIM >::apply_1d_realspace_push(), apply_BSH(), apply_BSH_new(), madness::MP2::apply_exchange_vector(), madness::TDHF::apply_excitation_operators(), madness::TDHF::apply_G(), madness::GTHPseudopotential< Q >::apply_potential(), apply_potential(), madness::SCF::apply_potential(), madness::FunctionImpl< T, NDIM >::apply_source_driven(), madness::projector_irrep::apply_symmetry_operators(), madness::FunctionImpl< T, NDIM >::binaryXX(), madness::Solver< T, NDIM >::build_fock_matrix(), Calculation::calc_optimal_coeffs(), madness::change_tree_state(), madness::Function< T, NDIM >::clear(), madness::Cloud::clear_cache(), madness::compress(), madness::PNO::compute_fluctuation_matrix(), madness::F12Potentials::compute_fQc_integrals_ij(), madness::Exchange< T, NDIM >::ExchangeImpl< T, NDIM >::compute_K_tile(), madness::NemoBase::compute_kinetic_energy(), madness::CC2::compute_local_coupling(), madness::Nemo::compute_nemo_potentials(), madness::Exchange< T, NDIM >::ExchangeImpl< T, NDIM >::MacroTaskExchangeSimple::compute_offdiagonal_batch_in_symmetric_matrix(), madness::F12Potentials::compute_regularized_fluctuation_matrix(), madness::F12Potentials::compute_regularized_fluctuation_matrix(), madness::SCF::compute_residual(), Calculation::compute_residuals(), madness::Solver< T, NDIM >::compute_rho(), madness::Solver< T, NDIM >::compute_rho_slow(), madness::conj(), madness::convert(), madness::response_space::copy(), madness::copy(), madness::copy(), madness::copy(), madness::FunctionImpl< T, NDIM >::copy_coeffs(), madness::PotentialManager::core_projection(), madness::MacroTaskQ::create_worlds(), madness::cross(), madness::SCF::derivatives(), dipole_generator(), madness::div(), madness::FunctionImpl< T, NDIM >::do_dirac_convolution(), madness::Znemo::do_step_restriction(), madness::SCF::do_step_restriction(), doit(), dostuff(), madness::Znemo::timer::end(), molresponse::end_timer(), molresponse::end_timer(), energy(), energy(), madness::BinSorter< T, inserterT >::finish(), madness::FunctionImpl< T, NDIM >::flo_unary_op_node_inplace(), madness::FunctionImpl< T, NDIM >::flo_unary_op_node_inplace(), madness::FunctionImpl< T, NDIM >::gaxpy(), madness::gaxpy(), madness::gaxpy(), gaxpy1(), madness::FunctionImpl< T, NDIM >::gaxpy_ext(), madness::FunctionImpl< T, NDIM >::gaxpy_inplace(), madness::FunctionImpl< T, NDIM >::gaxpy_inplace_reconstructed(), madness::MacroTaskQ::get_scheduled_task_number(), madness::StrongOrthogonalityProjector< T, NDIM >::get_vectors_for_outer_product(), madness::WorldDCPmapInterface< keyT >::global_size(), madness::grad(), madness::grad_ble_one(), madness::grad_ble_two(), madness::grad_bpsline_two(), madness::grad_bspline_one(), madness::grad_bspline_three(), CoreFittingTarget::gradient(), ground_exchange(), ground_exchange_multiworld(), madness::BasisFunctions::guess_with_exop(), madness::hartree_product(), madness::FunctionImpl< T, NDIM >::hartree_product(), madness::Solver< T, NDIM >::initial_guess(), initial_loadbal(), initialize_calc_params(), madness::inner(), madness::inner(), madness::innerXX(), madness::timer::interrupt(), madness::CC2::iterate_singles(), madness::SCF::kinetic_energy_matrix(), madness::Kinetic< T, NDIM >::kinetic_energy_matrix(), madness::Kinetic< T, NDIM >::kinetic_energy_matrix(), line_plot(), madness::FunctionImpl< T, NDIM >::load(), madness::archive::ArchiveLoadImpl< ParallelInputArchive< localarchiveT >, WorldContainer< keyT, valueT > >::load(), madness::LoadBalanceDeux< NDIM >::load_balance(), madness::SCF::load_mos(), Calculation::load_mos(), madness::Solver< T, NDIM >::load_orbitals(), madness::load_quadrature(), madness::SCF::loadbal(), loadbal(), loadbal(), loadbal(), main(), madness::main(), main(), main(), DF::make_component_lineplots(), DF::make_component_logplots(), madness::Nemo::make_density(), madness::SCF::make_density(), madness::SCF::make_density(), MiniDFT::make_density(), DF::make_density_lineplots(), madness::SCF::make_fock_matrix(), madness::Zcis::make_guess(), madness::TDHF::make_perturbed_fock_matrix(), madness::CompositeFunctorInterface< T, NDIM, MDIM >::make_redundant(), Calculation::make_reference(), Calculation::make_Upsi(), madness::TDHF::make_virtuals(), madness::FunctionImpl< T, NDIM >::make_Vphi(), madness::FunctionImpl< T, NDIM >::make_Vphi_only(), MiniDFT::makeao(), makeao_slow(), madness::matrix_dot(), madness::matrix_inner(), madness::matrix_inner_old(), madness::matrix_mul_sparse(), madness::FunctionImpl< T, NDIM >::merge_trees(), madness::mul(), madness::mul(), madness::mul(), madness::mul(), madness::mul_sparse(), madness::mul_sparse(), madness::EigSolverOp< T, NDIM >::multi_op_o(), madness::EigSolverOp< T, NDIM >::multi_op_r(), madness::FunctionImpl< T, NDIM >::multi_to_multi_op_values(), madness::Function< T, NDIM >::multiop_values(), madness::FunctionImpl< T, NDIM >::multiop_values(), madness::multiply(), madness::MP2::multiply_with_0th_order_Hamiltonian(), madness::FunctionImpl< T, NDIM >::mulXX(), madness::FunctionImpl< T, NDIM >::mulXXvec(), madness::nonstandard(), madness::norm2(), madness::X_space::norm2s(), madness::norm2s(), madness::norm2s(), madness::norm2s_T(), madness::norm_tree(), madness::norm_tree(), madness::normalize(), madness::pg_operator::operator()(), MyDerivativeOperator< T, NDIM >::operator()(), madness::DerivativeOperator< T, NDIM >::operator()(), madness::partial_mul(), madness::plot_along(), madness::plot_along(), madness::plot_line(), madness::plot_line(), madness::plot_line(), madness::plot_line(), madness::plot_line(), madness::plot_line(), Calculation::plot_p(), madness::plotdx(), madness::plotvtk_begin(), madness::plotvtk_data(), madness::plotvtk_data(), madness::plotvtk_end(), madness::MolecularOrbitals< T, NDIM >::post_process_mos(), preloadbal(), madness::XCOperator< T, NDIM >::prep_xc_args(), madness::XCOperator< T, NDIM >::prep_xc_args_response(), madness::WorldProfile::print(), madness::WorldDCPmapInterface< keyT >::print_data_sizes(), madness::Solver< T, NDIM >::print_fock_matrix_eigs(), madness::Solver< T, NDIM >::print_potential_matrix_eigs(), madness::print_stats(), madness::MacroTaskQ::print_taskq(), madness::SCF::project(), madness::Solver< T, NDIM >::project_ao_basis(), madness::SCF::project_ao_basis_only(), madness::projector_irrep::project_on_irreps(), madness::Function< T, NDIM >::project_out(), madness::FunctionImpl< T, NDIM >::project_out(), propagate(), propagate(), GroundStateCalculation::read(), realmain(), madness::reconstruct(), madness::FunctionImpl< T, NDIM >::recursive_apply(), madness::FunctionImpl< T, NDIM >::recursive_apply(), madness::WorldDCPmapInterface< keyT >::redistribute(), madness::FunctionImpl< T, NDIM >::refine(), madness::refine(), madness::WorldContainerImpl< keyT, valueT, hashfunT >::replicate(), madness::Cloud::replicate(), madness::CompositeFunctorInterface< T, NDIM, MDIM >::replicate_low_dim_functions(), madness::Solver< T, NDIM >::reproject(), response_exchange_multiworld(), madness::timer::resume(), madness::rot(), madness::Znemo::rotate_subspace(), madness::SCF::rotate_subspace(), madness::Nemo::rotate_subspace(), madness::SCF::rotate_subspace(), madness::MacroTaskQ::run_all(), madness::scale(), madness::scale(), madness::scale(), madness::scale(), madness::FunctionImpl< T, NDIM >::scale_oop(), serial_invoke(), madness::Function< T, NDIM >::set_impl(), madness::set_thresh(), simple_example(), madness::standard(), madness::MyTimer::start(), molresponse::start_timer(), DF::start_timer(), START_TIMER(), START_TIMER(), START_TIMER(), madness::Solver< T, NDIM >::START_TIMER(), madness::START_TIMER(), madness::MP2::START_TIMER(), madness::startup(), madness::FunctionImpl< T, NDIM >::store(), madness::archive::ArchiveStoreImpl< ParallelOutputArchive< localarchiveT >, WorldContainer< keyT, valueT > >::store(), madness::archive::ArchiveStoreImpl< ParallelOutputArchive< VectorOutputArchive >, WorldContainer< keyT, valueT > >::store(), madness::Cloud::store(), madness::Cloud::store_other(), madness::Cloud::store_other(), madness::sub(), madness::sub(), madness::LoadBalanceDeux< NDIM >::sum(), madness::sum(), madness::Znemo::timer::tag(), madness::timer::tag(), test(), test0(), test1(), test10(), test11(), test12(), test13(), test14(), test15(), test2(), test5(), test6(), test6a(), test7(), test_apply_push_1d(), test_basic(), test_bsh(), TEST_CASE(), test_conv(), test_conversion(), test_convolution(), test_diff(), test_florian(), test_gconv(), test_io(), test_local(), test_math(), test_multi(), test_multi_world(), test_opdir(), test_periodic(), test_periodic1(), test_periodic2(), test_periodic_bsh(), test_plot(), test_qm(), test_transform(), test_unaryop(), madness::transform(), madness::transform(), madness::transform(), madness::transform(), madness::Zcis::transform(), transform(), madness::transform_reconstructed(), madness::truncate(), madness::truncate(), madness::SCF::twoint(), madness::FunctionImpl< T, NDIM >::unary_op_coeff_inplace(), madness::FunctionImpl< T, NDIM >::unary_op_node_inplace(), madness::FunctionImpl< T, NDIM >::unary_op_value_inplace(), madness::FunctionImpl< T, NDIM >::unaryXX(), madness::FunctionImpl< T, NDIM >::unaryXXvalues(), madness::SubspaceK< T, NDIM >::update_subspace(), madness::Subspace< T, NDIM >::update_subspace(), madness::SCF::update_subspace(), madness::MolecularEnergy::value(), CoreFittingTarget::value(), madness::Function< T, NDIM >::vmulXX(), madness::FunctionImpl< T, NDIM >::vtransform(), work_even(), work_odd(), madness::zero_functions(), madness::X_space::zero_functions(), and madness::zero_functions_compressed().
|
private |
Implementation of fence.
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
[in] | epilogue | the action to execute (by the calling thread) immediately after the fence |
[in] | pause_during_epilogue | whether to suspend work while executing epilogue |
[in] | debug | set to true to print progress statistics using madness::print(); the default is false. |
pause_during_epilogue=false
is supportedRuns Dykstra-like termination algorithm on binary tree by locally ensuring ntask=0 and all am sent and processed, and then participating in a global sum of nsent and nrecv. Then globally checks that nsent=nrecv and that both are constant over two traversals. We are then we are sure that all tasks and AM are processed and there no AM in flight.
References madness::World::am, madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), broadcast(), debug, deferred_, madness::WorldTaskQueue::fence(), forbid_fence_, madness::WorldAmInterface::free_managed_buffers(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), MADNESS_ASSERT, MADNESS_CHECK, madness::World::mpi, MPI_BYTE, madness::WorldAmInterface::nrecv, madness::WorldAmInterface::nsent, madness::print(), PROFILE_MEMBER_FUNC, madness::World::rank(), madness::WorldTaskQueue::size(), sum, madness::World::taskq, SafeMPI::Intracomm::unique_tag(), and world_.
Referenced by fence().
|
inlinestaticprivate |
References madness::arg(), madness::Group::get_group(), and madness::TaskAttributes::hipri().
|
inlineprivate |
References madness::World::am, madness::RMI::ATTR_UNORDERED, madness::copy_am_arg(), madness::Group::id(), madness::Group::make_tree(), madness::new_am_arg(), madness::Group::remote_update(), madness::WorldAmInterface::send(), madness::detail::DistCache< keyT >::set_cache_value(), and world_.
Referenced by bcast_internal().
|
inlineprivate |
|
inline |
Lazy sync.
Lazy sync functions are asynchronous barriers with a nullary functor that is called after all processes have called it with the same key. You can think of lazy_sync as an asynchronous barrier. The lazy_sync functor must have the following signature:
keyT | The key type |
opT | The operation type |
key | The sync key |
op | The sync operation to be executed on this process |
key
does not conflict with other calls to lazy_sync
. Keys may be reused after the associated operation has finished. References madness::WorldTaskQueue::add(), SafeMPI::Intracomm::binary_tree_info(), madness::TaskAttributes::hipri(), madness::World::mpi, op(), madness::World::size(), madness::World::taskq, and world_.
|
inline |
Group lazy sync.
Lazy sync functions are asynchronous barriers with a nullary functor that is called after all processes in the group have called it with the same key. You can think of lazy_sync as an asynchronous barrier. The op
functor must have the following signature:
keyT | The key type |
opT | The operation type |
key | The sync key |
op | The sync operation to be executed on this process |
key
does not conflict with other calls to lazy_sync
. Keys may be reused after the associated operation has finished. References madness::WorldTaskQueue::add(), madness::Group::empty(), madness::Group::get_world(), madness::TaskAttributes::hipri(), madness::World::id(), MADNESS_ASSERT, madness::Group::make_tree(), op(), madness::Group::size(), madness::World::taskq, and world_.
|
inlineprivate |
Lazy sync parent task.
Send signal to the child processes in the binary tree for a lazy sync operation. After the signal has been sent to the children, the sync operation, op
, will be run.
keyT | The key type |
opT | The sync operation type |
child0 | The first child process of this process in the binary tree |
child1 | The second child process of this process in the binary tree |
key | The key associated with the sync operation |
op | The sync operation that will be run |
References op(), and send_internal().
|
inlineprivate |
Start a distributed lazy sync operation.
key | The sync key |
op | The sync operation to be executed on this process |
References madness::WorldTaskQueue::add(), madness::TaskAttributes::hipri(), op(), madness::Future< T >::probe(), madness::World::rank(), send_internal(), madness::World::taskq, and world_.
|
inlineprivate |
Lazy sync parent task.
Send signal to the parent process in the binary tree for a lazy sync operation.
keyT | The key type |
parent | The parent process of this process in the binary tree |
key | The lazy sync key |
References send_internal().
Inplace global max while still processing AM & tasks.
Referenced by madness::Cloud::print_size(), madness::print_stats(), test15(), and madness::time_transform().
|
inline |
Returns the maximum size of messages (in bytes) sent by reduce and broadcast.
References max_reducebcast_msg_size_.
Referenced by broadcast(), concat0(), and reduce().
Inplace global min while still processing AM & tasks.
Referenced by min(), madness::Cloud::print_size(), madness::print_stats(), and madness::time_transform().
Inplace global product while still processing AM & tasks.
|
inlinestatic |
Receive data from source
.
valueT | The data type stored in cache |
keyT | The key type |
source | The process that is sending the data to this process |
key | The key associated with the received data |
key
does not conflict with other calls to recv
. Keys may be reused after the associated operation has finished. References source().
|
inlinestaticprivate |
Receive data from remote node.
valueT | The data type stored in cache |
key | The distributed ID |
|
inline |
Distributed group reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
group_root | The group process that will receive the result of the reduction |
group | The group that will preform the reduction |
madness::Exception | When group is empty |
madness::Exception | When group is not registered |
madness::Exception | When the world id of group is not equal to that of the world used to construct this object |
madness::Exception | When this process is not in the group |
madness::Exception | When group_root is less than zero or greater than or equal to group size. |
key
does not conflict with other calls to reduce
. Keys may be reused after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), MADNESS_ASSERT, madness::Group::make_tree(), op(), madness::Group::size(), and world_.
|
inline |
Distributed reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
root | The process that will receive the result of the reduction |
key
does not conflict with other calls to reduce
. Keys may be reused after the associated operation has finished. References SafeMPI::Intracomm::binary_tree_info(), MADNESS_ASSERT, madness::World::mpi, op(), madness::World::size(), and world_.
|
inline |
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Optimizations can be added for long messages and to reduce the memory footprint
References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), broadcast(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), MADNESS_ASSERT, max_reducebcast_msg_size(), madness::World::mpi, MPI_BYTE, op(), posix_memalign(), T(), SafeMPI::Intracomm::unique_tag(), and world_.
|
inlineprivate |
Distributed reduce.
tagT | The tag type to be added to the key type |
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
root | The process that will receive the result of the reduction |
References madness::WorldTaskQueue::add(), madness::Future< T >::default_initializer(), madness::TaskAttributes::hipri(), op(), madness::World::rank(), send_internal(), madness::World::taskq, and world_.
|
inlinestaticprivate |
References madness::Future< T >::get(), MADNESS_ASSERT, and op().
|
inlinestaticprivate |
References op().
|
inline |
Send value to dest
.
keyT | The key type |
valueT | The value type (this may be a Future type) |
dest | The process where the data will be sent |
key | The key that is associated with the data |
value | The data to be sent to dest |
key
does not conflict with other calls to send
. Keys may be reused after the associated operation has finished. References madness::World::rank(), send_internal(), and world_.
|
inlineprivate |
Send value
to dest
.
Send non-future data to dest
.
keyT | The key type |
valueT | The value type |
dest | The node where the data will be sent |
key | The key that is associated with the data |
value | The data to be sent to dest |
References madness::WorldTaskQueue::add(), madness::TaskAttributes::hipri(), madness::World::rank(), madness::World::taskq, and world_.
Referenced by lazy_sync_children(), lazy_sync_internal(), lazy_sync_parent(), madness::WorldGopInterface::DelayedSend< keyT, valueT >::notify(), reduce_internal(), and send().
|
inlineprivate |
Send value
to dest
.
Send data that is stored in a future to dest
. The data in value
is only sent to the remote process once it has been set.
keyT | The key type |
valueT | The value type |
dest | The node where the data will be sent |
key | The key that is associated with the data |
value | The data to be sent to dest |
References madness::WorldTaskQueue::add(), madness::Future< T >::get(), madness::TaskAttributes::hipri(), madness::Future< T >::probe(), madness::World::rank(), madness::World::taskq, and world_.
void madness::WorldGopInterface::serial_invoke | ( | std::function< void()> | action | ) |
Executes an action on single (this) thread after ensuring all other work is done.
[in] | action | the action to execute (by the calling thread) |
References fence().
|
inline |
|
inline |
Set forbid_fence flag to new value and return old value.
References forbid_fence_, and status.
Referenced by madness::innerXX(), and madness::MacroTaskQ::run_all().
|
inline |
Set the maximum size of messages (in bytes) sent by reduce and broadcast.
sz | the maximum size of messages (in bytes) sent by reduce and broadcast |
sz>0
References MADNESS_ASSERT, and max_reducebcast_msg_size_.
Referenced by test14().
Inplace global sum while still processing AM & tasks.
Referenced by madness::DistributedMatrix< T >::copy_to_replicated(), madness::DistributedMatrix< T >::copy_to_replicated_patch(), doit(), madness::SystolicFixOrbitalOrders::end_iteration_hook(), madness::SystolicPMOrbitalLocalize::end_iteration_hook(), madness::WorldDCPmapInterface< keyT >::global_size(), madness::inner(), madness::inner(), line_plot(), main(), madness::matrix_dot(), madness::matrix_inner(), madness::matrix_inner_old(), madness::norm2(), madness::norm2s(), madness::norm2s(), madness::norm2s_T(), madness::WorldDCPmapInterface< keyT >::print_data_sizes(), madness::Cloud::print_size(), madness::print_stats(), madness::Exchange< T, NDIM >::ExchangeImpl< T, NDIM >::print_timer(), madness::Cloud::print_timings(), madness::MacroTaskQ::run_all(), madness::archive::ArchiveStoreImpl< ParallelOutputArchive< VectorOutputArchive >, WorldContainer< keyT, valueT > >::store(), test1(), madness::SubspaceK< T, NDIM >::update_subspace(), madness::Subspace< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().
|
friend |
|
private |
Debug mode.
Referenced by set_debug().
|
private |
Deferred cleanup object.
Referenced by ~WorldGopInterface(), and fence_impl().
|
private |
forbid calling fence() in case of several active worlds
Referenced by fence_impl(), and set_forbid_fence().
|
private |
maximum size of messages (in bytes) sent by reduce and broadcast
Referenced by max_reducebcast_msg_size(), and set_max_reducebcast_msg_size().
|
private |
World object that this is a part of.
Referenced by all_reduce(), all_reduce(), barrier(), bcast(), bcast(), bcast_internal(), bcast_internal(), bcast_task(), broadcast(), broadcast_serializable(), concat0(), fence_impl(), group_bcast_task(), lazy_sync(), lazy_sync(), lazy_sync_internal(), reduce(), reduce(), reduce(), reduce_internal(), send(), send_internal(), and send_internal().