1#ifndef MADNESS_SYSTOLIC_H
2#define MADNESS_SYSTOLIC_H
41# include <tbb/parallel_for.h>
58 std::vector<int64_t>
map;
64 tbb::parallel_for(0, nthread, [=](
const int id) {
78 tbb::parallel_for(0, nthread,
102 tbb::parallel_for(0, nthread, [=](
const int id) {
116 A.local_colrange(ilo, ihi);
123 int nthread =
env.nthread();
271 else if (
rank == 0) {
282 std::vector<T> buf(
rowdim);
308 virtual void get_id(std::pair<void*,unsigned short>&
id)
const {
321 ,
nproc(
A.process_coldim()*
A.process_rowdim())
324 ,
nlocal((
A.local_coldim()+1)/2)
354 A.get_colrange(
p,
lo, hi);
408 const int nthread =
env.nthread();
412 done = tbb::parallel_reduce(tbb::blocked_range<int>(0,nthread),
true,
413 [=] (
const tbb::blocked_range<int>& range,
bool init) ->
bool {
414 for(
int id = range.begin();
id < range.end(); ++id)
419 [] (
const bool l,
const bool r) {
return l && r; });
456 return A.get_world();
This header should include pretty much everything needed for the parallel runtime.
Definition test_ar.cc:118
const long * dims() const
Returns the array of tensor dimensions.
Definition basetensor.h:153
long size() const
Returns the number of elements in the tensor.
Definition basetensor.h:138
Manages data associated with a row/column/block distributed array.
Definition distributed_matrix.h:388
static std::enable_if< detail::function_traits< fnT >::value||detail::memfunc_traits< fnT >::value >::type make_id(std::pair< void *, unsigned short > &id, fnT fn)
Definition thread.h:922
Base class for parallel algorithms that employ a systolic loop to generate all row pairs in parallel.
Definition systolic.h:48
const int64_t nproc
No. of processes with rows of the matrix (not size of world)
Definition systolic.h:51
const int64_t rowdim
A(coldim,rowdim)
Definition systolic.h:53
const int tag
MPI tag to be used for messages.
Definition systolic.h:56
const int64_t nlocal
No. of local pairs.
Definition systolic.h:54
void solve_sequential()
Invoked by the user to run the algorithm with one thread mostly for debugging.
Definition systolic.h:443
World & get_world() const
Returns a reference to the world.
Definition systolic.h:455
DistributedMatrix< T > & A
Definition systolic.h:50
virtual void kernel(int i, int j, T *rowi, T *rowj)=0
Threadsafe routine to apply the operation to rows i and j of the matrix.
virtual void start_iteration_hook(const TaskThreadEnv &env)
Invoked by all threads at the start of each iteration.
Definition systolic.h:392
void iteration(const int nthread)
Definition systolic.h:61
int64_t get_rowdim() const
Returns length of row.
Definition systolic.h:448
std::vector< T * > iptr
Definition systolic.h:57
const int64_t coldim
A(coldim,rowdim)
Definition systolic.h:52
virtual ~SystolicMatrixAlgorithm()
Definition systolic.h:370
virtual bool converged(const TaskThreadEnv &env) const =0
Invoked simultaneously by all threads after each sweep to test for convergence.
virtual void get_id(std::pair< void *, unsigned short > &id) const
Get the task id.
Definition systolic.h:308
const ProcessID rank
Rank of current process.
Definition systolic.h:55
void run(World &world, const TaskThreadEnv &env)
Invoked by the task queue to run the algorithm with multiple threads.
Definition systolic.h:407
ProcessID get_rank() const
Returns rank of this process in the world.
Definition systolic.h:460
void cycle()
Cycles data around the loop ... only one thread should invoke this.
Definition systolic.h:183
SystolicMatrixAlgorithm(DistributedMatrix< T > &A, int tag, int nthread=ThreadPool::size()+1)
A must be a column distributed matrix with an even column tile >= 2.
Definition systolic.h:319
std::vector< T * > jptr
Indirection for implementing cyclic buffer !! SHOULD BE VOLATILE ?????
Definition systolic.h:57
virtual void end_iteration_hook(const TaskThreadEnv &env)
Invoked by all threads at the end of each iteration before convergence test.
Definition systolic.h:400
void unshuffle()
Call this after iterating to restore correct order of rows in original matrix.
Definition systolic.h:164
std::vector< int64_t > map
Used to keep track of actual row indices.
Definition systolic.h:58
int64_t get_coldim() const
Returns length of column.
Definition systolic.h:452
void set_nthread(int nthread)
Set the number of threads.
Definition thread.h:420
All world tasks must be derived from this public interface.
Definition taskfn.h:69
volatile World * world
Definition taskfn.h:72
Used to pass information about the thread environment to a user's task.
Definition thread.h:472
A tensor is a multidimensional array.
Definition tensor.h:317
T * ptr()
Returns a pointer to the internal data.
Definition tensor.h:1825
static std::size_t size()
Returns the number of threads in the pool.
Definition thread.h:1419
SafeMPI::Request Irecv(T *buf, int count, int source, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Async receive data of up to count elements from process source.
Definition worldmpi.h:321
void Send(const T *buf, long lenbuf, int dest, int tag=SafeMPI::DEFAULT_SEND_RECV_TAG) const
Send array of lenbuf elements to process dest.
Definition worldmpi.h:347
void Recv(T *buf, long lenbuf, int src, int tag) const
Receive data of up to lenbuf elements from process src.
Definition worldmpi.h:374
A parallel world class.
Definition world.h:132
static void await(SafeMPI::Request &request, bool dowork=true)
Wait for a MPI request to complete.
Definition world.h:534
WorldMpiInterface & mpi
MPI interface.
Definition world.h:204
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition derivatives.cc:72
static double lo
Definition dirac-hatom.cc:23
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
#define MADNESS_ASSERT(condition)
Assert a condition that should be free of side-effects since in release builds this might be a no-op.
Definition madness_exception.h:134
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
static XNonlinearSolver< std::vector< Function< T, NDIM > >, T, vector_function_allocator< T, NDIM > > nonlinear_vector_solver(World &world, const long nvec)
Definition nonlinsol.h:371
Defines and implements most of Tensor.
int ProcessID
Used to clearly identify process number/rank.
Definition worldtypes.h:43