33 #ifndef MADNESS_MISC_INTERPOLATION_1D_H__INCLUDED
34 #define MADNESS_MISC_INTERPOLATION_1D_H__INCLUDED
40 #include "../world/world_task_queue.h"
71 double x0_2 = x[0] * x[0], x1_2 = x[1] * x[1], x2_2 = x[2] * x[2],
73 double x0_3 = x[0] * x[0] * x[0], x1_3 = x[1] * x[1] * x[1],
74 x2_3 = x[2] * x[2] * x[2], x3_3 = x[3] * x[3] * x[3];
76 a[0] = -(-x0_3 * x2_2 * x[3] *
f[1] + x0_3 * x[2] * x3_2 *
f[1] -
77 x0_3 *
f[3] * x[2] * x1_2 + x0_3 * x[3] *
f[2] * x1_2 +
78 x0_3 *
f[3] * x2_2 * x[1] - x0_3 * x3_2 *
f[2] * x[1] +
79 x0_2 * x1_3 *
f[3] * x[2] - x0_2 * x1_3 *
f[2] * x[3] +
80 x0_2 * x3_3 *
f[2] * x[1] + x0_2 *
f[1] * x2_3 * x[3] -
81 x0_2 *
f[1] * x3_3 * x[2] - x0_2 *
f[3] * x2_3 * x[1] +
82 x[0] * x3_2 *
f[2] * x1_3 - x[0] *
f[3] * x2_2 * x1_3 +
83 x[0] * x1_2 *
f[3] * x2_3 - x[0] * x1_2 *
f[2] * x3_3 -
84 x[0] *
f[1] * x3_2 * x2_3 + x[0] *
f[1] * x2_2 * x3_3 -
85 f[0] * x2_3 * x1_2 * x[3] +
f[0] * x2_2 * x1_3 * x[3] +
86 f[0] * x3_2 * x2_3 * x[1] -
f[0] * x3_3 * x2_2 * x[1] +
87 f[0] * x3_3 * x1_2 * x[2] -
f[0] * x3_2 * x1_3 * x[2]) /
88 (-x2_2 * x[0] * x3_3 + x2_2 * x[0] * x1_3 - x0_2 * x[3] * x2_3 +
89 x0_2 * x3_3 * x[2] - x0_2 * x[1] * x3_3 + x0_2 * x[1] * x2_3 +
90 x0_2 * x1_3 * x[3] - x0_2 * x1_3 * x[2] + x3_2 * x[0] * x2_3 -
91 x3_2 * x[0] * x1_3 + x[3] * x2_3 * x1_2 - x3_2 * x2_3 * x[1] +
92 x3_3 * x2_2 * x[1] - x3_3 * x[2] * x1_2 + x[0] * x3_3 * x1_2 -
93 x[0] * x2_3 * x1_2 - x0_3 * x3_2 * x[2] - x0_3 * x[3] * x1_2 +
94 x0_3 * x3_2 * x[1] + x[2] * x3_2 * x1_3 - x2_2 * x[3] * x1_3 +
95 x0_3 * x2_2 * x[3] - x0_3 * x2_2 * x[1] + x0_3 * x[2] * x1_2);
96 a[1] = (-x2_3 * x1_2 *
f[0] + x3_2 * x2_3 *
f[0] + x2_3 * x0_2 *
f[1] +
97 x1_2 *
f[3] * x2_3 - x2_3 * x0_2 *
f[3] -
f[1] * x3_2 * x2_3 -
98 f[3] * x2_2 * x1_3 - x3_3 * x2_2 *
f[0] +
f[1] * x2_2 * x3_3 +
99 x2_2 * x1_3 *
f[0] -
f[1] * x2_2 * x0_3 +
f[3] * x2_2 * x0_3 -
100 x1_3 * x3_2 *
f[0] - x0_2 * x1_3 *
f[2] -
f[3] * x0_3 * x1_2 +
101 f[1] * x3_2 * x0_3 + x1_2 *
f[2] * x0_3 + x3_3 *
f[0] * x1_2 -
102 x3_2 *
f[2] * x0_3 -
f[1] * x3_3 * x0_2 + x0_2 * x3_3 *
f[2] -
103 x1_2 *
f[2] * x3_3 + x3_2 *
f[2] * x1_3 + x0_2 * x1_3 *
f[3]) /
105 (-x2_2 * x0_2 * x[3] - x2_2 * x[1] * x3_2 + x2_2 * x1_2 * x[3] +
106 x2_2 * x[0] * x3_2 - x2_2 * x[0] * x1_2 + x2_2 * x0_2 * x[1] +
107 x[2] * x[0] * x1_3 + x[2] * x0_3 * x[3] - x[2] * x0_3 * x[1] -
108 x[2] * x1_3 * x[3] - x[2] * x0_2 * x3_2 + x[2] * x1_2 * x3_2 -
109 x[2] * x[3] * x[0] * x1_2 + x[2] * x[3] * x0_2 * x[1] +
110 x0_3 * x1_2 - x0_2 * x1_3 + x[3] * x[0] * x1_3 -
111 x[3] * x0_3 * x[1] - x3_2 * x[0] * x1_2 + x3_2 * x0_2 * x[1]);
112 a[2] = -(-x1_3 *
f[3] * x[2] + x1_3 *
f[2] * x[3] + x1_3 * x[0] *
f[3] +
113 x1_3 *
f[0] * x[2] - x1_3 * x[0] *
f[2] - x1_3 *
f[0] * x[3] +
114 f[3] * x2_3 * x[1] -
f[3] * x0_3 * x[1] - x[1] * x2_3 *
f[0] +
115 x[1] *
f[2] * x0_3 + x3_3 *
f[0] * x[1] - x3_3 *
f[2] * x[1] +
116 f[1] * x[3] * x0_3 -
f[1] * x[0] * x3_3 - x3_3 *
f[0] * x[2] +
117 x3_3 * x[0] *
f[2] -
f[2] * x0_3 * x[3] + x2_3 *
f[0] * x[3] +
118 f[1] * x3_3 * x[2] -
f[1] * x2_3 * x[3] + x2_3 * x[0] *
f[1] -
119 x2_3 * x[0] *
f[3] +
f[3] * x0_3 * x[2] - x[2] *
f[1] * x0_3) /
120 (x[3] * x2_2 - x3_2 * x[2] + x[1] * x3_2 - x[1] * x2_2 -
121 x1_2 * x[3] + x1_2 * x[2]) /
122 (-x[2] * x[1] * x[3] + x[1] * x[2] * x[0] - x0_2 * x[1] +
123 x[1] * x[3] * x[0] + x[2] * x[3] * x[0] + x0_3 - x0_2 * x[2] -
125 a[3] = (x[0] *
f[3] * x1_2 - x0_2 * x[3] *
f[2] + x2_2 * x[0] *
f[1] +
126 x0_2 *
f[3] * x[2] - x3_2 *
f[2] * x[1] -
f[0] * x3_2 * x[2] -
127 f[3] * x[2] * x1_2 - x2_2 * x[0] *
f[3] -
f[0] * x2_2 * x[1] +
128 f[3] * x2_2 * x[1] + x0_2 *
f[1] * x[3] + x[2] * x3_2 *
f[1] -
129 x0_2 *
f[1] * x[2] +
f[0] * x[2] * x1_2 + x[3] *
f[2] * x1_2 +
130 f[0] * x3_2 * x[1] + x3_2 * x[0] *
f[2] - x[0] *
f[2] * x1_2 -
131 f[0] * x[3] * x1_2 - x0_2 * x[1] *
f[3] + x0_2 * x[1] *
f[2] +
132 f[0] * x2_2 * x[3] - x2_2 * x[3] *
f[1] - x3_2 * x[0] *
f[1]) /
133 (-x2_2 * x[0] * x3_3 + x2_2 * x[0] * x1_3 - x0_2 * x[3] * x2_3 +
134 x0_2 * x3_3 * x[2] - x0_2 * x[1] * x3_3 + x0_2 * x[1] * x2_3 +
135 x0_2 * x1_3 * x[3] - x0_2 * x1_3 * x[2] + x3_2 * x[0] * x2_3 -
136 x3_2 * x[0] * x1_3 + x[3] * x2_3 * x1_2 - x3_2 * x2_3 * x[1] +
137 x3_3 * x2_2 * x[1] - x3_3 * x[2] * x1_2 + x[0] * x3_3 * x1_2 -
138 x[0] * x2_3 * x1_2 - x0_3 * x3_2 * x[2] - x0_3 * x[3] * x1_2 +
139 x0_3 * x3_2 * x[1] + x[2] * x3_2 * x1_3 - x2_2 * x[3] * x1_3 +
140 x0_3 * x2_2 * x[3] - x0_3 * x2_2 * x[1] + x0_3 * x[2] * x1_2);
146 World* world_ptr =
nullptr) {
147 const bool use_threads = npts_per_task <
npt && world_ptr;
150 const auto iend =
npt - 2;
151 for (
int i = 1; i < iend; i += npts_per_task) {
152 auto task = [istart = i,
this, &x, &
p, npts_per_task, iend]() {
153 const auto ifence = std::min(istart + npts_per_task, iend);
154 for (
int i = istart; i < ifence; ++i) {
155 double mid = (x[i] + x[i + 1]) * 0.5;
156 double y[4] = {x[i - 1] - mid, x[i] - mid, x[i + 1] - mid,
158 this->a[i * 5] = mid;
163 world_ptr->taskq.add(
task);
168 world_ptr->taskq.fence();
171 for (
int j = 0; j < 5; ++j) {
173 a[5 *
npt - 5 + j] =
a[5 *
npt - 10 + j] =
a[5 *
npt - 15 + j];
185 template <
typename functionT>
193 std::vector<T>
p(
npt);
194 std::vector<double> x(
npt);
196 const auto npts_per_task =
std::max(min_npts_per_task, (
npt + nthreads - 1)/ nthreads);
197 const auto use_threads = world_ptr && nthreads && npts_per_task <
npt;
198 for (
int i = 0; i <
npt; i += npts_per_task) {
199 auto task = [istart = i, npts_per_task,
npt, &x, &
f, &
p,
this]() {
200 const auto ifence = std::min(istart + npts_per_task,
npt);
201 for (
int i = istart; i < ifence; ++i) {
202 x[i] = this->lo + i * this->
h;
229 template <
typename functionT>
242 template <
typename functionT>
249 const std::vector<T> &y)
253 if ((
int)y.size() <
npt)
254 throw "Insufficient y-points";
256 std::vector<double> x(
npt);
257 for (
int i = 0; i <
npt; ++i)
265 int i = int((y -
lo) *
rh);
266 if (i < 0 || i >=
npt)
267 throw "Out of range point";
271 return (
a[i + 1] + y1 *
a[i + 2]) + yy * (
a[i + 3] + y1 *
a[i + 4]);
275 double maxabserr = 0.0;
277 for (
int i = 0; i < 7 *
npt; ++i) {
278 double x =
lo + h7 * i;
289 template <
typename T>
An class for 1-D data interpolation based on cubic polynomials.
Definition: interpolation_1d.h:60
double h
Grid spacing.
Definition: interpolation_1d.h:64
int npt
No. of grid points.
Definition: interpolation_1d.h:66
CubicInterpolationTable(World &world, double lo, double hi, int npt, const functionT &f, const int min_npts_per_task=min_npts_per_task_default)
Definition: interpolation_1d.h:243
CubicInterpolationTable(World *world_ptr, double lo, double hi, int npt, const functionT &f, const int min_npts_per_task=std::numeric_limits< double >::max())
Definition: interpolation_1d.h:186
double rh
1/h
Definition: interpolation_1d.h:65
std::vector< T > a
(1+4)*npt vector of x and polynomial coefficients
Definition: interpolation_1d.h:67
CubicInterpolationTable(double lo, double hi, int npt, const std::vector< T > &y)
Definition: interpolation_1d.h:248
double hi
Interpolation is in range [lo,hi].
Definition: interpolation_1d.h:63
CubicInterpolationTable()
Definition: interpolation_1d.h:221
void make_interpolation(const std::vector< double > &x, const std::vector< T > &p, const int npts_per_task=std::numeric_limits< int >::max(), World *world_ptr=nullptr)
Definition: interpolation_1d.h:144
virtual ~CubicInterpolationTable()
Definition: interpolation_1d.h:286
static int min_npts_per_task_default
minimum # of points per task
Definition: interpolation_1d.h:219
static void cubic_fit(const double *x, const T *f, T *a)
Definition: interpolation_1d.h:70
CubicInterpolationTable(double lo, double hi, int npt, const functionT &f)
Definition: interpolation_1d.h:230
double lo
Interpolation is in range [lo,hi].
Definition: interpolation_1d.h:62
T operator()(double y) const
Definition: interpolation_1d.h:263
double err(const functionT &f) const
Definition: interpolation_1d.h:274
static std::size_t size()
Returns the number of threads in the pool.
Definition: thread.h:1413
void add(TaskInterface *t)
Add a new local task, taking ownership of the pointer.
Definition: world_task_queue.h:466
void fence()
Returns after all local tasks have completed.
Definition: world_task_queue.h:1384
A parallel world class.
Definition: world.h:132
WorldTaskQueue & taskq
Task queue.
Definition: world.h:204
char * p(char *buf, const char *name, int k, int initial_level, double thresh, int order)
Definition: derivatives.cc:72
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
#define max(a, b)
Definition: lda.h:51
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
NDIM & f
Definition: mra.h:2416
int task(int i)
Definition: test_runtime.cpp:4
double(* exact)(double, double, double)
Definition: testfuns.cc:6
std::vector< double > fit(size_t m, size_t n, const std::vector< double > N, const std::vector< double > &f)
Definition: testfuns.cc:36