MADNESS 0.10.1
solvers.h
Go to the documentation of this file.
1/*
2 This file is part of MADNESS.
3
4 Copyright (C) 2007,2010 Oak Ridge National Laboratory
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 For more information please contact:
21
22 Robert J. Harrison
23 Oak Ridge National Laboratory
24 One Bethel Valley Road
25 P.O. Box 2008, MS-6367
26
27 email: harrisonrj@ornl.gov
28 tel: 865-241-3937
29 fax: 865-572-0680
30
31 $Id$
32*/
33#ifndef MADNESS_LINALG_SOLVERS_H__INCLUDED
34#define MADNESS_LINALG_SOLVERS_H__INCLUDED
35
37#include <madness/world/print.h>
38#include <iostream>
40
41/*!
42 \file solvers.h
43 \brief Defines interfaces for optimization and non-linear equation solvers
44 \ingroup solvers
45*/
46
47namespace madness {
48
49 /*!
50 \brief Solves non-linear equation using KAIN (returns coefficients to compute next vector)
51
52 \ingroup solvers
53
54 The Krylov-accelerated inexact-Newton method employs directional
55 derivatives to estimate the Jacobian in the subspace and
56 separately computes updates in the subspace and its complement.
57
58 We wish to solve the non-linear equations \f$ f(x)=0 \f$ where
59 \f$ f \f$ and \f$ x \f$ are vectors of the same dimension (e.g.,
60 consider both being MADNESS functions).
61
62 Define the following matrices and vector (with \f$ i \f$ and \f$
63 j \f$ denoting previous iterations in the Krylov subspace and
64 \f$ m \f$ the current iteration):
65 \f{eqnarray*}{
66 Q_{i j} & = & \langle x_i \mid f_j \rangle \\
67 A_{i j} & = & \langle x_i - x_m \mid f_j - f_m \rangle = Q_{i j} - Q_{m j} - Q_{i m} + Q{m m} \\
68 b_i & =& -\langle x_i - x_m \mid f_m \rangle = -Q_{i m} + Q_{m m}
69 \f}
70 The subspace equation is of dimension \f$ m \f$ (assuming iterations
71 are indexed from zero) and is given by
72 \f[
73 A c = b
74 \f]
75 The interior and exterior updates may be combined into one simple expression
76 as follows. First, define an additional element of the solution vector
77 \f[
78 c_m = 1 - \sum_{i<m} c_i
79 \f]
80 and then the new vector (guess for next iteration) is given by
81 \f[
82 x_{m+1} = \sum_{i \le m}{c_i ( x_i - f_i)}
83 \f]
84
85 To employ the solver, each iteration
86 -# Compute the additional row and column of the matrix \f$ Q \f$
87 that is the inner product between solution vectors (\f$ x_i \f$) and residuals
88 (\f$ f_j \f$).
89 -# Call this routine to compute the coefficients \f$ c \f$ and from these
90 compute the next solution vector
91 -# Employ step restriction or line search as necessary to ensure stable/robust solution.
92
93 @param[in] Q The matrix of inner products between subspace vectors and residuals.
94 @param[in] rcond Threshold for discarding small singular values in the subspace equations.
95 @return Vector for computing next solution vector
96 */
97 template <typename T>
98 Tensor<T> KAIN(const Tensor<T>& Q, double rcond=1e-12) {
99 const int nvec = Q.dim(0);
100 const int m = nvec-1;
101
102 if (nvec == 1) {
103 Tensor<T> c(1);
104 c(0L) = 1.0;
105 return c;
106 }
107
108 Tensor<T> A(m,m);
109 Tensor<T> b(m);
110 for (long i=0; i<m; ++i) {
111 b(i) = Q(m,m) - Q(i,m);
112 for (long j=0; j<m; ++j) {
113 A(i,j) = Q(i,j) - Q(m,j) - Q(i,m) + Q(m,m);
114 }
115 }
116
117 // print("Q");
118 // print(Q);
119 // print("A");
120 // print(A);
121 // print("b");
122 // print(b);
123
124 Tensor<T> x;
125 Tensor<double> s, sumsq;
126 long rank;
127 gelss(A, b, rcond, x, s, rank, sumsq);
128// print("singular values", s);
129// print("rank", rank);
130// print("solution", x);
131
132 Tensor<T> c(nvec);
133 T sumC = 0.0;
134 for (long i=0; i<m; ++i) sumC += x(i);
135 c(Slice(0,m-1)) = x;
136// print("SUMC", nvec, m, sumC);
137 c(m) = 1.0 - sumC;
138
139// print("returned C", c);
140
141 return c;
142 }
143
144
145 /// The interface to be provided by targets for non-linear equation solver
146
147 /// \ingroup solvers
149 /// Should return the resdiual (vector F(x))
151
152 /// Override this to return \c true if the Jacobian is implemented
153 virtual bool provides_jacobian() const {return false;}
154
155 /// Some solvers require the jacobian or are faster if an analytic form is available
156
157 /// J(i,j) = partial F[i] over partial x[j] where F(x) is the vector valued residual
159 throw "not implemented";
160 }
161
162 /// Implement this if advantageous to compute residual and jacobian simultaneously
165 residual = this->residual(x);
166 jacobian = this->jacobian(x);
167 }
168
170 };
171
172
173 /// The interface to be provided by functions to be optimized
174
175 /// \ingroup solvers
177 /// Should return the value of the objective function
178 virtual double value(const Tensor<double>& x) = 0;
179
180 /// Override this to return true if the derivative is implemented
181 virtual bool provides_gradient() const {return false;}
182
183 /// Should return the derivative of the function
185 throw "not implemented";
186 }
187
188 /// Reimplement if more efficient to evaluate both value and gradient in one call
189 virtual void value_and_gradient(const Tensor<double>& x,
190 double& value,
192 value = this->value(x);
193 gradient = this->gradient(x);
194 }
195
196 /// Numerical test of the derivative ... optionally prints to stdout, returns max abs error
197 double test_gradient(Tensor<double>& x, double value_precision, bool doprint=true);
198
200 };
201
202
203 /// The interface to be provided by optimizers
204
205 /// \ingroup solvers
207 virtual bool optimize(Tensor<double>& x) = 0;
208 virtual bool converged() const = 0;
209 virtual double value() const = 0;
210 virtual double gradient_norm() const = 0;
212 };
213
214
215 /// Unconstrained minimization via steepest descent
216
217 /// \ingroup solvers
219 std::shared_ptr<OptimizationTargetInterface> target;
220 const double tol;
221 double f;
222 double gnorm;
223
224 public:
225 SteepestDescent(const std::shared_ptr<OptimizationTargetInterface>& tar,
226 double tol = 1e-6,
227 double value_precision = 1e-12,
228 double gradient_precision = 1e-12);
229
230 bool optimize(Tensor<double>& x);
231
232 bool converged() const;
233
234 double gradient_norm() const;
235
236 double value() const;
237
238 virtual ~SteepestDescent() { }
239 };
240
241
242 /// Optimization via quasi-Newton (BFGS or SR1 update)
243
244 /// \ingroup solvers
245 /// This is presently not a low memory algorithm ... we really need one!
247 protected:
248 std::string update; // One of BFGS or SR1
249 std::shared_ptr<OptimizationTargetInterface> target;
250 const int maxiter;
251 const double tol;
252 const double value_precision; // Numerical precision of value
253 const double gradient_precision; // Numerical precision of each element of residual
254 double f;
255 double gnorm;
257 int n;
259
260 public:
261
262 /// make this static for other QN classed to have access to it
263 static double line_search(double a1, double f0, double dxgrad,
264 const Tensor<double>& x, const Tensor<double>& dx,
265 std::shared_ptr<OptimizationTargetInterface> target,
266 double value_precision);
267
268 /// make this static for other QN classed to have access to it
269 static void hessian_update_sr1(const Tensor<double>& s, const Tensor<double>& y,
270 Tensor<double>& hessian);
271
272 /// make this static for other QN classed to have access to it
273 static void hessian_update_bfgs(const Tensor<double>& dx,
274 const Tensor<double>& dg, Tensor<double>& hessian);
275
277
278 public:
279 QuasiNewton(const std::shared_ptr<OptimizationTargetInterface>& tar,
280 int maxiter = 20,
281 double tol = 1e-6,
282 double value_precision = 1e-12,
283 double gradient_precision = 1e-12);
284
285 /// Choose update method (currently only "BFGS" or "SR1")
286 void set_update(const std::string& method);
287
288 /// Choose update method (currently only "BFGS" or "SR1")
289 void set_test(const bool& test_level);
290
291 /// Runs the optimizer
292
293 /// @return True if converged
294 bool optimize(Tensor<double>& x);
295
296 /// After running the optimizer returns true if converged
297
298 /// @return True if converged
299 bool converged() const;
300
301 /// Value of objective function
302
303 /// @return Value of objective function
304 double value() const;
305
306 /// Resets Hessian to default guess
308
309 /// Sets Hessian to given matrix
311
312 /// Value of gradient norm
313
314 /// @return Norm of gradient of objective function
315 double gradient_norm() const;
316
317 virtual ~QuasiNewton() {}
318 };
319}
320
321#endif // MADNESS_LINALG_SOLVERS_H__INCLUDED
Definition test_ar.cc:118
long dim(int i) const
Returns the size of dimension i.
Definition basetensor.h:147
Optimization via quasi-Newton (BFGS or SR1 update)
Definition solvers.h:246
const double gradient_precision
Definition solvers.h:253
void reset_hessian()
Resets Hessian to default guess.
Definition solvers.h:307
bool converged() const
After running the optimizer returns true if converged.
Definition solvers.cc:334
const double value_precision
Definition solvers.h:252
void set_update(const std::string &method)
Choose update method (currently only "BFGS" or "SR1")
Definition solvers.cc:269
virtual ~QuasiNewton()
Definition solvers.h:317
static double line_search(double a1, double f0, double dxgrad, const Tensor< double > &x, const Tensor< double > &dx, std::shared_ptr< OptimizationTargetInterface > target, double value_precision)
make this static for other QN classed to have access to it
Definition solvers.cc:113
static void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition solvers.cc:179
std::shared_ptr< OptimizationTargetInterface > target
Definition solvers.h:249
bool optimize(Tensor< double > &x)
Runs the optimizer.
Definition solvers.cc:278
std::string update
Definition solvers.h:248
bool printtest
Definition solvers.h:258
const double tol
Definition solvers.h:251
void set_test(const bool &test_level)
Choose update method (currently only "BFGS" or "SR1")
Definition solvers.cc:274
void set_hessian(const Tensor< double > &matrix)
Sets Hessian to given matrix.
Definition solvers.h:310
double gnorm
Definition solvers.h:255
double f
Definition solvers.h:254
const int maxiter
Definition solvers.h:250
int n
Definition solvers.h:257
Tensor< double > new_search_direction(const Tensor< double > &g) const
Definition solvers.cc:210
static void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition solvers.cc:166
Tensor< double > h
Definition solvers.h:256
double value() const
Value of objective function.
Definition solvers.cc:336
double gradient_norm() const
Value of gradient norm.
Definition solvers.cc:338
A slice defines a sub-range or patch of a dimension.
Definition slice.h:103
Unconstrained minimization via steepest descent.
Definition solvers.h:218
double value() const
Definition solvers.cc:111
double f
Definition solvers.h:221
double gnorm
Definition solvers.h:222
bool converged() const
Definition solvers.cc:107
virtual ~SteepestDescent()
Definition solvers.h:238
const double tol
Definition solvers.h:220
std::shared_ptr< OptimizationTargetInterface > target
Definition solvers.h:219
double gradient_norm() const
Definition solvers.cc:109
bool optimize(Tensor< double > &x)
Definition solvers.cc:79
A tensor is a multidimension array.
Definition tensor.h:317
Definition y.cc:25
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:34
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
Tensor< T > KAIN(const Tensor< T > &Q, double rcond=1e-12)
Solves non-linear equation using KAIN (returns coefficients to compute next vector)
Definition solvers.h:98
void gelss(const Tensor< T > &a, const Tensor< T > &b, double rcond, Tensor< T > &x, Tensor< typename Tensor< T >::scalar_type > &s, long &rank, Tensor< typename Tensor< T >::scalar_type > &sumsq)
Solve Ax = b for general A using the LAPACK *gelss routines.
Definition lapack.cc:884
NDIM const Function< R, NDIM > & g
Definition mra.h:2416
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2002
static const double b
Definition nonlinschro.cc:119
Defines simple templates for printing to std::cout "a la Python".
double Q(double a)
Definition relops.cc:20
static const double c
Definition relops.cc:10
static const double m
Definition relops.cc:9
static const double L
Definition rk.cc:46
The interface to be provided by functions to be optimized.
Definition solvers.h:176
virtual void value_and_gradient(const Tensor< double > &x, double &value, Tensor< double > &gradient)
Reimplement if more efficient to evaluate both value and gradient in one call.
Definition solvers.h:189
virtual ~OptimizationTargetInterface()
Definition solvers.h:199
virtual Tensor< double > gradient(const Tensor< double > &x)
Should return the derivative of the function.
Definition solvers.h:184
virtual double value(const Tensor< double > &x)=0
Should return the value of the objective function.
double test_gradient(Tensor< double > &x, double value_precision, bool doprint=true)
Numerical test of the derivative ... optionally prints to stdout, returns max abs error.
Definition solvers.cc:38
virtual bool provides_gradient() const
Override this to return true if the derivative is implemented.
Definition solvers.h:181
The interface to be provided by optimizers.
Definition solvers.h:206
virtual bool optimize(Tensor< double > &x)=0
virtual double gradient_norm() const =0
virtual ~OptimizerInterface()
Definition solvers.h:211
virtual double value() const =0
virtual bool converged() const =0
The interface to be provided by targets for non-linear equation solver.
Definition solvers.h:148
virtual void residual_and_jacobian(const Tensor< double > &x, Tensor< double > &residual, Tensor< double > &jacobian)
Implement this if advantageous to compute residual and jacobian simultaneously.
Definition solvers.h:163
virtual ~SolverTargetInterface()
Definition solvers.h:169
virtual Tensor< double > jacobian(const Tensor< double > &x)
Some solvers require the jacobian or are faster if an analytic form is available.
Definition solvers.h:158
virtual Tensor< double > residual(const Tensor< double > &x)=0
Should return the resdiual (vector F(x))
virtual bool provides_jacobian() const
Override this to return true if the Jacobian is implemented.
Definition solvers.h:153
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
void e()
Definition test_sig.cc:75
const double a1
Definition vnucso.cc:85