MADNESS  0.10.1
solvers.h
Go to the documentation of this file.
1 /*
2  This file is part of MADNESS.
3 
4  Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20  For more information please contact:
21 
22  Robert J. Harrison
23  Oak Ridge National Laboratory
24  One Bethel Valley Road
25  P.O. Box 2008, MS-6367
26 
27  email: harrisonrj@ornl.gov
28  tel: 865-241-3937
29  fax: 865-572-0680
30 
31  $Id$
32 */
33 #ifndef MADNESS_LINALG_SOLVERS_H__INCLUDED
34 #define MADNESS_LINALG_SOLVERS_H__INCLUDED
35 
36 #include <madness/tensor/tensor.h>
37 #include <madness/world/print.h>
38 #include <iostream>
40 
41 /*!
42  \file solvers.h
43  \brief Defines interfaces for optimization and non-linear equation solvers
44  \ingroup solvers
45 */
46 
47 namespace madness {
48 
49  /*!
50  \brief Solves non-linear equation using KAIN (returns coefficients to compute next vector)
51 
52  \ingroup solvers
53 
54  The Krylov-accelerated inexact-Newton method employs directional
55  derivatives to estimate the Jacobian in the subspace and
56  separately computes updates in the subspace and its complement.
57 
58  We wish to solve the non-linear equations \f$ f(x)=0 \f$ where
59  \f$ f \f$ and \f$ x \f$ are vectors of the same dimension (e.g.,
60  consider both being MADNESS functions).
61 
62  Define the following matrices and vector (with \f$ i \f$ and \f$
63  j \f$ denoting previous iterations in the Krylov subspace and
64  \f$ m \f$ the current iteration):
65  \f{eqnarray*}{
66  Q_{i j} & = & \langle x_i \mid f_j \rangle \\
67  A_{i j} & = & \langle x_i - x_m \mid f_j - f_m \rangle = Q_{i j} - Q_{m j} - Q_{i m} + Q{m m} \\
68  b_i & =& -\langle x_i - x_m \mid f_m \rangle = -Q_{i m} + Q_{m m}
69  \f}
70  The subspace equation is of dimension \f$ m \f$ (assuming iterations
71  are indexed from zero) and is given by
72  \f[
73  A c = b
74  \f]
75  The interior and exterior updates may be combined into one simple expression
76  as follows. First, define an additional element of the solution vector
77  \f[
78  c_m = 1 - \sum_{i<m} c_i
79  \f]
80  and then the new vector (guess for next iteration) is given by
81  \f[
82  x_{m+1} = \sum_{i \le m}{c_i ( x_i - f_i)}
83  \f]
84 
85  To employ the solver, each iteration
86  -# Compute the additional row and column of the matrix \f$ Q \f$
87  that is the inner product between solution vectors (\f$ x_i \f$) and residuals
88  (\f$ f_j \f$).
89  -# Call this routine to compute the coefficients \f$ c \f$ and from these
90  compute the next solution vector
91  -# Employ step restriction or line search as necessary to ensure stable/robust solution.
92 
93  @param[in] Q The matrix of inner products between subspace vectors and residuals.
94  @param[in] rcond Threshold for discarding small singular values in the subspace equations.
95  @return Vector for computing next solution vector
96  */
97  template <typename T>
98  Tensor<T> KAIN(const Tensor<T>& Q, double rcond=1e-12) {
99  const int nvec = Q.dim(0);
100  const int m = nvec-1;
101 
102  if (nvec == 1) {
103  Tensor<T> c(1);
104  c(0L) = 1.0;
105  return c;
106  }
107 
108  Tensor<T> A(m,m);
109  Tensor<T> b(m);
110  for (long i=0; i<m; ++i) {
111  b(i) = Q(m,m) - Q(i,m);
112  for (long j=0; j<m; ++j) {
113  A(i,j) = Q(i,j) - Q(m,j) - Q(i,m) + Q(m,m);
114  }
115  }
116 
117  // print("Q");
118  // print(Q);
119  // print("A");
120  // print(A);
121  // print("b");
122  // print(b);
123 
124  Tensor<T> x;
125  Tensor<double> s, sumsq;
126  long rank;
127  gelss(A, b, rcond, x, s, rank, sumsq);
128 // print("singular values", s);
129 // print("rank", rank);
130 // print("solution", x);
131 
132  Tensor<T> c(nvec);
133  T sumC = 0.0;
134  for (long i=0; i<m; ++i) sumC += x(i);
135  c(Slice(0,m-1)) = x;
136 // print("SUMC", nvec, m, sumC);
137  c(m) = 1.0 - sumC;
138 
139 // print("returned C", c);
140 
141  return c;
142  }
143 
144 
145  /// The interface to be provided by targets for non-linear equation solver
146 
147  /// \ingroup solvers
149  /// Should return the resdiual (vector F(x))
150  virtual Tensor<double> residual(const Tensor<double>& x) = 0;
151 
152  /// Override this to return \c true if the Jacobian is implemented
153  virtual bool provides_jacobian() const {return false;}
154 
155  /// Some solvers require the jacobian or are faster if an analytic form is available
156 
157  /// J(i,j) = partial F[i] over partial x[j] where F(x) is the vector valued residual
159  throw "not implemented";
160  }
161 
162  /// Implement this if advantageous to compute residual and jacobian simultaneously
163  virtual void residual_and_jacobian(const Tensor<double>& x,
165  residual = this->residual(x);
166  jacobian = this->jacobian(x);
167  }
168 
170  };
171 
172 
173  /// The interface to be provided by functions to be optimized
174 
175  /// \ingroup solvers
177  /// Should return the value of the objective function
178  virtual double value(const Tensor<double>& x) = 0;
179 
180  /// Override this to return true if the derivative is implemented
181  virtual bool provides_gradient() const {return false;}
182 
183  /// Should return the derivative of the function
185  throw "not implemented";
186  }
187 
188  /// Reimplement if more efficient to evaluate both value and gradient in one call
189  virtual void value_and_gradient(const Tensor<double>& x,
190  double& value,
192  value = this->value(x);
193  gradient = this->gradient(x);
194  }
195 
196  /// Numerical test of the derivative ... optionally prints to stdout, returns max abs error
197  double test_gradient(Tensor<double>& x, double value_precision, bool doprint=true);
198 
200  };
201 
202 
203  /// The interface to be provided by optimizers
204 
205  /// \ingroup solvers
207  virtual bool optimize(Tensor<double>& x) = 0;
208  virtual bool converged() const = 0;
209  virtual double value() const = 0;
210  virtual double gradient_norm() const = 0;
212  };
213 
214 
215  /// Unconstrained minimization via steepest descent
216 
217  /// \ingroup solvers
219  std::shared_ptr<OptimizationTargetInterface> target;
220  const double tol;
221  double f;
222  double gnorm;
223 
224  public:
225  SteepestDescent(const std::shared_ptr<OptimizationTargetInterface>& tar,
226  double tol = 1e-6,
227  double value_precision = 1e-12,
228  double gradient_precision = 1e-12);
229 
230  bool optimize(Tensor<double>& x);
231 
232  bool converged() const;
233 
234  double gradient_norm() const;
235 
236  double value() const;
237 
238  virtual ~SteepestDescent() { }
239  };
240 
241 
242  /// Optimization via quasi-Newton (BFGS or SR1 update)
243 
244  /// \ingroup solvers
245  /// This is presently not a low memory algorithm ... we really need one!
247  protected:
248  std::string update; // One of BFGS or SR1
249  std::shared_ptr<OptimizationTargetInterface> target;
250  const int maxiter;
251  const double tol;
252  const double value_precision; // Numerical precision of value
253  const double gradient_precision; // Numerical precision of each element of residual
254  double f;
255  double gnorm;
257  int n;
258  bool printtest;
259 
260  public:
261 
262  /// make this static for other QN classed to have access to it
263  static double line_search(double a1, double f0, double dxgrad,
264  const Tensor<double>& x, const Tensor<double>& dx,
265  std::shared_ptr<OptimizationTargetInterface> target,
266  double value_precision);
267 
268  /// make this static for other QN classed to have access to it
269  static void hessian_update_sr1(const Tensor<double>& s, const Tensor<double>& y,
270  Tensor<double>& hessian);
271 
272  /// make this static for other QN classed to have access to it
273  static void hessian_update_bfgs(const Tensor<double>& dx,
274  const Tensor<double>& dg, Tensor<double>& hessian);
275 
277 
278  public:
279  QuasiNewton(const std::shared_ptr<OptimizationTargetInterface>& tar,
280  int maxiter = 20,
281  double tol = 1e-6,
282  double value_precision = 1e-12,
283  double gradient_precision = 1e-12);
284 
285  /// Choose update method (currently only "BFGS" or "SR1")
286  void set_update(const std::string& method);
287 
288  /// Choose update method (currently only "BFGS" or "SR1")
289  void set_test(const bool& test_level);
290 
291  /// Runs the optimizer
292 
293  /// @return True if converged
294  bool optimize(Tensor<double>& x);
295 
296  /// After running the optimizer returns true if converged
297 
298  /// @return True if converged
299  bool converged() const;
300 
301  /// Value of objective function
302 
303  /// @return Value of objective function
304  double value() const;
305 
306  /// Resets Hessian to default guess
308 
309  /// Sets Hessian to given matrix
311 
312  /// Value of gradient norm
313 
314  /// @return Norm of gradient of objective function
315  double gradient_norm() const;
316 
317  virtual ~QuasiNewton() {}
318  };
319 }
320 
321 #endif // MADNESS_LINALG_SOLVERS_H__INCLUDED
real_convolution_3d A(World &world)
Definition: DKops.h:230
Definition: test_ar.cc:118
Optimization via quasi-Newton (BFGS or SR1 update)
Definition: solvers.h:246
const double gradient_precision
Definition: solvers.h:253
void reset_hessian()
Resets Hessian to default guess.
Definition: solvers.h:307
bool converged() const
After running the optimizer returns true if converged.
Definition: solvers.cc:334
const double value_precision
Definition: solvers.h:252
void set_update(const std::string &method)
Choose update method (currently only "BFGS" or "SR1")
Definition: solvers.cc:269
virtual ~QuasiNewton()
Definition: solvers.h:317
static double line_search(double a1, double f0, double dxgrad, const Tensor< double > &x, const Tensor< double > &dx, std::shared_ptr< OptimizationTargetInterface > target, double value_precision)
make this static for other QN classed to have access to it
Definition: solvers.cc:113
static void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition: solvers.cc:179
std::shared_ptr< OptimizationTargetInterface > target
Definition: solvers.h:249
bool optimize(Tensor< double > &x)
Runs the optimizer.
Definition: solvers.cc:278
std::string update
Definition: solvers.h:248
bool printtest
Definition: solvers.h:258
const double tol
Definition: solvers.h:251
void set_test(const bool &test_level)
Choose update method (currently only "BFGS" or "SR1")
Definition: solvers.cc:274
void set_hessian(const Tensor< double > &matrix)
Sets Hessian to given matrix.
Definition: solvers.h:310
double gnorm
Definition: solvers.h:255
double f
Definition: solvers.h:254
const int maxiter
Definition: solvers.h:250
int n
Definition: solvers.h:257
QuasiNewton(const std::shared_ptr< OptimizationTargetInterface > &tar, int maxiter=20, double tol=1e-6, double value_precision=1e-12, double gradient_precision=1e-12)
Definition: solvers.cc:250
Tensor< double > new_search_direction(const Tensor< double > &g) const
Definition: solvers.cc:210
static void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition: solvers.cc:166
Tensor< double > h
Definition: solvers.h:256
double value() const
Value of objective function.
Definition: solvers.cc:336
double gradient_norm() const
Value of gradient norm.
Definition: solvers.cc:338
A slice defines a sub-range or patch of a dimension.
Definition: slice.h:103
Unconstrained minimization via steepest descent.
Definition: solvers.h:218
double value() const
Definition: solvers.cc:111
double f
Definition: solvers.h:221
SteepestDescent(const std::shared_ptr< OptimizationTargetInterface > &tar, double tol=1e-6, double value_precision=1e-12, double gradient_precision=1e-12)
Definition: solvers.cc:67
double gnorm
Definition: solvers.h:222
bool converged() const
Definition: solvers.cc:107
virtual ~SteepestDescent()
Definition: solvers.h:238
const double tol
Definition: solvers.h:220
std::shared_ptr< OptimizationTargetInterface > target
Definition: solvers.h:219
double gradient_norm() const
Definition: solvers.cc:109
bool optimize(Tensor< double > &x)
Definition: solvers.cc:79
A tensor is a multidimension array.
Definition: tensor.h:317
Definition: y.cc:25
const double m
Definition: gfit.cc:199
auto T(World &world, response_space &f) -> response_space
Definition: global_functions.cc:34
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
Tensor< T > KAIN(const Tensor< T > &Q, double rcond=1e-12)
Solves non-linear equation using KAIN (returns coefficients to compute next vector)
Definition: solvers.h:98
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition: mra.h:2002
void gelss(const Tensor< T > &a, const Tensor< T > &b, double rcond, Tensor< T > &x, Tensor< typename Tensor< T >::scalar_type > &s, long &rank, Tensor< typename Tensor< T >::scalar_type > &sumsq)
Solve Ax = b for general A using the LAPACK *gelss routines.
Definition: lapack.cc:884
NDIM const Function< R, NDIM > & g
Definition: mra.h:2416
static const double b
Definition: nonlinschro.cc:119
Defines simple templates for printing to std::cout "a la Python".
double Q(double a)
Definition: relops.cc:20
static const double c
Definition: relops.cc:10
static const double L
Definition: rk.cc:46
The interface to be provided by functions to be optimized.
Definition: solvers.h:176
virtual Tensor< double > gradient(const Tensor< double > &x)
Should return the derivative of the function.
Definition: solvers.h:184
virtual void value_and_gradient(const Tensor< double > &x, double &value, Tensor< double > &gradient)
Reimplement if more efficient to evaluate both value and gradient in one call.
Definition: solvers.h:189
virtual ~OptimizationTargetInterface()
Definition: solvers.h:199
virtual double value(const Tensor< double > &x)=0
Should return the value of the objective function.
double test_gradient(Tensor< double > &x, double value_precision, bool doprint=true)
Numerical test of the derivative ... optionally prints to stdout, returns max abs error.
Definition: solvers.cc:38
virtual bool provides_gradient() const
Override this to return true if the derivative is implemented.
Definition: solvers.h:181
The interface to be provided by optimizers.
Definition: solvers.h:206
virtual bool optimize(Tensor< double > &x)=0
virtual double gradient_norm() const =0
virtual ~OptimizerInterface()
Definition: solvers.h:211
virtual double value() const =0
virtual bool converged() const =0
The interface to be provided by targets for non-linear equation solver.
Definition: solvers.h:148
virtual void residual_and_jacobian(const Tensor< double > &x, Tensor< double > &residual, Tensor< double > &jacobian)
Implement this if advantageous to compute residual and jacobian simultaneously.
Definition: solvers.h:163
virtual Tensor< double > residual(const Tensor< double > &x)=0
Should return the resdiual (vector F(x))
virtual ~SolverTargetInterface()
Definition: solvers.h:169
virtual Tensor< double > jacobian(const Tensor< double > &x)
Some solvers require the jacobian or are faster if an analytic form is available.
Definition: solvers.h:158
virtual bool provides_jacobian() const
Override this to return true if the Jacobian is implemented.
Definition: solvers.h:153
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
void e()
Definition: test_sig.cc:75
const double a1
Definition: vnucso.cc:85