madness/api-doc/solvers_8h_source.html

/*

  This file is part of MADNESS.


  Copyright (C) 2007,2010 Oak Ridge National Laboratory


  This program is free software; you can redistribute it and/or modify

  it under the terms of the GNU General Public License as published by

  the Free Software Foundation; either version 2 of the License, or

  (at your option) any later version.


  This program is distributed in the hope that it will be useful,

  but WITHOUT ANY WARRANTY; without even the implied warranty of

  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

  GNU General Public License for more details.


  You should have received a copy of the GNU General Public License

  along with this program; if not, write to the Free Software

  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA


  For more information please contact:


  Robert J. Harrison

  Oak Ridge National Laboratory

  One Bethel Valley Road

  P.O. Box 2008, MS-6367


  email: harrisonrj@ornl.gov

  tel:   865-241-3937

  fax:   865-572-0680


  $Id$

*/

#ifndef MADNESS_LINALG_SOLVERS_H__INCLUDED

#define MADNESS_LINALG_SOLVERS_H__INCLUDED


#include <madness/tensor/tensor.h>

#include <madness/world/print.h>

#include <iostream>

#include <madness/tensor/tensor_lapack.h>


/*!

  \file solvers.h

  \brief Defines interfaces for optimization and non-linear equation solvers

  \ingroup solvers

*/


namespace madness {


    /*!

      \brief Solves non-linear equation using KAIN (returns coefficients to compute next vector)


      \ingroup solvers


      The Krylov-accelerated inexact-Newton method employs directional

      derivatives to estimate the Jacobian in the subspace and

      separately computes updates in the subspace and its complement.


      We wish to solve the non-linear equations \f$ f(x)=0 \f$ where

      \f$ f \f$ and \f$ x \f$ are vectors of the same dimension (e.g.,

      consider both being MADNESS functions).


      Define the following matrices and vector (with \f$ i \f$ and \f$

      j \f$ denoting previous iterations in the Krylov subspace and

      \f$ m \f$ the current iteration):

      \f{eqnarray*}{

      Q_{i j} & = & \langle x_i \mid f_j \rangle \\

      A_{i j} & = & \langle x_i - x_m \mid f_j - f_m \rangle = Q_{i j} - Q_{m j} - Q_{i m} + Q{m m} \\

      b_i & =& -\langle x_i - x_m \mid f_m \rangle = -Q_{i m} + Q_{m m}

      \f}

      The subspace equation is of dimension \f$ m \f$ (assuming iterations

      are indexed from zero) and is given by

      \f[

      A c = b

      \f]

      The interior and exterior updates may be combined into one simple expression

      as follows. First, define an additional element of the solution vector

      \f[

      c_m = 1 - \sum_{i<m} c_i

      \f]

      and then the new vector (guess for next iteration) is given by

      \f[

      x_{m+1} = \sum_{i \le m}{c_i ( x_i - f_i)}

      \f]


      To employ the solver, each iteration

      -# Compute the additional row and column of the matrix \f$ Q \f$

      that is the inner product between solution vectors (\f$ x_i \f$) and residuals

      (\f$ f_j \f$).

      -# Call this routine to compute the coefficients \f$ c \f$ and from these

      compute the next solution vector

      -# Employ step restriction or line search as necessary to ensure stable/robust solution.


      @param[in] Q The matrix of inner products between subspace vectors and residuals.

      @param[in] rcond Threshold for discarding small singular values in the subspace equations.

      @return Vector for computing next solution vector

    */

    template <typename T>


    Tensor<T> KAIN(const Tensor<T>& Q, double rcond=1e-12) {

        const int nvec = Q.dim(0);

        const int m = nvec-1;


        if (nvec == 1) {

            Tensor<T> c(1);

            c(0L) = 1.0;

            return c;

        }


        Tensor<T> A(m,m);

        Tensor<T> b(m);

        for (long i=0; i<m; ++i) {

            b(i) = Q(m,m) - Q(i,m);

            for (long j=0; j<m; ++j) {

                A(i,j) = Q(i,j) - Q(m,j) - Q(i,m) + Q(m,m);

            }

        }


    //     print("Q");

    //     print(Q);

    //     print("A");

    //     print(A);

    //     print("b");

    //     print(b);


        Tensor<T> x;

        Tensor<double> s, sumsq;

        long rank;

        gelss(A, b, rcond, x, s, rank, sumsq);

//         print("singular values", s);

//         print("rank", rank);

//         print("solution", x);


        Tensor<T> c(nvec);

        T sumC = 0.0;

        for (long i=0; i<m; ++i) sumC += x(i);

        c(Slice(0,m-1)) = x;

//         print("SUMC", nvec, m, sumC);

        c(m) = 1.0 - sumC;


//         print("returned C", c);


        return c;

    }


    /// The interface to be provided by targets for non-linear equation solver


    /// \ingroup solvers


    struct SolverTargetInterface {

        /// Should return the resdiual (vector F(x))

        virtual Tensor<double> residual(const Tensor<double>& x) = 0;


        /// Override this to return \c true if the Jacobian is implemented

        virtual bool provides_jacobian() const {return false;}


        /// Some solvers require the jacobian or are faster if an analytic form is available


        /// J(i,j) = partial F[i] over partial x[j] where F(x) is the vector valued residual


        virtual Tensor<double> jacobian(const Tensor<double>& x) {

            throw "not implemented";

        }


        /// Implement this if advantageous to compute residual and jacobian simultaneously


        virtual void residual_and_jacobian(const Tensor<double>& x,

                                           Tensor<double>& residual, Tensor<double>& jacobian) {

            residual = this->residual(x);

            jacobian = this->jacobian(x);

        }


        virtual ~SolverTargetInterface() {}

    };


    /// The interface to be provided by functions to be optimized


    /// \ingroup solvers


    struct OptimizationTargetInterface {

        /// Should return the value of the objective function

        virtual double value(const Tensor<double>& x) = 0;


        /// Override this to return true if the derivative is implemented

        virtual bool provides_gradient() const {return false;}


        /// Should return the derivative of the function


        virtual Tensor<double> gradient(const Tensor<double>& x) {

            throw "not implemented";

        }


        /// Reimplement if more efficient to evaluate both value and gradient in one call


        virtual void value_and_gradient(const Tensor<double>& x,

                                        double& value,

                                        Tensor<double>& gradient) {

            value = this->value(x);

            gradient = this->gradient(x);

        }


        /// Numerical test of the derivative ... optionally prints to stdout, returns max abs error

        double test_gradient(Tensor<double>& x, double value_precision, bool doprint=true);


    virtual ~OptimizationTargetInterface(){}

    };


    /// The interface to be provided by optimizers


    /// \ingroup solvers


    struct OptimizerInterface {

        virtual bool optimize(Tensor<double>& x) = 0;

        virtual bool converged() const = 0;

        virtual double value() const = 0;

        virtual double gradient_norm() const = 0;

    virtual ~OptimizerInterface(){}

    };


    /// Unconstrained minimization via steepest descent


    /// \ingroup solvers


    class SteepestDescent : public OptimizerInterface {

        std::shared_ptr<OptimizationTargetInterface> target;

        const double tol;

        double f;

        double gnorm;


    public:

        SteepestDescent(const std::shared_ptr<OptimizationTargetInterface>& tar,

                        double tol = 1e-6,

                        double value_precision = 1e-12,

                        double gradient_precision = 1e-12);


        bool optimize(Tensor<double>& x);


        bool converged() const;


        double gradient_norm() const;


        double value() const;


        virtual ~SteepestDescent() { }

    };


    /// Optimization via quasi-Newton (BFGS or SR1 update)


    /// \ingroup solvers

    /// This is presently not a low memory algorithm ... we really need one!


    class QuasiNewton : public OptimizerInterface {

    protected:

        std::string update;              // One of BFGS or SR1

        std::shared_ptr<OptimizationTargetInterface> target;

        const int maxiter;

        const double tol;

        const double value_precision;  // Numerical precision of value

        const double gradient_precision; // Numerical precision of each element of residual

        double f;

        double gnorm;

        Tensor<double> h;

        int n;

        bool printtest;


    public:


        /// make this static for other QN classed to have access to it

        static double line_search(double a1, double f0, double dxgrad,

                const Tensor<double>& x, const Tensor<double>& dx,

                std::shared_ptr<OptimizationTargetInterface> target,

                double value_precision);


        /// make this static for other QN classed to have access to it

        static void hessian_update_sr1(const Tensor<double>& s, const Tensor<double>& y,

                Tensor<double>& hessian);


        /// make this static for other QN classed to have access to it

        static void hessian_update_bfgs(const Tensor<double>& dx,

                     const Tensor<double>& dg, Tensor<double>& hessian);


        Tensor<double> new_search_direction(const Tensor<double>& g) const;


    public:

        QuasiNewton(const std::shared_ptr<OptimizationTargetInterface>& tar,

                    int maxiter = 20,

                    double tol = 1e-6,

                    double value_precision = 1e-12,

                    double gradient_precision = 1e-12);


        /// Choose update method (currently only "BFGS" or "SR1")

        void set_update(const std::string& method);


        /// Choose update method (currently only "BFGS" or "SR1")

        void set_test(const bool& test_level);


        /// Runs the optimizer


        /// @return True if converged

        bool optimize(Tensor<double>& x);


        /// After running the optimizer returns true if converged


        /// @return True if converged

        bool converged() const;


        /// Value of objective function


        /// @return Value of objective function

        double value() const;


        /// Resets Hessian to default guess

        void reset_hessian() {h = Tensor<double>();}


        /// Sets Hessian to given matrix

        void set_hessian(const Tensor<double>& matrix) {h = madness::copy(matrix);}


        /// Value of gradient norm


        /// @return Norm of gradient of objective function

        double gradient_norm() const;


    virtual ~QuasiNewton() {}

    };


}


#endif // MADNESS_LINALG_SOLVERS_H__INCLUDED

A
Definition test_ar.cc:118

madness::BaseTensor::dim
long dim(int i) const
Returns the size of dimension i.
Definition basetensor.h:147

madness::QuasiNewton
Optimization via quasi-Newton (BFGS or SR1 update)
Definition solvers.h:246

madness::QuasiNewton::gradient_precision
const double gradient_precision
Definition solvers.h:253

madness::QuasiNewton::reset_hessian
void reset_hessian()
Resets Hessian to default guess.
Definition solvers.h:307

madness::QuasiNewton::converged
bool converged() const
After running the optimizer returns true if converged.
Definition solvers.cc:334

madness::QuasiNewton::value_precision
const double value_precision
Definition solvers.h:252

madness::QuasiNewton::set_update
void set_update(const std::string &method)
Choose update method (currently only "BFGS" or "SR1")
Definition solvers.cc:269

madness::QuasiNewton::~QuasiNewton
virtual ~QuasiNewton()
Definition solvers.h:317

madness::QuasiNewton::line_search
static double line_search(double a1, double f0, double dxgrad, const Tensor< double > &x, const Tensor< double > &dx, std::shared_ptr< OptimizationTargetInterface > target, double value_precision)
make this static for other QN classed to have access to it
Definition solvers.cc:113

madness::QuasiNewton::hessian_update_bfgs
static void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition solvers.cc:179

madness::QuasiNewton::target
std::shared_ptr< OptimizationTargetInterface > target
Definition solvers.h:249

madness::QuasiNewton::optimize
bool optimize(Tensor< double > &x)
Runs the optimizer.
Definition solvers.cc:278

madness::QuasiNewton::update
std::string update
Definition solvers.h:248

madness::QuasiNewton::printtest
bool printtest
Definition solvers.h:258

madness::QuasiNewton::tol
const double tol
Definition solvers.h:251

madness::QuasiNewton::set_test
void set_test(const bool &test_level)
Choose update method (currently only "BFGS" or "SR1")
Definition solvers.cc:274

madness::QuasiNewton::set_hessian
void set_hessian(const Tensor< double > &matrix)
Sets Hessian to given matrix.
Definition solvers.h:310

madness::QuasiNewton::gnorm
double gnorm
Definition solvers.h:255

madness::QuasiNewton::f
double f
Definition solvers.h:254

madness::QuasiNewton::maxiter
const int maxiter
Definition solvers.h:250

madness::QuasiNewton::n
int n
Definition solvers.h:257

madness::QuasiNewton::new_search_direction
Tensor< double > new_search_direction(const Tensor< double > &g) const
Definition solvers.cc:210

madness::QuasiNewton::hessian_update_sr1
static void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition solvers.cc:166

madness::QuasiNewton::h
Tensor< double > h
Definition solvers.h:256

madness::QuasiNewton::value
double value() const
Value of objective function.
Definition solvers.cc:336

madness::QuasiNewton::gradient_norm
double gradient_norm() const
Value of gradient norm.
Definition solvers.cc:338

madness::Slice
A slice defines a sub-range or patch of a dimension.
Definition slice.h:103

madness::SteepestDescent
Unconstrained minimization via steepest descent.
Definition solvers.h:218

madness::SteepestDescent::value
double value() const
Definition solvers.cc:111

madness::SteepestDescent::f
double f
Definition solvers.h:221

madness::SteepestDescent::gnorm
double gnorm
Definition solvers.h:222

madness::SteepestDescent::converged
bool converged() const
Definition solvers.cc:107

madness::SteepestDescent::~SteepestDescent
virtual ~SteepestDescent()
Definition solvers.h:238

madness::SteepestDescent::tol
const double tol
Definition solvers.h:220

madness::SteepestDescent::target
std::shared_ptr< OptimizationTargetInterface > target
Definition solvers.h:219

madness::SteepestDescent::gradient_norm
double gradient_norm() const
Definition solvers.cc:109

madness::SteepestDescent::optimize
bool optimize(Tensor< double > &x)
Definition solvers.cc:79

madness::Tensor
A tensor is a multidimensional array.
Definition tensor.h:317

matrix
Definition y.cc:25

T
auto T(World &world, response_space &f) -> response_space
Definition global_functions.cc:28

madness
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10

madness::KAIN
Tensor< T > KAIN(const Tensor< T > &Q, double rcond=1e-12)
Solves non-linear equation using KAIN (returns coefficients to compute next vector)
Definition solvers.h:98

madness::gelss
void gelss(const Tensor< T > &a, const Tensor< T > &b, double rcond, Tensor< T > &x, Tensor< typename Tensor< T >::scalar_type > &s, long &rank, Tensor< typename Tensor< T >::scalar_type > &sumsq)
Solve Ax = b for general A using the LAPACK *gelss routines.
Definition lapack.cc:884

madness::g
NDIM const Function< R, NDIM > & g
Definition mra.h:2481

madness::copy
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence.
Definition mra.h:2066

b
static const double b
Definition nonlinschro.cc:119

print.h
Defines simple templates for printing to std::cout "a la Python".

Q
double Q(double a)
Definition relops.cc:20

c
static const double c
Definition relops.cc:10

m
static const double m
Definition relops.cc:9

L
static const double L
Definition rk.cc:46

madness::OptimizationTargetInterface
The interface to be provided by functions to be optimized.
Definition solvers.h:176

madness::OptimizationTargetInterface::value_and_gradient
virtual void value_and_gradient(const Tensor< double > &x, double &value, Tensor< double > &gradient)
Reimplement if more efficient to evaluate both value and gradient in one call.
Definition solvers.h:189

madness::OptimizationTargetInterface::~OptimizationTargetInterface
virtual ~OptimizationTargetInterface()
Definition solvers.h:199

madness::OptimizationTargetInterface::gradient
virtual Tensor< double > gradient(const Tensor< double > &x)
Should return the derivative of the function.
Definition solvers.h:184

madness::OptimizationTargetInterface::value
virtual double value(const Tensor< double > &x)=0
Should return the value of the objective function.

madness::OptimizationTargetInterface::test_gradient
double test_gradient(Tensor< double > &x, double value_precision, bool doprint=true)
Numerical test of the derivative ... optionally prints to stdout, returns max abs error.
Definition solvers.cc:38

madness::OptimizationTargetInterface::provides_gradient
virtual bool provides_gradient() const
Override this to return true if the derivative is implemented.
Definition solvers.h:181

madness::OptimizerInterface
The interface to be provided by optimizers.
Definition solvers.h:206

madness::OptimizerInterface::optimize
virtual bool optimize(Tensor< double > &x)=0

madness::OptimizerInterface::gradient_norm
virtual double gradient_norm() const =0

madness::OptimizerInterface::~OptimizerInterface
virtual ~OptimizerInterface()
Definition solvers.h:211

madness::OptimizerInterface::value
virtual double value() const =0

madness::OptimizerInterface::converged
virtual bool converged() const =0

madness::SolverTargetInterface
The interface to be provided by targets for non-linear equation solver.
Definition solvers.h:148

madness::SolverTargetInterface::residual_and_jacobian
virtual void residual_and_jacobian(const Tensor< double > &x, Tensor< double > &residual, Tensor< double > &jacobian)
Implement this if advantageous to compute residual and jacobian simultaneously.
Definition solvers.h:163

madness::SolverTargetInterface::~SolverTargetInterface
virtual ~SolverTargetInterface()
Definition solvers.h:169

madness::SolverTargetInterface::jacobian
virtual Tensor< double > jacobian(const Tensor< double > &x)
Some solvers require the jacobian or are faster if an analytic form is available.
Definition solvers.h:158

madness::SolverTargetInterface::residual
virtual Tensor< double > residual(const Tensor< double > &x)=0
Should return the resdiual (vector F(x))

madness::SolverTargetInterface::provides_jacobian
virtual bool provides_jacobian() const
Override this to return true if the Jacobian is implemented.
Definition solvers.h:153

tensor.h
Defines and implements most of Tensor.

tensor_lapack.h
Prototypes for a partial interface from Tensor to LAPACK.

e
void e()
Definition test_sig.cc:75

a1
const double a1
Definition vnucso.cc:85