MADNESS  0.10.1
molopt.h
Go to the documentation of this file.
1 #ifndef MADNESS_MOLOPT_H
2 #define MADNESS_MOLOPT_H
3 
4 #include <string>
5 #include <algorithm>
6 
10 
11 namespace madness {
12 
13  class MolOpt {
14  private:
15 
16  const int maxiter; //< Maximum number of iterations
17  const double maxstep; //< Maximum step in any one coordinate (currently Cartesian in a.u.)
18  const double etol; //< Convergence test for energy change
19  const double gtol; //< Convergence test for maximum gradient element
20  const double xtol; //< Convergence test for Cartesian step in a.u.
21  const double energy_precision; //< Assumed precision in energy
22  const double gradient_precision; //< Assumed precision in the gradient
23  const int print_level; //< print_level=0 is none; 1 is default; 2 is debug
24  const std::string update; //< update = "bfgs" (default) or "sr1"
25 
26  Tensor<double> hessian;
27 
28  /// Returns new search direction given gradient and projected/shifted hessian
29  Tensor<double> new_search_direction(const Tensor<double>& g, const Tensor<double>& h) const {
30  Tensor<double> dx, s;
31  double tol = gradient_precision; // threshold for small hessian eigenvalues
32  double trust = std::min(maxstep*g.dim(0),1.0);
33 
34  Tensor<double> v, e;
35  syev(h, v, e);
36  if (print_level>1) print("hessian eigenvalues", e);
37 
38  // Transform gradient into spectral basis
39  Tensor<double> gv = inner(g,v);
40  if (print_level>1) print("spectral gradient", gv);
41 
42  // Take step applying restrictions
43  int nneg=0, nsmall=0, nrestrict=0;
44  for (int i=0; i<e.dim(0); i++) {
45  if (e[i] > 900.0) {
46  // This must be a translation or rotation ... skip it
47  if (print_level>1) print("skipping redundant mode",i);
48  }
49  else if (e[i] < -tol) { // BGFS hessian should be positive ... SR1 may not be
50  if (print_level > 0) printf(" forcing negative eigenvalue to be positive %d %.1e\n", i, e[i]);
51  nneg++;
52  e[i] = tol; // or -e[i] ??
53  }
54  else if (e[i] < tol) {
55  if (print_level > 0) printf(" forcing small eigenvalue to be positive %d %.1e\n", i, e[i]);
56  nsmall++;
57  e[i] = tol;
58  }
59 
60  gv[i] = -gv[i] / e[i]; // Newton step
61 
62  if (std::abs(gv[i]) > trust) { // Step restriction
63  double gvnew = trust*std::abs(gv(i))/gv[i];
64  if (print_level > 0) printf(" restricting step in spectral direction %d %.1e --> %.1e\n", i, gv[i], gvnew);
65  nrestrict++;
66  gv[i] = gvnew;
67  }
68  }
69  if (print_level>0 && (nneg || nsmall || nrestrict))
70  printf(" nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict);
71 
72  // Transform back from spectral basis
73  gv = inner(v,gv);
74 
75  if (print_level>1) print("cartesian dx before restriction", gv);
76 
77  // Now apply step restriction in real space
78  bool printing = false;
79  for (int i=0; i<gv.dim(0); i++) {
80  if (fabs(gv[i]) > maxstep) {
81  gv[i] = maxstep * gv[i]/fabs(gv[i]);
82  if (print_level > 0) {
83  if (!printing) printf(" restricting step in Cartesian direction");
84  printing = true;
85  printf(" %d", i);
86  }
87  }
88  }
89  if (printing) printf("\n");
90 
91  return gv;
92  }
93 
94 
95  /// Makes the projector onto independent coordinates
96 
97  /// For Cartesians \code P*x removes the rotations and translations;
98  /// eventually will add support for redundant internal coordinates
99  Tensor<double> make_projector(const Molecule& molecule) {
100  const int natom = molecule.natom();
101  const Tensor<double> coords = molecule.get_all_coords(); // (natom,3)
102 
103  // Construct normalized vectors in V in the direction of the translations and infinitesimal rotations
104  Tensor<double> V(6,natom,3); // First 3 translations, second 3 rotations
105 
106  for (int k=0; k<3; k++) // Translations already orthonormal
107  V(k,_,k) = 1.0/std::sqrt(double(natom));
108 
109  Tensor<double> centroid(3);
110  for (int k=0; k<3; k++) centroid(k) = coords(_,k).sum()/natom;
111  if (print_level>1) print("centroid", centroid);
112 
113  for (int i=0; i<natom; i++) {
114  double x = coords(i,0) - centroid[0];
115  double y = coords(i,1) - centroid[1];
116  double z = coords(i,2) - centroid[2];
117 
118  V(3,i,0) = 0; // Rotn about x axis
119  V(3,i,1) = z;
120  V(3,i,2) = -y;
121 
122  V(4,i,0) = z; // Rotn about y axis
123  V(4,i,1) = 0;
124  V(4,i,2) = -x;
125 
126  V(5,i,0) = -y;
127  V(5,i,1) = x;
128  V(5,i,2) = 0; // Rotn about z axis
129  }
130 
131  V = V.reshape(6,3*natom);
132 
133  if (print_level>1) print("V before orthonormal");
134  if (print_level>1) print(V);
135 
136  // Normalize rotations, orthonormalize rotns and translations,
137  // noting may end up with a zero vector for linear molecules
138  for (int i=3; i<6; i++) {
139  V(i,_).scale(1.0/V(i,_).normf());
140  for (int j=0; j<i; j++) {
141  double s = V(i,_).trace(V(j,_));
142  V(i,_) -= V(j,_)*s;
143  }
144  double vnorm = V(i,_).normf();
145  if (vnorm > 1e-6) {
146  V(i,_) *= 1.0/vnorm;
147  }
148  else {
149  V(i,_) = 0.0;
150  }
151  }
152 
153  // The projector is 1 - VT*V
154  V = -inner(transpose(V),V);
155  for (int i=0; i<3*natom; i++) V(i,i) += 1.0;
156 
157  return V;
158  }
159 
160  /// a1 is initial step (usually pick 1)
161  /// energy0 is energy at zero step
162  /// dxgrad is gradient projected onto search dir
163  ///
164  template <typename targetT>
165  double line_search(Molecule& molecule, targetT& target, const Tensor<double>& dx,
166  double energy0, double dxgrad, double a1=1.0) {
167 
168  double energy1;
169  double hess, a2;
170  const char* lsmode = "";
171 
172  Tensor<double> x = molecule.get_all_coords().flat();
173 
174  // Ensure we are walking downhill (BFGS should ensure that, but SR1 may not)
175  if (dxgrad*a1 > 0.0) {
176  if(print_level > 0) print(" line search gradient +ve ", a1, dxgrad);
177  a1 = -a1;
178  }
179 
180  // Compute energy at new point
181  energy1 = target.value(x + a1 * dx);
182 
183  // Fit to a parabola using energy0, g0, energy1
184  hess = 2.0*(energy1-energy0-a1*dxgrad)/(a1*a1);
185  a2 = -dxgrad/hess; // Newton step
186 
187  if (std::abs(energy1-energy0) < energy_precision) { // Insufficient precision
188  a2 = a1;
189  lsmode = "fixed";
190  }
191  else if (hess > 0.0) { // Positive curvature
192  if ((energy1 - energy0) <= -energy_precision) { // a1 step went downhill
193  lsmode = "downhill";
194  if (std::abs(a2) > 4.0*std::abs(a1)) { // Walking down hill but don't go too far
195  lsmode = "restrict";
196  a2 = 4.0*a1;
197  }
198  }
199  else { // a1 step went uphill ... we have bracketed the minimum.
200  lsmode = "bracket";
201  }
202  }
203  else { // Negative curvature
204  if ((energy1 - energy0) < energy_precision) { // keep walking down hill
205  lsmode = "negative";
206  a2 = 2e0*a1;
207  }
208  else {
209  lsmode = "punt"; // negative curvature but no apparent progress
210  a2 = a1;
211  }
212  }
213 
214  if (std::abs(a2-a1)<0.2*std::abs(a1)) { // Take full step to avoid reconverging SCF
215  a2 = a1;
216  lsmode = "fixed2";
217  }
218 
219 
220  // Predicted next energy
221  double energy2 = energy0 + dxgrad*a2 + 0.5*hess*a2*a2;
222 
223  if (print_level > 0) {
224  printf("\n line search grad=%.2e hess=%.2e mode=%s newstep=%.3f\n", dxgrad, hess, lsmode, a2);
225  printf(" predicted %.12e\n\n", energy2);
226  }
227 
228  return a2;
229  }
230 
231 
232  public:
233 
234  MolOpt(int maxiter=20,
235  double maxstep=0.1,
236  double etol=1e-4,
237  double gtol=1e-3,
238  double xtol=1e-3,
239  double energy_precision = 1e-5,
240  double gradient_precision = 1e-4,
241  int print_level = 1,
242  std::string update = "BFGS")
243  : maxiter(maxiter)
244  , maxstep(maxstep)
245  , etol(std::max(etol,energy_precision))
246  , gtol(std::max(gtol,gradient_precision))
247  , xtol(xtol)
248  , energy_precision(energy_precision)
249  , gradient_precision(gradient_precision)
250  , print_level(print_level)
251  , update(update)
252 
253  {
254  if (print_level > 0) {
255  std::cout << endl;
256  print_justified("Molecular Geometry Optimization");
257  std::cout << endl;
258  print(" maximum iterations", maxiter);
259  print(" maximum step", maxstep);
260  print(" energy convergence", etol);
261  print(" gradient convergence", gtol);
262  print(" cartesian convergence", xtol);
263  print(" energy precision", energy_precision);
264  print(" gradient precision", gradient_precision);
265  print(" hessian update", update);
266  }
267  }
268 
269  void set_hessian(const Tensor<double>& h) {
270  hessian = h;
271  }
272 
273  const Tensor<double>& get_hessian() const {
274  return hessian;
275  }
276 
277  void initialize_hessian(const Molecule& molecule) {
278  const int N = 3*molecule.natom();
279  hessian = Tensor<double>(N,N);
280  for (int i=0; i<N; i++) hessian(i,i) = 0.5;
281  }
282 
283  template <typename targetT>
284  void optimize(Molecule molecule, targetT& target) { ////!!!!!!! pass by value
285  const int natom = molecule.natom();
286 
287  // Code structured so it will be straightforward to introduce redundant internal coordinates
288 
289  if (hessian.size() == 0) initialize_hessian(molecule);
290 
291  double ep = 0.0; // Previous energy
292  Tensor<double> gp(3*natom); // Previous gradient
293  Tensor<double> dx(3*natom); // Current search direction
294  gp = 0.0;
295  dx = 0.0;
296 
297  for (int iter=0; iter<maxiter; iter++) {
298 
299  if (print_level > 0) print("\n\n Geometry optimization iteration", iter, "\n");
300  if (print_level > 0) molecule.print();
301 
302  double e;
303  Tensor<double> g;
304 
305  target.energy_and_gradient(molecule, e, g);
306 
307  double de = e - ep;
308  double dxmax = dx.absmax();
309  double gmax = g.absmax();
310 
311  bool dxconv = (iter>0) && (dxmax < xtol);
312  bool gconv = gmax < gtol;
313  bool econv = (iter>0) && (std::abs(de) < etol);
314  bool converged = econv && dxconv && gconv;
315 
316  if (!converged && gmax < gradient_precision) {
317  if (print_level > 0) print("\nInsufficient precision in gradient to proceed further -- forcing convergence\n");
318  converged = true;
319  }
320 
321  if (print_level > 0) {
322  const char* tf[] = {"F","T"};
323  print(" ");
324  printf(" energy delta-e max-dx max-g e dx g\n");
325  printf(" ---------------- --------- --------- --------- --- --- ---\n");
326  printf(" %15.6f %9.2e %9.2e %9.2e %s %s %s\n",
327  e, de, dxmax, gmax, tf[econv], tf[dxconv], tf[gconv]);
328  //print(e, de, econv, dxmax, dxconv, dxnorm, gmax, gconv, gnorm, converged);
329  print(" ");
330  }
331 
332  if (converged) {
333  if (print_level > 0) print("\n Geometry optimization converged!\n");
334  if (print_level > 0) molecule.print();
335  break;
336  }
337 
338  // Construct projector
339  Tensor<double> P = make_projector(molecule);
340 
341  // Project the gradient before updating Hessian
342  g = inner(P,g);
343  if (print_level>1) print("gradient after projection", g);
344 
345  if (iter > 0) {
346  if ((g-gp).absmax() < 2.0*gradient_precision) {
347  if (print_level > 0) print(" skipping hessian update due to insufficient precision in gradient");
348  }
349  else if (update == "bfgs") {
350  QuasiNewton::hessian_update_bfgs(dx, g-gp, hessian);
351  }
352  else if (update == "sr1") {
353  QuasiNewton::hessian_update_sr1(dx, g-gp, hessian);
354  }
355  else {
356  throw "unknown update";
357  }
358  }
359 
360  ep = e;
361  gp = g;
362 
363  // Construct the projected and shifted hessian = PHP + shift*(1-P)
364  const double shift = 1000.0; // this value assumed in new_search_dir
365  Tensor<double> PHPS = inner(P,inner(hessian,P)) - shift*P;
366  if (print_level > 1) {print("projector"); print(P);}
367  for (int i=0; i<3*natom; i++) PHPS(i,i) += shift;
368  if (print_level > 1) {print("PHPS"); print(PHPS);}
369 
370  // Construct new search direction by diagonalizing Hessian and taking spectral step
371  dx = new_search_direction(g, PHPS);
372  if (print_level > 1) print("dx", dx);
373 
374  // Line search
375  double alpha = line_search(molecule, target, dx, e, dx.trace(g), 1.0);
376  if (print_level > 1) print("step", alpha);
377  dx.scale(alpha);
378  if (print_level > 1) print("scaled dx", dx);
379 
380  // Take the step
381  Tensor<double> x = molecule.get_all_coords().flat();
382  x += dx;
383  molecule.set_all_coords(x.reshape(natom,3));
384 
385  if (print_level > 1) print("new molecular coords");
386  }
387  }
388  };
389 }
390 #endif // MADNESS_MOLOPT_H
static void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition: solvers.cc:179
static void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y, Tensor< double > &hessian)
make this static for other QN classed to have access to it
Definition: solvers.cc:166
static double shift
Definition: dirac-hatom.cc:19
const int maxiter
Definition: gygi_soltion.cc:68
static const double v
Definition: hatom_sf_dirac.cc:20
#define max(a, b)
Definition: lda.h:51
File holds all helper structures necessary for the CC_Operator and CC2 class.
Definition: DFParameters.h:10
response_space transpose(response_space &f)
Definition: basic_operators.cc:10
static const Slice _(0,-1, 1)
void print(const T &t, const Ts &... ts)
Print items to std::cout (items separated by spaces) and terminate with a new line.
Definition: print.h:225
void print_justified(const char *s, int column, bool underline)
Print a string justified on the left to start at the given column with optional underlining.
Definition: print.cc:75
NDIM const Function< R, NDIM > & g
Definition: mra.h:2416
double inner(response_space &a, response_space &b)
Definition: response_functions.h:442
void syev(const Tensor< T > &A, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Real-symmetric or complex-Hermitian eigenproblem.
Definition: lapack.cc:969
Definition: mraimpl.h:50
static long abs(long a)
Definition: tensor.h:218
static const long k
Definition: rk.cc:44
Defines interfaces for optimization and non-linear equation solvers.
static double V(const coordT &r)
Definition: tdse.cc:288
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
int P
Definition: test_binsorter.cc:9
const double alpha
Definition: test_coulomb.cc:54
void e()
Definition: test_sig.cc:75
#define N
Definition: testconv.cc:37
vector_complex_function_3d update(World &world, const vector_complex_function_3d &psi, vector_complex_function_3d &vpsi, const tensor_real &e, int iter)
Definition: testcosine.cc:210
double h(const coord_1d &r)
Definition: testgconv.cc:68
static Molecule molecule
Definition: testperiodicdft.cc:38
const double a2
Definition: vnucso.cc:86
const double a1
Definition: vnucso.cc:85
FLOAT target(const FLOAT &x)
Definition: y.cc:295