MADNESS 0.10.1
molopt.h
Go to the documentation of this file.
1#ifndef MADNESS_MOLOPT_H
2#define MADNESS_MOLOPT_H
3
8
9#include <algorithm>
10#include <string>
11
12namespace madness {
13
14// Generalized Optimization using targetT to provide energy and gradient of a
15// molecule targetT must have the following methods:
16// void energy_and_gradient(Molecule& molecule, double& energy, Tensor<double>&
17// gradient) double value(const Tensor<double>& x) // value of target at x
18// Let's express that in code so we can get compile time errors if we don't
19// have the right methods
20//
21//
22//
23
24class MolOpt {
25 private:
26 const int maxiter; //< Maximum number of iterations
27 const double maxstep; //< Maximum step in any one coordinate (currently
28 //Cartesian in a.u.)
29 const double etol; //< Convergence test for energy change
30 const double gtol; //< Convergence test for maximum gradient element
31 const double xtol; //< Convergence test for Cartesian step in a.u.
32 const double energy_precision; //< Assumed precision in energy
33 const double gradient_precision; //< Assumed precision in the gradient
34 const int print_level; //< print_level=0 is none; 1 is default; 2 is debug
35 const std::string update; //< update = "bfgs" (default) or "sr1"
36
37 Tensor<double> hessian;
38
39 /// Returns new search direction given gradient and projected/shifted hessian
40 Tensor<double> new_search_direction(const Tensor<double>& g,
41 const Tensor<double>& h) const {
42 Tensor<double> dx, s;
43 double tol = gradient_precision; // threshold for small hessian eigenvalues
44 double trust = std::min(maxstep * g.dim(0), 1.0);
45
46 Tensor<double> v, e;
47 syev(h, v, e);
48 if (print_level > 1) print("hessian eigenvalues", e);
49
50 // Transform gradient into spectral basis
51 Tensor<double> gv = inner(g, v);
52 if (print_level > 1) print("spectral gradient", gv);
53
54 // Take step applying restrictions
55 int nneg = 0, nsmall = 0, nrestrict = 0;
56 for (int i = 0; i < e.dim(0); i++) {
57 if (e[i] > 900.0) {
58 // This must be a translation or rotation ... skip it
59 if (print_level > 1) print("skipping redundant mode", i);
60 } else if (e[i] <
61 -tol) { // BGFS hessian should be positive ... SR1 may not be
62 if (print_level > 0)
63 printf(" forcing negative eigenvalue to be positive %d %.1e\n", i,
64 e[i]);
65 nneg++;
66 e[i] = tol; // or -e[i] ??
67 } else if (e[i] < tol) {
68 if (print_level > 0)
69 printf(" forcing small eigenvalue to be positive %d %.1e\n", i,
70 e[i]);
71 nsmall++;
72 e[i] = tol;
73 }
74
75 gv[i] = -gv[i] / e[i]; // Newton step
76
77 if (std::abs(gv[i]) > trust) { // Step restriction
78 double gvnew = trust * std::abs(gv(i)) / gv[i];
79 if (print_level > 0)
80 printf(" restricting step in spectral direction %d %.1e --> %.1e\n",
81 i, gv[i], gvnew);
82 nrestrict++;
83 gv[i] = gvnew;
84 }
85 }
86 if (print_level > 0 && (nneg || nsmall || nrestrict))
87 printf(" nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict);
88
89 // Transform back from spectral basis
90 gv = inner(v, gv);
91
92 if (print_level > 1) print("cartesian dx before restriction", gv);
93
94 // Now apply step restriction in real space
95 bool printing = false;
96 for (int i = 0; i < gv.dim(0); i++) {
97 if (fabs(gv[i]) > maxstep) {
98 gv[i] = maxstep * gv[i] / fabs(gv[i]);
99 if (print_level > 0) {
100 if (!printing) printf(" restricting step in Cartesian direction");
101 printing = true;
102 printf(" %d", i);
103 }
104 }
105 }
106 if (printing) printf("\n");
107
108 return gv;
109 }
110
111 /// Makes the projector onto independent coordinates
112
113 /// For Cartesians \code P*x removes the rotations and translations;
114 /// eventually will add support for redundant internal coordinates
115 Tensor<double> make_projector(const Molecule& molecule) {
116 const int natom = molecule.natom();
117 const Tensor<double> coords = molecule.get_all_coords(); // (natom,3)
118
119 // Construct normalized vectors in V in the direction of the translations
120 // and infinitesimal rotations
121 Tensor<double> V(6, natom, 3); // First 3 translations, second 3 rotations
122
123 for (int k = 0; k < 3; k++) // Translations already orthonormal
124 V(k, _, k) = 1.0 / std::sqrt(static_cast<double>(natom));
125
126 Tensor<double> centroid(3);
127 for (int k = 0; k < 3; k++) centroid(k) = coords(_, k).sum() / natom;
128 if (print_level > 1) print("centroid", centroid);
129
130 for (int i = 0; i < natom; i++) {
131 double x = coords(i, 0) - centroid[0];
132 double y = coords(i, 1) - centroid[1];
133 double z = coords(i, 2) - centroid[2];
134
135 V(3, i, 0) = 0; // Rotn about x axis
136 V(3, i, 1) = z;
137 V(3, i, 2) = -y;
138
139 V(4, i, 0) = z; // Rotn about y axis
140 V(4, i, 1) = 0;
141 V(4, i, 2) = -x;
142
143 V(5, i, 0) = -y;
144 V(5, i, 1) = x;
145 V(5, i, 2) = 0; // Rotn about z axis
146 }
147
148 V = V.reshape(6, 3 * natom);
149
150 if (print_level > 1) print("V before orthonormal");
151 if (print_level > 1) print(V);
152
153 // Normalize rotations, orthonormalize rotns and translations,
154 // noting may end up with a zero vector for linear molecules
155 for (int i = 3; i < 6; i++) {
156 V(i, _).scale(1.0 / V(i, _).normf());
157 for (int j = 0; j < i; j++) {
158 double s = V(i, _).trace(V(j, _));
159 V(i, _) -= V(j, _) * s;
160 }
161 double vnorm = V(i, _).normf();
162 if (vnorm > 1e-6) {
163 V(i, _) *= 1.0 / vnorm;
164 } else {
165 V(i, _) = 0.0;
166 }
167 }
168
169 // The projector is 1 - VT*V
170 V = -inner(transpose(V), V);
171 for (int i = 0; i < 3 * natom; i++) V(i, i) += 1.0;
172
173 return V;
174 }
175
176 /// a1 is initial step (usually pick 1)
177 /// energy0 is energy at zero step
178 /// dxgrad is gradient projected onto search dir
179 ///
180 template <typename targetT>
181 double line_search(Molecule& molecule, targetT& target,
182 const Tensor<double>& dx, double energy0, double dxgrad,
183 double a1 = 1.0) {
184 double energy1;
185 double hess, a2;
186 const char* lsmode = "";
187
188 Tensor<double> x = molecule.get_all_coords().flat();
189
190 // Ensure we are walking downhill (BFGS should ensure that, but SR1 may not)
191 if (dxgrad * a1 > 0.0) {
192 if (print_level > 0) print(" line search gradient +ve ", a1, dxgrad);
193 a1 = -a1;
194 }
195
196 // Compute energy at new point
197 energy1 = target.value(x + a1 * dx);
198
199 // Fit to a parabola using energy0, g0, energy1
200 hess = 2.0 * (energy1 - energy0 - a1 * dxgrad) / (a1 * a1);
201 a2 = -dxgrad / hess; // Newton step
202
203 if (std::abs(energy1 - energy0) <
204 energy_precision) { // Insufficient precision
205 a2 = a1;
206 lsmode = "fixed";
207 } else if (hess > 0.0) { // Positive curvature
208 if ((energy1 - energy0) <= -energy_precision) { // a1 step went downhill
209 lsmode = "downhill";
210 if (std::abs(a2) >
211 4.0 * std::abs(a1)) { // Walking down hill but don't go too far
212 lsmode = "restrict";
213 a2 = 4.0 * a1;
214 }
215 } else { // a1 step went uphill ... we have bracketed the minimum.
216 lsmode = "bracket";
217 }
218 } else { // Negative curvature
219 if ((energy1 - energy0) < energy_precision) { // keep walking down hill
220 lsmode = "negative";
221 a2 = 2e0 * a1;
222 } else {
223 lsmode = "punt"; // negative curvature but no apparent progress
224 a2 = a1;
225 }
226 }
227
228 if (std::abs(a2 - a1) <
229 0.2 * std::abs(a1)) { // Take full step to avoid reconverging SCF
230 a2 = a1;
231 lsmode = "fixed2";
232 }
233
234 // Predicted next energy
235 double energy2 = energy0 + dxgrad * a2 + 0.5 * hess * a2 * a2;
236
237 if (print_level > 0) {
238 printf("\n line search grad=%.2e hess=%.2e mode=%s newstep=%.3f\n",
239 dxgrad, hess, lsmode, a2);
240 printf(" predicted %.12e\n\n", energy2);
241 }
242
243 return a2;
244 }
245
246 public:
247 MolOpt(int maxiter = 20, double maxstep = 0.1, double etol = 1e-4,
248 double gtol = 1e-3, double xtol = 1e-3, double energy_precision = 1e-5,
249 double gradient_precision = 1e-4, int print_level = 1,
250 std::string update = "BFGS")
251 : maxiter(maxiter),
252 maxstep(maxstep),
253 etol(std::max(etol, energy_precision)),
254 gtol(std::max(gtol, gradient_precision)),
255 xtol(xtol),
256 energy_precision(energy_precision),
257 gradient_precision(gradient_precision),
258 print_level(print_level),
260
261 {
262 if (print_level > 0) {
263 std::cout << endl;
264 print_justified("Molecular Geometry Optimization");
265 std::cout << endl;
266 print(" maximum iterations", maxiter);
267 print(" maximum step", maxstep);
268 print(" energy convergence", etol);
269 print(" gradient convergence", gtol);
270 print(" cartesian convergence", xtol);
271 print(" energy precision", energy_precision);
272 print(" gradient precision", gradient_precision);
273 print(" hessian update", update);
274 }
275 }
276
277 void set_hessian(const Tensor<double>& h) { hessian = h; }
278
279 const Tensor<double>& get_hessian() const { return hessian; }
280
281 void initialize_hessian(const Molecule& molecule) {
282 const int N = 3 * molecule.natom();
283 hessian = Tensor<double>(N, N);
284 for (int i = 0; i < N; i++) hessian(i, i) = 0.5;
285 }
286
287 template <typename targetT>
288 Molecule optimize(Molecule molecule,
289 targetT& target) { ////!!!!!!! pass by value
290 const int natom = molecule.natom();
291
292 // Code structured so it will be straightforward to introduce redundant
293 // internal coordinates
294
295 if (hessian.size() == 0) initialize_hessian(molecule);
296
297 double ep = 0.0; // Previous energy
298 Tensor<double> gp(3 * natom); // Previous gradient
299 Tensor<double> dx(3 * natom); // Current search direction
300 gp = 0.0;
301 dx = 0.0;
302
303 for (int iter = 0; iter < maxiter; iter++) {
304 if (print_level > 0)
305 print("\n\n Geometry optimization iteration", iter, "\n");
306 if (print_level > 0) molecule.print();
307
308 double e;
309 Tensor<double> g;
310
311 target.energy_and_gradient(molecule, e, g);
312
313 double de = e - ep;
314 double dxmax = dx.absmax();
315 double gmax = g.absmax();
316
317 bool dxconv = (iter > 0) && (dxmax < xtol);
318 bool gconv = gmax < gtol;
319 bool econv = (iter > 0) && (std::abs(de) < etol);
320 bool converged = econv && dxconv && gconv;
321
322 if (!converged && gmax < gradient_precision) {
323 if (print_level > 0)
324 print(
325 "\nInsufficient precision in gradient to proceed further -- "
326 "forcing convergence\n");
327 converged = true;
328 }
329
330 if (print_level > 0) {
331 const char* tf[] = {"F", "T"};
332 print(" ");
333 printf(
334 " energy delta-e max-dx max-g e dx g\n");
335 printf(
336 " ---------------- --------- --------- --------- --- --- "
337 "---\n");
338 printf(" %15.6f %9.2e %9.2e %9.2e %s %s %s\n", e, de, dxmax,
339 gmax, tf[econv], tf[dxconv], tf[gconv]);
340 // print(e, de, econv, dxmax, dxconv, dxnorm, gmax, gconv, gnorm,
341 // converged);
342 print(" ");
343 }
344
345 if (converged) {
346 if (print_level > 0) print("\n Geometry optimization converged!\n");
347 if (print_level > 0) molecule.print();
348 break;
349 }
350
351 // Construct projector
352 Tensor<double> P = make_projector(molecule);
353
354 // Project the gradient before updating Hessian
355 g = inner(P, g);
356 if (print_level > 1) print("gradient after projection", g);
357
358 if (iter > 0) {
359 if ((g - gp).absmax() < 2.0 * gradient_precision) {
360 if (print_level > 0)
361 print(
362 " skipping hessian update due to insufficient precision in "
363 "gradient");
364 } else if (update == "bfgs") {
365 QuasiNewton::hessian_update_bfgs(dx, g - gp, hessian);
366 } else if (update == "sr1") {
367 QuasiNewton::hessian_update_sr1(dx, g - gp, hessian);
368 } else {
369 throw "unknown update";
370 }
371 }
372
373 ep = e;
374 gp = g;
375
376 // Construct the projected and shifted hessian = PHP + shift*(1-P)
377 const double shift = 1000.0; // this value assumed in new_search_dir
378 Tensor<double> PHPS = inner(P, inner(hessian, P)) - shift * P;
379 if (print_level > 1) {
380 print("projector");
381 print(P);
382 }
383 for (int i = 0; i < 3 * natom; i++) PHPS(i, i) += shift;
384 if (print_level > 1) {
385 print("PHPS");
386 print(PHPS);
387 }
388
389 // Construct new search direction by diagonalizing Hessian and taking
390 // spectral step
391 dx = new_search_direction(g, PHPS);
392 if (print_level > 1) print("dx", dx);
393
394 // Line search
395 double alpha = line_search(molecule, target, dx, e, dx.trace(g), 1.0);
396 if (print_level > 1) print("step", alpha);
397 dx.scale(alpha);
398 if (print_level > 1) print("scaled dx", dx);
399
400 // Take the step
401 Tensor<double> x = molecule.get_all_coords().flat();
402 x += dx;
403 molecule.set_all_coords(x.reshape(natom, 3));
404
405 if (print_level > 1) print("new molecular coords");
406 }
407 // return the optimized molecule
408 return molecule;
409 }
410};
411} // namespace madness
412#endif // MADNESS_MOLOPT_H
void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg)
Definition kain.cc:329
void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y)
Definition kain.cc:317
static double shift
Definition dirac-hatom.cc:19
std::complex< double > inner(const Fcwf &psi, const Fcwf &phi)
Definition fcwf.cc:275
const int maxiter
Definition gygi_soltion.cc:68
static const double v
Definition hatom_sf_dirac.cc:20
#define max(a, b)
Definition lda.h:51
void print(const tensorT &t)
Definition mcpfit.cc:140
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void print_justified(const char *s, int column, bool underline)
Print a string justified on the left to start at the given column with optional underlining.
Definition print.cc:75
void syev(const Tensor< T > &A, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Real-symmetric or complex-Hermitian eigenproblem.
Definition lapack.cc:969
Definition mraimpl.h:50
static long abs(long a)
Definition tensor.h:218
static const long k
Definition rk.cc:44
Defines interfaces for optimization and non-linear equation solvers.
static double V(const coordT &r)
Definition tdse.cc:288
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
template Tensor< int > transpose(const Tensor< int > &t)
int P
Definition test_binsorter.cc:9
void e()
Definition test_sig.cc:75
#define N
Definition testconv.cc:37
vector_complex_function_3d update(World &world, const vector_complex_function_3d &psi, vector_complex_function_3d &vpsi, const tensor_real &e, int iter)
Definition testcosine.cc:210
static const double alpha
Definition testcosine.cc:10
double g(const coord_t &r)
Definition testgconv.cc:116
double h(const coord_1d &r)
Definition testgconv.cc:175
static Molecule molecule
Definition testperiodicdft.cc:39
const double a2
Definition vnucso.cc:86
const double a1
Definition vnucso.cc:85
FLOAT target(const FLOAT &x)
Definition y.cc:295