MADNESS 0.10.1
molopt.h
Go to the documentation of this file.
1#ifndef MADNESS_MOLOPT_H
2#define MADNESS_MOLOPT_H
3
8
9#include <algorithm>
10#include <concepts>
11#include <string>
12
13namespace madness {
14
15// Generalized Optimization using targetT to provide energy and gradient of a
16// molecule targetT must have the following methods:
17// void energy_and_gradient(Molecule& molecule, double& energy, Tensor<double>&
18// gradient) double value(const Tensor<double>& x) // value of target at x
19// Let's express that in code so we can get compile time errors if we don't
20// have the right methods
21//
22//
23//
24
25class MolOpt {
26 private:
27 const int maxiter; //< Maximum number of iterations
28 const double maxstep; //< Maximum step in any one coordinate (currently
29 //Cartesian in a.u.)
30 const double etol; //< Convergence test for energy change
31 const double gtol; //< Convergence test for maximum gradient element
32 const double xtol; //< Convergence test for Cartesian step in a.u.
33 const double energy_precision; //< Assumed precision in energy
34 const double gradient_precision; //< Assumed precision in the gradient
35 const int print_level; //< print_level=0 is none; 1 is default; 2 is debug
36 const std::string update; //< update = "bfgs" (default) or "sr1"
37
38 Tensor<double> hessian;
39
40 /// Returns new search direction given gradient and projected/shifted hessian
41 Tensor<double> new_search_direction(const Tensor<double>& g,
42 const Tensor<double>& h) const {
43 Tensor<double> dx, s;
44 double tol = gradient_precision; // threshold for small hessian eigenvalues
45 double trust = std::min(maxstep * g.dim(0), 1.0);
46
47 Tensor<double> v, e;
48 syev(h, v, e);
49 if (print_level > 1) print("hessian eigenvalues", e);
50
51 // Transform gradient into spectral basis
52 Tensor<double> gv = inner(g, v);
53 if (print_level > 1) print("spectral gradient", gv);
54
55 // Take step applying restrictions
56 int nneg = 0, nsmall = 0, nrestrict = 0;
57 for (int i = 0; i < e.dim(0); i++) {
58 if (e[i] > 900.0) {
59 // This must be a translation or rotation ... skip it
60 if (print_level > 1) print("skipping redundant mode", i);
61 } else if (e[i] <
62 -tol) { // BGFS hessian should be positive ... SR1 may not be
63 if (print_level > 0)
64 printf(" forcing negative eigenvalue to be positive %d %.1e\n", i,
65 e[i]);
66 nneg++;
67 e[i] = tol; // or -e[i] ??
68 } else if (e[i] < tol) {
69 if (print_level > 0)
70 printf(" forcing small eigenvalue to be positive %d %.1e\n", i,
71 e[i]);
72 nsmall++;
73 e[i] = tol;
74 }
75
76 gv[i] = -gv[i] / e[i]; // Newton step
77
78 if (std::abs(gv[i]) > trust) { // Step restriction
79 double gvnew = trust * std::abs(gv(i)) / gv[i];
80 if (print_level > 0)
81 printf(" restricting step in spectral direction %d %.1e --> %.1e\n",
82 i, gv[i], gvnew);
83 nrestrict++;
84 gv[i] = gvnew;
85 }
86 }
87 if (print_level > 0 && (nneg || nsmall || nrestrict))
88 printf(" nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict);
89
90 // Transform back from spectral basis
91 gv = inner(v, gv);
92
93 if (print_level > 1) print("cartesian dx before restriction", gv);
94
95 // Now apply step restriction in real space
96 bool printing = false;
97 for (int i = 0; i < gv.dim(0); i++) {
98 if (fabs(gv[i]) > maxstep) {
99 gv[i] = maxstep * gv[i] / fabs(gv[i]);
100 if (print_level > 0) {
101 if (!printing) printf(" restricting step in Cartesian direction");
102 printing = true;
103 printf(" %d", i);
104 }
105 }
106 }
107 if (printing) printf("\n");
108
109 return gv;
110 }
111
112 /// Makes the projector onto independent coordinates
113
114 /// For Cartesians \code P*x removes the rotations and translations;
115 /// eventually will add support for redundant internal coordinates
116 Tensor<double> make_projector(const Molecule& molecule) {
117 const int natom = molecule.natom();
118 const Tensor<double> coords = molecule.get_all_coords(); // (natom,3)
119
120 // Construct normalized vectors in V in the direction of the translations
121 // and infinitesimal rotations
122 Tensor<double> V(6, natom, 3); // First 3 translations, second 3 rotations
123
124 for (int k = 0; k < 3; k++) // Translations already orthonormal
125 V(k, _, k) = 1.0 / std::sqrt(static_cast<double>(natom));
126
127 Tensor<double> centroid(3);
128 for (int k = 0; k < 3; k++) centroid(k) = coords(_, k).sum() / natom;
129 if (print_level > 1) print("centroid", centroid);
130
131 for (int i = 0; i < natom; i++) {
132 double x = coords(i, 0) - centroid[0];
133 double y = coords(i, 1) - centroid[1];
134 double z = coords(i, 2) - centroid[2];
135
136 V(3, i, 0) = 0; // Rotn about x axis
137 V(3, i, 1) = z;
138 V(3, i, 2) = -y;
139
140 V(4, i, 0) = z; // Rotn about y axis
141 V(4, i, 1) = 0;
142 V(4, i, 2) = -x;
143
144 V(5, i, 0) = -y;
145 V(5, i, 1) = x;
146 V(5, i, 2) = 0; // Rotn about z axis
147 }
148
149 V = V.reshape(6, 3 * natom);
150
151 if (print_level > 1) print("V before orthonormal");
152 if (print_level > 1) print(V);
153
154 // Normalize rotations, orthonormalize rotns and translations,
155 // noting may end up with a zero vector for linear molecules
156 for (int i = 3; i < 6; i++) {
157 V(i, _).scale(1.0 / V(i, _).normf());
158 for (int j = 0; j < i; j++) {
159 double s = V(i, _).trace(V(j, _));
160 V(i, _) -= V(j, _) * s;
161 }
162 double vnorm = V(i, _).normf();
163 if (vnorm > 1e-6) {
164 V(i, _) *= 1.0 / vnorm;
165 } else {
166 V(i, _) = 0.0;
167 }
168 }
169
170 // The projector is 1 - VT*V
171 V = -inner(transpose(V), V);
172 for (int i = 0; i < 3 * natom; i++) V(i, i) += 1.0;
173
174 return V;
175 }
176
177 /// a1 is initial step (usually pick 1)
178 /// energy0 is energy at zero step
179 /// dxgrad is gradient projected onto search dir
180 ///
181 template <typename targetT>
182 double line_search(Molecule& molecule, targetT& target,
183 const Tensor<double>& dx, double energy0, double dxgrad,
184 double a1 = 1.0) {
185 double energy1;
186 double hess, a2;
187 const char* lsmode = "";
188
189 Tensor<double> x = molecule.get_all_coords().flat();
190
191 // Ensure we are walking downhill (BFGS should ensure that, but SR1 may not)
192 if (dxgrad * a1 > 0.0) {
193 if (print_level > 0) print(" line search gradient +ve ", a1, dxgrad);
194 a1 = -a1;
195 }
196
197 // Compute energy at new point
198 energy1 = target.value(x + a1 * dx);
199
200 // Fit to a parabola using energy0, g0, energy1
201 hess = 2.0 * (energy1 - energy0 - a1 * dxgrad) / (a1 * a1);
202 a2 = -dxgrad / hess; // Newton step
203
204 if (std::abs(energy1 - energy0) <
205 energy_precision) { // Insufficient precision
206 a2 = a1;
207 lsmode = "fixed";
208 } else if (hess > 0.0) { // Positive curvature
209 if ((energy1 - energy0) <= -energy_precision) { // a1 step went downhill
210 lsmode = "downhill";
211 if (std::abs(a2) >
212 4.0 * std::abs(a1)) { // Walking down hill but don't go too far
213 lsmode = "restrict";
214 a2 = 4.0 * a1;
215 }
216 } else { // a1 step went uphill ... we have bracketed the minimum.
217 lsmode = "bracket";
218 }
219 } else { // Negative curvature
220 if ((energy1 - energy0) < energy_precision) { // keep walking down hill
221 lsmode = "negative";
222 a2 = 2e0 * a1;
223 } else {
224 lsmode = "punt"; // negative curvature but no apparent progress
225 a2 = a1;
226 }
227 }
228
229 if (std::abs(a2 - a1) <
230 0.2 * std::abs(a1)) { // Take full step to avoid reconverging SCF
231 a2 = a1;
232 lsmode = "fixed2";
233 }
234
235 // Predicted next energy
236 double energy2 = energy0 + dxgrad * a2 + 0.5 * hess * a2 * a2;
237
238 if (print_level > 0) {
239 printf("\n line search grad=%.2e hess=%.2e mode=%s newstep=%.3f\n",
240 dxgrad, hess, lsmode, a2);
241 printf(" predicted %.12e\n\n", energy2);
242 }
243
244 return a2;
245 }
246
247 public:
248 MolOpt(int maxiter = 20, double maxstep = 0.1, double etol = 1e-4,
249 double gtol = 1e-3, double xtol = 1e-3, double energy_precision = 1e-5,
250 double gradient_precision = 1e-4, int print_level = 1,
251 std::string update = "BFGS")
252 : maxiter(maxiter),
253 maxstep(maxstep),
254 etol(std::max(etol, energy_precision)),
255 gtol(std::max(gtol, gradient_precision)),
256 xtol(xtol),
257 energy_precision(energy_precision),
258 gradient_precision(gradient_precision),
259 print_level(print_level),
261
262 {
263 if (print_level > 0) {
264 std::cout << endl;
265 print_justified("Molecular Geometry Optimization");
266 std::cout << endl;
267 print(" maximum iterations", maxiter);
268 print(" maximum step", maxstep);
269 print(" energy convergence", etol);
270 print(" gradient convergence", gtol);
271 print(" cartesian convergence", xtol);
272 print(" energy precision", energy_precision);
273 print(" gradient precision", gradient_precision);
274 print(" hessian update", update);
275 }
276 }
277
278 void set_hessian(const Tensor<double>& h) { hessian = h; }
279
280 const Tensor<double>& get_hessian() const { return hessian; }
281
282 void initialize_hessian(const Molecule& molecule) {
283 const int N = 3 * molecule.natom();
284 hessian = Tensor<double>(N, N);
285 for (int i = 0; i < N; i++) hessian(i, i) = 0.5;
286 }
287
288 template <typename targetT>
289 Molecule optimize(Molecule molecule,
290 targetT& target) { ////!!!!!!! pass by value
291 const int natom = molecule.natom();
292
293 // Code structured so it will be straightforward to introduce redundant
294 // internal coordinates
295
296 if (hessian.size() == 0) initialize_hessian(molecule);
297
298 double ep = 0.0; // Previous energy
299 Tensor<double> gp(3 * natom); // Previous gradient
300 Tensor<double> dx(3 * natom); // Current search direction
301 gp = 0.0;
302 dx = 0.0;
303
304 for (int iter = 0; iter < maxiter; iter++) {
305 if (print_level > 0)
306 print("\n\n Geometry optimization iteration", iter, "\n");
307 if (print_level > 0) molecule.print();
308
309 double e;
310 Tensor<double> g;
311
312 target.energy_and_gradient(molecule, e, g);
313
314 double de = e - ep;
315 double dxmax = dx.absmax();
316 double gmax = g.absmax();
317
318 bool dxconv = (iter > 0) && (dxmax < xtol);
319 bool gconv = gmax < gtol;
320 bool econv = (iter > 0) && (std::abs(de) < etol);
321 bool converged = econv && dxconv && gconv;
322
323 if (!converged && gmax < gradient_precision) {
324 if (print_level > 0)
325 print(
326 "\nInsufficient precision in gradient to proceed further -- "
327 "forcing convergence\n");
328 converged = true;
329 }
330
331 if (print_level > 0) {
332 const char* tf[] = {"F", "T"};
333 print(" ");
334 printf(
335 " energy delta-e max-dx max-g e dx g\n");
336 printf(
337 " ---------------- --------- --------- --------- --- --- "
338 "---\n");
339 printf(" %15.6f %9.2e %9.2e %9.2e %s %s %s\n", e, de, dxmax,
340 gmax, tf[econv], tf[dxconv], tf[gconv]);
341 // print(e, de, econv, dxmax, dxconv, dxnorm, gmax, gconv, gnorm,
342 // converged);
343 print(" ");
344 }
345
346 if (converged) {
347 if (print_level > 0) print("\n Geometry optimization converged!\n");
348 if (print_level > 0) molecule.print();
349 break;
350 }
351
352 // Construct projector
353 Tensor<double> P = make_projector(molecule);
354
355 // Project the gradient before updating Hessian
356 g = inner(P, g);
357 if (print_level > 1) print("gradient after projection", g);
358
359 if (iter > 0) {
360 if ((g - gp).absmax() < 2.0 * gradient_precision) {
361 if (print_level > 0)
362 print(
363 " skipping hessian update due to insufficient precision in "
364 "gradient");
365 } else if (update == "bfgs") {
366 QuasiNewton::hessian_update_bfgs(dx, g - gp, hessian);
367 } else if (update == "sr1") {
368 QuasiNewton::hessian_update_sr1(dx, g - gp, hessian);
369 } else {
370 throw "unknown update";
371 }
372 }
373
374 ep = e;
375 gp = g;
376
377 // Construct the projected and shifted hessian = PHP + shift*(1-P)
378 const double shift = 1000.0; // this value assumed in new_search_dir
379 Tensor<double> PHPS = inner(P, inner(hessian, P)) - shift * P;
380 if (print_level > 1) {
381 print("projector");
382 print(P);
383 }
384 for (int i = 0; i < 3 * natom; i++) PHPS(i, i) += shift;
385 if (print_level > 1) {
386 print("PHPS");
387 print(PHPS);
388 }
389
390 // Construct new search direction by diagonalizing Hessian and taking
391 // spectral step
392 dx = new_search_direction(g, PHPS);
393 if (print_level > 1) print("dx", dx);
394
395 // Line search
396 double alpha = line_search(molecule, target, dx, e, dx.trace(g), 1.0);
397 if (print_level > 1) print("step", alpha);
398 dx.scale(alpha);
399 if (print_level > 1) print("scaled dx", dx);
400
401 // Take the step
402 Tensor<double> x = molecule.get_all_coords().flat();
403 x += dx;
404 molecule.set_all_coords(x.reshape(natom, 3));
405
406 if (print_level > 1) print("new molecular coords");
407 }
408 // return the optimized molecule
409 return molecule;
410 }
411};
412} // namespace madness
413#endif // MADNESS_MOLOPT_H
void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg)
Definition kain.cc:329
void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y)
Definition kain.cc:317
static double shift
Definition dirac-hatom.cc:19
std::complex< double > inner(const Fcwf &psi, const Fcwf &phi)
Definition fcwf.cc:275
const int maxiter
Definition gygi_soltion.cc:68
static const double v
Definition hatom_sf_dirac.cc:20
#define max(a, b)
Definition lda.h:51
void print(const tensorT &t)
Definition mcpfit.cc:140
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void print_justified(const char *s, int column, bool underline)
Print a string justified on the left to start at the given column with optional underlining.
Definition print.cc:75
void syev(const Tensor< T > &A, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Real-symmetric or complex-Hermitian eigenproblem.
Definition lapack.cc:969
Definition mraimpl.h:50
static long abs(long a)
Definition tensor.h:218
static const long k
Definition rk.cc:44
Defines interfaces for optimization and non-linear equation solvers.
static double V(const coordT &r)
Definition tdse.cc:288
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
template Tensor< int > transpose(const Tensor< int > &t)
int P
Definition test_binsorter.cc:9
void e()
Definition test_sig.cc:75
#define N
Definition testconv.cc:37
vector_complex_function_3d update(World &world, const vector_complex_function_3d &psi, vector_complex_function_3d &vpsi, const tensor_real &e, int iter)
Definition testcosine.cc:210
static const double alpha
Definition testcosine.cc:10
double g(const coord_t &r)
Definition testgconv.cc:116
double h(const coord_1d &r)
Definition testgconv.cc:175
static Molecule molecule
Definition testperiodicdft.cc:39
const double a2
Definition vnucso.cc:86
const double a1
Definition vnucso.cc:85
FLOAT target(const FLOAT &x)
Definition y.cc:295