MADNESS 0.10.1
molopt.h
Go to the documentation of this file.
1#ifndef MADNESS_MOLOPT_H
2#define MADNESS_MOLOPT_H
3
4#include <string>
5#include <algorithm>
6
10
11namespace madness {
12
13 class MolOpt {
14 private:
15
16 const int maxiter; //< Maximum number of iterations
17 const double maxstep; //< Maximum step in any one coordinate (currently Cartesian in a.u.)
18 const double etol; //< Convergence test for energy change
19 const double gtol; //< Convergence test for maximum gradient element
20 const double xtol; //< Convergence test for Cartesian step in a.u.
21 const double energy_precision; //< Assumed precision in energy
22 const double gradient_precision; //< Assumed precision in the gradient
23 const int print_level; //< print_level=0 is none; 1 is default; 2 is debug
24 const std::string update; //< update = "bfgs" (default) or "sr1"
25
26 Tensor<double> hessian;
27
28 /// Returns new search direction given gradient and projected/shifted hessian
29 Tensor<double> new_search_direction(const Tensor<double>& g, const Tensor<double>& h) const {
30 Tensor<double> dx, s;
31 double tol = gradient_precision; // threshold for small hessian eigenvalues
32 double trust = std::min(maxstep*g.dim(0),1.0);
33
34 Tensor<double> v, e;
35 syev(h, v, e);
36 if (print_level>1) print("hessian eigenvalues", e);
37
38 // Transform gradient into spectral basis
39 Tensor<double> gv = inner(g,v);
40 if (print_level>1) print("spectral gradient", gv);
41
42 // Take step applying restrictions
43 int nneg=0, nsmall=0, nrestrict=0;
44 for (int i=0; i<e.dim(0); i++) {
45 if (e[i] > 900.0) {
46 // This must be a translation or rotation ... skip it
47 if (print_level>1) print("skipping redundant mode",i);
48 }
49 else if (e[i] < -tol) { // BGFS hessian should be positive ... SR1 may not be
50 if (print_level > 0) printf(" forcing negative eigenvalue to be positive %d %.1e\n", i, e[i]);
51 nneg++;
52 e[i] = tol; // or -e[i] ??
53 }
54 else if (e[i] < tol) {
55 if (print_level > 0) printf(" forcing small eigenvalue to be positive %d %.1e\n", i, e[i]);
56 nsmall++;
57 e[i] = tol;
58 }
59
60 gv[i] = -gv[i] / e[i]; // Newton step
61
62 if (std::abs(gv[i]) > trust) { // Step restriction
63 double gvnew = trust*std::abs(gv(i))/gv[i];
64 if (print_level > 0) printf(" restricting step in spectral direction %d %.1e --> %.1e\n", i, gv[i], gvnew);
65 nrestrict++;
66 gv[i] = gvnew;
67 }
68 }
69 if (print_level>0 && (nneg || nsmall || nrestrict))
70 printf(" nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict);
71
72 // Transform back from spectral basis
73 gv = inner(v,gv);
74
75 if (print_level>1) print("cartesian dx before restriction", gv);
76
77 // Now apply step restriction in real space
78 bool printing = false;
79 for (int i=0; i<gv.dim(0); i++) {
80 if (fabs(gv[i]) > maxstep) {
81 gv[i] = maxstep * gv[i]/fabs(gv[i]);
82 if (print_level > 0) {
83 if (!printing) printf(" restricting step in Cartesian direction");
84 printing = true;
85 printf(" %d", i);
86 }
87 }
88 }
89 if (printing) printf("\n");
90
91 return gv;
92 }
93
94
95 /// Makes the projector onto independent coordinates
96
97 /// For Cartesians \code P*x removes the rotations and translations;
98 /// eventually will add support for redundant internal coordinates
99 Tensor<double> make_projector(const Molecule& molecule) {
100 const int natom = molecule.natom();
101 const Tensor<double> coords = molecule.get_all_coords(); // (natom,3)
102
103 // Construct normalized vectors in V in the direction of the translations and infinitesimal rotations
104 Tensor<double> V(6,natom,3); // First 3 translations, second 3 rotations
105
106 for (int k=0; k<3; k++) // Translations already orthonormal
107 V(k,_,k) = 1.0/std::sqrt(double(natom));
108
109 Tensor<double> centroid(3);
110 for (int k=0; k<3; k++) centroid(k) = coords(_,k).sum()/natom;
111 if (print_level>1) print("centroid", centroid);
112
113 for (int i=0; i<natom; i++) {
114 double x = coords(i,0) - centroid[0];
115 double y = coords(i,1) - centroid[1];
116 double z = coords(i,2) - centroid[2];
117
118 V(3,i,0) = 0; // Rotn about x axis
119 V(3,i,1) = z;
120 V(3,i,2) = -y;
121
122 V(4,i,0) = z; // Rotn about y axis
123 V(4,i,1) = 0;
124 V(4,i,2) = -x;
125
126 V(5,i,0) = -y;
127 V(5,i,1) = x;
128 V(5,i,2) = 0; // Rotn about z axis
129 }
130
131 V = V.reshape(6,3*natom);
132
133 if (print_level>1) print("V before orthonormal");
134 if (print_level>1) print(V);
135
136 // Normalize rotations, orthonormalize rotns and translations,
137 // noting may end up with a zero vector for linear molecules
138 for (int i=3; i<6; i++) {
139 V(i,_).scale(1.0/V(i,_).normf());
140 for (int j=0; j<i; j++) {
141 double s = V(i,_).trace(V(j,_));
142 V(i,_) -= V(j,_)*s;
143 }
144 double vnorm = V(i,_).normf();
145 if (vnorm > 1e-6) {
146 V(i,_) *= 1.0/vnorm;
147 }
148 else {
149 V(i,_) = 0.0;
150 }
151 }
152
153 // The projector is 1 - VT*V
154 V = -inner(transpose(V),V);
155 for (int i=0; i<3*natom; i++) V(i,i) += 1.0;
156
157 return V;
158 }
159
160 /// a1 is initial step (usually pick 1)
161 /// energy0 is energy at zero step
162 /// dxgrad is gradient projected onto search dir
163 ///
164 template <typename targetT>
165 double line_search(Molecule& molecule, targetT& target, const Tensor<double>& dx,
166 double energy0, double dxgrad, double a1=1.0) {
167
168 double energy1;
169 double hess, a2;
170 const char* lsmode = "";
171
172 Tensor<double> x = molecule.get_all_coords().flat();
173
174 // Ensure we are walking downhill (BFGS should ensure that, but SR1 may not)
175 if (dxgrad*a1 > 0.0) {
176 if(print_level > 0) print(" line search gradient +ve ", a1, dxgrad);
177 a1 = -a1;
178 }
179
180 // Compute energy at new point
181 energy1 = target.value(x + a1 * dx);
182
183 // Fit to a parabola using energy0, g0, energy1
184 hess = 2.0*(energy1-energy0-a1*dxgrad)/(a1*a1);
185 a2 = -dxgrad/hess; // Newton step
186
187 if (std::abs(energy1-energy0) < energy_precision) { // Insufficient precision
188 a2 = a1;
189 lsmode = "fixed";
190 }
191 else if (hess > 0.0) { // Positive curvature
192 if ((energy1 - energy0) <= -energy_precision) { // a1 step went downhill
193 lsmode = "downhill";
194 if (std::abs(a2) > 4.0*std::abs(a1)) { // Walking down hill but don't go too far
195 lsmode = "restrict";
196 a2 = 4.0*a1;
197 }
198 }
199 else { // a1 step went uphill ... we have bracketed the minimum.
200 lsmode = "bracket";
201 }
202 }
203 else { // Negative curvature
204 if ((energy1 - energy0) < energy_precision) { // keep walking down hill
205 lsmode = "negative";
206 a2 = 2e0*a1;
207 }
208 else {
209 lsmode = "punt"; // negative curvature but no apparent progress
210 a2 = a1;
211 }
212 }
213
214 if (std::abs(a2-a1)<0.2*std::abs(a1)) { // Take full step to avoid reconverging SCF
215 a2 = a1;
216 lsmode = "fixed2";
217 }
218
219
220 // Predicted next energy
221 double energy2 = energy0 + dxgrad*a2 + 0.5*hess*a2*a2;
222
223 if (print_level > 0) {
224 printf("\n line search grad=%.2e hess=%.2e mode=%s newstep=%.3f\n", dxgrad, hess, lsmode, a2);
225 printf(" predicted %.12e\n\n", energy2);
226 }
227
228 return a2;
229 }
230
231
232 public:
233
234 MolOpt(int maxiter=20,
235 double maxstep=0.1,
236 double etol=1e-4,
237 double gtol=1e-3,
238 double xtol=1e-3,
239 double energy_precision = 1e-5,
240 double gradient_precision = 1e-4,
241 int print_level = 1,
242 std::string update = "BFGS")
244 , maxstep(maxstep)
245 , etol(std::max(etol,energy_precision))
246 , gtol(std::max(gtol,gradient_precision))
247 , xtol(xtol)
248 , energy_precision(energy_precision)
249 , gradient_precision(gradient_precision)
250 , print_level(print_level)
251 , update(update)
252
253 {
254 if (print_level > 0) {
255 std::cout << endl;
256 print_justified("Molecular Geometry Optimization");
257 std::cout << endl;
258 print(" maximum iterations", maxiter);
259 print(" maximum step", maxstep);
260 print(" energy convergence", etol);
261 print(" gradient convergence", gtol);
262 print(" cartesian convergence", xtol);
263 print(" energy precision", energy_precision);
264 print(" gradient precision", gradient_precision);
265 print(" hessian update", update);
266 }
267 }
268
269 void set_hessian(const Tensor<double>& h) {
270 hessian = h;
271 }
272
273 const Tensor<double>& get_hessian() const {
274 return hessian;
275 }
276
277 void initialize_hessian(const Molecule& molecule) {
278 const int N = 3*molecule.natom();
279 hessian = Tensor<double>(N,N);
280 for (int i=0; i<N; i++) hessian(i,i) = 0.5;
281 }
282
283 template <typename targetT>
284 void optimize(Molecule molecule, targetT& target) { ////!!!!!!! pass by value
285 const int natom = molecule.natom();
286
287 // Code structured so it will be straightforward to introduce redundant internal coordinates
288
289 if (hessian.size() == 0) initialize_hessian(molecule);
290
291 double ep = 0.0; // Previous energy
292 Tensor<double> gp(3*natom); // Previous gradient
293 Tensor<double> dx(3*natom); // Current search direction
294 gp = 0.0;
295 dx = 0.0;
296
297 for (int iter=0; iter<maxiter; iter++) {
298
299 if (print_level > 0) print("\n\n Geometry optimization iteration", iter, "\n");
300 if (print_level > 0) molecule.print();
301
302 double e;
303 Tensor<double> g;
304
305 target.energy_and_gradient(molecule, e, g);
306
307 double de = e - ep;
308 double dxmax = dx.absmax();
309 double gmax = g.absmax();
310
311 bool dxconv = (iter>0) && (dxmax < xtol);
312 bool gconv = gmax < gtol;
313 bool econv = (iter>0) && (std::abs(de) < etol);
314 bool converged = econv && dxconv && gconv;
315
316 if (!converged && gmax < gradient_precision) {
317 if (print_level > 0) print("\nInsufficient precision in gradient to proceed further -- forcing convergence\n");
318 converged = true;
319 }
320
321 if (print_level > 0) {
322 const char* tf[] = {"F","T"};
323 print(" ");
324 printf(" energy delta-e max-dx max-g e dx g\n");
325 printf(" ---------------- --------- --------- --------- --- --- ---\n");
326 printf(" %15.6f %9.2e %9.2e %9.2e %s %s %s\n",
327 e, de, dxmax, gmax, tf[econv], tf[dxconv], tf[gconv]);
328 //print(e, de, econv, dxmax, dxconv, dxnorm, gmax, gconv, gnorm, converged);
329 print(" ");
330 }
331
332 if (converged) {
333 if (print_level > 0) print("\n Geometry optimization converged!\n");
334 if (print_level > 0) molecule.print();
335 break;
336 }
337
338 // Construct projector
339 Tensor<double> P = make_projector(molecule);
340
341 // Project the gradient before updating Hessian
342 g = inner(P,g);
343 if (print_level>1) print("gradient after projection", g);
344
345 if (iter > 0) {
346 if ((g-gp).absmax() < 2.0*gradient_precision) {
347 if (print_level > 0) print(" skipping hessian update due to insufficient precision in gradient");
348 }
349 else if (update == "bfgs") {
350 QuasiNewton::hessian_update_bfgs(dx, g-gp, hessian);
351 }
352 else if (update == "sr1") {
353 QuasiNewton::hessian_update_sr1(dx, g-gp, hessian);
354 }
355 else {
356 throw "unknown update";
357 }
358 }
359
360 ep = e;
361 gp = g;
362
363 // Construct the projected and shifted hessian = PHP + shift*(1-P)
364 const double shift = 1000.0; // this value assumed in new_search_dir
365 Tensor<double> PHPS = inner(P,inner(hessian,P)) - shift*P;
366 if (print_level > 1) {print("projector"); print(P);}
367 for (int i=0; i<3*natom; i++) PHPS(i,i) += shift;
368 if (print_level > 1) {print("PHPS"); print(PHPS);}
369
370 // Construct new search direction by diagonalizing Hessian and taking spectral step
371 dx = new_search_direction(g, PHPS);
372 if (print_level > 1) print("dx", dx);
373
374 // Line search
375 double alpha = line_search(molecule, target, dx, e, dx.trace(g), 1.0);
376 if (print_level > 1) print("step", alpha);
377 dx.scale(alpha);
378 if (print_level > 1) print("scaled dx", dx);
379
380 // Take the step
381 Tensor<double> x = molecule.get_all_coords().flat();
382 x += dx;
383 molecule.set_all_coords(x.reshape(natom,3));
384
385 if (print_level > 1) print("new molecular coords");
386 }
387 }
388 };
389}
390#endif // MADNESS_MOLOPT_H
void hessian_update_bfgs(const Tensor< double > &dx, const Tensor< double > &dg)
Definition kain.cc:329
void hessian_update_sr1(const Tensor< double > &s, const Tensor< double > &y)
Definition kain.cc:317
static double shift
Definition dirac-hatom.cc:19
std::complex< double > inner(const Fcwf &psi, const Fcwf &phi)
Definition fcwf.cc:275
const int maxiter
Definition gygi_soltion.cc:68
static const double v
Definition hatom_sf_dirac.cc:20
#define max(a, b)
Definition lda.h:51
void print(const tensorT &t)
Definition mcpfit.cc:140
Namespace for all elements and tools of MADNESS.
Definition DFParameters.h:10
void print_justified(const char *s, int column, bool underline)
Print a string justified on the left to start at the given column with optional underlining.
Definition print.cc:75
void syev(const Tensor< T > &A, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Real-symmetric or complex-Hermitian eigenproblem.
Definition lapack.cc:969
Definition mraimpl.h:50
static long abs(long a)
Definition tensor.h:218
static const long k
Definition rk.cc:44
Defines interfaces for optimization and non-linear equation solvers.
static double V(const coordT &r)
Definition tdse.cc:288
Defines and implements most of Tensor.
Prototypes for a partial interface from Tensor to LAPACK.
template Tensor< int > transpose(const Tensor< int > &t)
int P
Definition test_binsorter.cc:9
void e()
Definition test_sig.cc:75
#define N
Definition testconv.cc:37
vector_complex_function_3d update(World &world, const vector_complex_function_3d &psi, vector_complex_function_3d &vpsi, const tensor_real &e, int iter)
Definition testcosine.cc:210
static const double alpha
Definition testcosine.cc:10
double h(const coord_1d &r)
Definition testgconv.cc:68
double g(const coord_1d &r)
Definition testgconv.cc:49
static Molecule molecule
Definition testperiodicdft.cc:38
const double a2
Definition vnucso.cc:86
const double a1
Definition vnucso.cc:85
FLOAT target(const FLOAT &x)
Definition y.cc:295