GetFEM  5.4.3
gmm_solver_bfgs.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2004-2020 Yves Renard
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program; if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21  As a special exception, you may use this file as it is a part of a free
22  software library without restriction. Specifically, if other files
23  instantiate templates or use macros or inline functions from this file,
24  or you compile this file and link it with other files to produce an
25  executable, this file does not by itself cause the resulting executable
26  to be covered by the GNU Lesser General Public License. This exception
27  does not however invalidate any other reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file gmm_solver_bfgs.h
33  @author Yves Renard <[email protected]>
34  @date October 14 2004.
35  @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
36  */
37 #ifndef GMM_BFGS_H
38 #define GMM_BFGS_H
39 
40 #include "gmm_kernel.h"
41 #include "gmm_iter.h"
42 
43 namespace gmm {
44 
45  // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
46  // Quasi Newton method for optimization problems.
47  // with Wolfe Line search.
48 
49 
50  // delta[k] = x[k+1] - x[k]
51  // gamma[k] = grad f(x[k+1]) - grad f(x[k])
52  // H[0] = I
53  // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
54  // DFP : zeta[k] = H[k] gamma[k]
55  // tau[k] = gamma[k]^T zeta[k]
56  // rho[k] = 1 / gamma[k]^T delta[k]
57  // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
58  // - rho[k]^2 tau[k] delta[k] delta[k]^T
59  // DFP : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T
60  // - (1/tau[k])zeta[k] zeta[k]^T
61 
62  // Object representing the inverse of the Hessian
63  template <typename VECTOR> struct bfgs_invhessian {
64 
65  typedef typename linalg_traits<VECTOR>::value_type T;
66  typedef typename number_traits<T>::magnitude_type R;
67 
68  std::vector<VECTOR> delta, gamma, zeta;
69  std::vector<T> tau, rho;
70  int version;
71 
72  template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
73  copy(X, Y);
74  for (size_type k = 0 ; k < delta.size(); ++k) {
75  T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
76  switch (version) {
77  case 0 : // BFGS
78  add(scaled(zeta[k], rho[k]*xdelta), Y);
79  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
80  break;
81  case 1 : // DFP
82  add(scaled(delta[k], rho[k]*xdelta), Y);
83  add(scaled(zeta[k], -xzeta/tau[k]), Y);
84  break;
85  }
86  }
87  }
88 
89  void restart(void) {
90  delta.resize(0); gamma.resize(0); zeta.resize(0);
91  tau.resize(0); rho.resize(0);
92  }
93 
94  template<typename VECT1, typename VECT2>
95  void update(const VECT1 &deltak, const VECT2 &gammak) {
96  T vsp = vect_sp(deltak, gammak);
97  if (vsp == T(0)) return;
98  size_type N = vect_size(deltak), k = delta.size();
99  VECTOR Y(N);
100  hmult(gammak, Y);
101  delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
102  tau.resize(k+1); rho.resize(k+1);
103  resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N);
104  gmm::copy(deltak, delta[k]);
105  gmm::copy(gammak, gamma[k]);
106  rho[k] = R(1) / vsp;
107  if (version == 0)
108  add(delta[k], scaled(Y, -1), zeta[k]);
109  else
110  gmm::copy(Y, zeta[k]);
111  tau[k] = vect_sp(gammak, zeta[k]);
112  }
113 
114  bfgs_invhessian(int v = 0) { version = v; }
115  };
116 
117 
118  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
119  void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
120  int restart, iteration& iter, int version = 0,
121  double lambda_init=0.001, double print_norm=1.0) {
122 
123  typedef typename linalg_traits<VECTOR>::value_type T;
124  typedef typename number_traits<T>::magnitude_type R;
125 
126  bfgs_invhessian<VECTOR> invhessian(version);
127  VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
128  grad(x, r);
129  R lambda = lambda_init, valx = f(x), valy;
130  int nb_restart(0);
131 
132  if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
133  while (! iter.finished_vect(r)) {
134 
135  invhessian.hmult(r, d); gmm::scale(d, T(-1));
136 
137  // Wolfe Line search
138  R derivative = gmm::vect_sp(r, d);
139  R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
140  bool unbounded = true, blocked = false, grad_computed = false;
141 
142  for(;;) {
143  add(x, scaled(d, lambda), y);
144  valy = f(y);
145  if (iter.get_noisy() >= 2) {
146  cout.precision(15);
147  cout << "Wolfe line search, lambda = " << lambda
148  << " value = " << valy /print_norm << endl;
149 // << " derivative = " << derivative
150 // << " lambda min = " << lambda_min << " lambda max = "
151 // << lambda_max << endl; getchar();
152  }
153  if (valy <= valx + m1 * lambda * derivative) {
154  grad(y, r2); grad_computed = true;
155  T derivative2 = gmm::vect_sp(r2, d);
156  if (derivative2 >= m2*derivative) break;
157  lambda_min = lambda;
158  }
159  else {
160  lambda_max = lambda;
161  unbounded = false;
162  }
163  if (unbounded) lambda *= R(10);
164  else lambda = (lambda_max + lambda_min) / R(2);
165  if (lambda == lambda_max || lambda == lambda_min) break;
166  // valy <= R(2)*valx replaced by
167  // valy <= valx + gmm::abs(derivative)*lambda_init
168  // for compatibility with negative values (08.24.07).
169  if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
170  (lambda < R(lambda_init*1E-8) ||
171  (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
172  { blocked = true; lambda = lambda_init; break; }
173  }
174 
175  // Rank two update
176  ++iter;
177  if (!grad_computed) grad(y, r2);
178  gmm::add(scaled(r2, -1), r);
179  if ((iter.get_iteration() % restart) == 0 || blocked) {
180  if (iter.get_noisy() >= 1) cout << "Restart\n";
181  invhessian.restart();
182  if (++nb_restart > 10) {
183  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
184  return;
185  }
186  }
187  else {
188  invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
189  nb_restart = 0;
190  }
191  copy(r2, r); copy(y, x); valx = valy;
192  if (iter.get_noisy() >= 1)
193  cout << "BFGS value " << valx/print_norm << "\t";
194  }
195 
196  }
197 
198 
199  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
200  inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
201  int restart, iteration& iter, int version = 1) {
202  bfgs(f, grad, x, restart, iter, version);
203 
204  }
205 
206 
207 }
208 
209 #endif
210 
void copy(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:978
void resize(V &v, size_type n)
*‍/
Definition: gmm_blas.h:210
strongest_value_type< V1, V2 >::value_type vect_sp(const V1 &v1, const V2 &v2)
*‍/
Definition: gmm_blas.h:264
void add(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:1277
Iteration object.
Include the base gmm files.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49