Merge branch 'feature/pn-solver' into 'master'

Implement generic proximal Newton solver for solving nonsmooth minimization problems See merge request !63

Merge branch 'feature/pn-solver' into 'master'
fc983e3e · oliver.sander_at_tu-dresden.de · de87490c · f112d799 · fc983e3e · fc983e3e
Commit fc983e3e authored 2 years ago by oliver.sander_at_tu-dresden.de
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,8 @@

 # Release 2.9

+- A new solver `ProximalNewtonSolver` is added which solves non-smooth minimization problems.
+
 - The internal matrix of the`EnergyNorm` can now be accessed by `getMatrix()`.

 - `codespell` spell checker is now active for automated spell checking in the Gitlab CI. 

--- a/dune/solvers/solvers/CMakeLists.txt
+++ b/dune/solvers/solvers/CMakeLists.txt
@@ -6,6 +6,7 @@ install(FILES
    linearsolver.hh
    loopsolver.cc
    loopsolver.hh
+    proximalnewtonsolver.hh
    quadraticipopt.hh
    solver.hh
    tcgsolver.cc

--- a/dune/solvers/solvers/proximalnewtonsolver.hh
+++ b/dune/solvers/solvers/proximalnewtonsolver.hh
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=8 sw=4 sts=4:
+#ifndef DUNE_SOLVERS_SOLVERS_PROXIMALNEWTONSOLVER_HH
+#define DUNE_SOLVERS_SOLVERS_PROXIMALNEWTONSOLVER_HH
+
+#include <dune/common/exceptions.hh>
+
+#include <dune/solvers/common/canignore.hh>
+#include <dune/solvers/common/defaultbitvector.hh>
+#include <dune/solvers/common/resize.hh>
+#include <dune/solvers/solvers/criterion.hh>
+#include <dune/solvers/solvers/loopsolver.hh>
+
+
+namespace Dune::Solvers
+{
+  namespace ProximalNewton
+  {
+    /** \brief List of the four stages of the proximal Newton step
+     *
+     *  These are used to select proper regularization rules and are handed over to the
+     *  regularization update method.
+     */
+    enum Stage
+    {
+      minimize,      // first stage: minimizing the second order problem
+      configuration, // second stage: testing the new configuration x + dx
+      descent,       // third stage: testing the descent criteria for the new configuration
+      accepted       // last stage: the new increment was accepted
+    };
+
+    //! A dummy class for g=0 in the ProximalNewtonSolver
+    template<class VectorType>
+    struct ZeroFunctional
+    {
+      double operator()( const VectorType& dx ) const
+      {
+        return 0.0;
+      }
+
+      void updateOffset( const VectorType& x )
+      {
+        // do nothing
+      }
+    };
+
+
+    // A simple regularization updater which doubles in case of failure and halves in case of success
+    struct SimpleRegUpdater
+    {
+      void operator()( double& regWeight, Stage stage) const
+      {
+        if ( stage == Stage::accepted )
+          regWeight *= 0.5;
+        else
+          // make it at least 1.0
+          regWeight = std::max( 1.0, 10.0*regWeight );
+      }
+    };
+  }
+
+
+
+
+
+  /** \brief Generic proximal Newton solver to solve a given minimization problem
+   *
+   *  The proximal Newton solver aims to solve a minimization problem given in the form
+   *      Minimize J(x) := f(x) + g(x)
+   *  where f is a C^2 functional and g is a possibly non-smooth functional.
+   *  During the minimization, a sequence of increments dx as solutions of the second order subproblems
+   *      Minimize 0.5*f''(x)[dx,dx] +  f'(x)[dx] + g(x + dx) + r*||dx||^2
+   *  is computed until the update x := x + dx converges in some sense.
+   *  The user has to provide a suitable regularization strategy to control the regularization weight r,
+   *  and a proper norm ||.|| for the subproblem.
+   */
+  template<class SEA, class NSF, class SOS, class VectorType, class ErrorNorm, class RegUpdater, class BitVectorType = DefaultBitVector_t<VectorType>>
+  class ProximalNewtonSolver : public Solver, public CanIgnore<BitVectorType>
+  {
+  public:
+
+    using SmoothEnergyAssembler = SEA;
+    using NonsmoothFunctional = NSF;
+    using SecondOrderSolver = SOS;
+    using MatrixType = typename SecondOrderSolver::MatrixType;
+
+    void solve() override;
+
+    /** \brief Constructor taking all relevant data
+     *
+     *  \param sea The SmoothEnergyAssembler representing f: It must provide the method
+     *             assembleGradientAndHessian( x, f', f'' ) in order to compute the second order subproblem, and
+     *             the evaluation by computeEnergy( x ) to return f(x)
+     *  \param nsf The NonsmoothFunctional representing g: It must provide the method
+     *             updateOffset( x ) to update the offset in g( x + dx ), and an evaluation operator().
+     *  \param sos The SecondOrderSolver which is able to minimize the second order subproblem. It must provide
+     *             a method minimize( f'', f', g, r, x, ignore) which overwrites the parameter x with the minimizer
+     *             and throws a Dune::Exception in case the minimization failed.
+     *  \param solution This is the solution of the global problem. It is overwritten during the computation and serves
+     *                  also as the initial value.
+     *  \param errorNorm This is the Solvers::EnergyNorm used in the second order problem and also in the descent criteria.
+     *  \param regUpdater The regularization strategy. It must provide a call operator ( r, Stage ) that overwrites r
+     *                    based on the given Stage of the computation
+     *  \param initialRegularizationWeight The initial regularization weight to begin with
+     *  \param maxIterations The maximal number of proximal Newton steps before the Proximal Newton solver aborts the loop
+     *  \param threshold The threshold to stop the iteration once || dx || < threshold
+     *  \param verbosity If the verbosity is set to Solver::FULL the ProximalNewtonSolver will print a table showing
+     *                   the current iterations and some useful information.
+     */
+    ProximalNewtonSolver( const SmoothEnergyAssembler& sea,
+                          NonsmoothFunctional& nsf,
+                          const SecondOrderSolver& sos,
+                          VectorType& solution,
+                          const ErrorNorm& errorNorm,
+                          const RegUpdater& regUpdater,
+                          double& initialRegularizationWeight,
+                          int maxIterations,
+                          double threshold,
+                          Solver::VerbosityMode verbosity)
+    : smoothEnergyAssembler_(&sea)
+    , nonsmoothFunctional_(&nsf)
+    , sos_(&sos)
+    , solution_(&solution)
+    , norm_(&errorNorm)
+    , regUpdater_(regUpdater)
+    , regWeight_(&initialRegularizationWeight)
+    , maxIterations_(maxIterations)
+    , threshold_(threshold)
+    , verbosity_(verbosity)
+    {}
+
+
+
+    /** \brief Add a stop criterion to be executed at the beginning of the loop */
+    template<class... Args>
+    void addStopCriterion(Args&&... args)
+    {
+      stopCriteria_.emplace_back(std::forward<Args>(args)...);
+    }
+
+    /** \brief Add a descent criterion to be executed in the descent stage of the loop */
+    template<class... Args>
+    void addDescentCriterion(Args&&... args)
+    {
+      descentCriteria_.emplace_back(std::forward<Args>(args)...);
+    }
+
+    /** \brief Return the currently computed gradient of smooth energy part */
+    const auto& gradient() const
+    {
+      return *gradientPtr_;
+    }
+
+    /** \brief Return the currently computed hessian of smooth energy part */
+    const auto& hessian() const
+    {
+      return *hessianPtr_;
+    }
+
+    /** \brief Check the existence of a current hessian matrix */
+    bool hasHessian() const
+    {
+      return hessianPtr_ != nullptr;
+    }
+
+    /** \brief Check the existence of a current gradient vector */
+    bool hasGradient() const
+    {
+      return gradientPtr_ != nullptr;
+    }
+
+    /** \brief Set a hessian from the outside */
+    void setHessian( const std::shared_ptr<MatrixType>& hessianPtr )
+    {
+      hessianPtr_ = hessianPtr;
+    }
+
+    /** \brief Set a gradient from the outside */
+    void setGradient( const std::shared_ptr<VectorType>& gradientPtr )
+    {
+      gradientPtr_ = gradientPtr;
+    }
+
+    /** \brief Return the current computed correction in x */
+    const auto& correction() const
+    {
+      return *correction_;
+    }
+
+    /** \brief Access the currently used nonsmooth part (it changes due to the offsets) */
+    const auto& nonsmoothFunctional() const
+    {
+      return *nonsmoothFunctional_;
+    }
+
+    /** \brief direct access to the current regularization weight with read/write possibility */
+    auto& regularizationWeight()
+    {
+      return *regWeight_;
+    }
+
+    /** \brief Get current iteration number */
+    int getIterationCount()
+    {
+      return iter_;
+    }
+
+  private:
+
+    const SmoothEnergyAssembler* smoothEnergyAssembler_;
+    NonsmoothFunctional* nonsmoothFunctional_;
+    const SecondOrderSolver* sos_;
+
+    // current iterate of the solution of the minimization problem
+    VectorType* solution_;
+
+    // increments of the proximal Newton step
+    std::shared_ptr<VectorType> correction_;
+
+    const ErrorNorm* norm_;
+
+    const RegUpdater regUpdater_;
+
+    // current regularization weight
+    double* regWeight_;
+
+    int iter_;
+    int maxIterations_;
+    double threshold_;
+    Solver::VerbosityMode verbosity_;
+
+    // store the different criteria
+    std::vector<Dune::Solvers::Criterion> stopCriteria_;
+    std::vector<Dune::Solvers::Criterion> descentCriteria_;
+
+    // access to the internal data for external criteria
+    std::shared_ptr<MatrixType> hessianPtr_ = nullptr;
+    std::shared_ptr<VectorType> gradientPtr_ = nullptr;
+
+    void printLine( int iter, double usedReg, double normCorrection, double newEnergy, double energyDiff, std::string errorMessage = "") const
+    {
+      std::cout << std::setw( 7) << iter << "  |  "
+      << std::setw(15) << std::setprecision(9) << usedReg << " | "
+      << std::setw(15) << std::setprecision(9) << normCorrection << " | "
+      << std::setw(15) << std::setprecision(9) << newEnergy << " | "
+      << std::setw(15) << std::setprecision(9) << energyDiff << "   "
+      << errorMessage << std::endl;
+    };
+  };
+
+
+
+  template<class SEA, class NSF, class SOS, class V, class EN, class RU, class BV>
+  void ProximalNewtonSolver<SEA,NSF,SOS,V,EN,RU,BV>::solve()
+  {
+    using VectorType = V;
+
+    const bool printOutput = this->verbosity_ == NumProc::FULL;
+
+    auto& regWeight = *regWeight_;
+
+    if ( printOutput )
+    {
+      std::cout << " iterate |   regularization |      correction |          energy | energy difference "<< std::endl;
+      std::cout << "---------+------------------+-----------------+-----------------+-------------------"<< std::endl;
+    }
+
+    iter_ = 0;
+
+    if ( not correction_ )
+      correction_ = std::make_shared<VectorType>();
+
+    // we need a zero vector for computing concrete energy descents later
+    VectorType zeroVector;
+    resizeInitializeZero(*correction_, *solution_);
+    resizeInitializeZero(zeroVector, *solution_);
+
+    using real_type = typename VectorType::field_type;
+    real_type normCorrection = std::numeric_limits<double>::max();
+
+    // start the loop
+    for( iter_ = 0; iter_ < this->maxIterations_; iter_++ )
+    {
+      // check for ||dx|| < threshold
+      if ( (1.0 + regWeight)*normCorrection < threshold_ )
+      {
+        if ( printOutput )
+          std::cout << "ProximalNewtonSolver terminated because of weighted correction is below threshold: " << (1.0 + regWeight)*normCorrection << std::endl;
+        break;
+      }
+
+      // check user added additional stop criteria
+      bool stop = false;
+      for ( auto&& c: stopCriteria_ )
+      {
+        auto r = c();
+        if ( std::get<0>(r) )
+        {
+          if ( printOutput )
+            std::cout << "ProximalNewtonSolver terminated because of a user added stop criterion: " << std::get<1>( r ) << std::endl;
+
+          stop = true;
+          break;
+        }
+      }
+
+      // don't do another loop
+      if ( stop )
+        break;
+
+      // keep a copy
+      auto usedReg = *regWeight_;
+
+
+      // store some information in case the step gets discarded
+      auto oldX = *solution_;
+
+      // assemble the quadratic and linear part if not recycled from previous step
+      if ( not hasGradient() or not hasHessian() )
+      {
+        hessianPtr_ = std::make_shared<MatrixType>();
+        gradientPtr_ = std::make_shared<VectorType>();
+
+        smoothEnergyAssembler_->assembleGradientAndHessian( oldX, *gradientPtr_, *hessianPtr_ );
+      }
+
+      // shift the nonsmoothFunctional by the current x
+      nonsmoothFunctional_->updateOffset( oldX );
+
+      *correction_ = 0.0;
+
+      // remember the old energy: note that nonsmoothFunctional_ is already shifted by oldX
+      auto oldEnergy = smoothEnergyAssembler_->computeEnergy( oldX ) + (*nonsmoothFunctional_)( zeroVector );
+
+      ///////////////////////////////////////////////////////////////////////////////////////////
+      /// Stage I: Try to compute a Proximal Newton step ////////////////////////////////////////
+      ///////////////////////////////////////////////////////////////////////////////////////////
+
+      // compute one Proximal Newton Step with the second order solver
+      try
+      {
+        sos_->minimize( *hessianPtr_, *gradientPtr_, *nonsmoothFunctional_, regWeight, *correction_, this->ignore() );
+      }
+      catch(const MathError& e)
+      {
+        if ( printOutput )
+          printLine( iter_, usedReg, 0, oldEnergy, 0, "The Proximal Newton Step reported an error: " + std::string(e.what()) );
+
+        regUpdater_(regWeight, ProximalNewton::Stage::minimize );
+        continue;
+      }
+
+
+      ///////////////////////////////////////////////////////////////////////////////////////////
+      /// Stage II: Check that new x is a valid configuration ///////////////////////////////////
+      ///////////////////////////////////////////////////////////////////////////////////////////
+
+
+      // if we got here the correction can be used to compute the next x
+      auto newX = oldX;
+      newX += *correction_;
+
+      // compute the newEnergy: check for invalid configuration
+      real_type newEnergy;
+      try
+      {
+        newEnergy = smoothEnergyAssembler_->computeEnergy( newX ) + (*nonsmoothFunctional_)( *correction_ );
+      }
+      catch(const MathError& e)
+      {
+        if ( printOutput )
+          printLine( iter_, usedReg, 0, oldEnergy, 0, "Computing the new energy resulted in an error: " + std::string(e.what()) );
+
+        regUpdater_(regWeight, ProximalNewton::Stage::configuration );
+        continue;
+      }
+
+      // Compute objective function descent
+      auto energyDiff = newEnergy;
+      energyDiff -= oldEnergy;
+
+
+      ///////////////////////////////////////////////////////////////////////////////////////////
+      /// Stage III: Check that the new x fulfills descent criteria /////////////////////////////
+      ///////////////////////////////////////////////////////////////////////////////////////////
+
+      // check user added additional descent criteria
+      bool accepted = true;
+      std::string errorMessage;
+      for ( auto&& c: descentCriteria_ )
+      {
+        auto r = c();
+        if ( not std::get<0>( r ) )
+        {
+          if ( printOutput )
+            errorMessage = std::get<1>( r );
+
+          accepted = false;
+          break;
+        }
+      }
+
+      if ( not accepted )
+      {
+        if ( printOutput )
+          printLine( iter_, usedReg, 0, oldEnergy, 0, "The following descent criterion was not accepted: " + errorMessage );
+
+        regUpdater_(regWeight, ProximalNewton::Stage::descent );
+        continue;
+      }
+
+
+      normCorrection = (*norm_)( *correction_ );
+
+      if ( printOutput )
+      {
+        printLine( iter_, usedReg, normCorrection, newEnergy, energyDiff );
+      }
+
+
+      ///////////////////////////////////////////////////////////////////////////////////////////
+      /// Stage IV: Update the regularization weight for the next step  /////////////////////////
+      ///////////////////////////////////////////////////////////////////////////////////////////
+
+      regUpdater_(regWeight, ProximalNewton::Stage::accepted );
+
+
+      // seems like the step was accepted:
+      *solution_ = newX;
+
+      // reset gradient and hessian since x is updated
+      gradientPtr_.reset();
+      hessianPtr_.reset();
+    }
+  }
+} // namespace Dune::Solvers
+
+
+#endif
--- a/dune/solvers/test/CMakeLists.txt
+++ b/dune/solvers/test/CMakeLists.txt
@@ -26,4 +26,6 @@ endif()
 if(SuiteSparse_CHOLMOD_FOUND)
  dune_add_test(SOURCES cholmodsolvertest.cc)
  add_dune_suitesparse_flags(cholmodsolvertest)
+  dune_add_test(SOURCES proximalnewtonsolvertest.cc)
+  add_dune_suitesparse_flags(proximalnewtonsolvertest)
 endif()
--- a/dune/solvers/test/proximalnewtonsolvertest.cc
+++ b/dune/solvers/test/proximalnewtonsolvertest.cc
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=8 sw=4 sts=4:
+
+#include <config.h>
+
+#include <cmath>
+#include <iostream>
+#include <sstream>
+
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/parallel/mpihelper.hh>
+
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/norms/twonorm.hh>
+#include <dune/solvers/solvers/cholmodsolver.hh>
+#include <dune/solvers/solvers/proximalnewtonsolver.hh>
+
+#include "common.hh"
+
+using namespace Dune;
+
+// grid dimension
+const int dim = 3;
+
+// f(x) = 1/4 * ||x||^4
+template<class Matrix, class Vector>
+struct F
+{
+  void assembleGradientAndHessian( const Vector& x, Vector& grad, Matrix& hess ) const
+  {
+    auto norm2 = x.two_norm2();
+
+    grad = x;
+    grad *= norm2;
+
+    for(size_t i=0; i<x.size(); i++)
+      for(size_t j=0; j<x.size(); j++)
+        hess[i][j] = (i==j)*norm2 + 2*x[i]*x[j];
+  }
+
+  double computeEnergy( const Vector& x ) const
+  {
+    return  0.25* x.two_norm2() * x.two_norm2();
+  }
+};
+
+
+// g(x) = ( -1, -1, -1, ... , -1)^T * x
+template<class Vector>
+struct G
+{
+  double operator()( const Vector& x ) const
+  {
+    auto realX = x;
+    realX += offset_;
+
+    double sum = 0.0;
+    for ( size_t i=0; i<realX.size(); i++ )
+      sum -= realX[i];
+
+    return sum;
+  }
+
+  void updateOffset( const Vector& offset )
+  {
+    offset_ = offset;
+  }
+
+  Vector offset_;
+};
+
+template<class Matrix, class Vector>
+struct SecondOrdersolver
+{
+  using MatrixType = Matrix;
+
+  template<class BitVector>
+  void minimize( const Matrix& hess, const Vector& grad, const G<Vector>& g, double reg, Vector& dx, const BitVector& /*ignore*/) const
+  {
+    // add reg
+    auto HH = hess;
+    for ( size_t i=0; i<dx.size(); i++ )
+      HH[i][i] += reg;
+
+    // add g to grad
+    auto gg = grad;
+    gg *= -1.0;
+    for ( size_t i=0; i<dx.size(); i++ )
+      gg[i] += 1.0;
+
+
+    Solvers::CholmodSolver cholmod( HH, dx, gg );
+    cholmod.solve();
+  }
+};
+
+
+
+int main (int argc, char *argv[])
+{
+  // initialize MPI, finalize is done automatically on exit
+  [[maybe_unused]] MPIHelper& mpiHelper = MPIHelper::instance(argc, argv);
+
+  const int size = 10;
+
+  using Vector = FieldVector<double,size>;
+  using Matrix = FieldMatrix<double,size>;
+  using BitVector = std::bitset<size>;
+
+  // this is the analytical solution of the problem
+  Vector exactSol, zeroVector;
+  exactSol = std::pow( size, -1.0/3.0 );
+
+  zeroVector = 0.0;
+
+  F<Matrix,Vector> f;
+  G<Vector> g;
+  g.updateOffset(zeroVector);
+  SecondOrdersolver<Matrix,Vector> sos;
+
+
+  // choose some random initial vector
+  Vector sol;
+  for ( size_t i=0; i<sol.size(); i++ )
+    sol[i] = i;
+
+  TwoNorm<Vector> norm;
+
+  // create the solver
+  Solvers::ProximalNewton::SimpleRegUpdater regUpdater;
+  double reg = 42.0;
+  Solvers::ProximalNewtonSolver proximalNewtonSolver( f, g, sos, sol, norm, regUpdater, reg, 100, 1e-14, Solver::FULL);
+
+  // set some empty ignore field
+  BitVector ignore;
+  proximalNewtonSolver.setIgnore( ignore );
+
+  // go!
+  proximalNewtonSolver.solve();
+
+  // compute diff the exact solution
+  sol -= exactSol;
+
+  std::cout << "Difference to exact solution = " << sol.two_norm() << std::endl;
+
+  bool passed = sol.two_norm() < 1e-15;
+
+  return passed ? 0 : 1;
+}