diff --git a/CHANGELOG.md b/CHANGELOG.md index e4dbf48bfb09a0ddc618355522a8aea621801e2e..9789e01f8eca492a80962837fcd0987e39056cc9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ # Release 2.9 +- A new solver `ProximalNewtonSolver` is added which solves non-smooth minimization problems. + - The internal matrix of the`EnergyNorm` can now be accessed by `getMatrix()`. - `codespell` spell checker is now active for automated spell checking in the Gitlab CI. diff --git a/dune/solvers/solvers/CMakeLists.txt b/dune/solvers/solvers/CMakeLists.txt index 8946f69c27e1d3f23338cd864ef976d2c81e7134..514d97f39a5ddde0896e8a90f394b48bad2c280f 100644 --- a/dune/solvers/solvers/CMakeLists.txt +++ b/dune/solvers/solvers/CMakeLists.txt @@ -6,6 +6,7 @@ install(FILES linearsolver.hh loopsolver.cc loopsolver.hh + proximalnewtonsolver.hh quadraticipopt.hh solver.hh tcgsolver.cc diff --git a/dune/solvers/solvers/proximalnewtonsolver.hh b/dune/solvers/solvers/proximalnewtonsolver.hh new file mode 100644 index 0000000000000000000000000000000000000000..609d17de5cbc30215f528c43010e9ae607ebce7b --- /dev/null +++ b/dune/solvers/solvers/proximalnewtonsolver.hh @@ -0,0 +1,439 @@ +// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=8 sw=4 sts=4: +#ifndef DUNE_SOLVERS_SOLVERS_PROXIMALNEWTONSOLVER_HH +#define DUNE_SOLVERS_SOLVERS_PROXIMALNEWTONSOLVER_HH + +#include <dune/common/exceptions.hh> + +#include <dune/solvers/common/canignore.hh> +#include <dune/solvers/common/defaultbitvector.hh> +#include <dune/solvers/common/resize.hh> +#include <dune/solvers/solvers/criterion.hh> +#include <dune/solvers/solvers/loopsolver.hh> + + +namespace Dune::Solvers +{ + namespace ProximalNewton + { + /** \brief List of the four stages of the proximal Newton step + * + * These are used to select proper regularization rules and are handed over to the + * regularization update method. + */ + enum Stage + { + minimize, // first stage: minimizing the second order problem + configuration, // second stage: testing the new configuration x + dx + descent, // third stage: testing the descent criteria for the new configuration + accepted // last stage: the new increment was accepted + }; + + //! A dummy class for g=0 in the ProximalNewtonSolver + template<class VectorType> + struct ZeroFunctional + { + double operator()( const VectorType& dx ) const + { + return 0.0; + } + + void updateOffset( const VectorType& x ) + { + // do nothing + } + }; + + + // A simple regularization updater which doubles in case of failure and halves in case of success + struct SimpleRegUpdater + { + void operator()( double& regWeight, Stage stage) const + { + if ( stage == Stage::accepted ) + regWeight *= 0.5; + else + // make it at least 1.0 + regWeight = std::max( 1.0, 10.0*regWeight ); + } + }; + } + + + + + + /** \brief Generic proximal Newton solver to solve a given minimization problem + * + * The proximal Newton solver aims to solve a minimization problem given in the form + * Minimize J(x) := f(x) + g(x) + * where f is a C^2 functional and g is a possibly non-smooth functional. + * During the minimization, a sequence of increments dx as solutions of the second order subproblems + * Minimize 0.5*f''(x)[dx,dx] + f'(x)[dx] + g(x + dx) + r*||dx||^2 + * is computed until the update x := x + dx converges in some sense. + * The user has to provide a suitable regularization strategy to control the regularization weight r, + * and a proper norm ||.|| for the subproblem. + */ + template<class SEA, class NSF, class SOS, class VectorType, class ErrorNorm, class RegUpdater, class BitVectorType = DefaultBitVector_t<VectorType>> + class ProximalNewtonSolver : public Solver, public CanIgnore<BitVectorType> + { + public: + + using SmoothEnergyAssembler = SEA; + using NonsmoothFunctional = NSF; + using SecondOrderSolver = SOS; + using MatrixType = typename SecondOrderSolver::MatrixType; + + void solve() override; + + /** \brief Constructor taking all relevant data + * + * \param sea The SmoothEnergyAssembler representing f: It must provide the method + * assembleGradientAndHessian( x, f', f'' ) in order to compute the second order subproblem, and + * the evaluation by computeEnergy( x ) to return f(x) + * \param nsf The NonsmoothFunctional representing g: It must provide the method + * updateOffset( x ) to update the offset in g( x + dx ), and an evaluation operator(). + * \param sos The SecondOrderSolver which is able to minimize the second order subproblem. It must provide + * a method minimize( f'', f', g, r, x, ignore) which overwrites the parameter x with the minimizer + * and throws a Dune::Exception in case the minimization failed. + * \param solution This is the solution of the global problem. It is overwritten during the computation and serves + * also as the initial value. + * \param errorNorm This is the Solvers::EnergyNorm used in the second order problem and also in the descent criteria. + * \param regUpdater The regularization strategy. It must provide a call operator ( r, Stage ) that overwrites r + * based on the given Stage of the computation + * \param initialRegularizationWeight The initial regularization weight to begin with + * \param maxIterations The maximal number of proximal Newton steps before the Proximal Newton solver aborts the loop + * \param threshold The threshold to stop the iteration once || dx || < threshold + * \param verbosity If the verbosity is set to Solver::FULL the ProximalNewtonSolver will print a table showing + * the current iterations and some useful information. + */ + ProximalNewtonSolver( const SmoothEnergyAssembler& sea, + NonsmoothFunctional& nsf, + const SecondOrderSolver& sos, + VectorType& solution, + const ErrorNorm& errorNorm, + const RegUpdater& regUpdater, + double& initialRegularizationWeight, + int maxIterations, + double threshold, + Solver::VerbosityMode verbosity) + : smoothEnergyAssembler_(&sea) + , nonsmoothFunctional_(&nsf) + , sos_(&sos) + , solution_(&solution) + , norm_(&errorNorm) + , regUpdater_(regUpdater) + , regWeight_(&initialRegularizationWeight) + , maxIterations_(maxIterations) + , threshold_(threshold) + , verbosity_(verbosity) + {} + + + + /** \brief Add a stop criterion to be executed at the beginning of the loop */ + template<class... Args> + void addStopCriterion(Args&&... args) + { + stopCriteria_.emplace_back(std::forward<Args>(args)...); + } + + /** \brief Add a descent criterion to be executed in the descent stage of the loop */ + template<class... Args> + void addDescentCriterion(Args&&... args) + { + descentCriteria_.emplace_back(std::forward<Args>(args)...); + } + + /** \brief Return the currently computed gradient of smooth energy part */ + const auto& gradient() const + { + return *gradientPtr_; + } + + /** \brief Return the currently computed hessian of smooth energy part */ + const auto& hessian() const + { + return *hessianPtr_; + } + + /** \brief Check the existence of a current hessian matrix */ + bool hasHessian() const + { + return hessianPtr_ != nullptr; + } + + /** \brief Check the existence of a current gradient vector */ + bool hasGradient() const + { + return gradientPtr_ != nullptr; + } + + /** \brief Set a hessian from the outside */ + void setHessian( const std::shared_ptr<MatrixType>& hessianPtr ) + { + hessianPtr_ = hessianPtr; + } + + /** \brief Set a gradient from the outside */ + void setGradient( const std::shared_ptr<VectorType>& gradientPtr ) + { + gradientPtr_ = gradientPtr; + } + + /** \brief Return the current computed correction in x */ + const auto& correction() const + { + return *correction_; + } + + /** \brief Access the currently used nonsmooth part (it changes due to the offsets) */ + const auto& nonsmoothFunctional() const + { + return *nonsmoothFunctional_; + } + + /** \brief direct access to the current regularization weight with read/write possibility */ + auto& regularizationWeight() + { + return *regWeight_; + } + + /** \brief Get current iteration number */ + int getIterationCount() + { + return iter_; + } + + private: + + const SmoothEnergyAssembler* smoothEnergyAssembler_; + NonsmoothFunctional* nonsmoothFunctional_; + const SecondOrderSolver* sos_; + + // current iterate of the solution of the minimization problem + VectorType* solution_; + + // increments of the proximal Newton step + std::shared_ptr<VectorType> correction_; + + const ErrorNorm* norm_; + + const RegUpdater regUpdater_; + + // current regularization weight + double* regWeight_; + + int iter_; + int maxIterations_; + double threshold_; + Solver::VerbosityMode verbosity_; + + // store the different criteria + std::vector<Dune::Solvers::Criterion> stopCriteria_; + std::vector<Dune::Solvers::Criterion> descentCriteria_; + + // access to the internal data for external criteria + std::shared_ptr<MatrixType> hessianPtr_ = nullptr; + std::shared_ptr<VectorType> gradientPtr_ = nullptr; + + void printLine( int iter, double usedReg, double normCorrection, double newEnergy, double energyDiff, std::string errorMessage = "") const + { + std::cout << std::setw( 7) << iter << " | " + << std::setw(15) << std::setprecision(9) << usedReg << " | " + << std::setw(15) << std::setprecision(9) << normCorrection << " | " + << std::setw(15) << std::setprecision(9) << newEnergy << " | " + << std::setw(15) << std::setprecision(9) << energyDiff << " " + << errorMessage << std::endl; + }; + }; + + + + template<class SEA, class NSF, class SOS, class V, class EN, class RU, class BV> + void ProximalNewtonSolver<SEA,NSF,SOS,V,EN,RU,BV>::solve() + { + using VectorType = V; + + const bool printOutput = this->verbosity_ == NumProc::FULL; + + auto& regWeight = *regWeight_; + + if ( printOutput ) + { + std::cout << " iterate | regularization | correction | energy | energy difference "<< std::endl; + std::cout << "---------+------------------+-----------------+-----------------+-------------------"<< std::endl; + } + + iter_ = 0; + + if ( not correction_ ) + correction_ = std::make_shared<VectorType>(); + + // we need a zero vector for computing concrete energy descents later + VectorType zeroVector; + resizeInitializeZero(*correction_, *solution_); + resizeInitializeZero(zeroVector, *solution_); + + using real_type = typename VectorType::field_type; + real_type normCorrection = std::numeric_limits<double>::max(); + + // start the loop + for( iter_ = 0; iter_ < this->maxIterations_; iter_++ ) + { + // check for ||dx|| < threshold + if ( (1.0 + regWeight)*normCorrection < threshold_ ) + { + if ( printOutput ) + std::cout << "ProximalNewtonSolver terminated because of weighted correction is below threshold: " << (1.0 + regWeight)*normCorrection << std::endl; + break; + } + + // check user added additional stop criteria + bool stop = false; + for ( auto&& c: stopCriteria_ ) + { + auto r = c(); + if ( std::get<0>(r) ) + { + if ( printOutput ) + std::cout << "ProximalNewtonSolver terminated because of a user added stop criterion: " << std::get<1>( r ) << std::endl; + + stop = true; + break; + } + } + + // don't do another loop + if ( stop ) + break; + + // keep a copy + auto usedReg = *regWeight_; + + + // store some information in case the step gets discarded + auto oldX = *solution_; + + // assemble the quadratic and linear part if not recycled from previous step + if ( not hasGradient() or not hasHessian() ) + { + hessianPtr_ = std::make_shared<MatrixType>(); + gradientPtr_ = std::make_shared<VectorType>(); + + smoothEnergyAssembler_->assembleGradientAndHessian( oldX, *gradientPtr_, *hessianPtr_ ); + } + + // shift the nonsmoothFunctional by the current x + nonsmoothFunctional_->updateOffset( oldX ); + + *correction_ = 0.0; + + // remember the old energy: note that nonsmoothFunctional_ is already shifted by oldX + auto oldEnergy = smoothEnergyAssembler_->computeEnergy( oldX ) + (*nonsmoothFunctional_)( zeroVector ); + + /////////////////////////////////////////////////////////////////////////////////////////// + /// Stage I: Try to compute a Proximal Newton step //////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + + // compute one Proximal Newton Step with the second order solver + try + { + sos_->minimize( *hessianPtr_, *gradientPtr_, *nonsmoothFunctional_, regWeight, *correction_, this->ignore() ); + } + catch(const MathError& e) + { + if ( printOutput ) + printLine( iter_, usedReg, 0, oldEnergy, 0, "The Proximal Newton Step reported an error: " + std::string(e.what()) ); + + regUpdater_(regWeight, ProximalNewton::Stage::minimize ); + continue; + } + + + /////////////////////////////////////////////////////////////////////////////////////////// + /// Stage II: Check that new x is a valid configuration /////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + + + // if we got here the correction can be used to compute the next x + auto newX = oldX; + newX += *correction_; + + // compute the newEnergy: check for invalid configuration + real_type newEnergy; + try + { + newEnergy = smoothEnergyAssembler_->computeEnergy( newX ) + (*nonsmoothFunctional_)( *correction_ ); + } + catch(const MathError& e) + { + if ( printOutput ) + printLine( iter_, usedReg, 0, oldEnergy, 0, "Computing the new energy resulted in an error: " + std::string(e.what()) ); + + regUpdater_(regWeight, ProximalNewton::Stage::configuration ); + continue; + } + + // Compute objective function descent + auto energyDiff = newEnergy; + energyDiff -= oldEnergy; + + + /////////////////////////////////////////////////////////////////////////////////////////// + /// Stage III: Check that the new x fulfills descent criteria ///////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + + // check user added additional descent criteria + bool accepted = true; + std::string errorMessage; + for ( auto&& c: descentCriteria_ ) + { + auto r = c(); + if ( not std::get<0>( r ) ) + { + if ( printOutput ) + errorMessage = std::get<1>( r ); + + accepted = false; + break; + } + } + + if ( not accepted ) + { + if ( printOutput ) + printLine( iter_, usedReg, 0, oldEnergy, 0, "The following descent criterion was not accepted: " + errorMessage ); + + regUpdater_(regWeight, ProximalNewton::Stage::descent ); + continue; + } + + + normCorrection = (*norm_)( *correction_ ); + + if ( printOutput ) + { + printLine( iter_, usedReg, normCorrection, newEnergy, energyDiff ); + } + + + /////////////////////////////////////////////////////////////////////////////////////////// + /// Stage IV: Update the regularization weight for the next step ///////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + + regUpdater_(regWeight, ProximalNewton::Stage::accepted ); + + + // seems like the step was accepted: + *solution_ = newX; + + // reset gradient and hessian since x is updated + gradientPtr_.reset(); + hessianPtr_.reset(); + } + } +} // namespace Dune::Solvers + + +#endif diff --git a/dune/solvers/test/CMakeLists.txt b/dune/solvers/test/CMakeLists.txt index 314ad6ed239fa0103fca5211e0d5fc39fa4f70de..5d7a0f56d0aa4fbceafaa4df484bea6efccbbfcb 100644 --- a/dune/solvers/test/CMakeLists.txt +++ b/dune/solvers/test/CMakeLists.txt @@ -26,4 +26,6 @@ endif() if(SuiteSparse_CHOLMOD_FOUND) dune_add_test(SOURCES cholmodsolvertest.cc) add_dune_suitesparse_flags(cholmodsolvertest) + dune_add_test(SOURCES proximalnewtonsolvertest.cc) + add_dune_suitesparse_flags(proximalnewtonsolvertest) endif() diff --git a/dune/solvers/test/proximalnewtonsolvertest.cc b/dune/solvers/test/proximalnewtonsolvertest.cc new file mode 100644 index 0000000000000000000000000000000000000000..88a5164892a11513ac0c47696f6dbadd480391f4 --- /dev/null +++ b/dune/solvers/test/proximalnewtonsolvertest.cc @@ -0,0 +1,149 @@ +// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- +// vi: set et ts=8 sw=4 sts=4: + +#include <config.h> + +#include <cmath> +#include <iostream> +#include <sstream> + +#include <dune/common/bitsetvector.hh> +#include <dune/common/parallel/mpihelper.hh> + +#include <dune/solvers/norms/energynorm.hh> +#include <dune/solvers/norms/twonorm.hh> +#include <dune/solvers/solvers/cholmodsolver.hh> +#include <dune/solvers/solvers/proximalnewtonsolver.hh> + +#include "common.hh" + +using namespace Dune; + +// grid dimension +const int dim = 3; + +// f(x) = 1/4 * ||x||^4 +template<class Matrix, class Vector> +struct F +{ + void assembleGradientAndHessian( const Vector& x, Vector& grad, Matrix& hess ) const + { + auto norm2 = x.two_norm2(); + + grad = x; + grad *= norm2; + + for(size_t i=0; i<x.size(); i++) + for(size_t j=0; j<x.size(); j++) + hess[i][j] = (i==j)*norm2 + 2*x[i]*x[j]; + } + + double computeEnergy( const Vector& x ) const + { + return 0.25* x.two_norm2() * x.two_norm2(); + } +}; + + +// g(x) = ( -1, -1, -1, ... , -1)^T * x +template<class Vector> +struct G +{ + double operator()( const Vector& x ) const + { + auto realX = x; + realX += offset_; + + double sum = 0.0; + for ( size_t i=0; i<realX.size(); i++ ) + sum -= realX[i]; + + return sum; + } + + void updateOffset( const Vector& offset ) + { + offset_ = offset; + } + + Vector offset_; +}; + +template<class Matrix, class Vector> +struct SecondOrdersolver +{ + using MatrixType = Matrix; + + template<class BitVector> + void minimize( const Matrix& hess, const Vector& grad, const G<Vector>& g, double reg, Vector& dx, const BitVector& /*ignore*/) const + { + // add reg + auto HH = hess; + for ( size_t i=0; i<dx.size(); i++ ) + HH[i][i] += reg; + + // add g to grad + auto gg = grad; + gg *= -1.0; + for ( size_t i=0; i<dx.size(); i++ ) + gg[i] += 1.0; + + + Solvers::CholmodSolver cholmod( HH, dx, gg ); + cholmod.solve(); + } +}; + + + +int main (int argc, char *argv[]) +{ + // initialize MPI, finalize is done automatically on exit + [[maybe_unused]] MPIHelper& mpiHelper = MPIHelper::instance(argc, argv); + + const int size = 10; + + using Vector = FieldVector<double,size>; + using Matrix = FieldMatrix<double,size>; + using BitVector = std::bitset<size>; + + // this is the analytical solution of the problem + Vector exactSol, zeroVector; + exactSol = std::pow( size, -1.0/3.0 ); + + zeroVector = 0.0; + + F<Matrix,Vector> f; + G<Vector> g; + g.updateOffset(zeroVector); + SecondOrdersolver<Matrix,Vector> sos; + + + // choose some random initial vector + Vector sol; + for ( size_t i=0; i<sol.size(); i++ ) + sol[i] = i; + + TwoNorm<Vector> norm; + + // create the solver + Solvers::ProximalNewton::SimpleRegUpdater regUpdater; + double reg = 42.0; + Solvers::ProximalNewtonSolver proximalNewtonSolver( f, g, sos, sol, norm, regUpdater, reg, 100, 1e-14, Solver::FULL); + + // set some empty ignore field + BitVector ignore; + proximalNewtonSolver.setIgnore( ignore ); + + // go! + proximalNewtonSolver.solve(); + + // compute diff the exact solution + sol -= exactSol; + + std::cout << "Difference to exact solution = " << sol.two_norm() << std::endl; + + bool passed = sol.two_norm() < 1e-15; + + return passed ? 0 : 1; +}