Skip to content
Snippets Groups Projects
Commit fc983e3e authored by oliver.sander_at_tu-dresden.de's avatar oliver.sander_at_tu-dresden.de
Browse files

Merge branch 'feature/pn-solver' into 'master'

Implement generic proximal Newton solver for solving nonsmooth minimization problems

See merge request !63
parents de87490c f112d799
No related branches found
No related tags found
1 merge request!63Implement generic proximal Newton solver for solving nonsmooth minimization problems
Pipeline #54031 failed
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
# Release 2.9 # Release 2.9
- A new solver `ProximalNewtonSolver` is added which solves non-smooth minimization problems.
- The internal matrix of the`EnergyNorm` can now be accessed by `getMatrix()`. - The internal matrix of the`EnergyNorm` can now be accessed by `getMatrix()`.
- `codespell` spell checker is now active for automated spell checking in the Gitlab CI. - `codespell` spell checker is now active for automated spell checking in the Gitlab CI.
......
...@@ -6,6 +6,7 @@ install(FILES ...@@ -6,6 +6,7 @@ install(FILES
linearsolver.hh linearsolver.hh
loopsolver.cc loopsolver.cc
loopsolver.hh loopsolver.hh
proximalnewtonsolver.hh
quadraticipopt.hh quadraticipopt.hh
solver.hh solver.hh
tcgsolver.cc tcgsolver.cc
......
// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
// vi: set et ts=8 sw=4 sts=4:
#ifndef DUNE_SOLVERS_SOLVERS_PROXIMALNEWTONSOLVER_HH
#define DUNE_SOLVERS_SOLVERS_PROXIMALNEWTONSOLVER_HH
#include <dune/common/exceptions.hh>
#include <dune/solvers/common/canignore.hh>
#include <dune/solvers/common/defaultbitvector.hh>
#include <dune/solvers/common/resize.hh>
#include <dune/solvers/solvers/criterion.hh>
#include <dune/solvers/solvers/loopsolver.hh>
namespace Dune::Solvers
{
namespace ProximalNewton
{
/** \brief List of the four stages of the proximal Newton step
*
* These are used to select proper regularization rules and are handed over to the
* regularization update method.
*/
enum Stage
{
minimize, // first stage: minimizing the second order problem
configuration, // second stage: testing the new configuration x + dx
descent, // third stage: testing the descent criteria for the new configuration
accepted // last stage: the new increment was accepted
};
//! A dummy class for g=0 in the ProximalNewtonSolver
template<class VectorType>
struct ZeroFunctional
{
double operator()( const VectorType& dx ) const
{
return 0.0;
}
void updateOffset( const VectorType& x )
{
// do nothing
}
};
// A simple regularization updater which doubles in case of failure and halves in case of success
struct SimpleRegUpdater
{
void operator()( double& regWeight, Stage stage) const
{
if ( stage == Stage::accepted )
regWeight *= 0.5;
else
// make it at least 1.0
regWeight = std::max( 1.0, 10.0*regWeight );
}
};
}
/** \brief Generic proximal Newton solver to solve a given minimization problem
*
* The proximal Newton solver aims to solve a minimization problem given in the form
* Minimize J(x) := f(x) + g(x)
* where f is a C^2 functional and g is a possibly non-smooth functional.
* During the minimization, a sequence of increments dx as solutions of the second order subproblems
* Minimize 0.5*f''(x)[dx,dx] + f'(x)[dx] + g(x + dx) + r*||dx||^2
* is computed until the update x := x + dx converges in some sense.
* The user has to provide a suitable regularization strategy to control the regularization weight r,
* and a proper norm ||.|| for the subproblem.
*/
template<class SEA, class NSF, class SOS, class VectorType, class ErrorNorm, class RegUpdater, class BitVectorType = DefaultBitVector_t<VectorType>>
class ProximalNewtonSolver : public Solver, public CanIgnore<BitVectorType>
{
public:
using SmoothEnergyAssembler = SEA;
using NonsmoothFunctional = NSF;
using SecondOrderSolver = SOS;
using MatrixType = typename SecondOrderSolver::MatrixType;
void solve() override;
/** \brief Constructor taking all relevant data
*
* \param sea The SmoothEnergyAssembler representing f: It must provide the method
* assembleGradientAndHessian( x, f', f'' ) in order to compute the second order subproblem, and
* the evaluation by computeEnergy( x ) to return f(x)
* \param nsf The NonsmoothFunctional representing g: It must provide the method
* updateOffset( x ) to update the offset in g( x + dx ), and an evaluation operator().
* \param sos The SecondOrderSolver which is able to minimize the second order subproblem. It must provide
* a method minimize( f'', f', g, r, x, ignore) which overwrites the parameter x with the minimizer
* and throws a Dune::Exception in case the minimization failed.
* \param solution This is the solution of the global problem. It is overwritten during the computation and serves
* also as the initial value.
* \param errorNorm This is the Solvers::EnergyNorm used in the second order problem and also in the descent criteria.
* \param regUpdater The regularization strategy. It must provide a call operator ( r, Stage ) that overwrites r
* based on the given Stage of the computation
* \param initialRegularizationWeight The initial regularization weight to begin with
* \param maxIterations The maximal number of proximal Newton steps before the Proximal Newton solver aborts the loop
* \param threshold The threshold to stop the iteration once || dx || < threshold
* \param verbosity If the verbosity is set to Solver::FULL the ProximalNewtonSolver will print a table showing
* the current iterations and some useful information.
*/
ProximalNewtonSolver( const SmoothEnergyAssembler& sea,
NonsmoothFunctional& nsf,
const SecondOrderSolver& sos,
VectorType& solution,
const ErrorNorm& errorNorm,
const RegUpdater& regUpdater,
double& initialRegularizationWeight,
int maxIterations,
double threshold,
Solver::VerbosityMode verbosity)
: smoothEnergyAssembler_(&sea)
, nonsmoothFunctional_(&nsf)
, sos_(&sos)
, solution_(&solution)
, norm_(&errorNorm)
, regUpdater_(regUpdater)
, regWeight_(&initialRegularizationWeight)
, maxIterations_(maxIterations)
, threshold_(threshold)
, verbosity_(verbosity)
{}
/** \brief Add a stop criterion to be executed at the beginning of the loop */
template<class... Args>
void addStopCriterion(Args&&... args)
{
stopCriteria_.emplace_back(std::forward<Args>(args)...);
}
/** \brief Add a descent criterion to be executed in the descent stage of the loop */
template<class... Args>
void addDescentCriterion(Args&&... args)
{
descentCriteria_.emplace_back(std::forward<Args>(args)...);
}
/** \brief Return the currently computed gradient of smooth energy part */
const auto& gradient() const
{
return *gradientPtr_;
}
/** \brief Return the currently computed hessian of smooth energy part */
const auto& hessian() const
{
return *hessianPtr_;
}
/** \brief Check the existence of a current hessian matrix */
bool hasHessian() const
{
return hessianPtr_ != nullptr;
}
/** \brief Check the existence of a current gradient vector */
bool hasGradient() const
{
return gradientPtr_ != nullptr;
}
/** \brief Set a hessian from the outside */
void setHessian( const std::shared_ptr<MatrixType>& hessianPtr )
{
hessianPtr_ = hessianPtr;
}
/** \brief Set a gradient from the outside */
void setGradient( const std::shared_ptr<VectorType>& gradientPtr )
{
gradientPtr_ = gradientPtr;
}
/** \brief Return the current computed correction in x */
const auto& correction() const
{
return *correction_;
}
/** \brief Access the currently used nonsmooth part (it changes due to the offsets) */
const auto& nonsmoothFunctional() const
{
return *nonsmoothFunctional_;
}
/** \brief direct access to the current regularization weight with read/write possibility */
auto& regularizationWeight()
{
return *regWeight_;
}
/** \brief Get current iteration number */
int getIterationCount()
{
return iter_;
}
private:
const SmoothEnergyAssembler* smoothEnergyAssembler_;
NonsmoothFunctional* nonsmoothFunctional_;
const SecondOrderSolver* sos_;
// current iterate of the solution of the minimization problem
VectorType* solution_;
// increments of the proximal Newton step
std::shared_ptr<VectorType> correction_;
const ErrorNorm* norm_;
const RegUpdater regUpdater_;
// current regularization weight
double* regWeight_;
int iter_;
int maxIterations_;
double threshold_;
Solver::VerbosityMode verbosity_;
// store the different criteria
std::vector<Dune::Solvers::Criterion> stopCriteria_;
std::vector<Dune::Solvers::Criterion> descentCriteria_;
// access to the internal data for external criteria
std::shared_ptr<MatrixType> hessianPtr_ = nullptr;
std::shared_ptr<VectorType> gradientPtr_ = nullptr;
void printLine( int iter, double usedReg, double normCorrection, double newEnergy, double energyDiff, std::string errorMessage = "") const
{
std::cout << std::setw( 7) << iter << " | "
<< std::setw(15) << std::setprecision(9) << usedReg << " | "
<< std::setw(15) << std::setprecision(9) << normCorrection << " | "
<< std::setw(15) << std::setprecision(9) << newEnergy << " | "
<< std::setw(15) << std::setprecision(9) << energyDiff << " "
<< errorMessage << std::endl;
};
};
template<class SEA, class NSF, class SOS, class V, class EN, class RU, class BV>
void ProximalNewtonSolver<SEA,NSF,SOS,V,EN,RU,BV>::solve()
{
using VectorType = V;
const bool printOutput = this->verbosity_ == NumProc::FULL;
auto& regWeight = *regWeight_;
if ( printOutput )
{
std::cout << " iterate | regularization | correction | energy | energy difference "<< std::endl;
std::cout << "---------+------------------+-----------------+-----------------+-------------------"<< std::endl;
}
iter_ = 0;
if ( not correction_ )
correction_ = std::make_shared<VectorType>();
// we need a zero vector for computing concrete energy descents later
VectorType zeroVector;
resizeInitializeZero(*correction_, *solution_);
resizeInitializeZero(zeroVector, *solution_);
using real_type = typename VectorType::field_type;
real_type normCorrection = std::numeric_limits<double>::max();
// start the loop
for( iter_ = 0; iter_ < this->maxIterations_; iter_++ )
{
// check for ||dx|| < threshold
if ( (1.0 + regWeight)*normCorrection < threshold_ )
{
if ( printOutput )
std::cout << "ProximalNewtonSolver terminated because of weighted correction is below threshold: " << (1.0 + regWeight)*normCorrection << std::endl;
break;
}
// check user added additional stop criteria
bool stop = false;
for ( auto&& c: stopCriteria_ )
{
auto r = c();
if ( std::get<0>(r) )
{
if ( printOutput )
std::cout << "ProximalNewtonSolver terminated because of a user added stop criterion: " << std::get<1>( r ) << std::endl;
stop = true;
break;
}
}
// don't do another loop
if ( stop )
break;
// keep a copy
auto usedReg = *regWeight_;
// store some information in case the step gets discarded
auto oldX = *solution_;
// assemble the quadratic and linear part if not recycled from previous step
if ( not hasGradient() or not hasHessian() )
{
hessianPtr_ = std::make_shared<MatrixType>();
gradientPtr_ = std::make_shared<VectorType>();
smoothEnergyAssembler_->assembleGradientAndHessian( oldX, *gradientPtr_, *hessianPtr_ );
}
// shift the nonsmoothFunctional by the current x
nonsmoothFunctional_->updateOffset( oldX );
*correction_ = 0.0;
// remember the old energy: note that nonsmoothFunctional_ is already shifted by oldX
auto oldEnergy = smoothEnergyAssembler_->computeEnergy( oldX ) + (*nonsmoothFunctional_)( zeroVector );
///////////////////////////////////////////////////////////////////////////////////////////
/// Stage I: Try to compute a Proximal Newton step ////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
// compute one Proximal Newton Step with the second order solver
try
{
sos_->minimize( *hessianPtr_, *gradientPtr_, *nonsmoothFunctional_, regWeight, *correction_, this->ignore() );
}
catch(const MathError& e)
{
if ( printOutput )
printLine( iter_, usedReg, 0, oldEnergy, 0, "The Proximal Newton Step reported an error: " + std::string(e.what()) );
regUpdater_(regWeight, ProximalNewton::Stage::minimize );
continue;
}
///////////////////////////////////////////////////////////////////////////////////////////
/// Stage II: Check that new x is a valid configuration ///////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
// if we got here the correction can be used to compute the next x
auto newX = oldX;
newX += *correction_;
// compute the newEnergy: check for invalid configuration
real_type newEnergy;
try
{
newEnergy = smoothEnergyAssembler_->computeEnergy( newX ) + (*nonsmoothFunctional_)( *correction_ );
}
catch(const MathError& e)
{
if ( printOutput )
printLine( iter_, usedReg, 0, oldEnergy, 0, "Computing the new energy resulted in an error: " + std::string(e.what()) );
regUpdater_(regWeight, ProximalNewton::Stage::configuration );
continue;
}
// Compute objective function descent
auto energyDiff = newEnergy;
energyDiff -= oldEnergy;
///////////////////////////////////////////////////////////////////////////////////////////
/// Stage III: Check that the new x fulfills descent criteria /////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
// check user added additional descent criteria
bool accepted = true;
std::string errorMessage;
for ( auto&& c: descentCriteria_ )
{
auto r = c();
if ( not std::get<0>( r ) )
{
if ( printOutput )
errorMessage = std::get<1>( r );
accepted = false;
break;
}
}
if ( not accepted )
{
if ( printOutput )
printLine( iter_, usedReg, 0, oldEnergy, 0, "The following descent criterion was not accepted: " + errorMessage );
regUpdater_(regWeight, ProximalNewton::Stage::descent );
continue;
}
normCorrection = (*norm_)( *correction_ );
if ( printOutput )
{
printLine( iter_, usedReg, normCorrection, newEnergy, energyDiff );
}
///////////////////////////////////////////////////////////////////////////////////////////
/// Stage IV: Update the regularization weight for the next step /////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
regUpdater_(regWeight, ProximalNewton::Stage::accepted );
// seems like the step was accepted:
*solution_ = newX;
// reset gradient and hessian since x is updated
gradientPtr_.reset();
hessianPtr_.reset();
}
}
} // namespace Dune::Solvers
#endif
...@@ -26,4 +26,6 @@ endif() ...@@ -26,4 +26,6 @@ endif()
if(SuiteSparse_CHOLMOD_FOUND) if(SuiteSparse_CHOLMOD_FOUND)
dune_add_test(SOURCES cholmodsolvertest.cc) dune_add_test(SOURCES cholmodsolvertest.cc)
add_dune_suitesparse_flags(cholmodsolvertest) add_dune_suitesparse_flags(cholmodsolvertest)
dune_add_test(SOURCES proximalnewtonsolvertest.cc)
add_dune_suitesparse_flags(proximalnewtonsolvertest)
endif() endif()
// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
// vi: set et ts=8 sw=4 sts=4:
#include <config.h>
#include <cmath>
#include <iostream>
#include <sstream>
#include <dune/common/bitsetvector.hh>
#include <dune/common/parallel/mpihelper.hh>
#include <dune/solvers/norms/energynorm.hh>
#include <dune/solvers/norms/twonorm.hh>
#include <dune/solvers/solvers/cholmodsolver.hh>
#include <dune/solvers/solvers/proximalnewtonsolver.hh>
#include "common.hh"
using namespace Dune;
// grid dimension
const int dim = 3;
// f(x) = 1/4 * ||x||^4
template<class Matrix, class Vector>
struct F
{
void assembleGradientAndHessian( const Vector& x, Vector& grad, Matrix& hess ) const
{
auto norm2 = x.two_norm2();
grad = x;
grad *= norm2;
for(size_t i=0; i<x.size(); i++)
for(size_t j=0; j<x.size(); j++)
hess[i][j] = (i==j)*norm2 + 2*x[i]*x[j];
}
double computeEnergy( const Vector& x ) const
{
return 0.25* x.two_norm2() * x.two_norm2();
}
};
// g(x) = ( -1, -1, -1, ... , -1)^T * x
template<class Vector>
struct G
{
double operator()( const Vector& x ) const
{
auto realX = x;
realX += offset_;
double sum = 0.0;
for ( size_t i=0; i<realX.size(); i++ )
sum -= realX[i];
return sum;
}
void updateOffset( const Vector& offset )
{
offset_ = offset;
}
Vector offset_;
};
template<class Matrix, class Vector>
struct SecondOrdersolver
{
using MatrixType = Matrix;
template<class BitVector>
void minimize( const Matrix& hess, const Vector& grad, const G<Vector>& g, double reg, Vector& dx, const BitVector& /*ignore*/) const
{
// add reg
auto HH = hess;
for ( size_t i=0; i<dx.size(); i++ )
HH[i][i] += reg;
// add g to grad
auto gg = grad;
gg *= -1.0;
for ( size_t i=0; i<dx.size(); i++ )
gg[i] += 1.0;
Solvers::CholmodSolver cholmod( HH, dx, gg );
cholmod.solve();
}
};
int main (int argc, char *argv[])
{
// initialize MPI, finalize is done automatically on exit
[[maybe_unused]] MPIHelper& mpiHelper = MPIHelper::instance(argc, argv);
const int size = 10;
using Vector = FieldVector<double,size>;
using Matrix = FieldMatrix<double,size>;
using BitVector = std::bitset<size>;
// this is the analytical solution of the problem
Vector exactSol, zeroVector;
exactSol = std::pow( size, -1.0/3.0 );
zeroVector = 0.0;
F<Matrix,Vector> f;
G<Vector> g;
g.updateOffset(zeroVector);
SecondOrdersolver<Matrix,Vector> sos;
// choose some random initial vector
Vector sol;
for ( size_t i=0; i<sol.size(); i++ )
sol[i] = i;
TwoNorm<Vector> norm;
// create the solver
Solvers::ProximalNewton::SimpleRegUpdater regUpdater;
double reg = 42.0;
Solvers::ProximalNewtonSolver proximalNewtonSolver( f, g, sos, sol, norm, regUpdater, reg, 100, 1e-14, Solver::FULL);
// set some empty ignore field
BitVector ignore;
proximalNewtonSolver.setIgnore( ignore );
// go!
proximalNewtonSolver.solve();
// compute diff the exact solution
sol -= exactSol;
std::cout << "Difference to exact solution = " << sol.two_norm() << std::endl;
bool passed = sol.two_norm() < 1e-15;
return passed ? 0 : 1;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment