diff --git a/dune/tnnmg/CMakeLists.txt b/dune/tnnmg/CMakeLists.txt
index 29cc328c3ba4d6875e85730ad87fc146598678f4..896f9294bf47e48b0a1abba40d8cdac3e09e7eb3 100644
--- a/dune/tnnmg/CMakeLists.txt
+++ b/dune/tnnmg/CMakeLists.txt
@@ -1,5 +1,15 @@
 add_subdirectory("functionals")
 add_subdirectory("iterationsteps")
+add_subdirectory("linearsolvers")
+add_subdirectory("localsolvers")
 add_subdirectory("nonlinearities")
 add_subdirectory("problem-classes")
+add_subdirectory("projections")
 add_subdirectory("solvers")
+add_subdirectory("test")
+
+
+install(FILES
+    concepts.hh
+    typetraits.hh
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/tnnmg)
diff --git a/dune/tnnmg/concepts.hh b/dune/tnnmg/concepts.hh
new file mode 100644
index 0000000000000000000000000000000000000000..8d9348beba21f170081382dedb77d389fb49414f
--- /dev/null
+++ b/dune/tnnmg/concepts.hh
@@ -0,0 +1,74 @@
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set ts=8 sw=2 et sts=2:
+#ifndef DUNE_TNNMG_CONCEPTS_HH
+#define DUNE_TNNMG_CONCEPTS_HH
+
+
+
+namespace Dune {
+namespace TNNMG {
+namespace Concept {
+
+
+
+struct IsLessThanComparable
+{
+
+  struct RawLessThanComparable
+  {
+    template<class T1, class T2>
+    auto require(const T1& t1, const T2& t2) -> decltype(
+      t1<t2
+    );
+  };
+
+  struct LessThanComparableHelper
+  {
+
+    // if one of both types is not a tuple we can use the raw comparable check
+    template<class T1, class T2,
+      std::enable_if_t<(not IsTupleOrDerived<T1>::value) or (not IsTupleOrDerived<T2>::value), int> = 0>
+    static constexpr auto check(const T1&, const T2&)
+    {
+      return std::integral_constant<bool, models<RawLessThanComparable, T1, T2>()>();
+    }
+
+    // tuples of different length are not comparable
+    template<class... T, class... U,
+      std::enable_if_t<not (std::tuple_size<std::tuple<T...>>::value == std::tuple_size<std::tuple<U...>>::value), int> = 0>
+    static constexpr auto check(const std::tuple<T...>&, const std::tuple<U...>&)
+    {
+      return std::false_type();
+    }
+
+    // empty tuples are always comparable
+    static constexpr auto check(const std::tuple<>&, const std::tuple<>&)
+    {
+      return std::true_type();
+    }
+
+    // check if tuples are comparable
+    template<class T0, class... T, class U0, class... U,
+      std::enable_if_t<(std::tuple_size<std::tuple<T...>>::value == std::tuple_size<std::tuple<U...>>::value), int> = 0>
+    static constexpr auto check(const std::tuple<T0, T...>&, const std::tuple<U0, U...>&)
+    {
+      using HeadComparable = decltype(check(std::declval<T0>(), std::declval<U0>()));
+      using TailComparable = decltype(check(std::declval<std::tuple<T...>>(), std::declval<std::tuple<U...>>()));
+      return std::integral_constant<bool, HeadComparable::value and TailComparable::value>();
+    }
+  };
+
+  template<class T1, class T2>
+  auto require(const T1& t1, const T2& t2) -> decltype(
+    Dune::Concept::requireTrue<decltype(LessThanComparableHelper::check(t1, t2))::value>()
+  );
+};
+
+
+} // namespace Concept
+} // namespace TNNMG
+} // namespace Dune
+
+
+
+#endif // DUNE_TNNMG_CONCEPTS_HH
diff --git a/dune/tnnmg/functionals/CMakeLists.txt b/dune/tnnmg/functionals/CMakeLists.txt
index a490d9e019d0bdee58a51b629c39db74fc677e50..f973f88ca560c74ed5a85248e2ceaee63ac54cbb 100644
--- a/dune/tnnmg/functionals/CMakeLists.txt
+++ b/dune/tnnmg/functionals/CMakeLists.txt
@@ -1,10 +1,15 @@
 add_subdirectory("test")
 
 install(FILES
+    affineoperator.hh
+    bcqfconstrainedlinearization.hh
+    boxconstrainedquadraticfunctional.hh
     convexfunctional.hh
     cubeindicatorfunctional.hh
     nonsmoothconvexfunctional.hh
     normfunctional.hh
+    quadraticfunctional.hh
+    quadraticfunctionalconstrainedlinearization.hh
     stronglyconvexfunctional.hh
     sumfunctional.hh
     zerofunctional.hh
diff --git a/dune/tnnmg/functionals/affineoperator.hh b/dune/tnnmg/functionals/affineoperator.hh
new file mode 100644
index 0000000000000000000000000000000000000000..b901211b621c55209c9cdb6828bf5578da78c900
--- /dev/null
+++ b/dune/tnnmg/functionals/affineoperator.hh
@@ -0,0 +1,93 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_FUNCTIONALS_AFFINEOPERATOR_HH
+#define DUNE_TNNMG_FUNCTIONALS_AFFINEOPERATOR_HH
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+namespace Imp {
+
+  template<class MM, class X, class Y>
+  static auto mmv(const MM& m, const X& x, Y&& y, PriorityTag<1>) -> void_t<decltype(m.mmv(x, y))>
+  {
+    m.mmv(x, y);
+  }
+
+  template<class MM, class X, class Y>
+  static void mmv(const MM& m, const X& x, Y&& y, PriorityTag<0>)
+  {
+    y -= m*x;
+  }
+} // namespace Imp
+
+
+/** \brief The first derivative of a functional
+ *
+ * The functional is thought to be smooth here: truncation and other ways to deal
+ * with nonsmoothness happens elsewhere.
+ *
+ * \tparam M Matrix type
+ * \tparam V Vector type
+ */
+template<class M, class V>
+class AffineOperator
+{
+  /** \brief The second derivative of a functional: A fixed matrix wrapped as a function */
+  class ConstantMatrixFunction
+  {
+  public:
+    ConstantMatrixFunction(const M& value) :
+      value_(&value)
+    {}
+
+    const M& operator()(const V& d)
+    { return *value_; }
+
+  private:
+    const M* value_;
+  };
+
+public:
+
+  using Matrix = M;
+  using Vector = V;
+
+  /** \brief Constructor */
+  AffineOperator(const Matrix& matrix, const Vector& linearTerm) :
+    matrix_(&matrix),
+    linearTerm_(&linearTerm)
+  {}
+
+  /** \brief Evaluate the first derivative of the functional at a given point */
+  Vector operator()(const Vector& v) const
+  {
+    Vector result;
+    Solvers::resizeInitializeZero(result,v);
+    matrix_->umv(v, result);
+    result -= *linearTerm_;
+    return result;
+  }
+
+  /** \brief Get the second derivative of the functional
+   * \todo This could be a lambda but gcc-4.9.2 (shipped with Debian
+   * Jessie) does not accept inline friends with auto return type
+   * due to bug-59766. This is fixed in gcc-4.9.3.
+   */
+  friend ConstantMatrixFunction derivative(const AffineOperator& f)
+  {
+    return ConstantMatrixFunction(*f.matrix_);
+  }
+
+protected:
+  const Matrix* matrix_;
+  const Vector* linearTerm_;
+};
+
+
+}   // TNNMG
+}   // Dune
+
+#endif   // DUNE_TNNMG_FUNCTIONALS_AFFINEOPERATOR_HH
diff --git a/dune/tnnmg/functionals/bcqfconstrainedlinearization.hh b/dune/tnnmg/functionals/bcqfconstrainedlinearization.hh
new file mode 100644
index 0000000000000000000000000000000000000000..c9ae017ab40ce2550addec8583cb52e86b2a0cf8
--- /dev/null
+++ b/dune/tnnmg/functionals/bcqfconstrainedlinearization.hh
@@ -0,0 +1,145 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_FUNCTIONAL_BCQFCONSTRAINEDLINEARIZATION_HH
+#define DUNE_TNNMG_FUNCTIONAL_BCQFCONSTRAINEDLINEARIZATION_HH
+
+
+#include <cstddef>
+
+#include <dune/common/typetraits.hh>
+#include <dune/common/hybridutilities.hh>
+
+#include <dune/matrix-vector/algorithm.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+/**
+ * \brief A constrained linearization for BoxConstrainedQuadraticFunctional
+ */
+template<class F, class BV>
+class BoxConstrainedQuadraticFunctionalConstrainedLinearization
+{
+  using This = BoxConstrainedQuadraticFunctionalConstrainedLinearization<F,BV>;
+
+  template<class NV, class NBV, class T>
+  static void determineTruncation(const NV& x, const NV& lower, const NV& upper, NBV&& truncationFlags, const T& truncationTolerance)
+  {
+    namespace H = Dune::Hybrid;
+    H::ifElse(IsNumber<NV>(), [&](auto id){
+      if ((id(x) <= id(lower)+truncationTolerance) || (id(x) >= id(upper) - truncationTolerance))
+        id(truncationFlags) = true;
+    }, [&](auto id){
+      H::forEach(H::integralRange(H::size(id(x))), [&](auto&& i) {
+        This::determineTruncation(x[i], lower[i], upper[i], truncationFlags[i], truncationTolerance);
+      });
+    });
+  }
+
+  template<class NV, class NBV>
+  static void truncateVector(NV& x, const NBV& truncationFlags)
+  {
+    namespace H = Dune::Hybrid;
+    H::ifElse(IsNumber<NV>(), [&](auto id){
+      if (id(truncationFlags))
+        id(x) = 0;
+    }, [&](auto id){
+      H::forEach(H::integralRange(H::size(id(x))), [&](auto&& i) {
+        This::truncateVector(x[i], truncationFlags[i]);
+      });
+    });
+  }
+
+  template<class NM, class RBV, class CBV>
+  static void truncateMatrix(NM& A, const RBV& rowTruncationFlags, const CBV& colTruncationFlags)
+  {
+    namespace H = Dune::Hybrid;
+    using namespace Dune::MatrixVector;
+    H::ifElse(IsNumber<NM>(), [&](auto id){
+      if(id(rowTruncationFlags) or id(colTruncationFlags))
+        A = 0;
+    }, [&](auto id){
+      H::forEach(H::integralRange(H::size(id(rowTruncationFlags))), [&](auto&& i) {
+        auto&& Ai = A[i];
+        sparseRangeFor(Ai, [&](auto&& Aij, auto&& j) {
+            This::truncateMatrix(Aij, rowTruncationFlags[i], colTruncationFlags[j]);
+        });
+      });
+    });
+  }
+
+public:
+  using Matrix = typename F::Matrix;
+  using Vector = typename F::Vector;
+  using BitVector = BV;
+  using ConstrainedMatrix = Matrix;
+  using ConstrainedVector = Vector;
+  using ConstrainedBitVector = BitVector;
+
+
+  BoxConstrainedQuadraticFunctionalConstrainedLinearization(const F& f, const BitVector& ignore) :
+    f_(f),
+    ignore_(ignore),
+    truncationTolerance_(1e-10)
+  {}
+
+  void bind(const Vector& x)
+  {
+    negativeGradient_ = derivative(f_)(x);
+    negativeGradient_ *= -1;
+    hessian_ = derivative(derivative(f_))(x);
+    truncationFlags_ = ignore_;
+
+    // determine which components to truncate
+    determineTruncation(x, f_.lowerObstacle(), f_.upperObstacle(), truncationFlags_, truncationTolerance_);
+
+    // truncate gradient and hessian
+    truncateVector(negativeGradient_, truncationFlags_);
+    truncateMatrix(hessian_, truncationFlags_, truncationFlags_);
+  }
+
+  void extendCorrection(ConstrainedVector& cv, Vector& v) const
+  {
+    v = cv;
+    truncateVector(v, truncationFlags_);
+  }
+
+  const BitVector& truncated() const
+  {
+    return truncationFlags_;
+  }
+
+  const auto& negativeGradient() const
+  {
+    return negativeGradient_;
+  }
+
+  const auto& hessian() const
+  {
+    return hessian_;
+  }
+
+private:
+  const F& f_;
+  const BitVector& ignore_;
+
+  double truncationTolerance_;
+
+  Vector negativeGradient_;
+  Matrix hessian_;
+  BitVector truncationFlags_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+
+#endif // DUNE_TNNMG_FUNCTIONAL_BCQFCONSTRAINEDLINEARIZATION
diff --git a/dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh b/dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh
new file mode 100644
index 0000000000000000000000000000000000000000..ea7d85759d34507fb7a07688e629c444f23ed362
--- /dev/null
+++ b/dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh
@@ -0,0 +1,303 @@
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set ts=8 sw=2 et sts=2:
+#ifndef DUNE_TNNMG_FUNCTIONALS_BOXCONSTRAINEDQUADRATICFUNCTIONAL_HH
+#define DUNE_TNNMG_FUNCTIONALS_BOXCONSTRAINEDQUADRATICFUNCTIONAL_HH
+
+#include <cstddef>
+#include <type_traits>
+
+#include <dune/common/concept.hh>
+#include <dune/common/typetraits.hh>
+#include <dune/common/hybridutilities.hh>
+
+#include <dune/matrix-vector/algorithm.hh>
+
+#include <dune/solvers/common/copyorreference.hh>
+
+#include <dune/tnnmg/concepts.hh>
+
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+
+#include <dune/tnnmg/functionals/quadraticfunctional.hh>
+
+
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+template<class V, class L, class U,
+  std::enable_if_t<models<Concept::IsLessThanComparable, V, L>(), int> = 0>
+bool checkInsideBox(const V& v, const L& lower, const U& upper)
+{
+  return (lower <= v) && (v <= upper);
+}
+
+template<class V, class L, class U,
+  std::enable_if_t<not models<Concept::IsLessThanComparable, V, L>(), int> = 0>
+bool checkInsideBox(const V& v, const L& lower, const U& upper)
+{
+  namespace H = Dune::Hybrid;
+  bool result = true;
+  H::forEach(H::integralRange(H::size(v)), [&](auto&& i) {
+    result &= checkInsideBox(v[i], lower[i], upper[i]);
+  });
+  return result;
+}
+
+
+
+template<class V, class L, class U, class T,
+  std::enable_if_t<std::is_arithmetic<V>::value, int> = 0>
+void directionalObstacles(const V& x, const V& v, const L& lower, const U& upper, T& defectLower, T& defectUpper)
+{
+  if (v>0)
+  {
+    defectLower = std::max(defectLower, (lower - x)/v);
+    defectUpper = std::min(defectUpper, (upper - x)/v);
+  }
+  else if (v<0)
+  {
+    defectLower = std::max(defectLower, (upper - x)/v);
+    defectUpper = std::min(defectUpper, (lower - x)/v);
+  }
+}
+
+template<class V, class L, class U, class T,
+  std::enable_if_t<not std::is_arithmetic<V>::value, int> = 0>
+void directionalObstacles(const V& x, const V& v, const L& lower, const U& upper, T& defectLower, T& defectUpper)
+{
+  namespace H = Dune::Hybrid;
+  H::forEach(H::integralRange(H::size(v)), [&](auto&& i) {
+    directionalObstacles(x[i], v[i], lower[i], upper[i], defectLower, defectUpper);
+  });
+}
+
+
+
+template<class M, class V, class R>
+class ShiftedBoxConstrainedQuadraticFunctional;
+
+template<class M, class V, class L, class U, class R>
+class BoxConstrainedQuadraticFunctional;
+
+
+
+
+/** \brief Coordinate restriction of a quadratic functional
+ *
+ *  \tparam M Global matrix type
+ *  \tparam V global vector type
+ *  \tparam R Range type
+ */
+template<class M, class V, class R>
+class ShiftedBoxConstrainedQuadraticFunctional : public
+  ShiftedQuadraticFunctional<M,V,R>
+{
+  using Base = ShiftedQuadraticFunctional<M,V,R>;
+public:
+
+  using Matrix = M;
+  using Vector = V;
+  using LowerObstacle = Vector;
+  using UpperObstacle = Vector;
+  using Range = R;
+
+  ShiftedBoxConstrainedQuadraticFunctional(const Matrix& quadraticPart, const Vector& linearPart, const Vector& lowerObstacle, const Vector& upperObstacle, const Vector& origin) :
+    Base(quadraticPart, linearPart, origin),
+    originalLowerObstacle_(&lowerObstacle),
+    originalUpperObstacle_(&upperObstacle)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    auto temp = Base::origin();
+    temp += v;
+    if (checkInsideBox(temp, originalLowerObstacle(), originalUpperObstacle()))
+      return Base::operator()(v);
+    return std::numeric_limits<Range>::max();
+  }
+
+  const Vector& originalLowerObstacle() const
+  {
+    return *originalLowerObstacle_;
+  }
+
+  const Vector& originalUpperObstacle() const
+  {
+    return *originalUpperObstacle_;
+  }
+
+protected:
+  const Vector* originalLowerObstacle_;
+  const Vector* originalUpperObstacle_;
+};
+
+// \ToDo This should be an inline friend of ShiftedBoxConstrainedQuadraticFunctional
+// but gcc-4.9.2 shipped with debian jessie does not accept
+// inline friends with auto return type due to bug-59766.
+// Notice, that this is fixed in gcc-4.9.3.
+template<class Index, class M, class V, class R>
+auto coordinateRestriction(const ShiftedBoxConstrainedQuadraticFunctional<M, V, R>& f, const Index& i)
+{
+  using Range = R;
+  using LocalMatrix = std::decay_t<decltype(f.quadraticPart()[i][i])>;
+  using LocalVector = std::decay_t<decltype(f.originalLinearPart()[i])>;
+  using LocalLowerObstacle = std::decay_t<decltype(f.originalLowerObstacle()[i])>;
+  using LocalUpperObstacle = std::decay_t<decltype(f.originalUpperObstacle()[i])>;
+
+  using namespace Dune::MatrixVector;
+  namespace H = Dune::Hybrid;
+
+  LocalVector ri = f.originalLinearPart()[i];
+  const LocalMatrix* Aii_p = nullptr;
+
+  const auto& Ai = f.quadraticPart()[i];
+  sparseRangeFor(Ai, [&](auto&& Aij, auto&& j) {
+    // TODO Here we must implement a wrapper to guarantee that this will work with proxy matrices!
+    H::ifElse(H::equals(j, i), [&](auto&& id){
+      Aii_p = id(&Aij);
+    });
+    Imp::mmv(Aij, f.origin()[j], ri, PriorityTag<1>());
+  });
+
+  LocalLowerObstacle dli = f.originalLowerObstacle()[i];
+  LocalUpperObstacle dui = f.originalUpperObstacle()[i];
+  dli -= f.origin()[i];
+  dui -= f.origin()[i];
+
+  return BoxConstrainedQuadraticFunctional<LocalMatrix&, LocalVector, LocalLowerObstacle, LocalUpperObstacle, Range>(*Aii_p, std::move(ri), std::move(dli), std::move(dui));
+}
+
+
+
+/** \brief Coordinate restriction of a quadratic functional
+ *
+ *  \tparam M Global matrix type
+ *  \tparam V Global vector type
+ *  \tparam L Global lower obstacle type
+ *  \tparam U Global upper obstacle type
+ *  \tparam R Range type
+ */
+template<class M, class V, class L, class U, class R=double>
+class BoxConstrainedQuadraticDirectionalRestriction :
+  public QuadraticDirectionalRestriction<M,V,R>
+{
+  using Base = QuadraticDirectionalRestriction<M,V,R>;
+public:
+
+  using GlobalMatrix = typename Base::GlobalMatrix;
+  using GlobalVector = typename Base::GlobalVector;
+  using GlobalLowerObstacle = L;
+  using GlobalUpperObstacle = U;
+
+  using Matrix = typename Base::Matrix;
+  using Vector = typename Base::Vector;
+
+  using LowerObstacle = Vector;
+  using UpperObstacle = Vector;
+
+  using Range = typename Base::Range;
+
+  BoxConstrainedQuadraticDirectionalRestriction(const GlobalMatrix& matrix, const GlobalVector& linearTerm, const GlobalLowerObstacle& lower, const GlobalLowerObstacle& upper, const GlobalVector& origin, const GlobalVector& direction) :
+    Base(matrix, linearTerm, origin, direction)
+  {
+    defectLower_ = -std::numeric_limits<Vector>::max();
+    defectUpper_ = std::numeric_limits<Vector>::max();
+    directionalObstacles(origin, direction, lower, upper, defectLower_, defectUpper_);
+  }
+
+  Range operator()(const Vector& v) const
+  {
+    DUNE_THROW(Dune::NotImplemented, "Evaluation of QuadraticCoordinateRestriction not implemented");
+  }
+
+  const LowerObstacle& lowerObstacle() const
+  {
+    return defectLower_;
+  }
+
+  const UpperObstacle& upperObstacle() const
+  {
+    return defectUpper_;
+  }
+
+protected:
+  LowerObstacle defectLower_;
+  UpperObstacle defectUpper_;
+};
+
+
+
+/** \brief Box constrained quadratic functional
+ *
+ *  \tparam M Matrix type
+ *  \tparam V Vector type
+ *  \tparam L Lower obstacle type
+ *  \tparam U Upper obstacle type
+ *  \tparam R Range type
+ */
+template<class M, class V, class L, class U, class R>
+class BoxConstrainedQuadraticFunctional :
+  public QuadraticFunctional<M,V,R>
+{
+  using Base = QuadraticFunctional<M,V,R>;
+
+public:
+
+  using Matrix = typename Base::Matrix;
+  using Vector = typename Base::Vector;
+  using Range = typename Base::Range;
+  using LowerObstacle = std::decay_t<L>;
+  using UpperObstacle = std::decay_t<U>;
+
+  BoxConstrainedQuadraticFunctional(const Matrix& matrix, const Vector& linearPart, const LowerObstacle& lower, const UpperObstacle& upper) :
+    Base(matrix, linearPart),
+    lower_(lower),
+    upper_(upper)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    if (checkInsideBox(v, lower_.get(), upper_.get()))
+      return Base::operator()(v);
+    return std::numeric_limits<Range>::max();
+  }
+
+  const LowerObstacle& lowerObstacle() const
+  {
+    return lower_.get();
+  }
+
+  const UpperObstacle& upperObstacle() const
+  {
+    return upper_.get();
+  }
+
+  friend auto directionalRestriction(const BoxConstrainedQuadraticFunctional& f, const Vector& origin, const Vector& direction)
+    -> BoxConstrainedQuadraticDirectionalRestriction<Matrix, Vector, LowerObstacle, UpperObstacle, Range>
+  {
+    return BoxConstrainedQuadraticDirectionalRestriction<Matrix, Vector, LowerObstacle, UpperObstacle, Range>(f.quadraticPart(), f.linearPart(), f.lowerObstacle(), f.upperObstacle(), origin, direction);
+  }
+
+  friend auto shift(const BoxConstrainedQuadraticFunctional& f, const Vector& origin)
+    -> ShiftedBoxConstrainedQuadraticFunctional<Matrix, Vector, Range>
+  {
+    return ShiftedBoxConstrainedQuadraticFunctional<Matrix, Vector, Range>(f.quadraticPart(), f.linearPart(), f.lowerObstacle(), f.upperObstacle(), origin);
+  }
+
+protected:
+
+  Solvers::ConstCopyOrReference<L> lower_;
+  Solvers::ConstCopyOrReference<U> upper_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+#endif // DUNE_TNNMG_FUNCTIONALS_BOXCONSTRAINEDQUADRATICFUNCTIONAL_HH
diff --git a/dune/tnnmg/functionals/normfunctional.hh b/dune/tnnmg/functionals/normfunctional.hh
index 255cde2d0b284ef3e093d794c706be1318148c6f..33afa842805fc13b2c5fd27a2cf7661eae678563 100644
--- a/dune/tnnmg/functionals/normfunctional.hh
+++ b/dune/tnnmg/functionals/normfunctional.hh
@@ -1,231 +1,365 @@
 #ifndef DUNE_TNNMG_FUNCTIONALS_NORMFUNCTIONAL_HH
 #define DUNE_TNNMG_FUNCTIONALS_NORMFUNCTIONAL_HH
 
-#include <dune/common/bitsetvector.hh>
+#include <dune/solvers/common/resize.hh>
 #include <dune/tnnmg/problem-classes/nonlinearity.hh>
 #include <dune/tnnmg/functionals/nonsmoothconvexfunctional.hh>
 
 namespace Dune {
 
-  namespace TNNMG {
-
-    /** \brief The weighted sum of the Euclidean norm of the vector blocks
-     *
-     * This Functional class implements the functional
-     * \f[
-     *    j(v) = \sum_{i=1}^n a_i |v_i|,
-     * \f]
-     *where \f$ v \f$ is a vector in \f$ (\mathbb{R}^N)^n \f$, and \f$ a \in \mathbb{R}^n \f$
-     * is a vector of coefficients.
-     *
-     * \tparam blockSize The size \f$ N \f$ of the vector blocks.
-     */
-    template <int blockSize>
-    class NormFunctional
-    : public Dune::TNNMG::NonsmoothConvexFunctional<FieldVector<double,blockSize>,
-                                                    FieldMatrix<double,blockSize,blockSize> >
+namespace TNNMG {
+
+template <class V, class R=double>
+class ShiftedNormFunctional;
+
+/** \brief Coordinate restriction of the Euclidean norm functional
+ *
+ *  This general implementation doesn't really allow anything but shifting.
+ *  The shifted version be then be restricted further.
+ *
+ *  \tparam V Type of the vector before restriction
+ *  \tparam R Range type
+ *  \tparam I Index type used to access individual vector entries
+ */
+template <class V, class R, class I = std::size_t>
+class NormCoordinateRestriction
+{
+public:
+
+  using GlobalVector = V;
+  using GlobalIndex = I;
+
+  using Vector = std::decay_t<decltype(std::declval<GlobalVector>()[std::declval<GlobalIndex>()])>;
+
+  using Range = R;
+
+  NormCoordinateRestriction(const GlobalVector& coefficients, const GlobalVector& origin, GlobalIndex index)
+  : coefficients_(&coefficients),
+    origin_(origin),
+    index_(index)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    DUNE_THROW(Dune::NotImplemented, "Evaluation of NormCoordinateRestriction not implemented");
+  }
+
+  GlobalIndex index() const
+  {
+    return index_;
+  }
+
+  const auto& coefficients() const
+  {
+    return (*coefficients_)[index_];
+  }
+
+  template<class Origin,
+  std::enable_if_t<! Dune::IsNumber<Origin>::value, int> = 0>
+  friend ShiftedNormFunctional<Vector, Range>
+  shift(const NormCoordinateRestriction& f, const Origin& origin)
+  {
+    return ShiftedNormFunctional<Vector, Range>(f.coefficients(),
+                                                f.origin_[f.index_],   // origin of the functional before this shift
+                                                origin);   // new additional shift, with reference semantics
+  }
+
+protected:
+
+  const V* coefficients_;
+
+  const GlobalVector origin_;
+  const GlobalIndex index_;
+};
+
+/** \brief Coordinate restriction of Euclidean norm functional to a scalar function
+ *
+ *  This is the specialization for the restriction to 1-dimensional subspaces.
+ *  Unlike the general implementation above, this specialization can produce the subdifferential
+ *  of the functional.
+ *
+ *  \tparam V Type of the vector before restriction
+ *  \tparam R Range type
+ *  \tparam I Index type used to access individual vector entries
+ *
+ */
+template <int blocksize, class R, class I>
+class NormCoordinateRestriction<FieldVector<double,blocksize>, R, I>
+{
+public:
+
+  using GlobalVector = FieldVector<double,blocksize>;
+  using GlobalIndex = I;
+
+  using Vector = std::decay_t<decltype(std::declval<GlobalVector>()[std::declval<GlobalIndex>()])>;
+
+  using Range = R;
+
+  NormCoordinateRestriction(const GlobalVector& coefficients, const GlobalVector& origin, GlobalIndex index)
+  : coefficients_(&coefficients),
+    origin_(origin),
+    index_(index)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    auto tmp = origin_;
+    tmp[index_] += v;
+
+    return tmp.two_norm();
+  }
+
+  GlobalIndex index() const
+  {
+    return index_;
+  }
+
+  const auto& coefficients() const
+  {
+    return (*coefficients_)[index_];
+  }
+
+  void domain(Solvers::Interval<double>& d) const
+  {
+    // Our domain is the entire space
+    d[0] = -std::numeric_limits<double>::infinity();
+    d[1] =  std::numeric_limits<double>::infinity();
+  }
+
+  void subDiff(double z, Solvers::Interval<double>& dJ) const
+  {
+    constexpr double tolerance = 1e-15;
+
+    dJ = 0.0;
+
+    // Compute norm at coefficient i
+    auto tmp = *origin_;
+    tmp[index_] += z;
+
+    double norm = tmp.two_norm();
+
+    if (norm < tolerance)
     {
-    public:
-      typedef Dune::FieldVector<double,blockSize>      LocalVectorType;
-      typedef Dune::FieldMatrix<double,blockSize,blockSize>  LocalMatrixType;
-
-      typedef Nonlinearity<LocalVectorType, LocalMatrixType> NonlinearityType;
-
-      typedef typename NonlinearityType::VectorType VectorType;
-      typedef typename NonlinearityType::MatrixType MatrixType;
-      typedef typename NonlinearityType::IndexSet IndexSet;
-
-
-      /** \brief Constructor from a set of coefficients
-       *
-       * \param coefficients The coefficient vector \f$ a \f$
-       * \param tolerance The nonsmoothness a 0 of the norm function is implemented to have the radius 'tolerance'
-       */
-      NormFunctional(const std::vector<double> coefficients,
-                     double tolerance = 1e-15)
-      : coefficients_(coefficients),
-      tolerance_(tolerance)
-      {}
-
-      /** \brief Evaluate the functional at a given vector v
-       */
-      double operator()(const VectorType& v) const
-      {
-        double result = 0.0;
+      // We are at 0 -- the result is a true subdifferential
+      dJ[0] -= (*coefficients_)[0];
+      dJ[1] += (*coefficients_)[0];
+    }
+    else
+    {
+      // Compute the j-th entry of the gradient
+      dJ += (*coefficients_)[0] * tmp[index_] / norm;
+    }
+  }
 
-        for(size_t row=0; row<v.size(); ++row)
-          result += coefficients_[row] * v[row].two_norm();
+protected:
 
-        return result;
-      }
+  const GlobalVector* coefficients_;
 
-      /** \brief Add the gradient of the norm functional to a given vector
-       *
-       * Note: the gradient of \f$ |v| \f$ is \f$ v/|v| \f$.
-       */
-      void addGradient(const VectorType& v, VectorType& gradient) const
-      {
-        for(size_t row=0; row<v.size(); ++row)
-        {
-          double norm = v[row].two_norm();
+  const GlobalVector origin_;
+  const GlobalIndex index_;
+};
 
-          // Do nothing if we are at the nondifferentiable point
-          if (norm < tolerance_)
-            continue;
 
-          for (int j=0; j<blockSize; j++)
-            gradient[row][j] += coefficients_[row] * v[row][j] / norm;
-        }
-      }
 
-      /** \brief Add the Hessian of the functional to a given matrix
-       *
-       * Note: Entry \f$ i,j \f$ of the Hessian of \f$ |v| \f$  is \f[- \frac{v_i v_j}{|v|^3} + \frac{\delta_{ij}}{|v|} \f]
-       */
-      void addHessian(const VectorType& v, MatrixType& hessian) const
-      {
-        for(size_t row=0; row<v.size(); ++row)
-        {
-          double norm = v[row].two_norm();
-
-          // Do nothing if we are at the nondifferentiable point
-          if (norm < tolerance_)
-            continue;
-
-          for (int i=0; i<blockSize; i++)
-            for (int j=0; j<blockSize; j++)
-              hessian[row][row][i][j] += coefficients_[row] * (- v[row][i] * v[row][j] / (norm*norm*norm) + (i==j) / norm);
-        }
-      }
+/** \brief Shifted Euclidean norm functional
+ *
+ *  \tparam V global vector type
+ *  \tparam R Range type
+ */
+template <class V, class R>
+class ShiftedNormFunctional
+{
+public:
 
-      void addHessianIndices(IndexSet& indices) const {}
+  using Vector = V;
+  using Range = R;
 
-      /** \brief Compute the subdifferential of the nonlinearity restricted to the line \f$ u + t v \f$.
-       *
-       * \param[out] subdifferential the resulting subdifferential
-       * \param u base point
-       * \param v direction
-       */
-      virtual void directionalSubDiff(const VectorType& u, const VectorType& v, Dune::Solvers::Interval<double>& subdifferential) const
-      {
-        subdifferential[0] = 0.0;
-        subdifferential[1] = 0.0;
-
-        for(size_t row=0; row<u.size(); ++row)
-        {
-          double norm = u[row].two_norm();
-          if (norm >= tolerance_){
-            subdifferential[0] += coefficients_[row] * (u[row]*v[row]) / norm;
-            subdifferential[1] += coefficients_[row] * (u[row]*v[row]) / norm;
-          } else {
-            subdifferential[0] -= coefficients_[row] * v[row].two_norm();
-            subdifferential[1] += coefficients_[row] * v[row].two_norm();
-          }
-        }
-      }
+  ShiftedNormFunctional(const Vector& coefficients, const Vector& origin, const Vector& offset)
+  : coefficients_(&coefficients),
+    origin_(origin),
+    offset_(&offset)
+  {}
 
-      /** \brief Compute the subdifferential of the nonlinearity restricted to the
-       * line u_pos' +t e_(i,j) at t=0.
-       *
-       * Here e_(i,j) is the (i,j)-th Euclidean unit vector,
-       * and u_pos' is the internal position vector u_pos with the (i,j)-the entry replaced by x.
-       * If the nonlinearity decouples in the Euclidean directions this is simply the (i,j)-th
-       * component of the subdifferential.
-       *
-       * \param i global index
-       * \param j local index
-       * \param x value of the (i,j)-th entry of position to evaluate the nonlinearity at
-       * \param[out] D the subdifferential
-       */
-      virtual void subDiff(int i, double x, Dune::Solvers::Interval<double>& D, int j) const
-      {
-        D[0] = 0.0;
-        D[1] = 0.0;
-
-        // Compute current tangential displacement
-        LocalVectorType displacement = u_[i];
-        displacement[j] = x;
-        double norm = displacement.two_norm();
-        if (norm < tolerance_)
-        {
-          D[0] = - coefficients_[i];
-          D[1] =   coefficients_[i];
-        } else {
-          D[0] = D[1] = coefficients_[i] * displacement[j] / norm;
-        }
-      }
+  Range operator()(const Vector& v) const
+  {
+    DUNE_THROW(Dune::NotImplemented, "Evaluation of ShiftedNormFunctional not implemented");
+  }
 
-      /** \brief Return the regularity of the nonlinearity restricted to the
-       * line u_pos' +t e_(i,j) at t=0.
-       *
-       * Here e_(i,j) is the (i,j)-th Euclidean unit vector,
-       * and u_pos' is the internal position vector u_pos with the (i,j)-the entry replaced by x.
-       * Usually this will be the third derivative or a local Lipschitz constant of the second
-       * derivative. Note that if the subdifferential is set-valued at this position, this
-       * value will normally be infinity.
-       *
-       * \param i global index
-       * \param j local index
-       * \param x value of the (i,j)-th entry of position to evaluate the nonlinearity at
-       * \returns a value measuring the regularity
-       */
-      virtual double regularity(int i, double x, int j) const
-      {
-        // Compute current tangential displacement
-        LocalVectorType displacement = u_[i];
-        displacement[j] = x;
+  /** \brief Tell the functional that the origin has changed */
+  void updateOrigin()
+  {
+    // Does not do anything, because no information is precomputed
+  }
 
-        return (displacement.two_norm() < tolerance_)
-        ? std::numeric_limits<double>::max()
-        : 0;
-      }
+  void updateOrigin(std::size_t i)
+  {
+    // Does not do anything, because no information is precomputed
+  }
 
-      /** \brief Domain of definition of the functional in one coordinate direction
-       *
-       * The norm functional is the entire space
-       */
-      void domain(int i, Dune::Solvers::Interval<double>& dom, int j) const
-      {
-        // Our domain is the entire space
-        dom[0] = -std::numeric_limits<double>::max();
-        dom[1] =  std::numeric_limits<double>::max();
-      }
+  template <class Index>
+  friend NormCoordinateRestriction<Vector, Range, Index>
+  coordinateRestriction(const ShiftedNormFunctional& f, const Index& i)
+  {
+    auto tmp = f.origin_;
+    tmp += *f.offset_;
+    return NormCoordinateRestriction<Vector, Range, Index>(*f.coefficients_, tmp, i);
+  }
+
+protected:
+  const Vector* coefficients_;
+  const Vector origin_;
+  const Vector* offset_;
+};
+
+
+/** \brief Directional restriction of the Euclidean norm functional
+ *
+ *  \tparam V global vector type
+ *  \tparam R Range type
+ */
+template<class V, class R=double>
+class NormDirectionalRestriction
+{
+public:
+
+  using GlobalVector = V;
+
+  using Vector = typename GlobalVector::field_type;
+  using Range = R;
+
+  NormDirectionalRestriction(const GlobalVector& coefficients, const GlobalVector& origin, const GlobalVector& direction)
+  : coefficients_(&coefficients),
+    origin_(&origin),
+    direction_(&direction)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    Range result(0.0);
+
+    for(size_t i=0; i<(*origin_).size(); ++i)
+    {
+      // Extract the plastic strain components into a separate data structure, for easier readability
+      auto p = (*origin_)[i];
+      p.axpy(v, (*direction_)[i]);
+      result += (*coefficients_)[i][0] * p.two_norm();
+    }
+
+    return result;
+  }
+
+  GlobalVector& origin()
+  {
+    return *origin_;
+  }
 
-      /** \brief Set the internal position vector u_pos to v.
-       */
-      virtual void setVector(const VectorType& v)
+  const GlobalVector& origin() const
+  {
+    return *origin_;
+  }
+
+  void domain(Dune::Solvers::Interval<double>& d) const
+  {
+    d[0] = -std::numeric_limits<double>::max();
+    d[1] =  std::numeric_limits<double>::max();
+  }
+
+  void subDiff(double z, Dune::Solvers::Interval<double>& dJ) const
+  {
+    double tolerance = 1e-15;
+    dJ = 0.0;
+
+    for (size_t row=0; row<(*origin_).size(); ++row)
+    {
+      auto p = (*origin_)[row];
+      p.axpy(z, (*direction_)[row]);
+
+      auto norm = p.two_norm();
+
+      if (norm >= tolerance)
       {
-        u_ = v;
-      }
+        // Functional is differentiable here -- compute the standard directional derivative
+        // We compute the directional derivative as the product of the gradient and the direction
+        FieldVector<double,p.size()> gradient(0);
+        for (size_t i=0; i<p.size(); i++)
+          gradient[i] += (*coefficients_)[row][0] * p[i] / norm;
 
-      /** \brief Update the (i,j)-th entry of the internal position vector u_pos to x.
-       *
-       *
-       * \param i global index
-       * \param j local index
-       * \param x new value of the entry (u_pos)_(i,j)
-       */
-      virtual void updateEntry(int i, double x, int j)
+        dJ += gradient * (*direction_)[row];
+      }
+      else
       {
-        u_[i][j] = x;
+        dJ[0] -= (*coefficients_)[row][0] * (*direction_)[row].two_norm();
+        dJ[1] += (*coefficients_)[row][0] * (*direction_)[row].two_norm();
       }
+    }
+  }
 
+protected:
+  const GlobalVector* coefficients_;
+  const GlobalVector* origin_;
+  const GlobalVector* direction_;
+};
+
+
+
+/** \brief The weighted sum of the Euclidean norm of the vector blocks
+ *
+ * This Functional class implements the functional
+ * \f[
+ *    j(v) = \sum_{i=1}^n a_i |v_i|,
+ * \f]
+ *where \f$ v \f$ is a vector in \f$ (\mathbb{R}^N)^n \f$, and \f$ a \in \mathbb{R}^n \f$
+ * is a vector of coefficients.
+ *
+ *  \tparam V Vector type
+ *  \tparam R Range type
+ */
+template<class V, class R=double>
+class NormFunctional
+{
+public:
+
+  using Vector = V;
+  using Range = R;
+
+  NormFunctional(const Vector& coefficients) :
+  coefficients_(coefficients)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    Range result(0.0);
+
+    for (size_t i=0; i<v.size(); ++i)
+      result += coefficients_[i][0] * v[i].two_norm();
+
+    return result;
+  }
 
-    private:
-
-      /** \brief For each block, the norm is weighted with the corresponding coefficient from this array */
-      const std::vector<double> coefficients_;
+  const Vector& coefficients() const
+  {
+    return coefficients_;
+  }
 
-      /** \brief We consider the norm function to be nondifferentiable a circle of this radius around 0 */
-      const double tolerance_;
+  friend auto directionalRestriction(const NormFunctional& f, const Vector& origin, const Vector& direction)
+  {
+    return NormDirectionalRestriction<Vector, Range>(f.coefficients_, origin, direction);
+  }
 
-      /** \brief The current state */
-      VectorType u_;
+  friend ShiftedNormFunctional<Vector, Range>
+    shift(const NormFunctional& f, const Vector& origin)
+  {
+    Vector zero;
+    Solvers::resizeInitialize(zero, origin, 0.0);
+    return ShiftedNormFunctional<Vector, Range>(f.coefficients_, std::move(zero), origin);
+  }
 
-    };
+protected:
+  const Vector coefficients_;
+};
 
-  }
+}  // namespace TNNMG
 
-}
+}  // namespace Dune
 #endif
 
diff --git a/dune/tnnmg/functionals/quadraticfunctional.hh b/dune/tnnmg/functionals/quadraticfunctional.hh
new file mode 100644
index 0000000000000000000000000000000000000000..90a8ca13c784831429f3546f9bd520409fbf8880
--- /dev/null
+++ b/dune/tnnmg/functionals/quadraticfunctional.hh
@@ -0,0 +1,261 @@
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set ts=8 sw=2 et sts=2:
+#ifndef DUNE_TNNMG_FUNCTIONALS_QUADRATICFUNCTIONAL_HH
+#define DUNE_TNNMG_FUNCTIONALS_QUADRATICFUNCTIONAL_HH
+
+
+
+#include <dune/common/concept.hh>
+#include <dune/common/exceptions.hh>
+#include <dune/common/typeutilities.hh>
+#include <dune/common/hybridutilities.hh>
+
+#include <dune/matrix-vector/algorithm.hh>
+
+#include <dune/solvers/common/resize.hh>
+#include <dune/solvers/common/copyorreference.hh>
+
+#include <dune/tnnmg/functionals/affineoperator.hh>
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+template<class M, class V, class R=double>
+class ShiftedQuadraticFunctional;
+
+template<class M, class V, class R=double>
+class QuadraticFunctional;
+
+
+/** \brief Coordinate restriction of a quadratic functional
+ *
+ *  \tparam M Global matrix type
+ *  \tparam V global vector type
+ *  \tparam R Range type
+ */
+template<class M, class V, class R>
+class ShiftedQuadraticFunctional
+{
+public:
+
+  using Matrix = M;
+  using Vector = V;
+
+  using Range = R;
+
+  ShiftedQuadraticFunctional(const Matrix& quadraticPart, const Vector& linearPart, const Vector& origin) :
+    quadraticPart_(&quadraticPart),
+    originalLinearPart_(&linearPart),
+    origin_(&origin)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    Vector w = origin();
+    w += v;
+    Vector temp;
+    Dune::Solvers::resizeInitializeZero(temp,v);
+    quadraticPart().umv(w, temp);
+    temp *= 0.5;
+    temp -= originalLinearPart();
+    return temp*w;
+  }
+
+  Vector& origin()
+  {
+    return *origin_;
+  }
+
+  const Vector& origin() const
+  {
+    return *origin_;
+  }
+
+  void updateOrigin()
+  {}
+
+  void updateOrigin(std::size_t i)
+  {}
+
+  const Matrix& quadraticPart() const
+  {
+    return *quadraticPart_;
+  }
+
+  const Vector& originalLinearPart() const
+  {
+    return *originalLinearPart_;
+  }
+
+protected:
+  const Matrix* quadraticPart_;
+  const Vector* originalLinearPart_;
+  const Vector* origin_;
+};
+
+// Clang 3.8 refuses to compile this function as a part of the
+// ShiftedQuadraticFunctional class declaration/definition because
+// in order to determine the return type, we use calls to methods of
+// the then incomplete class.
+template<class M, class V, class R, class Index>
+auto coordinateRestriction(const ShiftedQuadraticFunctional<M,V,R>& f, const Index& i)
+{
+  using LocalMatrix = std::decay_t<decltype(f.quadraticPart()[i][i])>;
+  using LocalVector = std::decay_t<decltype(f.originalLinearPart()[i])>;
+
+  using namespace Dune::MatrixVector;
+  namespace H = Dune::Hybrid;
+
+  LocalVector ri = f.originalLinearPart()[i];
+  const LocalMatrix* Aii_p = nullptr;
+
+  const auto& Ai = f.quadraticPart()[i];
+  sparseRangeFor(Ai, [&](auto&& Aij, auto&& j) {
+    // TODO Here we must implement a wrapper to guarantee that this will work with proxy matrices!
+    H::ifElse(H::equals(j, i), [&](auto&& id){
+      Aii_p = id(&Aij);
+#ifdef NEW_TNNMG_COMPUTE_ITERATES_DIRECTLY
+    }, [&](auto&& id) {;
+      Imp::mmv(Aij, f.origin()[j], ri, PriorityTag<1>());
+    });
+#else
+    });
+    Imp::mmv(Aij, f.origin()[j], ri, PriorityTag<1>());
+#endif
+  });
+
+  return QuadraticFunctional<LocalMatrix&, LocalVector, R>(*Aii_p, std::move(ri));
+}
+
+/** \brief Coordinate restriction of a quadratic functional
+ *
+ *  \tparam M Global matrix type
+ *  \tparam V global vector type
+ *  \tparam R Range type
+ */
+template<class M, class V, class R=double>
+class QuadraticDirectionalRestriction
+{
+public:
+
+  using GlobalMatrix = M;
+  using GlobalVector = V;
+
+  using Matrix = typename GlobalVector::field_type;
+  using Vector = typename GlobalVector::field_type;
+
+
+  using Range = R;
+
+  QuadraticDirectionalRestriction(const GlobalMatrix& matrix, const GlobalVector& linearTerm, const GlobalVector& origin, const GlobalVector& direction)
+  {
+    GlobalVector temp = linearTerm;
+    matrix.mv(direction, temp);
+
+    quadraticPart_ = temp*direction;
+    linearPart_ = linearTerm*direction - temp*origin;
+  }
+
+  Range operator()(const Vector& v) const
+  {
+    DUNE_THROW(Dune::NotImplemented, "Evaluation of QuadraticDirectionalRestriction not implemented");
+  }
+
+  const Matrix& quadraticPart() const
+  {
+    return quadraticPart_;
+  }
+
+  const Vector& linearPart() const
+  {
+    return linearPart_;
+  }
+
+protected:
+  Matrix quadraticPart_;
+  Vector linearPart_;
+};
+
+
+
+/** \brief A quadratic functional
+ *
+ *  \tparam M Matrix type
+ *  \tparam V Vector type
+ *  \tparam R Range type
+ */
+template<class M, class V, class R>
+class QuadraticFunctional
+{
+public:
+
+  using Matrix = std::decay_t<M>;
+  using Vector = std::decay_t<V>;
+  using Range = R;
+
+  template<class MM, class VV>
+  QuadraticFunctional(MM&& matrix, VV&& linearTerm) :
+    matrix_(std::forward<MM>(matrix)),
+    linearPart_(std::forward<VV>(linearTerm))
+  {
+    Dune::Solvers::resizeInitializeZero(temp_,linearPart());
+  }
+
+  Range operator()(const Vector& v) const
+  {
+    Dune::Solvers::resizeInitializeZero(temp_,v);
+    quadraticPart().umv(v, temp_);
+    temp_ *= 0.5;
+    temp_ -= linearPart();
+    return temp_*v;
+  }
+
+  const Matrix& quadraticPart() const
+  {
+    return matrix_.get();
+  }
+
+  const Vector& linearPart() const
+  {
+    return linearPart_.get();
+  }
+
+  /**
+   * \brief Access derivative
+   */
+  friend AffineOperator<Matrix, Vector> derivative(const QuadraticFunctional& f)
+  {
+    return AffineOperator<Matrix, Vector>(f.quadraticPart(), f.linearPart());
+  }
+
+  friend auto directionalRestriction(const QuadraticFunctional& f, const Vector& origin, const Vector& direction)
+    -> QuadraticDirectionalRestriction<Matrix, Vector, Range>
+  {
+    return QuadraticDirectionalRestriction<Matrix, Vector, Range>(f.quadraticPart(), f.linearPart(), origin, direction);
+  }
+
+  friend auto shift(const QuadraticFunctional& f, const Vector& origin)
+    -> ShiftedQuadraticFunctional<Matrix, Vector, Range>
+  {
+    return ShiftedQuadraticFunctional<Matrix, Vector, Range>(f.quadraticPart(), f.linearPart(), origin);
+  }
+
+protected:
+
+  Solvers::ConstCopyOrReference<M> matrix_;
+  Solvers::ConstCopyOrReference<V> linearPart_;
+  mutable Vector temp_;
+};
+
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+#endif // DUNE_TNNMG_FUNCTIONALS_QUADRATICFUNCTIONAL_HH
diff --git a/dune/tnnmg/functionals/quadraticfunctionalconstrainedlinearization.hh b/dune/tnnmg/functionals/quadraticfunctionalconstrainedlinearization.hh
new file mode 100644
index 0000000000000000000000000000000000000000..bd031c7af6a8f5d9b79a46afdf9bcd973fe595ef
--- /dev/null
+++ b/dune/tnnmg/functionals/quadraticfunctionalconstrainedlinearization.hh
@@ -0,0 +1,108 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_FUNCTIONAL_QUADRATICFUNCTIONALCONSTRAINEDLINEARIZATION_HH
+#define DUNE_TNNMG_FUNCTIONAL_QUADRATICFUNCTIONALCONSTRAINEDLINEARIZATION_HH
+
+
+#include <cstddef>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+/**
+ * \brief A constrained linearization for QuadraticFunctional
+ *
+ * ATTENTION: This is currently only implemented for scalar valued vectors.
+ */
+template<class F, class BV>
+class QuadraticFunctionalConstrainedLinearization
+{
+public:
+    using Matrix = typename F::Matrix;
+    using Vector = typename F::Vector;
+    using BitVector = BV;
+    using ConstrainedMatrix = Matrix;
+    using ConstrainedVector = Vector;
+    using ConstrainedBitVector = BitVector;
+
+
+    QuadraticFunctionalConstrainedLinearization(const F& f, const BitVector& ignore) :
+        f_(f),
+        ignore_(ignore),
+        truncationTolerance_(1e-10)
+    {}
+
+    void bind(const Vector& x)
+    {
+        negativeGradient_ = derivative(f_)(x);
+        negativeGradient_ *= -1;
+        hessian_ = derivative(derivative(f_))(x);
+        truncationFlags_ = ignore_;
+
+        // truncate
+        for(std::size_t i=0; i<x.size(); ++i)
+        {
+            auto it = hessian_[i].begin();
+            auto end = hessian_[i].end();
+            for(; it!=end; ++it)
+                if (truncationFlags_[i].any() || truncationFlags_[it.index()].any())
+                    *it = 0;
+            if (truncationFlags_[i].any())
+            {
+                negativeGradient_[i] = 0;
+                hessian_[i][i] = 1;
+            }
+        }
+    }
+
+    void extendCorrection(ConstrainedVector& cv, Vector& v) const
+    {
+        // ATTENTION this is restricted to scalar problems currently
+        for(std::size_t i=0; i<v.size(); ++i)
+        {
+            if (truncationFlags_[i].any())
+                v[i] = 0;
+            else
+                v[i] = cv[i];
+        }
+    }
+
+    const BitVector& truncated() const
+    {
+        return truncationFlags_;
+    }
+
+    const auto& negativeGradient() const
+    {
+        return negativeGradient_;
+    }
+
+    const auto& hessian() const
+    {
+        return hessian_;
+    }
+
+private:
+    const F& f_;
+    const BitVector& ignore_;
+
+    double truncationTolerance_;
+
+    Vector negativeGradient_;
+    Matrix hessian_;
+    BitVector truncationFlags_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+
+#endif // DUNE_TNNMG_FUNCTIONAL_QUADRATICFUNCTIONALCONSTRAINEDLINEARIZATION
diff --git a/dune/tnnmg/functionals/sumfunctional.hh b/dune/tnnmg/functionals/sumfunctional.hh
index 545d3f058d4302a0e8778b5c983df2ce99978551..b026ea3cb191ab3537b678aaed8efbfc7ab751cd 100644
--- a/dune/tnnmg/functionals/sumfunctional.hh
+++ b/dune/tnnmg/functionals/sumfunctional.hh
@@ -1,11 +1,155 @@
 #ifndef DUNE_TNNMG_FUNCTIONALS_SUMFUNCTIONAL_HH
 #define DUNE_TNNMG_FUNCTIONALS_SUMFUNCTIONAL_HH
 
+#ifdef USE_OLD_TNNMG
 #include <memory>
 
 #include <dune/common/shared_ptr.hh>
 #include "dune/tnnmg/functionals/nonsmoothconvexfunctional.hh"
+#else
+#include <dune/common/hybridutilities.hh>
+#include <dune/common/std/apply.hh>
+#endif
+
+#ifndef USE_OLD_TNNMG
+namespace Dune {
+namespace TNNMG {
+
+// Forward declaration
+template<class... F>
+class ShiftedSumFunctional;
+
+
+/** \brief Sum of several functionals
+ *
+ *  \tparam F List of functionals
+ */
+template<class... F>
+class SumFunctional
+{
+  using FunctionTuple = std::tuple<F...>;
+  using F0 = std::tuple_element_t<0,FunctionTuple>;
+
+public:
+
+  using Functions = FunctionTuple;
+  using Vector = typename F0::Vector;
+  using Range = typename F0::Range;
+
+  SumFunctional(const F&... f) :
+    functions_(f...)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    Range y{0};
+    Dune::Hybrid::forEach(functions_, [&](auto&& f) {
+      y += f(v);
+    });
+    return y;
+  }
+
+  const FunctionTuple& functions() const
+  {
+    return functions_;
+  }
+
+  friend ShiftedSumFunctional<F...>
+    shift(const SumFunctional& f, const Vector& origin)
+  {
+    return Dune::Std::apply([&](const auto&... args) {
+      return ShiftedSumFunctional<F...>(args..., origin);
+    }, f.functions());
+  }
+
+private:
+  FunctionTuple functions_;
+};
+
+template<class... F, class Vector>
+auto directionalRestriction(const SumFunctional<F...>& f, const Vector& origin, const Vector& direction)
+{
+  return Dune::Std::apply([&](const auto&... args) {
+    return SumFunctional<decltype(directionalRestriction(args, origin, direction))...>(directionalRestriction(args, origin, direction)...);
+  }, f.functions());
+}
+
+
+
+/** \brief Sum of several functionals
+ *
+ *  \tparam F List of functionals
+ */
+template<class... F>
+class ShiftedSumFunctional
+{
+  using FunctionTuple = std::tuple<F...>;
+  using F0 = std::tuple_element_t<0,FunctionTuple>;
 
+public:
+
+  using Vector = typename F0::Vector;
+  using Range = typename F0::Range;
+
+private:
+
+  using ShiftedFunctionTuple = std::tuple<decltype(shift(std::declval<F>(), std::declval<Vector>()))...>;
+
+public:
+
+  using Functions = ShiftedFunctionTuple;
+
+  ShiftedSumFunctional(const F&... f, const Vector& origin) :
+    shiftedFunctions_(shift(f, origin)...)
+  {}
+
+  Range operator()(const Vector& v) const
+  {
+    Range y{0};
+    Dune::Hybrid::forEach(shiftedFunctions_, [&](auto&& f) {
+      y += f(v);
+    });
+    return y;
+  }
+
+  void updateOrigin()
+  {
+    Dune::Hybrid::forEach(shiftedFunctions_, [&](auto&& f) {
+      f.updateOrigin();
+    });
+  }
+
+  template<class Index>
+  void updateOrigin(Index i)
+  {
+    Dune::Hybrid::forEach(shiftedFunctions_, [&](auto&& f) {
+      f.updateOrigin(i);
+    });
+  }
+
+  const ShiftedFunctionTuple& functions() const
+  {
+    return shiftedFunctions_;
+  }
+
+private:
+  ShiftedFunctionTuple shiftedFunctions_;
+};
+
+
+
+template<class... F, class Index>
+auto coordinateRestriction(const ShiftedSumFunctional<F...>& f, const Index& i)
+{
+  return Dune::Std::apply([&](const auto&... args) {
+    return SumFunctional<decltype(coordinateRestriction(args, i))...>(coordinateRestriction(args, i)...);
+  }, f.functions());
+}
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+#else
 namespace Dune {
 
   namespace TNNMG {
@@ -125,5 +269,6 @@ namespace Dune {
   }   // namespace TNNMG
 
 }   // namespace Dune
+#endif
 
 #endif
diff --git a/dune/tnnmg/functionals/test/functionaltest.hh b/dune/tnnmg/functionals/test/functionaltest.hh
index 71560449e6005d10172431af2875b1cb11469d52..148643379e6e35262c1811ffaf8f5c154f0e3e41 100644
--- a/dune/tnnmg/functionals/test/functionaltest.hh
+++ b/dune/tnnmg/functionals/test/functionaltest.hh
@@ -32,7 +32,11 @@ namespace TNNMG {
  */
 template <class Functional>
 void testConvexity(const Functional& functional,
+#ifdef USE_OLD_TNNMG
                    const std::vector<typename Functional::VectorType>& testPoints)
+#else
+                   const std::vector<typename Functional::Vector>& testPoints)
+#endif
 {
   for (size_t i=0; i<testPoints.size(); i++)
     for (size_t j=0; j<testPoints.size(); j++)
@@ -49,10 +53,9 @@ void testConvexity(const Functional& functional,
       for (double t : {0.0, 0.2, 0.4, 0.6, 0.8, 1.0})
       {
         // convex combination between the two test points
-        typename Functional::VectorType p(p0.size());
-        for (size_t k=0; k<p0.size(); k++)
-          for (size_t l=0; l<p0[k].size(); l++)
-            p[k][l] = (1-t)*p0[k][l] + t*p1[k][l];
+        auto p = p0;
+        p *= (1-t);
+        p.axpy(t,p1);
 
         // Test for convexity
         if (functional(p) > ((1-t)*v0 + t*v1) + 1e-10)
@@ -333,9 +336,98 @@ void testSubDiff(Functional& functional,
   }
 }
 
+/** \brief Test whether the values of a directional restriction are correct, by comparing
+ *  with the values of the unrestricted functional.
+ * \param functional The functional to be restricted
+ * \param testPoints Set of test points, will additionally be abused as a set of test directions, too
+ * \param testParameters Test the one-dimensional restriction at these parameter values
+ */
+template <typename Functional, typename TestPoints>
+void testDirectionalRestrictionValues(const Functional& functional,
+                                      const TestPoints& testPoints,
+                                      const std::vector<double> testParameters)
+{
+  // Loop over all test points
+  for (auto&& origin : testPoints)
+  {
+    // Abuse test points also as test directions
+    for (auto&& testDirection : testPoints)
+    {
+      auto restriction = directionalRestriction(functional, origin, testDirection);
+
+      for (auto v : testParameters)
+      {
+        // Test whether restriction really is a restriction
+        auto probe = origin;
+        probe.axpy(v, testDirection);
+
+        auto restrictionValue = restriction(v);
+        auto realValue        = functional(probe);
+
+        if (fabs(restrictionValue - realValue) > 1e-6)
+        {
+          std::cerr << "Value of the directional restriction does not match the actual "
+                    << "functional value." << std::endl
+                    << "Restriction value at parameter " << v << " : " << restrictionValue << std::endl
+                    << "Actual value: " << realValue << std::endl;
+          abort();
+        }
+      }
+    }
+  }
+}
+
+/** \brief Test whether the subdifferentials of a directional restriction are correct, by comparing
+ *  with a finite-difference approximation
+ * \param functional The functional to be restricted
+ * \param testPoints Set of test points, will additionally be abused as a set of test directions, too
+ * \param testParameters Test the one-dimensional restriction at these parameter values
+ */
+template <typename Functional, typename TestPoints>
+void testDirectionalRestrictionSubdifferential(const Functional& functional,
+                                               const TestPoints& testPoints,
+                                               const std::vector<double> testParameters)
+{
+  // Loop over all test points
+  for (auto&& origin : testPoints)
+  {
+    // Abuse test points also as test directions
+    for (auto&& testDirection : testPoints)
+    {
+      auto restriction = directionalRestriction(functional, origin, testDirection);
+
+      for (auto v : testParameters)
+      {
+        // Test the subdifferential of the restriction
+        Solvers::Interval<double> subDifferential;
+        restriction.subDiff(v, subDifferential);
+
+        // Step size. Best value: square root of the machine precision
+        const double eps = std::sqrt(std::numeric_limits<double>::epsilon());
+
+        auto forwardFDGradient  = (restriction(v+eps) - restriction(v)) / eps;
+        auto backwardFDGradient = (restriction(v) - restriction(v-eps)) / eps;
+
+        if (std::abs(backwardFDGradient - subDifferential[0]) > 1e4*eps
+            or std::abs(forwardFDGradient - subDifferential[1]) > 1e4*eps)
+        {
+          std::cout << "Bug in subdifferential for directional restriction " << Dune::className(restriction) << ":" << std::endl;
+          std::cerr << "Subdifferential doesn't match FD approximation" << std::endl;
+          std::cerr << "SubDifferential: " << subDifferential
+                    << ",   FD: [" << backwardFDGradient << ", " << forwardFDGradient << "]" << std::endl;
+          std::cerr << "Origin:" << std::endl << origin << std::endl;
+          std::cerr << "Test direction:" << std::endl << testDirection << std::endl;
+          std::cerr << "Parameter value: " << v << std::endl;
+          DUNE_THROW(MathError,"");
+        }
+      }
+    }
+  }
+}
+
 
 }   // namespace TNNMG
 
 }   // namespace Dune
 
-#endif   // DUNE_TNNMG_FUNCTIONALS_FUNCTIONALTEST_HH
\ No newline at end of file
+#endif   // DUNE_TNNMG_FUNCTIONALS_FUNCTIONALTEST_HH
diff --git a/dune/tnnmg/functionals/test/normfunctionaltest.cc b/dune/tnnmg/functionals/test/normfunctionaltest.cc
index 67205a053874760c2d7e520a0c1c1f143a1c5833..250ad012459a9479db78eafc7c5aa522bba90aef 100644
--- a/dune/tnnmg/functionals/test/normfunctionaltest.cc
+++ b/dune/tnnmg/functionals/test/normfunctionaltest.cc
@@ -13,20 +13,26 @@ using namespace Dune;
 int main(int argc, char** argv)
 {
   // Create a norm functional on (R^3)^2 for testing
-  std::vector<double> coefficients = {2.0, 3.0};
-  typedef TNNMG::NormFunctional<3> Functional;
-  Functional functional(coefficients);
+  const int blocksize = 3;
+  using Vector = BlockVector<FieldVector<double,blocksize> >;
+  using Weights = Vector;
+
+  // TODO: Improve: we use the same vector type for weights as for coefficient vectors,
+  // even though we have only one coefficient per vector block.
+  Weights weights = {{2.0,2.0,2.0}, {3.0,3.0,3.0}};
+
+  TNNMG::NormFunctional<Vector> functional(weights);
 
   // A set of local test points, i.e., values for a single vector block
-  std::vector<Functional::LocalVectorType> localTestPoints = {{0,0,0},
-                                                              {1,0,0},
-                                                              {0,-2,0},
-                                                              {3.14,3.14,-4}};
+  std::vector<FieldVector<double,blocksize> > localTestPoints = {{0,0,0},
+                                                                 {1,0,0},
+                                                                 {0,-2,0},
+                                                                 {3.14,3.14,-4}};
 
   // Create real test points (i.e., block vectors) from the given values for a single block
-  std::vector<Functional::VectorType> testPoints(localTestPoints.size()*localTestPoints.size());
+  std::vector<Vector> testPoints(localTestPoints.size()*localTestPoints.size());
   for (size_t i=0; i<testPoints.size(); i++)
-    testPoints[i].resize(coefficients.size());
+    testPoints[i].resize(weights.size());
 
   for (size_t i=0; i<localTestPoints.size(); i++)
     for (size_t j=0; j<localTestPoints.size(); j++)
@@ -36,8 +42,46 @@ int main(int argc, char** argv)
     }
 
   // Test whether the functional is convex
-  testConvexity(functional, testPoints);
+  TNNMG::testConvexity(functional, testPoints);
+
+  ///////////////////////////////////////////////////////////////////
+  //  Test the directional restriction
+  ///////////////////////////////////////////////////////////////////
+
+  TNNMG::testDirectionalRestrictionValues(functional, testPoints, {-3, -2, -1, 0, 1, 2, 3});
+
+  TNNMG::testDirectionalRestrictionSubdifferential(functional, testPoints, {-3, -2, -1, 0, 1, 2, 3});
+
+  ///////////////////////////////////////////////////////////////////
+  //  Test the coordinate restriction
+  ///////////////////////////////////////////////////////////////////
+
+  // TODO: The following code does some shifting, restricting, and evaluating,
+  // without any real strategy.  I more systematic way to test this would be
+  // highly appreciated!
+  Vector probe;
+
+  probe = testPoints[0];  // Just to get the sizes right
+  probe = 0;
+
+  std::cout << "unshifted: " << functional(probe) << std::endl;
 
+  Vector offset = probe;
+  offset = 1;
+  offset = 2;
+
+  auto shifted1 = shift(functional, offset);
+
+  // Restrict to one FieldVector
+  auto shifted2ElementPlasticStrain = coordinateRestriction(shifted1, 0);
+
+  auto shifted3ElementPlasticStrain = shift(shifted2ElementPlasticStrain, offset[0]);
+
+  auto shifted3ElementPlasticStrainDirection = coordinateRestriction(shifted3ElementPlasticStrain, 0);
+
+  std::cout << "shifted3ElementPlasticStrainDirection: " << shifted3ElementPlasticStrainDirection(3*offset[0][0]) << std::endl;
+
+#if 0
   // Test whether the functional positive 1-homogeneous
   // We abuse the test points as test directions.
   testHomogeneity(functional, testPoints);
@@ -48,11 +92,8 @@ int main(int argc, char** argv)
   // Test the first derivative at the given test points
   testHessian(functional, testPoints);
 
-  // Test the directional subdifferential at the given test points
-  testDirectionalSubdifferential(functional, testPoints);
-
   // Test the partial subdifferential at the given test points
   testSubDiff(functional, testPoints);
-
+#endif
   return 0;
 }
diff --git a/dune/tnnmg/iterationsteps/CMakeLists.txt b/dune/tnnmg/iterationsteps/CMakeLists.txt
index 1f085d72a8496951185d940a83b011efc4261a9d..a643cac00897e4086aab77fcc720e73324a6517c 100644
--- a/dune/tnnmg/iterationsteps/CMakeLists.txt
+++ b/dune/tnnmg/iterationsteps/CMakeLists.txt
@@ -1,6 +1,9 @@
 install(FILES
+    acceleratednonlineargsstep.hh
     genericnonlineargs.hh
     genericnonlinearjacobi.hh
+    nonlineargsstep.hh
     preconfiguredtnnmgstep.hh
+    tnnmgacceleration.hh
     tnnmgstep.hh
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/tnnmg/iterationsteps)
diff --git a/dune/tnnmg/iterationsteps/acceleratednonlineargsstep.hh b/dune/tnnmg/iterationsteps/acceleratednonlineargsstep.hh
new file mode 100644
index 0000000000000000000000000000000000000000..15e43ed9ce3e56e4adb36b5c0ba6121061864dec
--- /dev/null
+++ b/dune/tnnmg/iterationsteps/acceleratednonlineargsstep.hh
@@ -0,0 +1,119 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_ITERATIONSTEPS_ACCELERATEDNONLINEARGSSTEP_HH
+#define DUNE_TNNMG_ITERATIONSTEPS_ACCELERATEDNONLINEARGSSTEP_HH
+
+
+#include <dune/solvers/iterationsteps/iterationstep.hh>
+
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+/**
+ * \brief A nonlinear Gauss-Seidel step
+ *
+ * \tparam V Vector type storing the iterate
+ * \tparam F Functional to minimize
+ * \tparam LS Local solver type
+ * \tparam BV Bit-vector type for marking ignored components
+ */
+template<class V, class F, class LS, class A, class BV = Dune::BitSetVector<V::block_type::dimension>>
+class AcceleratedNonlinearGSStep :
+  public IterationStep<V, BV>
+{
+  using Base = IterationStep<V, BV>;
+
+public:
+
+  using Vector = typename Base::Vector;
+  using BitVector = typename Base::BitVector;
+  using Functional = F;
+
+  using LocalSolver = LS;
+  using NonlinearSmoother = NonlinearGSStep<Functional, LocalSolver, BitVector>;
+
+  using AccelerationStep = A;
+
+  //! default constructor
+  template<class LS_T, class A_T>
+  AcceleratedNonlinearGSStep(const Functional& f, Vector& x, LS_T&& localSolver, A_T&& accelerationStep) :
+    Base(x),
+    f_(&f),
+    nonlinearSmoother_(f, x, std::forward<LS_T>(localSolver)),
+    preSmoothingSteps_(1),
+    postSmoothingSteps_(0),
+    accelerationStep_(accelerationStep)
+  {}
+
+  //! destructor
+  ~AcceleratedNonlinearGSStep()
+  {}
+
+  using Base::getIterate;
+
+  void preprocess() override
+  {
+    nonlinearSmoother_.setIgnore(this->ignore());
+    nonlinearSmoother_.preprocess();
+  }
+
+  void setPreSmoothingSteps(std::size_t i)
+  {
+    preSmoothingSteps_ = i;
+  }
+
+  void setPostSmoothingSteps(std::size_t i)
+  {
+    postSmoothingSteps_ = i;
+  }
+
+  /**
+   * \brief Do one Gauss-Seidel step
+   */
+  void iterate() override
+  {
+
+    const auto& f = *f_;
+    const auto& ignore = (*this->ignoreNodes_);
+    auto& x = *getIterate();
+
+    for (std::size_t i=0; i<preSmoothingSteps_; ++i)
+        nonlinearSmoother_.iterate();
+
+    accelerationStep_(f, x, ignore);
+
+    for (std::size_t i=0; i<postSmoothingSteps_; ++i)
+        nonlinearSmoother_.iterate();
+  }
+
+  const AccelerationStep& accelerationStep() const
+  {
+    return accelerationStep_;
+  }
+
+private:
+
+  const Functional* f_;
+
+  NonlinearSmoother nonlinearSmoother_;
+  std::size_t preSmoothingSteps_;
+  std::size_t postSmoothingSteps_;
+
+  AccelerationStep accelerationStep_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+
+#endif // DUNE_TNNMG_ITERATIONSTEPS_ACCELERATEDNONLINEARGSSTEP_HH
diff --git a/dune/tnnmg/iterationsteps/genericnonlineargs.hh b/dune/tnnmg/iterationsteps/genericnonlineargs.hh
index 7600e56a1be0e9d02d0478bdfd4fa1bf89bf2beb..0e1c3b2c6b78b1b0ec431484296952157d86cb1c 100644
--- a/dune/tnnmg/iterationsteps/genericnonlineargs.hh
+++ b/dune/tnnmg/iterationsteps/genericnonlineargs.hh
@@ -52,14 +52,6 @@ class GenericNonlinearGS : public IterationStep<typename TNNMGProblemType::Vecto
             }
         };
 
-        /** \brief Returns the current iterate
-         *
-         */
-        VectorType getSol()
-        {
-            return *x_;
-        };
-
     private:
 
         TNNMGProblemType* problem;
diff --git a/dune/tnnmg/iterationsteps/genericnonlinearjacobi.hh b/dune/tnnmg/iterationsteps/genericnonlinearjacobi.hh
index cf429b39dad23ca9c62a67049d4da7b89ee0beb3..84b39bdda79fd0482e577bd1db7da267dbf99a55 100644
--- a/dune/tnnmg/iterationsteps/genericnonlinearjacobi.hh
+++ b/dune/tnnmg/iterationsteps/genericnonlinearjacobi.hh
@@ -61,14 +61,6 @@ class GenericNonlinearJacobi : public IterationStep<typename TNNMGProblemType::V
             }
         };
 
-        /** \brief Returns the current iterate
-         *
-         */
-        VectorType getSol()
-        {
-            return *x_;
-        };
-
         using IterationStep<VectorType>::ignoreNodes_;
 
     private:
diff --git a/dune/tnnmg/iterationsteps/nonlineargsstep.hh b/dune/tnnmg/iterationsteps/nonlineargsstep.hh
new file mode 100644
index 0000000000000000000000000000000000000000..f64bddd7bae1d7f318b290df9b2b9d9df687fcfb
--- /dev/null
+++ b/dune/tnnmg/iterationsteps/nonlineargsstep.hh
@@ -0,0 +1,202 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_ITERATIONSTEPS_NONLINEARGSSTEP_HH
+#define DUNE_TNNMG_ITERATIONSTEPS_NONLINEARGSSTEP_HH
+
+#include <dune/common/indices.hh>
+#include <dune/common/typetraits.hh>
+#include <dune/common/hybridutilities.hh>
+
+#include <dune/solvers/iterationsteps/iterationstep.hh>
+#include <dune/solvers/common/defaultbitvector.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+
+class CoordinateDirection
+{
+public:
+  CoordinateDirection(std::size_t index) :
+    index_(index)
+  {}
+
+  std::size_t index() const
+  {
+    return index_;
+  }
+
+private:
+  std::size_t index_;
+};
+
+
+
+/**
+ * \brief A nonlinear Gauss-Seidel loop
+ *
+ * \param x Vector storing the iterate
+ * \param f Functional to minimize
+ * \param ignore Bit-vector marking ignored components
+ * \param localSolver Solver used to solve the local defect problems
+ *
+ * This is not a full iteration step but just its algorithmic
+ * core. Having this seperately allows to easily implement
+ * a nested Gauss-Seidel by calling gaussSeidelLoop()
+ * recursively inside of a localSolver.
+ */
+template<class V, class F, class BV, class LS,
+  std::enable_if_t<not IsTuple<LS>::value, int> = 0>
+void gaussSeidelLoop(V& x, const F& f, const BV& ignore, const LS& localSolver)
+{
+  namespace H = Dune::Hybrid;
+
+  auto&& shiftedF = shift(f, x);
+
+  H::forEach(H::integralRange(H::size(x)), [&](auto&& i) {
+    auto&& fLocal = coordinateRestriction(shiftedF, i);
+#ifdef NEW_TNNMG_COMPUTE_ITERATES_DIRECTLY
+    localSolver(x[i], fLocal, ignore[i]);
+#else
+    auto localCorrection = x[i];
+    localCorrection = 0;
+    localSolver(localCorrection, fLocal, ignore[i]);
+    x[i] += localCorrection;
+#endif
+    shiftedF.updateOrigin(i);
+  });
+}
+
+
+
+/**
+ * \brief A nonlinear Gauss-Seidel loop
+ *
+ * \param x Vector storing the iterate
+ * \param f Functional to minimize
+ * \param ignore Bit-vector marking ignored components
+ * \param localSolver Solver used to solve the local defect problems
+ *
+ * This is not a full iteration step but just its algorithmic
+ * core. Having this separately allows to easily implement
+ * a nested Gauss-Seidel by calling gaussSeidelLoop()
+ * recursively inside of a localSolver.
+ */
+template<class V, class F, class BV, class LS,
+  std::enable_if_t<IsTuple<LS>::value, int> = 0>
+void gaussSeidelLoop(V& x, const F& f, const BV& ignore, const LS& localSolvers)
+{
+  namespace H = Dune::Hybrid;
+
+  // Clang 3.8 refuses to recognise H::size as constant here, so that
+  // we need to use the decltype(...)::value kludge
+  static_assert(decltype(H::size(x))::value == decltype(H::size(localSolvers))::value,
+                "Size of local solver tuple provided to gaussSeidelLoop does not "
+                "match size of problem.");
+
+  auto&& shiftedF = shift(f, x);
+
+  H::forEach(H::integralRange(H::size(x)), [&](auto&& i) {
+    auto&& fLocal = coordinateRestriction(shiftedF, i);
+#ifdef NEW_TNNMG_COMPUTE_ITERATES_DIRECTLY
+    localSolver(x[i], fLocal, ignore[i]);
+#else
+    auto localCorrection = x[i];
+    localCorrection = 0;
+    H::elementAt(localSolvers, i)(localCorrection, fLocal, ignore[i]);
+    x[i] += localCorrection;
+#endif
+    shiftedF.updateOrigin(i);
+  });
+}
+
+
+template<class LS>
+class GaussSeidelLocalSolver
+{
+public:
+  GaussSeidelLocalSolver(const LS& localSolver) :
+    localSolver_(localSolver)
+  {}
+
+  template<class Vector, class Functional, class BitVector>
+  constexpr void operator()(Vector& x, const Functional& f, const BitVector& ignore) const
+  {
+    gaussSeidelLoop(x, f, ignore, localSolver_);
+  }
+
+private:
+  LS localSolver_;
+};
+
+
+template<class LS>
+auto gaussSeidelLocalSolver(LS&& localSolver) -> decltype(GaussSeidelLocalSolver<std::decay_t<LS>>(std::forward<LS>(localSolver)))
+{
+  return GaussSeidelLocalSolver<std::decay_t<LS>>(std::forward<LS>(localSolver));
+}
+
+
+/**
+ * \brief A nonlinear Gauss-Seidel step
+ *
+ * \tparam F Functional to minimize
+ * \tparam LS Local solver type
+ * \tparam BV Bit-vector type for marking ignored components
+ */
+template<class F, class LS, class BV = typename Solvers::DefaultBitVector_t<typename F::Vector> >
+class NonlinearGSStep :
+  public IterationStep<typename F::Vector, BV>
+{
+  using Base = IterationStep<typename F::Vector, BV>;
+
+public:
+
+  using Vector = typename F::Vector;
+  using BitVector = typename Base::BitVector;
+  using Functional = F;
+  using LocalSolver = LS;
+
+  //! default constructor
+  template<class LS_T>
+  NonlinearGSStep(const Functional& f, Vector& x, LS_T&& localSolver) :
+    Base(x),
+    f_(&f),
+    localSolver_(std::forward<LS_T>(localSolver))
+  {}
+
+  //! destructor
+  ~NonlinearGSStep()
+  {}
+
+  using Base::getIterate;
+
+  /**
+   * \brief Do one Gauss-Seidel step
+   */
+  void iterate() override
+  {
+    auto& x = *getIterate();
+    const auto& ignore = (*this->ignoreNodes_);
+    gaussSeidelLoop(x, *f_, ignore, localSolver_);
+  }
+
+private:
+
+  const Functional* f_;
+  LocalSolver localSolver_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_ITERATIONSTEPS_NONLINEARGSSTEP_HH
+
diff --git a/dune/tnnmg/iterationsteps/tnnmgacceleration.hh b/dune/tnnmg/iterationsteps/tnnmgacceleration.hh
new file mode 100644
index 0000000000000000000000000000000000000000..c4adc7ded1560f560f2d9691b29102a9b24e3b4e
--- /dev/null
+++ b/dune/tnnmg/iterationsteps/tnnmgacceleration.hh
@@ -0,0 +1,142 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_ITERATIONSTEPS_TNNMGACCELERATION_HH
+#define DUNE_TNNMG_ITERATIONSTEPS_TNNMGACCELERATION_HH
+
+#include <memory>
+#include <cmath>
+
+#include <dune/solvers/common/resize.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+/**
+ * \brief TNNMG acceleration step for AcceleratedNonlinearGSStep
+ *
+ * This implements TNNMG-like accelerations for nonlinear Gauss-Seidel
+ * methods. Notice that this is parametrized wrt the functional and
+ * all the substeps needed in a TNNMG coarse correction
+ *
+ * Notice, that this will internally create a Linearization object on
+ * demand and reuse it on all applications of the acceleration step.
+ *
+ * Because this class is itself used as callback in the
+ * AcceleratedNonlinearGSStep it should have value semantics.
+ * Hence it is designed to be cheap to copy.
+ *
+ * \tparam F Type of functional to minimize
+ * \tparam BV Type of bit vectors used to mark ignored components
+ * \tparam CL Type of the constrained linearization used for coarse corrections
+ * \tparam LS Type of linear solver callback used for the coarse correction
+ * \tparam DP Type of defect projection callback
+ * \tparam LSS Type of solver callback for scalar line search problems
+ */
+template<class F, class BV, class CL, class LS, class DP, class LSS>
+class TNNMGAcceleration
+{
+    using Vector = typename F::Vector;
+    using BitVector = BV;
+    using Linearization = CL;
+    using LinearSolver = LS;
+    using DefectProjection = DP;
+    using LineSearchSolver = LSS;
+    using ConstrainedVector = typename Linearization::ConstrainedVector;
+
+public:
+
+    /**
+     * \brief Create TNNMGAcceleration
+     *
+     * Notice that all callbacks passed to the constructor will be stored
+     * by value and should thus be cheap to copy.
+     *
+     * \param linearSolver This is a callback used to solve the constrained linearized system
+     * \param projection This is a callback used to compute a projection into a defect-admissible set
+     * \param lineSolver This is a callback used to minimize a directional restriction of the functional for computing a damping parameter
+     */
+    TNNMGAcceleration(const LinearSolver& linearSolver, const DefectProjection& projection, const LineSearchSolver& lineSolver) :
+        linearSolver_(linearSolver),
+        projection_(projection),
+        lineSolver_(lineSolver)
+    {}
+
+    /**
+     * \brief Apply acceleration step
+     *
+     * Apply the acceleration step to compute a correction
+     * for the given functional at given position and subject
+     * to ignore-constraints.
+     *
+     * \param f Functional to minimize
+     * \param x Current iterate that should be corrected
+     * \param ignore Bit vector marking components to ignore
+     */
+    void operator()(const F& f, Vector& x, const BitVector& ignore)
+    {
+        if (not linearization_)
+            linearization_ = std::make_shared<Linearization>(f, ignore);
+
+        linearization_->bind(x);
+
+        auto&& A = linearization_->hessian();
+        auto&& r = linearization_->negativeGradient();
+
+        Dune::Solvers::resizeInitializeZero(correction_, x);
+        Dune::Solvers::resizeInitializeZero(constrainedCorrection_, r);
+
+        linearSolver_(constrainedCorrection_, A, r);
+
+        linearization_->extendCorrection(constrainedCorrection_, correction_);
+
+        projection_(f, x, correction_);
+
+        // damp
+        auto fv = directionalRestriction(f, x, correction_);
+        dampingFactor_ = 0;
+        lineSolver_(dampingFactor_, fv, false);
+        if (std::isnan(dampingFactor_))
+            dampingFactor_ = 0;
+        correction_ *= dampingFactor_;
+
+        x += correction_;
+    }
+
+    /**
+     * \brief Export the last computed damping factor
+     */
+    double lastDampingFactor() const
+    {
+        return dampingFactor_;
+    }
+
+    /**
+     * \brief Export the last used linearization
+     */
+    const Linearization& linearization() const
+    {
+        return *linearization_;
+    }
+
+private:
+    LinearSolver linearSolver_;
+    DefectProjection projection_;
+    LineSearchSolver lineSolver_;
+    ConstrainedVector constrainedCorrection_;
+    Vector correction_;
+    std::shared_ptr<Linearization> linearization_;
+    double dampingFactor_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_ITERATIONSTEPS_TNNMGACCELERATION_HH
diff --git a/dune/tnnmg/iterationsteps/tnnmgstep.hh b/dune/tnnmg/iterationsteps/tnnmgstep.hh
index 41cd19f625fbd0e1f1ff7cd9e384a5af30cd898f..5c377c131aef3625eb2ccba0016d2e4e65ebe39a 100644
--- a/dune/tnnmg/iterationsteps/tnnmgstep.hh
+++ b/dune/tnnmg/iterationsteps/tnnmgstep.hh
@@ -8,200 +8,262 @@
 
 #include <dune/common/timer.hh>
 
+#include <dune/solvers/common/resize.hh>
 #include "dune/solvers/iterationsteps/iterationstep.hh"
 #include "dune/solvers/iterationsteps/lineariterationstep.hh"
-
-template<class TNNMGProblemTypeTEMPLATE, class NonlinearSmootherType>
-class TruncatedNonsmoothNewtonMultigrid : public IterationStep<typename TNNMGProblemTypeTEMPLATE::VectorType>
+#include <dune/solvers/solvers/iterativesolver.hh>
+#include <dune/solvers/solvers/linearsolver.hh>
+
+namespace Dune {
+namespace TNNMG {
+
+/**
+ * \brief One iteration of the TNNMG method
+ *
+ * \tparam F Functional to minimize
+ * \tparam BV Bit-vector type for marking ignored components
+ */
+template<class F, class BV, class Linearization,
+                                  class DefectProjection,
+                                  class LineSearchSolver>
+class TNNMGStep :
+  public IterationStep<typename F::Vector, BV>
 {
-
-    public:
-        typedef TNNMGProblemTypeTEMPLATE TNNMGProblemType;
-
-        typedef typename TNNMGProblemType::MatrixType MatrixType;
-        typedef typename TNNMGProblemType::VectorType VectorType;
-
-        typedef typename TNNMGProblemType::Linearization Linearization;
-        typedef typename Linearization::MatrixType CoarseMatrixType;
-        typedef typename Linearization::VectorType CoarseVectorType;
-        typedef typename Linearization::BitVectorType CoarseBitVectorType;
-
-
-        typedef LinearIterationStep<CoarseMatrixType, CoarseVectorType, CoarseBitVectorType> CoarseLinearIterationStep;
-
-        struct Statistics
-        {
-            int iterationCount;
-            double smoothingTime;
-            double linearizationTime;
-            double coarseCorrectionTime;
-            double postProcessingTime;
-        };
-
-        TruncatedNonsmoothNewtonMultigrid() :
-            problem_(0),
-            nonlinearSmoother_(0),
-            linearIterationStep_(0),
-            preSmoothingSteps_(1),
-            postSmoothingSteps_(0),
-            linearIterationSteps_(1)
-        {};
-
-        TruncatedNonsmoothNewtonMultigrid(CoarseLinearIterationStep& linearIterationStep, NonlinearSmootherType& nonlinearSmoother) :
-            problem_(0),
-            nonlinearSmoother_(&nonlinearSmoother),
-            linearIterationStep_(&linearIterationStep),
-            preSmoothingSteps_(1),
-            postSmoothingSteps_(0),
-            linearIterationSteps_(1)
-        {};
-
-
-        void setNonlinearSmoother(NonlinearSmootherType& nonlinearSmoother)
-        {
-            nonlinearSmoother_ = &nonlinearSmoother;
-        };
-
-        void setLinearIterationStep(CoarseLinearIterationStep& linearIterationStep)
-        {
-            linearIterationStep_ = &linearIterationStep;
-        };
-
-        void setProblem(VectorType& x, TNNMGProblemType& problem)
-        {
-            this->x_ = &x;
-            this->problem_ = &problem;
-        };
-
-        void setSmoothingSteps(int pre, int linear, int post)
-        {
-            preSmoothingSteps_ = pre;
-            linearIterationSteps_ = linear;
-            postSmoothingSteps_ = post;
-        };
-
-
-        virtual void preprocess()
-        {
-            outStream_.str("");
-            outStream_ << problem_->getOutput(true);
-            outStream_ << "  step size    ";
-            outStream_ << "  energy       ";
-
-            statistics_.iterationCount = 0;
-            statistics_.smoothingTime = 0.0;
-            statistics_.linearizationTime = 0.0;
-            statistics_.coarseCorrectionTime = 0.0;
-            statistics_.postProcessingTime = 0.0;
-        }
-
-        void iterate()
-        {
-            // clear previous output data
-            outStream_.str("");
-
-            // do some time measurement
-            Dune::Timer timer;
-            ++statistics_.iterationCount;
-
-            VectorType& x = *x_;
-            TNNMGProblemType& problem = *problem_;
-
-            // apply nonlinear smoother (pre smoothing)
-            timer.reset();
-            nonlinearSmoother_->setProblem(x, problem);
-            nonlinearSmoother_->ignoreNodes_ = ignoreNodes_;
-            for(int i=0; i<preSmoothingSteps_; ++i)
-                nonlinearSmoother_->iterate();
-            x = nonlinearSmoother_->getSol();
-            statistics_.smoothingTime += timer.elapsed();
-
-            // assemble and truncate linear system for coarse correction
-            timer.reset();
-            Linearization linearization;
-            problem.assembleTruncate(x, linearization, *ignoreNodes_);
-            statistics_.linearizationTime += timer.elapsed();
-
-            // solve linear system for coarse correction
-            CoarseVectorType v(linearization.b.size());
-            v = 0.0;
-
-            // apply linear solver to coarse problem
-            timer.reset();
-            {
-                linearIterationStep_->setProblem(linearization.A, v, linearization.b);
-                linearIterationStep_->ignoreNodes_ = &(linearization.ignore);
-                linearIterationStep_->preprocess();
-                for(int i=0; i<linearIterationSteps_; ++i)
-                    linearIterationStep_->iterate();
-                v = linearIterationStep_->getSol();
-            }
-            statistics_.coarseCorrectionTime += timer.elapsed();
-
-            // post processing
-            timer.reset();
-
-            // project correction
-            VectorType projected_v;
-            problem.projectCoarseCorrection(x, v, projected_v, linearization);
-
-            // compute damping parameter
-            double alpha = problem.computeDampingParameter(x, projected_v);
-
-            // apply damped coarse correction
-            x.axpy(alpha, projected_v);
-
-            statistics_.postProcessingTime += timer.elapsed();
-
-            // apply nonlinear smoother (post smoothing)
-            timer.reset();
-            nonlinearSmoother_->setProblem(x, problem);
-            for(int i=0; i<postSmoothingSteps_; ++i)
-                nonlinearSmoother_->iterate();
-            x = nonlinearSmoother_->getSol();
-            statistics_.smoothingTime += timer.elapsed();
-
-            outStream_ << problem.getOutput();
-            outStream_.setf(std::ios::scientific);
-            outStream_ << std::setw(15) << alpha;
-            outStream_ << std::setw(15) << problem.computeEnergy(x);
-        }
-
-        VectorType getSol()
-        {
-            return *x_;
-        }
-
-        std::string getOutput() const
-        {
-            std::string s = outStream_.str();
-            outStream_.str("");
-            return s;
-        }
-
-        const Statistics& getStatistics() const
-        {
-            return statistics_;
-        }
-
-        using IterationStep<VectorType>::ignoreNodes_;
-
-    private:
-
-        Statistics statistics_;
-
-        using IterationStep<VectorType>::x_;
-        TNNMGProblemType* problem_;
-
-        NonlinearSmootherType* nonlinearSmoother_;
-
-        CoarseLinearIterationStep* linearIterationStep_;
-
-        int preSmoothingSteps_;
-        int postSmoothingSteps_;
-        int linearIterationSteps_;
-
-        mutable std::ostringstream outStream_;
+  using Base = IterationStep<typename F::Vector, BV>;
+
+public:
+
+  using Vector = typename F::Vector;
+  using ConstrainedVector = typename Linearization::ConstrainedVector;
+  using ConstrainedMatrix = typename Linearization::ConstrainedMatrix;
+  using BitVector = typename Base::BitVector;
+  using ConstrainedBitVector = typename Linearization::ConstrainedBitVector;
+  using Functional = F;
+  using IterativeSolver = Solvers::IterativeSolver< ConstrainedVector, Solvers::DefaultBitVector_t<ConstrainedVector> >;
+  using LinearSolver = Solvers::LinearSolver< ConstrainedMatrix,  ConstrainedVector >;
+
+  /** \brief Constructor with an iterative solver object for the linear correction
+   * \param iterativeSolver This is a callback used to solve the constrained linearized system
+   * \param projection This is a callback used to compute a projection into a defect-admissible set
+   * \param lineSolver This is a callback used to minimize a directional restriction of the functional
+   *        for computing a damping parameter
+   */
+  TNNMGStep(const Functional& f,
+            Vector& x,
+            std::shared_ptr<IterationStep<Vector,BitVector> > nonlinearSmoother,
+            std::shared_ptr<IterativeSolver> iterativeSolver,
+            const DefectProjection& projection,
+            const LineSearchSolver& lineSolver)
+  : Base(x),
+    f_(&f),
+    nonlinearSmoother_(nonlinearSmoother),
+    preSmoothingSteps_(1),
+    iterativeSolver_(iterativeSolver),
+    projection_(projection),
+    lineSolver_(lineSolver)
+  {}
+
+  /** \brief Constructor with a linear solver object for the linear correction
+   * \param linearSolver This is a callback used to solve the constrained linearized system
+   * \param projection This is a callback used to compute a projection into a defect-admissible set
+   * \param lineSolver This is a callback used to minimize a directional restriction of the functional
+   *        for computing a damping parameter
+   */
+  TNNMGStep(const Functional& f,
+            Vector& x,
+            std::shared_ptr<IterationStep<Vector,BitVector> > nonlinearSmoother,
+            std::shared_ptr<LinearSolver> linearSolver,
+            const DefectProjection& projection,
+            const LineSearchSolver& lineSolver)
+  : Base(x),
+    f_(&f),
+    nonlinearSmoother_(nonlinearSmoother),
+    preSmoothingSteps_(1),
+    linearSolver_(linearSolver),
+    projection_(projection),
+    lineSolver_(lineSolver)
+  {}
+
+  /** \brief Constructor with a LinearIterationStep object for the linear correction
+   * \param linearIterationStep This is a callback used to solve the constrained linearized system
+   * \param projection This is a callback used to compute a projection into a defect-admissible set
+   * \param lineSolver This is a callback used to minimize a directional restriction of the functional
+   *        for computing a damping parameter
+   */
+  TNNMGStep(const Functional& f,
+            Vector& x,
+            std::shared_ptr<Solvers::IterationStep<Vector,BitVector> > nonlinearSmoother,
+            std::shared_ptr<Solvers::LinearIterationStep<ConstrainedMatrix,ConstrainedVector> > linearIterationStep,
+            unsigned int noOfLinearIterationSteps,
+            const DefectProjection& projection,
+            const LineSearchSolver& lineSolver)
+  : Base(x),
+    f_(&f),
+    nonlinearSmoother_(nonlinearSmoother),
+    preSmoothingSteps_(1),
+    linearIterationStep_(linearIterationStep),
+    noOfLinearIterationSteps_(noOfLinearIterationSteps),
+    projection_(projection),
+    lineSolver_(lineSolver)
+  {}
+
+  //! destructor
+  ~TNNMGStep()
+  {}
+
+  using Base::getIterate;
+
+  void preprocess() override
+  {
+    nonlinearSmoother_->setIgnore(this->ignore());
+    nonlinearSmoother_->preprocess();
+  }
+
+  void setPreSmoothingSteps(std::size_t i)
+  {
+    preSmoothingSteps_ = i;
+  }
+
+  /**
+   * \brief Do one TNNMG step
+   */
+  void iterate() override
+  {
+
+    const auto& f = *f_;
+    const auto& ignore = (*this->ignoreNodes_);
+    auto& x = *getIterate();
+
+    // Nonlinear presmoothing
+    for (std::size_t i=0; i<preSmoothingSteps_; ++i)
+        nonlinearSmoother_->iterate();
+
+    // Compute constraint/truncated linearization
+    if (not linearization_)
+      linearization_ = std::make_shared<Linearization>(f, ignore);
+
+    linearization_->bind(x);
+
+    auto&& A = linearization_->hessian();
+    auto&& r = linearization_->negativeGradient();
+
+    // Compute inexact solution of the linearized problem
+    Solvers::resizeInitializeZero(correction_, x);
+    Solvers::resizeInitializeZero(constrainedCorrection_, r);
+
+    // TNNMGStep assumes that the linearization and the solver for the
+    // linearized problem will not use the ignoreNodes field
+    auto emptyIgnore = ConstrainedBitVector();
+
+    Solvers::resizeInitialize(emptyIgnore, constrainedCorrection_, false);
+
+    // Do the linear correction with a LinearIterationStep object
+    if (linearIterationStep_)
+    {
+      linearIterationStep_->setIgnore(emptyIgnore);
+      linearIterationStep_->setProblem(A, constrainedCorrection_, r);
+      linearIterationStep_->preprocess();
+
+      for (unsigned int i=0; i<noOfLinearIterationSteps_; i++)
+        linearIterationStep_->iterate();
+    }
+    else if (iterativeSolver_)  // Do the linear correction with an iterative Solver object
+    {
+      // Hand the linear problem to the iterative solver.
+      // The IterationStep member needs to be a LinearIterationStep,
+      // so we can give it the matrix.
+      using LinearIterationStepType = Solvers::LinearIterationStep<std::decay_t<decltype(A)>,
+                                                                  typename Linearization::ConstrainedVector,
+                                                                  decltype(emptyIgnore) >;
+
+      LinearIterationStepType* linearIterationStep;
+
+      auto iterativeSolver = std::dynamic_pointer_cast<Solvers::IterativeSolver<typename Linearization::ConstrainedVector> >(iterativeSolver_);
+      if (iterativeSolver)
+      {
+        iterativeSolver->getIterationStep().setIgnore(emptyIgnore);
+        linearIterationStep = dynamic_cast<LinearIterationStepType*>(&(iterativeSolver->getIterationStep()));
+      } else
+        DUNE_THROW(Exception, "Linear solver has to be an IterativeSolver!");
+
+      if (linearIterationStep)
+        linearIterationStep->setProblem(A, constrainedCorrection_, r);
+      else
+        DUNE_THROW(Exception, "Linear solver does not accept matrices!");
+
+
+      iterativeSolver_->preprocess();
+      iterativeSolver_->solve();
+    }
+    else // Do the linear correction with a linear Solver object
+    {
+      linearSolver_->setProblem(A,constrainedCorrection_, r);
+      linearSolver_->preprocess();
+      linearSolver_->solve();
+    }
+
+    linearization_->extendCorrection(constrainedCorrection_, correction_);
+
+    // Project onto admissible set
+    projection_(f, x, correction_);
+
+    // Line search
+    auto fv = directionalRestriction(f, x, correction_);
+    dampingFactor_ = 0;
+    lineSolver_(dampingFactor_, fv, false);
+    if (std::isnan(dampingFactor_))
+      dampingFactor_ = 0;
+    correction_ *= dampingFactor_;
+
+    x += correction_;
+  }
+
+  /**
+   * \brief Export the last computed damping factor
+   */
+  double lastDampingFactor() const
+  {
+    return dampingFactor_;
+  }
+
+  /**
+   * \brief Export the last used linearization
+   */
+  const Linearization& linearization() const
+  {
+    return *linearization_;
+  }
+
+private:
+
+  const Functional* f_;
+
+  std::shared_ptr<IterationStep<Vector,BitVector> > nonlinearSmoother_;
+  std::size_t preSmoothingSteps_;
+
+  std::shared_ptr<Linearization> linearization_;
+
+  // The following members cannot all be used at once:
+  // either we have a Dune::Solvers::IterativeSolver that implements the linear correction...
+  std::shared_ptr<IterativeSolver> iterativeSolver_;
+
+  // or we have a Dune::Solvers::LinearSolver that implements the linear correction...
+  std::shared_ptr<LinearSolver> linearSolver_;
+
+  // ... or we have a mere LinearIterationStep, together with a number of times
+  // it is supposed to be called.  You cannot have more than one at once.
+  std::shared_ptr<LinearIterationStep<typename Linearization::ConstrainedMatrix,typename Linearization::ConstrainedVector> > linearIterationStep_;
+  unsigned int noOfLinearIterationSteps_;
+
+  typename Linearization::ConstrainedVector constrainedCorrection_;
+  Vector correction_;
+  DefectProjection projection_;
+  LineSearchSolver lineSolver_;
+  double dampingFactor_;
 };
 
+
+} // end namespace TNNMG
+} // end namespace Dune
+
 #endif
diff --git a/dune/tnnmg/linearsolvers/CMakeLists.txt b/dune/tnnmg/linearsolvers/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fbac5d259e092edc1b0ee2b79146eed1eba41d9
--- /dev/null
+++ b/dune/tnnmg/linearsolvers/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+install(FILES
+    fastamgsolver.hh
+    ilu0cgsolver.hh
+    ldlsolver.hh
+    mgsolver.hh
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/tnnmg/linearsolvers)
diff --git a/dune/tnnmg/linearsolvers/fastamgsolver.hh b/dune/tnnmg/linearsolvers/fastamgsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..aead8aa6bc327f3932bece2c01d016188fdae959
--- /dev/null
+++ b/dune/tnnmg/linearsolvers/fastamgsolver.hh
@@ -0,0 +1,67 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LINEARSOLVERS_FASTAMGSOLVER_HH
+#define DUNE_TNNMG_LINEARSOLVERS_FASTAMGSOLVER_HH
+
+#include <memory>
+
+#include <dune/istl/solvers.hh>
+#include <dune/istl/paamg/fastamg.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+template<class Matrix, class Vector>
+class FastAMGSolver
+{
+  using Operator = Dune::MatrixAdapter<Matrix,Vector,Vector>;
+  using Criterion = Dune::Amg::SymmetricCriterion<Matrix, Dune::Amg::RowSum>;
+  using AMG = Dune::Amg::FastAMG<Operator, Vector>;
+  using Parameters = Dune::Amg::Parameters;
+
+public:
+
+  template<class M>
+  void operator()(Vector& x, M&& m, const Vector& r)
+  {
+//    if (not amg_)
+    {
+      matrix_ = std::forward<M>(m);
+      Parameters parameters;
+      parameters.setDebugLevel(0);
+      parameters.setNoPreSmoothSteps(1);
+      parameters.setNoPostSmoothSteps(1);
+      Criterion criterion;
+      criterion.setDebugLevel(0);
+      operator_ = std::make_shared<Operator>(matrix_);
+      amg_ = std::make_shared<AMG>(*operator_, criterion, parameters);
+    }
+//    else
+//    {
+//      matrix_ = std::forward<M>(m);
+//      amg_->recalculateHierarchy();
+//    }
+    x = 0;
+    auto mutableR = r;
+    amg_->pre(x, mutableR);
+    amg_->apply(x, mutableR);
+  }
+
+private:
+    Matrix matrix_;
+    std::shared_ptr<AMG> amg_;
+    std::shared_ptr<Operator> operator_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_LINEARSOLVERS_FASTAMGSOLVER_HH
diff --git a/dune/tnnmg/linearsolvers/ilu0cgsolver.hh b/dune/tnnmg/linearsolvers/ilu0cgsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..3a1922f96e743a3f538c1a59ef457dab597f4c21
--- /dev/null
+++ b/dune/tnnmg/linearsolvers/ilu0cgsolver.hh
@@ -0,0 +1,52 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LINEARSOLVERS_ILU0CGSOLVER_HH
+#define DUNE_TNNMG_LINEARSOLVERS_ILU0CGSOLVER_HH
+
+
+
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/solvers.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+class ILU0CGSolver
+{
+public:
+
+  ILU0CGSolver(double residualReduction, std::size_t maxSteps) :
+    residualReduction_(residualReduction),
+    maxSteps_(maxSteps)
+  {}
+
+  template<class Matrix, class Vector>
+  void operator()(Vector& x, const Matrix& A, const Vector& r) const
+  {
+    Dune::InverseOperatorResult statistics;
+    Dune::MatrixAdapter<Matrix,Vector,Vector> op(A);
+    Dune::SeqILU0<Matrix,Vector,Vector> ilu0(A,1.0);
+    Dune::CGSolver<Vector> cg(op, ilu0, residualReduction_, maxSteps_, 0);
+    x = 0;
+    auto mutableR = r;
+    cg.apply(x, mutableR, statistics);
+  }
+
+private:
+  double residualReduction_;
+  std::size_t maxSteps_;
+};
+
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_LINEARSOLVERS_ILU0CGSOLVER_HH
diff --git a/dune/tnnmg/linearsolvers/ldlsolver.hh b/dune/tnnmg/linearsolvers/ldlsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..8616a66a5a24ff8168f2534ab54754a7cf2a227a
--- /dev/null
+++ b/dune/tnnmg/linearsolvers/ldlsolver.hh
@@ -0,0 +1,41 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LINEARSOLVERS_LDLSOLVER_HH
+#define DUNE_TNNMG_LINEARSOLVERS_LDLSOLVER_HH
+
+#include <memory>
+
+#include <dune/istl/solvers.hh>
+#include <dune/istl/ldl.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+struct LDLSolver
+{
+  template<class Matrix, class Vector>
+  void operator()(Vector& x, const Matrix& A, const Vector& r) const
+  {
+#if HAVE_SUITESPARSE_LDL
+    Dune::InverseOperatorResult statistics;
+    Dune::LDL<Matrix> ldlSolver(A);
+    auto mutableR = r;
+    ldlSolver.apply(x, mutableR, statistics);
+#else
+    DUNE_THROW(Dune::NotImplemented, "LDLTSolver is not available without SuiteSparses' LDL.");
+#endif
+  }
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_LINEARSOLVERS_LDLSOLVER_HH
diff --git a/dune/tnnmg/linearsolvers/mgsolver.hh b/dune/tnnmg/linearsolvers/mgsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..601ab308a38a2c24a82d567eca5f479b895992d1
--- /dev/null
+++ b/dune/tnnmg/linearsolvers/mgsolver.hh
@@ -0,0 +1,94 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LINEARSOLVERS_MGSOLVER_HH
+#define DUNE_TNNMG_LINEARSOLVERS_MGSOLVER_HH
+
+#include <memory>
+
+#include <dune/solvers/common/defaultbitvector.hh>
+#include <dune/solvers/common/resize.hh>
+#include <dune/solvers/iterationsteps/multigridstep.hh>
+#include <dune/solvers/solvers/solver.hh>
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+/**
+ * \brief A linear solver callback using one multigrid step
+ *
+ * \tparam Matrix Matrix type of linear system
+ * \tparam Vector Vector type of linear system
+ */
+template<class Matrix, class Vector>
+class MGSolver
+{
+  using BitVector = Dune::Solvers::DefaultBitVector_t<Vector>;
+  using MGStep = MultigridStep<Matrix, Vector, BitVector>;
+
+public:
+
+  /**
+   * \brief Construct V-cycle MGSolver
+   *
+   * \param transferOperators A vector of transfer operators to be used in the multigrid method
+   * \param preSmoother Pre-smoother for the multigrid method
+   * \param postSmoother Post-smoother for the multigrid method
+   * \param pre Number of pre-smoothing steps
+   * \param post Number of post-smoothing steps
+   */
+  template<class TransferOperators, class PreSmoother, class PostSmoother>
+  MGSolver(const TransferOperators& transferOperators, PreSmoother& preSmoother, PostSmoother& postSmoother, unsigned int pre, unsigned int post)
+  {
+    mgStep_.setMGType(1, pre, post);
+    mgStep_.setSmoother(&preSmoother, &postSmoother);
+    mgStep_.setTransferOperators(transferOperators);
+  }
+
+  /**
+   * \brief Construct V-cycle MGSolver
+   *
+   * \param transferOperators A vector of transfer operators to be used in the multigrid method
+   * \param preSmoother Pre-smoother for the multigrid method
+   * \param postSmoother Post-smoother for the multigrid method
+   * \param pre Number of pre-smoothing steps
+   * \param post Number of post-smoothing steps
+   * \param coarseSolver
+   */
+  template<class TransferOperators, class PreSmoother, class PostSmoother>
+  MGSolver(const TransferOperators& transferOperators, PreSmoother& preSmoother, PostSmoother& postSmoother, unsigned int pre, unsigned int post,
+           std::shared_ptr<Solver> coarseSolver)
+  {
+    mgStep_.setMGType(1, pre, post);
+    mgStep_.setSmoother(&preSmoother, &postSmoother);
+    mgStep_.setTransferOperators(transferOperators);
+    mgStep_.basesolver_ = coarseSolver.get();
+  }
+
+  template<class M>
+  void operator()(Vector& x, M&& m, const Vector& r)
+  {
+    Dune::Solvers::resizeInitialize(ignore_, x, false);
+    mgStep_.setIgnore(ignore_);
+    mgStep_.setProblem(m, x, r);
+    mgStep_.preprocess();
+    mgStep_.iterate();
+  }
+
+private:
+  MGStep mgStep_;
+  BitVector ignore_;
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_LINEARSOLVERS_MGSOLVER_HH
+
+
diff --git a/dune/tnnmg/localsolvers/CMakeLists.txt b/dune/tnnmg/localsolvers/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41820d86bb49ef3cdf4ff78d393370ef350b56ff
--- /dev/null
+++ b/dune/tnnmg/localsolvers/CMakeLists.txt
@@ -0,0 +1,5 @@
+
+install(FILES
+    scalarobstaclesolver.hh
+    scalarquadraticsolver.hh
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/tnnmg/localsolvers)
diff --git a/dune/tnnmg/localsolvers/scalarobstaclesolver.hh b/dune/tnnmg/localsolvers/scalarobstaclesolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..406c37da58907ebb4d74a27a4cdb448109f66fb9
--- /dev/null
+++ b/dune/tnnmg/localsolvers/scalarobstaclesolver.hh
@@ -0,0 +1,42 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LOCALSOLVERS_SCALAROBSTACLESOLVER_HH
+#define DUNE_TNNMG_LOCALSOLVERS_SCALAROBSTACLESOLVER_HH
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+/**
+ * \brief A local solver for scalar quadratic obstacle problems
+ *
+ * \todo Add concept check for the function interface
+ */
+class ScalarObstacleSolver
+{
+public:
+  template<class Vector, class Functional, class BitVector>
+  constexpr void operator()(Vector& x, const Functional& f, const BitVector& ignore) const
+  {
+    if (not ignore)
+    {
+      x = f.linearPart()/f.quadraticPart();
+      if (x < f.lowerObstacle())
+        x = f.lowerObstacle();
+      if (x > f.upperObstacle())
+        x = f.upperObstacle();
+    }
+  }
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_LOCALSOLVERS_SCALAROBSTACLESOLVER_HH
diff --git a/dune/tnnmg/localsolvers/scalarquadraticsolver.hh b/dune/tnnmg/localsolvers/scalarquadraticsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..52ea48cc007e4576d7df2f5d852908d282b853cc
--- /dev/null
+++ b/dune/tnnmg/localsolvers/scalarquadraticsolver.hh
@@ -0,0 +1,36 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LOCALSOLVERS_SCALARQUADRATICSOLVER_HH
+#define DUNE_TNNMG_LOCALSOLVERS_SCALARQUADRATICSOLVER_HH
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+
+/**
+ * \brief A local solver for scalar quadratic problems
+ *
+ * \todo Add concept check for the function interface
+ */
+class ScalarQuadraticSolver
+{
+public:
+  template<class Vector, class Functional, class BitVector>
+  constexpr void operator()(Vector& x, const Functional& f, const BitVector& ignore) const
+  {
+    if (not ignore)
+      x = f.linearPart()/f.quadraticPart();
+  }
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_LOCALSOLVERS_SCALARQUADRATICSOLVER_HH
diff --git a/dune/tnnmg/localsolvers/simplexsolver.hh b/dune/tnnmg/localsolvers/simplexsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..2c9856472e49cd5645ca0fd6408fadb9d7cd6241
--- /dev/null
+++ b/dune/tnnmg/localsolvers/simplexsolver.hh
@@ -0,0 +1,158 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_LOCALSOLVERS_SIMPLEXSOLVER_HH
+#define DUNE_TNNMG_LOCALSOLVERS_SIMPLEXSOLVER_HH
+
+#include <algorithm>
+#include <cmath>
+#include <functional>
+#include <numeric>
+
+#include <dune/matrix-vector/axpy.hh>
+
+#include "../functionals/boxconstrainedquadraticfunctional.hh"
+
+namespace Dune {
+namespace TNNMG {
+
+/**
+ * \brief A local solver for quadratic minimization problems with lower obstacle
+ * where the quadratic part is given by a scaled identity. Additionally a sum
+ * constraint is imposed.
+ *
+ * Note: In order to use it as a solver to compute corrections for a
+ * simplex-constrained problem, make sure the iterate already fulfills the sum
+ * constraint and you compute corrections with sum constraint 0.
+ *
+ * The available implementation solves the problem in n*log(n) time, exploiting
+ * the decoupling of the energy w.r.t. the coordinate directions.
+ *
+ * \todo Add generic case where the quadratic part is not given by a scaled
+ * identity.
+ *
+ */
+template <class Field = double>
+struct SimplexSolver {
+  SimplexSolver(Field sum = 0.0)
+      : r_(sum) {}
+
+  /**
+  * \brief Project b to a (scaled) Gibbs simplex
+  * \param x projected vector
+  * \param b input vector
+  * \param s scaling of the simplex
+  *
+  * Write the problem as follows:
+  *   ( I_N + \partial\phi_N  1_N^T ) (    x   ) = ( b )
+  *   (         1_N             0   ) ( lambda ) = ( s )
+  * where I_N is the identity matrix with N rows and columns,
+  * phi_N is an N-vector of phi_0, the latter denoting the obstacle at 0 and
+  * 1_N is the N-vector of ones.
+  *
+  * We determine lambda by a Schur approach:
+  *   For each lambda, (I_N + \partial\phi_N) \ (b - lambda) yields a unique
+  *   solution which we can use to replace x in the second equation. Furthermore
+  *   the contributions do not couple, so we obtain
+  *   s = \sum_i (1 + \partial\phi_0) \ (b_i - lambda).
+  *
+  *   We sort the values of b by size and successively check if the resulting
+  *   intervals do permit a lambda that induces an activity pattern which does
+  *   indeed cut off all subsequent (lower) values of b.
+  *   Note: This also works in the corner case where b contains duplicate
+  *   values, because lambda cannot be larger than the successive element (with
+  *   the same value) in these cases -- the interval has width 0.
+  *
+  * With given lambda, we can compute x easily.
+  *
+  */
+  template <class X, class R>
+  static auto projectToSimplex(X& x, X b, R s = 1.0) {
+    if (s == 0.0) {
+      x = 0.0;
+      return;
+    }
+    assert(s > 0.0 && "Sum constraint must be non-negative.");
+
+    size_t N = b.size();
+    assert(N > 0 && "Vector has zero size.");
+
+    // determine lambda by successively checking the intervals of b
+    R lambda;
+    x = b; // alienate x for a sorted version of b
+    std::sort(x.begin(), x.end(), std::greater<R>());
+    s *= -1.0;
+    for (size_t i = 0; i < N; ++i) {
+      s += x[i]; //
+      lambda = s / (i + 1);
+      if (x[i + 1] < lambda)
+        break;
+    }
+
+    // compute x
+    for (size_t j = 0; j < N; ++j)
+      x[j] = std::max(b[j] - lambda, R(0));
+  }
+
+  /**
+   * \brief Solve a quadratic minimization problem with lower obstacle where
+   * the quadratic part is given by a scaled identity. Additionally, a sum
+   * constraint is imposed on the solution.
+   * \param x the minimizer
+   * \param f the quadratic functional with lower obstacle
+   * \param ignore ignore nodes
+   *
+   *  This determines x for the following system (1).
+   *  The equivalence transformations (2),(3) show what is implemented.
+   *
+   *  (1a) x = argmin 0.5*<Av,v> - <b,v> where
+   *  (1b) \sum v_i = r
+   *  (1c) v_i \geq l_i
+   *
+   *  Divide energy by a>0.
+   *  Set c := 1/a * b.
+   *  (2a) x = argmin 0.5*<v,v> - <c,v> where
+   *  (2b) \sum v_i = r
+   *  (2c) v_i \geq l_i
+   *
+   *  Transform w := v - l.
+   *  Set d := c - l, s = r - \sum l_i, const = 0.5*<l,l> - <c,l>.
+   *  (3a) x = l + argmin 0.5*<w,w> - <d,w> + const
+   *  (3b) \sum w_i = s
+   *  (3c) v_i \geq 0
+   *
+   */
+  template <class X, class K, int n, class V, class L, class U, class R,
+            class Ignore>
+  auto operator()(X&& x, BoxConstrainedQuadraticFunctional<
+                             ScaledIdentityMatrix<K, n>&, V, L, U, R>& f,
+                  Ignore& ignore) const {
+    if (ignore.all())
+      return;
+    if (ignore.any())
+      DUNE_THROW(NotImplemented, "All or no ignore nodes should be set.");
+    assert(ignore.none());
+
+    const auto& a = f.quadraticPart().scalar();
+    if (a <= 0.0)
+      DUNE_THROW(MathError, "ScaledIdentity scaling must be positive.");
+
+    for (auto&& ui : f.upperObstacle())
+      assert(std::isinf(ui) && "Upper obstacle must be infinity.");
+
+    auto&& l = f.lowerObstacle();
+    auto s = std::accumulate(l.begin(), l.end(), r_, std::minus<R>());
+    auto d = l;
+    d *= -1.0;
+    Dune::MatrixVector::addProduct(d, 1.0 / a, f.linearPart());
+    projectToSimplex(x, d, s);
+    x += l;
+  }
+
+private:
+  const Field r_;
+};
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+#endif // DUNE_TNNMG_LOCALSOLVERS_SIMPLEXSOLVER_HH
diff --git a/dune/tnnmg/problem-classes/bisection.hh b/dune/tnnmg/problem-classes/bisection.hh
index f315f289cc54797c3123f6564f981244753f3079..7337bfcf611dde7e48d9b62970d91db936b20a82 100644
--- a/dune/tnnmg/problem-classes/bisection.hh
+++ b/dune/tnnmg/problem-classes/bisection.hh
@@ -17,15 +17,21 @@ class Bisection
          *        and try to find the minimum that way.  This can lead to speedup.
          * \param safety acceptance factor for inexact minimization
          */
-        Bisection(double acceptError = 0.0, double acceptFactor = 1.0, double requiredResidual = 1e-12, bool fastQuadratic = true, double safety = 1e-14) :
+        Bisection(double acceptError = 0.0, double acceptFactor = 1.0, double requiredResidual = 1e-12,
+#ifdef USE_OLD_TNNMG
+                  bool fastQuadratic = true,
+#endif
+                  double safety = 1e-14) :
+#ifdef USE_OLD_TNNMG
             fastQuadratic_(fastQuadratic),
+#endif
             acceptFactor_(acceptFactor),
             acceptError_(acceptError),
             requiredResidual_(requiredResidual),
             safety_(safety)
         {};
 
-
+#ifndef USE_OLD_TNNMG
         /** \brief minimize convex functional
         * 
         * Computes an approximate minimizer of the convex functional
@@ -49,13 +55,193 @@ class Bisection
         {
             size_t n=0;
 
+            Dune::Solvers::Interval<double> I;
+            count = 0;
+
+            // try if projected initial guess is sufficient
+            Dune::Solvers::Interval<double> dom = J.domain();
+            x = dom.projectIn(x);
+            Dune::Solvers::Interval<double> DJ = J.subDifferential(x);
+            ++count;
+            if (DJ.containsZero(safety_))
+            {
+                if (verbosity > 0)
+                    std::cout << "Bisection: initial iterate " << x << " accepted, DJ = " << DJ << std::endl;
+                return x;
+            }
+
+            // compute initial interval
+            // if quadratic part is strictly positive we can compute one bound from the other
+            if (DJ[0] > 0.0)
+            {
+                I[1] = x;
+                I[0] = dom.projectFromBelow(I[1] - DJ[0]);
+
+                DJ = J.subDifferential(I[0]);
+                ++count;
+                while (DJ[0] > safety_)
+                {
+                    I[0] = I[1] - 2.0*(I[1]-I[0]);
+                    if (I[0] < dom[0])
+                    {
+                        I[0] = dom[0];
+                        break;
+                    }
+                    DJ = J.subDifferential(I[0]);
+                    ++count;
+                }
+            }
+            else
+            {
+                I[0] = x;
+                I[1] = dom.projectFromAbove(I[0] - DJ[1]);
+
+                DJ = J.subDifferential(I[1]);
+                ++count;
+                while (DJ[1] < -safety_)
+                {
+                    I[1] = I[0] - 2.0*(I[0]-I[1]);
+                    if (I[1] > dom[1])
+                    {
+                        I[1] = dom[1];
+                        break;
+                    }
+                    DJ = J.subDifferential(I[1]);
+                    ++count;
+                }
+            }
+
+            if (verbosity>0)
+            {
+                std::cout << " #  |      I[0]    |       x      |      I[1]    |   dJ(I[0])   |     dJ(x)    |   dJ(I[1])   |" << std::endl;
+                std::cout << "----|--------------|--------------|--------------|--------------|--------------|--------------|" << std::endl;
+            }
+
+            // compute midpoint
+            x = (I[0] + I[1]) / 2.0;
+
+            // We remember the value of x from the last loop in order
+            // to check if the iteration becomes stationary. This
+            // is necessary to guarantee termination because it
+            // might happen, that I[0] and I[1] are successive
+            // floating point numbers, while the values at both
+            // interval boundaries do not match the normal
+            // termination criterion.
+            //
+            // Checking ((I[0] < x) and (x < I[1])) does not work
+            // since this is true in the above case if x is taken
+            // from the register of a floating point unit that
+            // uses a higher accuracy internally
+            //
+            // The above does e.g. happen with the x87 floating
+            // point unit that uses 80 bit internally. In this
+            // specific case the problem could be avoided by
+            // changing the x87 rounding mode with
+            //
+            //   int mode = 0x27F;
+            //   asm ("fldcw %0" : : "m" (*&mode));
+            //
+            // In the general case one can avoid the problem at least
+            // for the equality check using a binary comparison
+            // that circumvents the floating point unit.
+            double xLast = I[0];
+
+            // iterate while mid point does not coincide with boundary numerically
+            while (not(binaryEqual(x, xLast)))
+            {
+                ++n;
+
+                // evaluate subdifferential
+                DJ = J.subDifferential(x);
+                ++count;
+
+                if (verbosity>0)
+                {
+                    Dune::Solvers::Interval<double> DJ0 = J.subDifferential(I[0]);
+                    Dune::Solvers::Interval<double> DJ1 = J.subDifferential(I[1]);
+
+                    const std::streamsize p = std::cout.precision();
+
+                    std::cout << std::setw(4) << n << "|"
+                              << std::setprecision(8)
+                              << std::setw(14) << I[0] << "|"
+                              << std::setw(14) << x << "|"
+                              << std::setw(14) << I[1] << "|"
+                              << std::setw(14) << DJ0 << "|"
+                              << std::setw(14) << DJ << "|"
+                              << std::setw(14) << DJ1 << "|" << std::endl;
+
+                    std::cout << std::setprecision(p);
+                }
+
+                // stop if at least 'factor' of full minimization step is realized
+                // if DJ[0]<= 0 we have
+                //
+                //    x_old <= I[0] <= x <= x* <= I[1]
+                //
+                // and hence the criterion
+                //
+                //    alpha|x*-x_old| <= alpha|I[1]-x_old| <= |x-x_old| <= |x*-x_old|
+                //
+                // similar for DJ[1] >= 0
+                if ((I[0]>=x_old) and (DJ[0]<=0.0) and (std::abs((x-x_old)/(I[1]-x_old)) >= acceptFactor_) and (-requiredResidual_ <= DJ[1]))
+                    break;
+                if ((I[1]<=x_old) and (DJ[1]>=0.0) and (std::abs((x-x_old)/(I[0]-x_old)) >= acceptFactor_) and (DJ[0] <= requiredResidual_))
+                    break;
+
+                // stop if error is less than acceptError_
+                if (std::abs(I[1]-I[0]) <= acceptError_)
+                    break;
+
+                // stop if 0 in DJ numerically
+                if (DJ.containsZero(safety_))
+                    break;
+
+                // adjust bounds and compute new mid point
+                if (DJ[0] > 0.0)
+                    I[1] = x;
+                else
+                    I[0] = x;
+
+                xLast = x;
+
+                x = (I[0] + I[1]) / 2.0;
+            }
+
+            return x;
+        }
+
+#else  // The following is for the old tnnmg implementation
+
+        /** \brief minimize convex functional
+        *
+        * Computes an approximate minimizer of the convex functional
+        * @f[ f(x) = \frac 1 2 ax^2 - rx + \phi(x) @f]
+        * using bisection. If \f$ x^* \f$ is the exact minimizer
+        * and \f$ x^{old} \f$ is the old value the returned
+        * approximation @f$ x @f$ satisfies
+        * \f[
+        * (x - x^{old}) \in [ factor , 1] (x^*- x^{old}).
+        * \f]
+        * This guarantees enough descent for every fixed \f$ factor \f$ and hence convergence of the Gauss-Seidel method.
+        *
+        * \param J The convex functional
+        * \param x initial value
+        * \param x_old old value, needed for inexact minimization
+        * \param [out] count return the number of times the subdifferential of J was evaluated
+        * \return approximate minimizer
+        */
+        template <class Functional>
+        double minimize(const Functional& J, double x, double x_old, int& count, int verbosity=0) const
+        {
+            size_t n=0;
+
             Dune::Solvers::Interval<double> I;
             Dune::Solvers::Interval<double> DJ;
             Dune::Solvers::Interval<double> dom;
             count = 0;
 
             J.domain(dom);
-
             double A = J.quadraticPart();
             double b = J.linearPart();
 
@@ -74,7 +260,11 @@ class Bisection
             J.subDiff(x, DJ);
             ++count;
             if (DJ.containsZero(safety_))
+            {
+                if (verbosity > 0)
+                    std::cout << "Bisection: initial iterate " << x << " accepted, DJ = " << DJ << std::endl;
                 return x;
+            }
 
             // compute initial interval
             // if quadratic part is strictly positive we can compute one bound from the other
@@ -232,6 +422,7 @@ class Bisection
         setFastQuadratic(bool fastQuadratic) {
               fastQuadratic_ = fastQuadratic;
         }
+#endif
 
     private:
 
@@ -296,8 +487,9 @@ class Bisection
             return true;
         }
 
-
+#ifdef USE_OLD_TNNMG
         bool fastQuadratic_;
+#endif
         double acceptFactor_;
         double acceptError_;
         double requiredResidual_;
diff --git a/dune/tnnmg/projections/CMakeLists.txt b/dune/tnnmg/projections/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b99dea451971a0ae211eb1c02c80e2a7fdc450d
--- /dev/null
+++ b/dune/tnnmg/projections/CMakeLists.txt
@@ -0,0 +1,5 @@
+
+install(FILES
+    obstacledefectprojection.hh
+    trivialdefectprojection.hh
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/tnnmg/projections)
diff --git a/dune/tnnmg/projections/obstacledefectprojection.hh b/dune/tnnmg/projections/obstacledefectprojection.hh
new file mode 100644
index 0000000000000000000000000000000000000000..5579f3e2d107dfcae8c6f24049cee5da3b3de384
--- /dev/null
+++ b/dune/tnnmg/projections/obstacledefectprojection.hh
@@ -0,0 +1,61 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_PROJECTIONS_OBSTACLEDEFECTPROJECTION_HH
+#define DUNE_TNNMG_PROJECTIONS_OBSTACLEDEFECTPROJECTION_HH
+
+#include <dune/common/hybridutilities.hh>
+
+#include <dune/istl/preconditioners.hh>
+#include <dune/istl/solvers.hh>
+
+#include <dune/common/typetraits.hh>
+
+namespace Dune {
+namespace TNNMG {
+
+  namespace {
+    template <class Obstacle, class Vector,
+              bool IsNumber = Dune::IsNumber<Vector>::value>
+    struct ObstacleDefectProjectionHelper {
+      static constexpr void apply(const Obstacle& lo, const Obstacle& uo,
+                                  const Vector& x, Vector& v) {
+        using namespace Dune::Hybrid;
+        forEach(integralRange(Hybrid::size(x)), [&](auto&& i) {
+          auto const& lo_i = lo[i];
+          auto const& uo_i = uo[i];
+          auto const& x_i = x[i];
+          auto& v_i = v[i];
+          ObstacleDefectProjectionHelper<
+              std::decay_t<decltype(lo_i)>,
+              std::decay_t<decltype(x_i)>>::apply(lo_i, uo_i, x_i, v_i);
+        });
+      }
+    };
+    template <class Obstacle, class Vector>
+    struct ObstacleDefectProjectionHelper<Obstacle, Vector, true> {
+      static constexpr void apply(const Obstacle& lo, const Obstacle& uo,
+                                  const Vector& x, Vector& v) {
+        if (v < lo - x)
+          v = lo - x;
+        if (v > uo - x)
+          v = uo - x;
+      }
+    };
+  }
+
+  struct ObstacleDefectProjection {
+    template <class Functional, class Vector>
+    constexpr void operator()(const Functional& f, const Vector& x, Vector& v) {
+      auto const& lo = f.lowerObstacle();
+      auto const& uo = f.upperObstacle();
+      ObstacleDefectProjectionHelper<std::decay_t<decltype(lo)>, Vector>::apply(
+          lo, uo, x, v);
+    }
+  };
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_PROJECTIONS_OBSTACLEDEFECTPROJECTION_HH
diff --git a/dune/tnnmg/projections/trivialdefectprojection.hh b/dune/tnnmg/projections/trivialdefectprojection.hh
new file mode 100644
index 0000000000000000000000000000000000000000..622ab9d491cca595d824221687e449d1cbd2c8cb
--- /dev/null
+++ b/dune/tnnmg/projections/trivialdefectprojection.hh
@@ -0,0 +1,28 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#ifndef DUNE_TNNMG_PROJECTIONS_TRIVIALDEFECTPROJECTION_HH
+#define DUNE_TNNMG_PROJECTIONS_TRIVIALDEFECTPROJECTION_HH
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+
+/** \brief The projection onto the entire space, i.e., the projection that does nothing
+ */
+struct TrivialDefectProjection
+{
+  template<class Functional, class Vector>
+  constexpr void operator()(const Functional& f, const Vector& x, Vector& v)
+  {}
+};
+
+
+
+} // end namespace TNNMG
+} // end namespace Dune
+
+
+
+#endif // DUNE_TNNMG_PROJECTIONS_TRIVIALDEFECTPROJECTION_HH
diff --git a/dune/tnnmg/test/CMakeLists.txt b/dune/tnnmg/test/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14cc1d69f6723048fc3b32126b22f7285e50f00d
--- /dev/null
+++ b/dune/tnnmg/test/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Put your test in here if it needs access to external grids
+
+dune_add_test(SOURCES conceptcheck)
+dune_add_test(SOURCES multitypegstest)
+dune_add_test(SOURCES nonlineargstest)
+dune_add_test(SOURCES nonlineargsperformancetest NAME nonlineargsperformancetest_corrections)
+dune_add_test(SOURCES nonlineargsperformancetest NAME nonlineargsperformancetest_iterates COMPILE_DEFINITIONS "NEW_TNNMG_COMPUTE_ITERATES_DIRECTLY=1")
+dune_add_test(SOURCES simplexsolvertest)
+dune_add_test(SOURCES tnnmgsteptest)
diff --git a/dune/tnnmg/test/conceptcheck.cc b/dune/tnnmg/test/conceptcheck.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0101ad503083406ec944cc3d470d3e90c30470c6
--- /dev/null
+++ b/dune/tnnmg/test/conceptcheck.cc
@@ -0,0 +1,56 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#include <config.h>
+
+#include <dune/common/concept.hh>
+#include <dune/common/fvector.hh>
+#include <dune/common/typetraits.hh>
+
+#include <dune/istl/bvector.hh>
+#include <dune/istl/multitypeblockvector.hh>
+
+#include <dune/tnnmg/concepts.hh>
+
+struct NonComparable {};
+
+
+
+int main() {
+  bool passed = true;
+  using namespace Dune::TNNMG::Concept;
+
+  using CT0 = int;
+  using CT1 = std::tuple<double, double>;
+  using CT2 = std::tuple<double, std::string>;
+  using CT3 = std::tuple<CT1, CT2>;
+  using CT4 = Dune::MultiTypeBlockVector<CT1, CT3>;
+
+  using NCT0 = NonComparable;
+  using NCT1 = std::complex<int>;
+  using NCT2 = std::tuple<double, NonComparable>;
+  using NCT3 = std::tuple<CT3, NCT1>;
+  using NCT4 = std::tuple<NCT2, CT3>;
+  using NCT5 = Dune::MultiTypeBlockVector<CT1, NCT3>;
+
+
+  // Check comparable types
+  {
+    passed = passed and Dune::models<IsLessThanComparable, CT0, CT0>();
+    passed = passed and Dune::models<IsLessThanComparable, CT1, CT1>();
+    passed = passed and Dune::models<IsLessThanComparable, CT2, CT2>();
+    passed = passed and Dune::models<IsLessThanComparable, CT3, CT3>();
+    passed = passed and Dune::models<IsLessThanComparable, CT4, CT4>();
+  }
+
+  // Check non-comparable types
+  {
+    passed = passed and not Dune::models<IsLessThanComparable, NCT0, NCT0>();
+    passed = passed and not Dune::models<IsLessThanComparable, NCT1, NCT1>();
+    passed = passed and not Dune::models<IsLessThanComparable, NCT2, NCT2>();
+    passed = passed and not Dune::models<IsLessThanComparable, NCT3, NCT3>();
+    passed = passed and not Dune::models<IsLessThanComparable, NCT4, NCT4>();
+    passed = passed and not Dune::models<IsLessThanComparable, NCT5, NCT5>();
+  }
+
+  return passed ? 0 : 1;
+}
diff --git a/dune/tnnmg/test/multitypegstest.cc b/dune/tnnmg/test/multitypegstest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..89f4374315370ca1bdae3b498262f7b1160c84d2
--- /dev/null
+++ b/dune/tnnmg/test/multitypegstest.cc
@@ -0,0 +1,264 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+
+#include <config.h>
+
+#include <iostream>
+#include <sstream>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/stringutility.hh>
+
+// dune-istl includes
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/multitypeblockvector.hh>
+#include <dune/istl/multitypeblockmatrix.hh>
+
+
+// dune-grid includes
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/io/file/vtk/vtkwriter.hh>
+
+// dune-solver includes
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/common/defaultbitvector.hh>
+
+// dune-tnnmg includes
+#include <dune/tnnmg/functionals/quadraticfunctional.hh>
+#include <dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh>
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+
+#include <dune/tnnmg/localsolvers/scalarobstaclesolver.hh>
+
+#include <dune/tnnmg/linearsolvers/ldlsolver.hh>
+#include <dune/tnnmg/linearsolvers/ilu0cgsolver.hh>
+#include <dune/tnnmg/linearsolvers/fastamgsolver.hh>
+
+#include <dune/solvers/test/common.hh>
+
+using namespace Dune;
+
+template <class DomainType, class RangeType, class F>
+class FunctionFromLambda :
+  public Dune::VirtualFunction<DomainType, RangeType>
+{
+public:
+  FunctionFromLambda(F f):
+    f_(f)
+  {}
+
+  void evaluate(const DomainType& x, RangeType& y) const
+  {
+    y = f_(x);
+  }
+
+private:
+  F f_;
+};
+
+template<class Domain, class Range, class F>
+auto makeFunction(F&& f)
+{
+  return FunctionFromLambda<Domain, Range, std::decay_t<F>>(std::forward<F>(f));
+}
+
+
+template<class MatrixType, class VectorType, class BitVector>
+void solveProblem(const MatrixType& mat, VectorType& x, const VectorType& rhs, const BitVector& ignore, int maxIterations=100, double tolerance=1.0e-8)
+{
+    using Vector = VectorType;
+    using Matrix = MatrixType;
+
+    auto lower = x;
+    auto upper = x;
+
+    lower = 0;
+    upper = 1;
+
+    using Functional = Dune::TNNMG::BoxConstrainedQuadraticFunctional<Matrix, Vector, Vector, Vector, double>;
+    auto J = Functional(mat, rhs, lower, upper);
+
+    auto localSubSolver = gaussSeidelLocalSolver(gaussSeidelLocalSolver(Dune::TNNMG::ScalarObstacleSolver()));
+    auto localSolver = std::make_tuple(localSubSolver, localSubSolver);
+
+    using Step = typename Dune::TNNMG::NonlinearGSStep<Functional, decltype(localSolver)>;
+    using Solver = ::LoopSolver<Vector>;
+
+    auto step = Step(J, x, localSolver);
+    auto norm = EnergyNorm<Matrix, Vector>(mat);
+    auto solver = Solver(&step, 100, 0, &norm, Solver::FULL);
+
+    step.setIgnore(ignore);
+
+    solver.addCriterion(
+            [&](){
+                return Dune::formatString("   % 12.5e", J(x));
+            },
+            "   energy      ");
+
+    double initialEnergy = J(x);
+    solver.addCriterion(
+            [&](){
+                static double oldEnergy=initialEnergy;
+                double currentEnergy = J(x);
+                double decrease = currentEnergy - oldEnergy;
+                oldEnergy = currentEnergy;
+                return Dune::formatString("   % 12.5e", decrease);
+            },
+            "   decrease    ");
+
+    solver.addCriterion(
+            [&](){
+                using namespace Dune::Indices;
+                std::size_t n00 = 0;
+                for(std::size_t i=0; i<x[_0].size(); ++i)
+                  n00 += ((x[_0][i][0] == lower[_0][i][0]) || (x[_0][i][0] == upper[_0][i][0]));
+                std::size_t n01 = 0;
+                for(std::size_t i=0; i<x[_0].size(); ++i)
+                  n01 += ((x[_0][i][1] == lower[_0][i][1]) || (x[_0][i][1] == upper[_0][i][1]));
+                std::size_t n10 = 0;
+                for(std::size_t i=0; i<x[_0].size(); ++i)
+                  n10 += ((x[_1][i][0] == lower[_1][i][0]) || (x[_1][i][0] == upper[_1][i][0]));
+                return Dune::formatString("   % 8d   % 8d   % 8d", n00, n01, n10);
+            },
+            "   #tr(0*0)   #tr(0*1)   #tr(1*0)");
+
+    std::vector<double> correctionNorms;
+    solver.addCriterion(Dune::Solvers::correctionNormCriterion(step, norm, 1e-10, correctionNorms));
+
+    solver.preprocess();
+    solver.solve();
+    std::cout << correctionNorms.size() << std::endl;
+}
+
+
+
+
+
+template <class GridType>
+bool checkWithGrid(const GridType& grid, const std::string fileName="")
+{
+  bool passed = true;
+
+  static const int dim = GridType::dimension;
+
+  const int blocksize0 = 2;
+  const int blocksize1 = 1;
+
+  using Vector = MultiTypeBlockVector<BlockVector<FieldVector<double,blocksize0> >,
+                                      BlockVector<FieldVector<double,blocksize1> > >;
+  using BlockedMatrixRow0 = MultiTypeBlockVector<BCRSMatrix<FieldMatrix<double,blocksize0,blocksize0> >,
+                                                 BCRSMatrix<FieldMatrix<double,blocksize0,blocksize1> > >;
+  using BlockedMatrixRow1 = MultiTypeBlockVector<BCRSMatrix<FieldMatrix<double,blocksize1,blocksize0> >,
+                                                 BCRSMatrix<FieldMatrix<double,blocksize1,blocksize1> > >;
+  using BlockedMatrixType = MultiTypeBlockMatrix<BlockedMatrixRow0,BlockedMatrixRow1>;
+
+  BlockedMatrixType A;
+  Vector blockedX;
+
+  using namespace Indices;
+
+  typedef typename Dune::Solvers::DefaultBitVector_t<Vector> BitVector;
+
+  typedef typename GridType::LeafGridView GridView;
+  typedef typename Dune::FieldVector<double, dim> DomainType;
+  typedef typename Dune::FieldVector<double, 1> RangeType;
+
+  const auto gridView = grid.leafGridView();
+
+  constructPQ1Pattern(gridView, A[_0][_0]);
+  constructPQ1Pattern(gridView, A[_1][_1]);
+  A = 0.0;
+  assemblePQ1Stiffness(gridView, A[_0][_0]);
+  assemblePQ1Stiffness(gridView, A[_1][_1]);
+
+  BlockedMatrixType M;
+  constructPQ1Pattern(gridView, M[_0][_0]);
+  constructPQ1Pattern(gridView, M[_1][_1]);
+  M = 0.0;
+  assemblePQ1Mass(gridView, M[_0][_0]);
+  assemblePQ1Mass(gridView, M[_1][_1]);
+  A[_0][_0] += M[_0][_0];
+  A[_1][_1] += M[_1][_1];
+
+  Vector rhs;
+  rhs[_0].resize(A[_0][_0].N());
+  rhs[_1].resize(A[_1][_1].N());
+  rhs = 0;
+//    ConstantFunction<DomainType, RangeType> f(5);
+  auto f = makeFunction<DomainType, RangeType>([](const DomainType& x) { return (x.two_norm() < .7) ? 200 : 0;});
+  assemblePQ1RHS(gridView, rhs[_0], f);
+  assemblePQ1RHS(gridView, rhs[_1], f);
+
+  Vector u;
+  Solvers::resizeInitializeZero(u,rhs);
+
+  BitVector ignore;
+  ignore[_0].resize(A[_0][_0].N());
+  ignore[_1].resize(A[_1][_1].N());
+  ignore[_0].unsetAll();
+  ignore[_1].unsetAll();
+  markBoundaryDOFs(gridView, ignore[_0]);
+  markBoundaryDOFs(gridView, ignore[_1]);
+
+  solveProblem(A, u, rhs, ignore);
+
+//  if (fileName!="")
+//  {
+//    typename Dune::VTKWriter<GridView> vtkWriter(gridView);
+//    vtkWriter.addVertexData(u[_0], "solution0");
+//    vtkWriter.addVertexData(u[_1], "solution1");
+//    vtkWriter.write(fileName);
+//  }
+
+  return passed;
+}
+
+
+template <int dim>
+bool checkWithYaspGrid(int refine, const std::string fileName="")
+{
+  bool passed = true;
+
+  typedef Dune::YaspGrid<dim> GridType;
+
+  typename Dune::FieldVector<typename GridType::ctype,dim> L(1.0);
+  typename std::array<int,dim> s;
+  std::fill(s.begin(), s.end(), 1);
+
+  GridType grid(L, s);
+
+  for (int i = 0; i < refine; ++i)
+    grid.globalRefine(1);
+
+  std::cout << "Testing with YaspGrid<" << dim << ">" << std::endl;
+  std::cout << "Number of levels: " << (grid.maxLevel()+1) << std::endl;
+  std::cout << "Number of leaf nodes: " << grid.leafGridView().size(dim) << std::endl;
+
+  passed = passed and checkWithGrid(grid, fileName);
+
+  return passed;
+}
+
+
+
+
+int main(int argc, char** argv) try
+{
+  Dune::MPIHelper::instance(argc, argv);
+  bool passed(true);
+
+  int refine2d = 6;
+
+  passed = passed and checkWithYaspGrid<2>(refine2d, "obstacletnnmgtest_yasp_2d_solution");
+
+  return passed ? 0 : 1;
+}
+catch (Dune::Exception e) {
+
+    std::cout << e << std::endl;
+
+}
+
diff --git a/dune/tnnmg/test/nonlineargsperformancetest.cc b/dune/tnnmg/test/nonlineargsperformancetest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..cd7928143262318bee9d4dbfb4860918cc78981a
--- /dev/null
+++ b/dune/tnnmg/test/nonlineargsperformancetest.cc
@@ -0,0 +1,286 @@
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=8 sw=4 sts=4:
+#include <config.h>
+
+#include <iostream>
+#include <sstream>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/fmatrix.hh>
+#include <dune/common/function.hh>
+#include <dune/common/fvector.hh>
+#include <dune/common/timer.hh>
+
+// dune-istl includes
+#include <dune/istl/bcrsmatrix.hh>
+
+// dune-grid includes
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/io/file/vtk/vtkwriter.hh>
+
+// dune-solver includes
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/iterationsteps/truncatedblockgsstep.hh>
+
+// dune-tnnmg includes
+#include <dune/tnnmg/functionals/quadraticfunctional.hh>
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+
+#include <dune/solvers/test/common.hh>
+
+template <class DomainType, class RangeType, class F>
+class FunctionFromLambda :
+    public Dune::VirtualFunction<DomainType, RangeType>
+{
+    public:
+        FunctionFromLambda(F f):
+            f_(f)
+        {}
+
+        void evaluate(const DomainType& x, RangeType& y) const
+        {
+            y = f_(x);
+        }
+
+    private:
+        F f_;
+};
+
+template<class Domain, class Range, class F>
+auto makeFunction(F&& f)
+{
+    return FunctionFromLambda<Domain, Range, std::decay_t<F>>(std::forward<F>(f));
+}
+
+
+
+template<class MatrixType, class VectorType, class BitVector>
+void performanceTest(const MatrixType& mat, VectorType& x, const VectorType& rhs, const BitVector& ignore, int maxIterations=100, double tolerance=1.0e-8)
+{
+    using Vector = VectorType;
+    using Matrix = MatrixType;
+
+    double tNonlinearGS;
+    double tTruncatedGS;
+    {
+        x = 0;
+
+        using Functional = Dune::TNNMG::QuadraticFunctional<Matrix, Vector, double>;
+        auto J = Functional(mat, rhs);
+
+        auto scalarLocalSolver = [](auto& xi, const auto& restriction, const auto& ignoreI) {
+            if (not ignoreI)
+                xi = restriction.linearPart()/restriction.quadraticPart();
+        };
+
+        auto localSolver = [scalarLocalSolver](auto& xi, const auto& restriction, const auto& ignoreI) {
+            gaussSeidelLoop(xi, restriction, ignoreI, scalarLocalSolver);
+        };
+
+        using Step = typename Dune::TNNMG::NonlinearGSStep<Functional, decltype(localSolver)>;
+        using Solver = LoopSolver<Vector>;
+        using Norm =  EnergyNorm<Matrix, Vector>;
+
+        auto step = Step(J, x, localSolver);
+        auto norm = Norm(mat);
+        auto solver = Solver(&step, 100, 0, &norm, Solver::FULL);
+
+        step.ignoreNodes_ = &ignore;
+
+        solver.preprocess();
+
+        for (int i=0; i<10; ++i)
+        {
+            x = 0;
+            solver.solve();
+        }
+    }
+
+    {
+        x = 0;
+
+        using Functional = Dune::TNNMG::QuadraticFunctional<Matrix, Vector, double>;
+        auto J = Functional(mat, rhs);
+
+        auto scalarLocalSolver = [](auto& xi, const auto& restriction, const auto& ignoreI) {
+            if (not ignoreI)
+                xi = restriction.linearPart()/restriction.quadraticPart();
+        };
+
+        auto localSolver = [scalarLocalSolver](auto& xi, const auto& restriction, const auto& ignoreI) {
+            gaussSeidelLoop(xi, restriction, ignoreI, scalarLocalSolver);
+        };
+
+        using Step = typename Dune::TNNMG::NonlinearGSStep<Functional, decltype(localSolver)>;
+        using Solver = LoopSolver<Vector>;
+        using Norm =  EnergyNorm<Matrix, Vector>;
+
+        auto step = Step(J, x, localSolver);
+        auto norm = Norm(mat);
+        auto solver = Solver(&step, 100, 0, &norm, Solver::FULL);
+
+        step.ignoreNodes_ = &ignore;
+
+        solver.preprocess();
+
+        Dune::Timer timer;
+        for (int i=0; i<10; ++i)
+        {
+            x = 0;
+            solver.solve();
+        }
+        tNonlinearGS = timer.elapsed();
+    }
+
+    {
+        x = 0;
+
+        using Step = TruncatedBlockGSStep<Matrix, Vector, BitVector>;
+        using Solver = LoopSolver<Vector>;
+        using Norm =  EnergyNorm<Matrix, Vector>;
+
+        auto step = Step(mat, x, rhs);
+        auto norm = Norm(mat);
+        auto solver = Solver(&step, 100, 0, &norm, Solver::FULL);
+
+        step.ignoreNodes_ = &ignore;
+
+        solver.preprocess();
+
+        Dune::Timer timer;
+        for (int i=0; i<10; ++i)
+        {
+            x = 0;
+            solver.solve();
+        }
+        tTruncatedGS = timer.elapsed();
+    }
+
+    std::cout << "Solution with NonlinearGSStep took      :" << tNonlinearGS << "s" << std::endl;
+    std::cout << "Solution with TruncatedBlockGSStep took :" << tTruncatedGS << "s" << std::endl;
+    std::cout << "TruncatedBlockGSStep to NonlinearGSStep :" << ((tNonlinearGS / tTruncatedGS - 1.0) * 100.0) << "%" << std::endl;
+}
+
+
+
+
+
+
+template <class GridType>
+bool checkWithGrid(const GridType& grid, const std::string fileName="")
+{
+    bool passed = true;
+
+    static const int dim = GridType::dimension;
+
+    typedef typename Dune::FieldMatrix<double, 1, 1> MatrixBlock;
+    typedef typename Dune::FieldVector<double, 1> VectorBlock;
+    typedef typename Dune::BCRSMatrix<MatrixBlock> Matrix;
+    typedef typename Dune::BlockVector<VectorBlock> Vector;
+    typedef typename Dune::BitSetVector<1> BitVector;
+
+    typedef typename GridType::LeafGridView GridView;
+    typedef typename Dune::FieldVector<double, dim> DomainType;
+    typedef typename Dune::FieldVector<double, 1> RangeType;
+
+
+    const GridView gridView = grid.leafGridView();
+
+    Matrix A;
+    constructPQ1Pattern(gridView, A);
+    A = 0.0;
+    assemblePQ1Stiffness(gridView, A);
+
+    Matrix M;
+    constructPQ1Pattern(gridView, M);
+    M = 0.0;
+    assemblePQ1Mass(gridView, M);
+    A += M;
+
+    Vector rhs(A.N());
+    rhs = 0;
+//    ConstantFunction<DomainType, RangeType> f(5);
+    auto f = makeFunction<DomainType, RangeType>([](const DomainType& x) { return (x.two_norm() < .7) ? 200 : 0;});
+    assemblePQ1RHS(gridView, rhs, f);
+
+    Vector u(A.N());
+    u = 0;
+
+    BitVector ignore(A.N());
+    ignore.unsetAll();
+    markBoundaryDOFs(gridView, ignore);
+
+    performanceTest(A, u, rhs, ignore);
+
+    if (fileName!="")
+    {
+        typename Dune::VTKWriter<GridView> vtkWriter(gridView);
+        vtkWriter.addVertexData(u, "solution");
+        vtkWriter.write(fileName);
+    }
+
+    return passed;
+}
+
+
+template <int dim>
+bool checkWithYaspGrid(int refine, const std::string fileName="")
+{
+    bool passed = true;
+
+    typedef Dune::YaspGrid<dim> GridType;
+
+    typename Dune::FieldVector<typename GridType::ctype,dim> L(1.0);
+    typename std::array<int,dim> s;
+    std::fill(s.begin(), s.end(), 1);
+
+    GridType grid(L, s);
+
+    for (int i = 0; i < refine; ++i)
+        grid.globalRefine(1);
+
+    std::cout << "Testing with YaspGrid<" << dim << ">" << std::endl;
+    std::cout << "Number of levels: " << (grid.maxLevel()+1) << std::endl;
+    std::cout << "Number of leaf nodes: " << grid.leafGridView().size(dim) << std::endl;
+
+    passed = passed and checkWithGrid(grid, fileName);
+
+
+    return passed;
+}
+
+
+
+
+int main(int argc, char** argv) try
+{
+    Dune::MPIHelper::instance(argc, argv);
+    bool passed(true);
+
+
+//    int refine1d = 16;
+    int refine1d = 1;
+    int refine2d = 5;
+    int refine3d = 1;
+
+    if (argc>1)
+        std::istringstream(argv[1]) >> refine1d;
+    if (argc>2)
+        std::istringstream(argv[2]) >> refine2d;
+    if (argc>3)
+        std::istringstream(argv[3]) >> refine3d;
+
+    passed = passed and checkWithYaspGrid<1>(refine1d, "nonlineargsperformancetest_yasp_1d_solution");
+    passed = passed and checkWithYaspGrid<2>(refine2d, "nonlineargsperformancetest_yasp_2d_solution");
+//    passed = passed and checkWithYaspGrid<3>(refine3d, "nonlineargsperformancetest_yasp_3d_solution");
+
+    return passed ? 0 : 1;
+}
+catch (Dune::Exception e) {
+
+    std::cout << e << std::endl;
+
+}
+
diff --git a/dune/tnnmg/test/nonlineargstest.cc b/dune/tnnmg/test/nonlineargstest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a346baf6d68fd5fc1dd77bedd0841ff08e7a2f11
--- /dev/null
+++ b/dune/tnnmg/test/nonlineargstest.cc
@@ -0,0 +1,362 @@
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=8 sw=4 sts=4:
+
+#include <config.h>
+
+#include <iostream>
+#include <sstream>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/stringutility.hh>
+
+// dune-istl includes
+#include <dune/istl/bcrsmatrix.hh>
+
+
+
+// dune-grid includes
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/io/file/vtk/vtkwriter.hh>
+
+// dune-solver includes
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/common/defaultbitvector.hh>
+#include <dune/solvers/transferoperators/compressedmultigridtransfer.hh>
+#include <dune/solvers/iterationsteps/truncatedblockgsstep.hh>
+
+// dune-tnnmg includes
+#include <dune/tnnmg/functionals/quadraticfunctional.hh>
+#include <dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh>
+#include <dune/tnnmg/functionals/bcqfconstrainedlinearization.hh>
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+#include <dune/tnnmg/iterationsteps/acceleratednonlineargsstep.hh>
+#include <dune/tnnmg/iterationsteps/tnnmgacceleration.hh>
+
+#include <dune/tnnmg/localsolvers/scalarobstaclesolver.hh>
+
+#include <dune/tnnmg/linearsolvers/ldlsolver.hh>
+#include <dune/tnnmg/linearsolvers/ilu0cgsolver.hh>
+#include <dune/tnnmg/linearsolvers/fastamgsolver.hh>
+#include <dune/tnnmg/linearsolvers/mgsolver.hh>
+
+#include <dune/tnnmg/projections/obstacledefectprojection.hh>
+
+#include <dune/solvers/test/common.hh>
+
+template <class DomainType, class RangeType, class F>
+class FunctionFromLambda :
+    public Dune::VirtualFunction<DomainType, RangeType>
+{
+    public:
+        FunctionFromLambda(F f):
+            f_(f)
+        {}
+
+        void evaluate(const DomainType& x, RangeType& y) const
+        {
+            y = f_(x);
+        }
+
+    private:
+        F f_;
+};
+
+template<class Domain, class Range, class F>
+auto makeFunction(F&& f)
+{
+    return FunctionFromLambda<Domain, Range, std::decay_t<F>>(std::forward<F>(f));
+}
+
+
+// template<class F>
+// std::tuple<double, double, int> bisection(const F& f, double start, double guess, double tol)
+// {
+//     double a = start;
+//     double x = guess;
+//     double fx = f(x);
+//     int evaluations = 1;
+//     while (fx<0)
+//     {
+//         x = x + (guess-start);
+//         fx = f(x);
+//         ++evaluations;
+//     }
+//     double b = x;
+//     while (std::fabs(b-a)>tol)
+//     {
+//         x = (a+b)*.5;
+//         fx = f(x);
+//         ++evaluations;
+//         if (fx<0)
+//             a = x;
+//         else
+//             b = x;
+//     }
+//     return std::make_tuple(x, fx, evaluations);
+// };
+
+
+
+template<class MatrixType, class VectorType, class BitVector, class TransferOperators>
+void solveProblem(const MatrixType& mat, VectorType& x, const VectorType& rhs, const BitVector& ignore, const TransferOperators& transfer, int maxIterations=100, double tolerance=1.0e-8)
+{
+    using Vector = VectorType;
+    using Matrix = MatrixType;
+
+    auto lower = Vector(rhs.size());
+    auto upper = Vector(rhs.size());
+
+    lower = 0;
+    upper = 1;
+
+    using Functional = Dune::TNNMG::BoxConstrainedQuadraticFunctional<Matrix&, Vector&, Vector&, Vector&, double>;
+    auto J = Functional(mat, rhs, lower, upper);
+
+//    auto localSolver = [](auto& xi, const auto& restriction, const auto& ignoreI) {
+//        gaussSeidelLoop(xi, restriction, ignoreI, Dune::TNNMG::ScalarObstacleSolver());
+//    };
+
+//    Dune::TNNMG::GaussSeidelSolver<Dune::TNNMG::ScalarObstacleSolver> gss(Dune::TNNMG::ScalarObstacleSolver());
+
+    auto localSolver = gaussSeidelLocalSolver(std::make_tuple(Dune::TNNMG::ScalarObstacleSolver(), Dune::TNNMG::ScalarObstacleSolver()));
+
+//    auto localSolver = gaussSeidelLocalSolver(Dune::TNNMG::ScalarObstacleSolver());
+
+
+    auto smoother = TruncatedBlockGSStep<Matrix, Vector, BitVector>();
+    auto linearSolver = Dune::TNNMG::MGSolver<Matrix, Vector>(transfer, smoother, smoother, 3, 3);
+
+//    auto linearSolver = Dune::TNNMG::FastAMGSolver<Matrix, Vector>();
+//    auto linearSolver = Dune::TNNMG::ILU0CGSolver(1e-5, 5);
+//    auto linearSolver = Dune::TNNMG::LDLSolver();
+
+    using Linearization = Dune::TNNMG::BoxConstrainedQuadraticFunctionalConstrainedLinearization<Functional, BitVector>;
+    using LinearSolver = decltype(linearSolver);
+    using DefectProjection = Dune::TNNMG::ObstacleDefectProjection;
+    using LineSearchSolver = Dune::TNNMG::ScalarObstacleSolver;
+    using Acceleration = Dune::TNNMG::TNNMGAcceleration<Functional, BitVector, Linearization, LinearSolver, DefectProjection, LineSearchSolver>;
+
+    auto acceleration = Acceleration(linearSolver, DefectProjection(), LineSearchSolver());
+
+    using Step = typename Dune::TNNMG::AcceleratedNonlinearGSStep<Vector, Functional, decltype(localSolver), decltype(acceleration)>;
+    using Solver = LoopSolver<Vector>;
+    using Norm =  EnergyNorm<Matrix, Vector>;
+
+    auto step = Step(J, x, localSolver, acceleration);
+    auto norm = Norm(mat);
+    auto solver = Solver(&step, 1e9, 0, &norm, Solver::FULL);
+
+    step.setIgnore(ignore);
+    step.setPreSmoothingSteps(3);
+    step.setPostSmoothingSteps(3);
+
+    solver.addCriterion(
+            [&](){
+                return Dune::formatString("   % 12.5e", J(x));
+            },
+            "   energy      ");
+
+    double initialEnergy = J(x);
+    solver.addCriterion(
+            [&](){
+                static double oldEnergy=initialEnergy;
+                double currentEnergy = J(x);
+                double decrease = currentEnergy - oldEnergy;
+                oldEnergy = currentEnergy;
+                return Dune::formatString("   % 12.5e", decrease);
+            },
+            "   decrease    ");
+
+    solver.addCriterion(
+            [&](){
+                return Dune::formatString("   % 12.5e", step.accelerationStep().lastDampingFactor());
+            },
+            "   damping     ");
+
+
+    solver.addCriterion(
+            [&](){
+                return Dune::formatString("   % 12d", step.accelerationStep().linearization().truncated().count());
+            },
+            "   truncated   ");
+
+
+//    Vector temp(x.size());
+//    auto gradientCriterion = Dune::Solvers::Criterion(
+//            [&](){
+//                temp = 0;
+//                J.addGradient(x, temp);
+//                auto err = temp.two_norm();
+//                return std::make_tuple(err<tolerance, Dune::formatString("   % 12.5e", err));
+//            },
+//            "   |gradient|  ");
+
+//    solver.addCriterion(
+//            [&](){return Dune::formatString("   % 12.5e", J(x));},
+//            "   energy      ");
+
+//    double initialEnergy = J(x);
+//    solver.addCriterion(
+//            [&](){
+//                static double oldEnergy=initialEnergy;
+//                double currentEnergy = J(x);
+//                double decrease = currentEnergy - oldEnergy;
+//                oldEnergy = currentEnergy;
+//                return Dune::formatString("   % 12.5e", decrease);
+//            },
+//            "   decrease    ");
+
+//    solver.addCriterion(
+//            [&](){return Dune::formatString("   % 12d", stepRule.evaluations_);},
+//            "   evaluations ");
+
+//    solver.addCriterion(
+//            [&](){return Dune::formatString("   % 12.5e", stepRule.x_);},
+//            "   step size   ");
+
+//    solver.addCriterion(Dune::Solvers::maxIterCriterion(solver, maxIterations) | gradientCriterion);
+
+    std::vector<double> correctionNorms;
+    solver.addCriterion(Dune::Solvers::correctionNormCriterion(step, norm, 1e-10, correctionNorms));
+
+//    solver.addCriterion(Dune::Solvers::correctionNormCriterion(step, norm, 1e-4));
+
+    solver.preprocess();
+    solver.solve();
+    std::cout << correctionNorms.size() << std::endl;
+}
+
+
+
+
+
+template <class GridType>
+bool checkWithGrid(const GridType& grid, const std::string fileName="")
+{
+    bool passed = true;
+
+    static const int dim = GridType::dimension;
+
+    typedef typename Dune::FieldMatrix<double, 2, 2> MatrixBlock;
+    typedef typename Dune::FieldVector<double, 2> VectorBlock;
+    typedef typename Dune::BCRSMatrix<MatrixBlock> Matrix;
+    typedef typename Dune::BlockVector<VectorBlock> Vector;
+    typedef typename Dune::Solvers::DefaultBitVector_t<Vector> BitVector;
+
+    typedef typename GridType::LeafGridView GridView;
+    typedef typename Dune::FieldVector<double, dim> DomainType;
+    typedef typename Dune::FieldVector<double, 2> RangeType;
+
+
+    const GridView gridView = grid.leafGridView();
+
+    Matrix A;
+    constructPQ1Pattern(gridView, A);
+    A = 0.0;
+    assemblePQ1Stiffness(gridView, A);
+
+    Matrix M;
+    constructPQ1Pattern(gridView, M);
+    M = 0.0;
+    assemblePQ1Mass(gridView, M);
+    A += M;
+
+    Vector rhs(A.N());
+    rhs = 0;
+//    ConstantFunction<DomainType, RangeType> f(5);
+    auto f = makeFunction<DomainType, RangeType>([](const DomainType& x) { return (x.two_norm() < .7) ? 200 : 0;});
+    assemblePQ1RHS(gridView, rhs, f);
+
+    Vector u(A.N());
+    u = 0;
+
+    BitVector ignore(A.N());
+    ignore.unsetAll();
+    markBoundaryDOFs(gridView, ignore);
+
+
+    using TransferOperator = CompressedMultigridTransfer<Vector>;
+    using TransferOperators = std::vector<std::shared_ptr<TransferOperator>>;
+
+    TransferOperators transfer(grid.maxLevel());
+    for (size_t i = 0; i < transfer.size(); ++i)
+    {
+        // create transfer operator from level i to i+1
+        transfer[i] = std::make_shared<TransferOperator>();
+        transfer[i]->setup(grid, i, i+1);
+    }
+
+    solveProblem(A, u, rhs, ignore, transfer);
+
+//    if (fileName!="")
+//    {
+//        typename Dune::VTKWriter<GridView> vtkWriter(gridView);
+//        vtkWriter.addVertexData(u, "solution");
+//        vtkWriter.write(fileName);
+//    }
+
+    return passed;
+}
+
+
+template <int dim>
+bool checkWithYaspGrid(int refine, const std::string fileName="")
+{
+    bool passed = true;
+
+    typedef Dune::YaspGrid<dim> GridType;
+
+    typename Dune::FieldVector<typename GridType::ctype,dim> L(1.0);
+    typename std::array<int,dim> s;
+    std::fill(s.begin(), s.end(), 1);
+
+    GridType grid(L, s);
+
+    for (int i = 0; i < refine; ++i)
+        grid.globalRefine(1);
+
+    std::cout << "Testing with YaspGrid<" << dim << ">" << std::endl;
+    std::cout << "Number of levels: " << (grid.maxLevel()+1) << std::endl;
+    std::cout << "Number of leaf nodes: " << grid.leafGridView().size(dim) << std::endl;
+
+    passed = passed and checkWithGrid(grid, fileName);
+
+
+    return passed;
+}
+
+
+
+
+int main(int argc, char** argv) try
+{
+    Dune::MPIHelper::instance(argc, argv);
+    bool passed(true);
+
+
+//    int refine1d = 16;
+    int refine1d = 1;
+    int refine2d = 6;
+    int refine3d = 1;
+
+    if (argc>1)
+        std::istringstream(argv[1]) >> refine1d;
+    if (argc>2)
+        std::istringstream(argv[2]) >> refine2d;
+    if (argc>3)
+        std::istringstream(argv[3]) >> refine3d;
+
+    passed = passed and checkWithYaspGrid<1>(refine1d, "obstacletnnmgtest_yasp_1d_solution");
+    passed = passed and checkWithYaspGrid<2>(refine2d, "obstacletnnmgtest_yasp_2d_solution");
+//    passed = passed and checkWithYaspGrid<3>(refine3d, "obstacletnnmgtest_yasp_3d_solution");
+
+    return passed ? 0 : 1;
+}
+catch (Dune::Exception e) {
+
+    std::cout << e << std::endl;
+
+}
+
diff --git a/dune/tnnmg/test/simplexsolvertest.cc b/dune/tnnmg/test/simplexsolvertest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8e0397ecf019baf6976d6197019a2aa42fdb8c72
--- /dev/null
+++ b/dune/tnnmg/test/simplexsolvertest.cc
@@ -0,0 +1,186 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+
+#include <config.h>
+
+#include <iostream>
+#include <limits>
+
+#include <dune/common/fvector.hh>
+#include <dune/common/parallel/mpihelper.hh>
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/bvector.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/scaledidmatrix.hh>
+
+#include <dune/solvers/common/defaultbitvector.hh>
+
+#include <dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh>
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+#include <dune/tnnmg/localsolvers/simplexsolver.hh>
+
+constexpr size_t N = 7;
+using Field = double;
+using Solver = Dune::TNNMG::SimplexSolver<Field>;
+
+using LocalVector = Dune::FieldVector<Field, N>;
+using LocalMatrix = Dune::ScaledIdentityMatrix<Field, N>;
+using LocalFunctional = Dune::TNNMG::BoxConstrainedQuadraticFunctional<
+    LocalMatrix&, LocalVector&, LocalVector&, LocalVector&, Field>;
+using LocalIgnore = std::bitset<N>;
+
+using Vector = Dune::BlockVector<LocalVector>;
+using Matrix = Dune::BCRSMatrix<LocalMatrix>;
+using Functional = Dune::TNNMG::BoxConstrainedQuadraticFunctional<
+    Matrix, Vector, Vector, Vector, Field>;
+using Ignore = Dune::Solvers::DefaultBitVector_t<Vector>;
+
+constexpr Field tol = 1e-14;
+bool equalsWithTol(Field a, Field b) { return fabs(b - a) < tol; }
+bool hasSumWithTol(LocalVector& v, Field sum) {
+  return fabs(std::accumulate(v.begin(), v.end(), -sum)) < tol;
+}
+
+int main(int argc, char** argv) try {
+  Dune::MPIHelper::instance(argc, argv);
+  bool passed(true);
+
+  { // test locally
+    LocalVector upper(std::numeric_limits<Field>::infinity());
+    LocalVector lower(0.0);
+    LocalMatrix matrix(1.0);
+    LocalVector rhs(1.0);
+    LocalFunctional f(matrix, rhs, lower, upper);
+    LocalVector x(0.0);
+    LocalIgnore ignore(false);
+    { // all values one, constraint N
+      Solver solver(N);
+      solver(x, f, ignore);
+      for (auto&& xi : x)
+        passed = passed and equalsWithTol(xi, 1.0);
+    }
+    { // all values one, constraint 1 -> project
+      Solver solver(1.0);
+      solver(x, f, ignore);
+      for (auto&& xi : x)
+        passed = passed and equalsWithTol(xi, 1.0 / N);
+    }
+    { // one entry zero, others 1
+      Solver solver(N - 1);
+      rhs[0] = 0.0;
+      solver(x, f, ignore);
+      passed = passed and equalsWithTol(x[0], 0.0);
+      for (size_t i = 1; i < N; ++i)
+        passed = passed and equalsWithTol(x[i], 1.0);
+    }
+    { // everything ignored
+      Solver solver;
+      for (size_t i = 0; i < N; ++i)
+        ignore[i] = true;
+      x = std::numeric_limits<Field>::quiet_NaN();
+      solver(x, f, ignore);
+      for (auto&& xi : x)
+        passed = passed and std::isnan(xi);
+      for (size_t i = 0; i < N; ++i)
+        ignore[i] = false;
+    }
+    { // partially ignored (throws)
+      Solver solver;
+      ignore[0] = true;
+      try {
+        solver(x, f, ignore);
+        passed = passed and false;
+      } catch (Dune::NotImplemented) {
+        passed = passed and true;
+      }
+      ignore[0] = false;
+    }
+    { // different values, fitting constraint
+      for (size_t i = 0; i < N; ++i)
+        rhs[i] = i + 1;
+      Field sum = N * (N + 1) / 2;
+      Solver solver(sum);
+      solver(x, f, ignore);
+      for (size_t i = 0; i < N; ++i)
+        passed = passed and equalsWithTol(x[i], i + 1);
+    }
+    { // different values, oversized constraint -> project
+      for (size_t i = 0; i < N; ++i)
+        rhs[i] = i + 1;
+      Field sum = N * (N + 1) / 2;
+      Solver solver(10 * sum);
+      solver(x, f, ignore);
+      Field sumShift = (sum * 10 - sum) / N;
+      for (size_t i = 0; i < N; ++i)
+        passed = passed and equalsWithTol(x[i], i + 1 + sumShift);
+    }
+    { // different values, undersized constraint -> project
+      for (size_t i = 0; i < N; ++i)
+        rhs[i] = i + 1;
+      Field sum = N * (N + 1) / 2;
+      Solver solver(sum - N);
+      solver(x, f, ignore);
+      for (size_t i = 0; i < N; ++i)
+        passed = passed and equalsWithTol(x[i], i);
+    }
+    { // different and coinciding values, undersized constraint -> project
+      rhs[0] = 1;
+      for (size_t i = 1; i < N; ++i)
+        rhs[i] = i;
+      Field sum = (N - 1) * (N - 2) / 2;
+      Solver solver(sum);
+      solver(x, f, ignore);
+      passed = passed and equalsWithTol(x[0], 0);
+      for (size_t i = 1; i < N; ++i)
+        passed = passed and equalsWithTol(x[i], i - 1);
+    }
+    { // non-zero obstacle
+      lower = 1.0;
+      for (size_t i = 0; i < N; ++i)
+        rhs[i] = i;
+      Field sum = N * (N - 1) / 2 + 1;
+      Solver solver(sum);
+      solver(x, f, ignore);
+      passed = passed and equalsWithTol(x[0], 1);
+      for (size_t i = 1; i < N; ++i)
+        passed = passed and equalsWithTol(x[i], i);
+    }
+  }
+  { // test globally
+    size_t m = 10;
+
+    Vector upper(m);
+    Vector lower(m);
+    Vector rhs(m);
+    Vector x(m);
+    Ignore ignore(m);
+
+    Dune::MatrixIndexSet indices(m, m);
+    for (size_t i = 0; i < m; ++i)
+      indices.add(i, i);
+    Matrix matrix;
+    indices.exportIdx(matrix);
+
+    upper = std::numeric_limits<Field>::infinity();
+    lower = 0.0;
+    matrix = 1.0;
+    for (auto&& rhs_i : rhs)
+      for (size_t j = 0; j < N; ++j)
+        rhs_i[j] = j + 1;
+    Functional f(matrix, rhs, lower, upper);
+    x = 0.0;
+    for (auto&& xi : x)
+      xi[0] = 1.0; // make sure the local sum constraint is fulfilled initially
+    ignore.unsetAll();
+
+    Dune::TNNMG::gaussSeidelLocalSolver(Solver(1.0));
+
+    for (auto&& xi : x)
+      passed = passed and hasSumWithTol(xi, 1.0);
+  }
+
+  return passed ? 0 : 1;
+} catch (Dune::Exception e) {
+  std::cout << e << std::endl;
+}
diff --git a/dune/tnnmg/test/tnnmgsteptest.cc b/dune/tnnmg/test/tnnmgsteptest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..dffa4222d6459a935ecb7e4b6dec41d8692e96c0
--- /dev/null
+++ b/dune/tnnmg/test/tnnmgsteptest.cc
@@ -0,0 +1,271 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set et ts=4 sw=2 sts=2:
+#include <config.h>
+
+#include <iostream>
+#include <sstream>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/stringutility.hh>
+#include <dune/common/test/testsuite.hh>
+
+// dune-istl includes
+#include <dune/istl/bcrsmatrix.hh>
+
+// dune-grid includes
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/io/file/vtk/vtkwriter.hh>
+
+// dune-solver includes
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/common/defaultbitvector.hh>
+#include <dune/solvers/iterationsteps/multigridstep.hh>
+#include <dune/solvers/iterationsteps/truncatedblockgsstep.hh>
+#include <dune/solvers/transferoperators/compressedmultigridtransfer.hh>
+
+// dune-tnnmg includes
+#include <dune/tnnmg/functionals/quadraticfunctional.hh>
+#include <dune/tnnmg/functionals/boxconstrainedquadraticfunctional.hh>
+#include <dune/tnnmg/functionals/bcqfconstrainedlinearization.hh>
+#include <dune/tnnmg/iterationsteps/nonlineargsstep.hh>
+#include <dune/tnnmg/iterationsteps/tnnmgstep.hh>
+
+#include <dune/tnnmg/localsolvers/scalarobstaclesolver.hh>
+
+#include <dune/tnnmg/projections/obstacledefectprojection.hh>
+
+#include <dune/solvers/test/common.hh>
+
+template <class DomainType, class RangeType, class F>
+class FunctionFromLambda :
+    public Dune::VirtualFunction<DomainType, RangeType>
+{
+    public:
+        FunctionFromLambda(F f):
+            f_(f)
+    {}
+
+        void evaluate(const DomainType& x, RangeType& y) const
+        {
+            y = f_(x);
+        }
+
+    private:
+        F f_;
+};
+
+template<class Domain, class Range, class F>
+auto makeFunction(F&& f)
+{
+    return FunctionFromLambda<Domain, Range, std::decay_t<F>>(std::forward<F>(f));
+}
+
+
+// TODO include those last two parameters
+template<class MatrixType, class VectorType, class BitVector, class TransferOperators>
+void solveProblem(const MatrixType& mat, VectorType& x, const VectorType& rhs, const BitVector& ignore, const TransferOperators& transfer, int maxIterations=100, double tolerance=1.0e-8)
+{
+    using Vector = VectorType;
+    using Matrix = MatrixType;
+
+    auto lower = Vector(rhs.size());
+    auto upper = Vector(rhs.size());
+
+    lower = 0;
+    upper = 1;
+
+    using Functional = Dune::TNNMG::BoxConstrainedQuadraticFunctional<Matrix&, Vector&, Vector&, Vector&, double>;
+    auto J = Functional(mat, rhs, lower, upper);
+
+    auto localSolver = gaussSeidelLocalSolver(Dune::TNNMG::ScalarObstacleSolver());
+
+    using NonlinearSmoother = Dune::TNNMG::NonlinearGSStep<Functional, decltype(localSolver), BitVector>;
+    auto nonlinearSmoother = std::make_shared<NonlinearSmoother>(J, x, localSolver);
+
+    auto smoother = TruncatedBlockGSStep<Matrix, Vector>{};
+
+    auto linearMultigridStep = std::make_shared<Dune::Solvers::MultigridStep<Matrix, Vector> >();
+    linearMultigridStep->setMGType(1, 3, 3);
+    linearMultigridStep->setSmoother(&smoother);
+    linearMultigridStep->setTransferOperators(transfer);
+
+    using Linearization = Dune::TNNMG::BoxConstrainedQuadraticFunctionalConstrainedLinearization<Functional, BitVector>;
+    using DefectProjection = Dune::TNNMG::ObstacleDefectProjection;
+    using LineSearchSolver = Dune::TNNMG::ScalarObstacleSolver;
+    using Step = Dune::TNNMG::TNNMGStep<Functional, BitVector, Linearization, DefectProjection, LineSearchSolver>;
+
+    using Solver = LoopSolver<Vector>;
+    using Norm =  EnergyNorm<Matrix, Vector>;
+
+
+    using Step = Dune::TNNMG::TNNMGStep<Functional, BitVector, Linearization, DefectProjection, LineSearchSolver>;
+    int mu=1; // #multigrid steps in Newton step
+    auto step = Step(J, x, nonlinearSmoother, linearMultigridStep, mu, DefectProjection(), LineSearchSolver());
+
+    auto norm = Norm(mat);
+    auto solver = Solver(step, 1e9, 0, norm, Solver::FULL);
+
+    step.setIgnore(ignore);
+    step.setPreSmoothingSteps(3);
+
+    solver.addCriterion(
+            [&](){
+            return Dune::formatString("   % 12.5e", J(x));
+            },
+            "   energy      ");
+
+    double initialEnergy = J(x);
+    solver.addCriterion(
+            [&](){
+            static double oldEnergy=initialEnergy;
+            double currentEnergy = J(x);
+            double decrease = currentEnergy - oldEnergy;
+            oldEnergy = currentEnergy;
+            return Dune::formatString("   % 12.5e", decrease);
+            },
+            "   decrease    ");
+
+    solver.addCriterion(
+            [&](){
+            return Dune::formatString("   % 12.5e", step.lastDampingFactor());
+            },
+            "   damping     ");
+
+
+    solver.addCriterion(
+            [&](){
+            return Dune::formatString("   % 12d", step.linearization().truncated().count());
+            },
+            "   truncated   ");
+
+
+    std::vector<double> correctionNorms;
+    solver.addCriterion(Dune::Solvers::correctionNormCriterion(step, norm, 1e-10, correctionNorms));
+
+    solver.preprocess();
+    solver.solve();
+    std::cout << correctionNorms.size() << std::endl;
+}
+
+
+
+
+
+template <class GridType>
+Dune::TestSuite checkWithGrid(const GridType& grid)
+{
+    Dune::TestSuite suite;
+
+    static const int dim = GridType::dimension;
+
+    using MatrixBlock = typename Dune::FieldMatrix<double, 1, 1>;
+    using VectorBlock = typename Dune::FieldVector<double, 1>;
+    using Matrix = typename Dune::BCRSMatrix<MatrixBlock>;
+    using Vector = typename Dune::BlockVector<VectorBlock>;
+    using BitVector = typename Dune::Solvers::DefaultBitVector_t<Vector>;
+
+    using GridView = typename GridType::LeafGridView;
+    using DomainType = typename Dune::FieldVector<double, dim>;
+    using RangeType = typename Dune::FieldVector<double, 1>;
+
+
+    const GridView gridView = grid.leafGridView();
+
+    Matrix A;
+    constructPQ1Pattern(gridView, A);
+    A = 0.0;
+    assemblePQ1Stiffness(gridView, A);
+
+    Matrix M;
+    constructPQ1Pattern(gridView, M);
+    M = 0.0;
+    assemblePQ1Mass(gridView, M);
+    A += M;
+
+    Vector rhs(A.N());
+    rhs = 0;
+    auto f = makeFunction<DomainType, RangeType>([](const DomainType& x) { return (x.two_norm() < .7) ? 200 : 0;});
+    assemblePQ1RHS(gridView, rhs, f);
+
+    Vector u(A.N());
+    u = 0;
+
+    BitVector ignore(A.N());
+    ignore.unsetAll();
+    markBoundaryDOFs(gridView, ignore);
+
+
+    using TransferOperator = CompressedMultigridTransfer<Vector>;
+    using TransferOperators = std::vector<std::shared_ptr<TransferOperator>>;
+
+    TransferOperators transfer(grid.maxLevel());
+    for (size_t i = 0; i < transfer.size(); ++i)
+    {
+        // create transfer operator from level i to i+1
+        transfer[i] = std::make_shared<TransferOperator>();
+        transfer[i]->setup(grid, i, i+1);
+    }
+
+    solveProblem(A, u, rhs, ignore, transfer);
+
+    // TODO: let suite check something
+    return suite;
+}
+
+
+template <int dim>
+Dune::TestSuite checkWithYaspGrid(int refine)
+{
+    using GridType = Dune::YaspGrid<dim>;
+
+    typename Dune::FieldVector<typename GridType::ctype,dim> L(1.0);
+    typename std::array<int,dim> s;
+    std::fill(s.begin(), s.end(), 1);
+
+    GridType grid(L, s);
+
+    for (int i = 0; i < refine; ++i)
+        grid.globalRefine(1);
+
+    std::cout << "Testing with YaspGrid<" << dim << ">" << std::endl;
+    std::cout << "Number of levels: " << (grid.maxLevel()+1) << std::endl;
+    std::cout << "Number of leaf nodes: " << grid.leafGridView().size(dim) << std::endl;
+
+    auto suite = checkWithGrid(grid);
+
+    return suite;
+}
+
+
+
+
+int main(int argc, char** argv) try
+{
+    Dune::MPIHelper::instance(argc, argv);
+    Dune::TestSuite suite;
+
+
+    int refine1d = 1;
+    int refine2d = 6;
+    int refine3d = 1;
+
+    if (argc>1)
+        std::istringstream(argv[1]) >> refine1d;
+    if (argc>2)
+        std::istringstream(argv[2]) >> refine2d;
+    if (argc>3)
+        std::istringstream(argv[3]) >> refine3d;
+
+    suite.subTest(checkWithYaspGrid<1>(refine1d));
+    suite.subTest(checkWithYaspGrid<2>(refine2d));
+
+    return suite.exit();
+}
+catch (Dune::Exception e) {
+
+    std::cout << e << std::endl;
+
+}
+
diff --git a/dune/tnnmg/typetraits.hh b/dune/tnnmg/typetraits.hh
new file mode 100644
index 0000000000000000000000000000000000000000..f65c7fc1ea399c1cfee1dadcfcdaf85963705cdd
--- /dev/null
+++ b/dune/tnnmg/typetraits.hh
@@ -0,0 +1,19 @@
+// -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+// vi: set ts=8 sw=2 et sts=2:
+#ifndef DUNE_TNNMG_TYPETRAITS_HH
+#define DUNE_TNNMG_TYPETRAITS_HH
+
+
+
+namespace Dune {
+namespace TNNMG {
+
+// Empty for now. But we may need more type traits in dune-tnnmg later on.
+// To avoid removing and readding the header it's kept empty.
+
+} // namespace TNNMG
+} // namespace Dune
+
+
+
+#endif // DUNE_TNNMG_TYPETRAITS_HH