diff --git a/dune/solvers/common/staticmatrixtools.hh b/dune/solvers/common/staticmatrixtools.hh
index 3eb1aff2e24cf917ed5ad841bb58ee97da4888f0..74c6ca37f786739c3b7e5cf6bd69c5f7f92df6af 100644
--- a/dune/solvers/common/staticmatrixtools.hh
+++ b/dune/solvers/common/staticmatrixtools.hh
@@ -114,6 +114,9 @@ class StaticMatrix
         }
 
 
+
+
+
         // add transformed matrix X = A^T*Y*B ******************************************************
         template<class MatrixA, class MatrixB, class TransformationMatrix>
         static void addTransformedMatrix(MatrixA& A, const TransformationMatrix& T1, const MatrixB& B, const TransformationMatrix& T2)
@@ -126,15 +129,47 @@ class StaticMatrix
                 {
                     for (size_t k=0; k<B.N(); ++k)
                     {
-                        typename MatrixB::row_type::ConstIterator lIt=B[k].begin();
-                        typename MatrixB::row_type::ConstIterator lEnd=B[k].end();
-                        for(; lIt!=lEnd; ++lIt)
-                            A[i][jIt.index()] += T1[k][i] * B[k][lIt.index()] * T2[lIt.index()][jIt.index()];
+                        if (T1[k][i]!=0)
+                        {
+                            typename MatrixB::row_type::ConstIterator lIt=B[k].begin();
+                            typename MatrixB::row_type::ConstIterator lEnd=B[k].end();
+                            for(; lIt!=lEnd; ++lIt)
+                                A[i][jIt.index()] += T1[k][i] * B[k][lIt.index()] * T2[lIt.index()][jIt.index()];
+                        }
                     }
                 }
             }
         }
 
+        template<class K1, class K2, class K3, int n, int m>
+        static void addTransformedMatrix(
+            typename Dune::FieldMatrix<K1, m, m>& A,
+            const typename Dune::FieldMatrix<K2, n, m>& T1,
+            const typename Dune::FieldMatrix<K3, n, n>& B,
+            const typename Dune::FieldMatrix<K2, n, m>& T2)
+        {
+            typename Dune::FieldMatrix<K1, m, n> T2transposedB;
+            T2transposedB = 0;
+            for (size_t i=0; i<A.N(); ++i)
+            {
+                for (size_t k=0; k<B.N(); ++k)
+                {
+                    if (T1[k][i]!=0)
+                        for (size_t l=0; l<B.M(); ++l)
+                            T2transposedB[i][l] += T1[k][i] * B[k][l];
+                }
+            }
+            for (size_t k=0; k<T2.N(); ++k)
+            {
+                for (size_t l=0; l<T2.M(); ++l)
+                {
+                    if (T2[k][l]!=0)
+                        for (size_t i=0; i<A.N(); ++i)
+                            A[i][l] += T2transposedB[i][k] * T2[k][l];
+                }
+            }
+        }
+
         template<class MatrixA, class MatrixB>
         static void addTransformedMatrix(MatrixA& X, const double& A, const MatrixB& Y, const double& B)
         {
diff --git a/dune/solvers/iterationsteps/Makefile.am b/dune/solvers/iterationsteps/Makefile.am
index ddc45f75da2d5c93c09f391a0f228cab01ed1819..aac9853c9a5c3026577394e6dccad78064518865 100644
--- a/dune/solvers/iterationsteps/Makefile.am
+++ b/dune/solvers/iterationsteps/Makefile.am
@@ -12,6 +12,7 @@ iterationsteps_HEADERS = amgstep.hh \
                          mmgstep.hh mmgstep.cc \
                          multigridstep.hh \
                          multigridstep.cc \
+                         obstacletnnmgstep.hh \
                          projectedblockgsstep.hh \
                          projectedblockgsstep.cc \
                          projectedlinegsstep.cc projectedlinegsstep.hh \
diff --git a/dune/solvers/iterationsteps/blockgsstep.hh b/dune/solvers/iterationsteps/blockgsstep.hh
index 2aa6b31b5992f4dc922d3b6f955ca19ad1d6366c..2f73df80196d37633b594d5d112d1fa540c2d09f 100644
--- a/dune/solvers/iterationsteps/blockgsstep.hh
+++ b/dune/solvers/iterationsteps/blockgsstep.hh
@@ -5,6 +5,13 @@
 
 #include "lineariterationstep.hh"
 
+/** \brief A linear Gauß-Seidel iteration step for convex problems which have a block-structure.
+ * 
+ *  \tparam OperatorType The linear operator type
+ *  \tparam DiscFuncType The block vector type of the right hand side and the iterates
+ *  \tparam BitVectorType The type of the bit-vector specifying degrees of freedom that shall be ignored.
+ *
+ */
 template<class OperatorType, 
          class DiscFuncType,
          class BitVectorType = Dune::BitSetVector<DiscFuncType::block_type::dimension> >
@@ -21,15 +28,21 @@ template<class OperatorType,
         BlockGSStep() {}
 
         //! Constructor with a linear problem
-        BlockGSStep(const OperatorType& mat, DiscFuncType& x, DiscFuncType& rhs)
+        BlockGSStep(const OperatorType& mat, DiscFuncType& x, const DiscFuncType& rhs)
             : LinearIterationStep<OperatorType,DiscFuncType>(mat, x, rhs)
         {}
 
         //! Perform one iteration
         virtual void iterate();
-
+    
+        //! Return the solution
         virtual DiscFuncType getSol();
 
+        /** \brief Compute the residual of the current iterate of a (block) degree of freedom
+         *
+         *  \param index Global index of the (block) degree of freedom 
+         *  \param r Write residual in this vector
+         */
         virtual void residual(int index, VectorBlock& r) const;
 
     };
diff --git a/dune/solvers/iterationsteps/lineariterationstep.hh b/dune/solvers/iterationsteps/lineariterationstep.hh
index d25a0059d9ad82716409f91219d3542dc6deea3e..8157405b135d1175870aa01c8df9fdb773c6787d 100644
--- a/dune/solvers/iterationsteps/lineariterationstep.hh
+++ b/dune/solvers/iterationsteps/lineariterationstep.hh
@@ -7,7 +7,7 @@
 
 #include "iterationstep.hh"
 
-//! Base class for iteration steps being called by a iterative linear solver
+//! Base class for iteration steps being called by an iterative linear solver
 template<class MatrixType, class VectorType, class BitVectorType = Dune::BitSetVector<VectorType::block_type::dimension> >
 class LinearIterationStep : public IterationStep<VectorType, BitVectorType>
 {
@@ -20,14 +20,14 @@ public:
     virtual ~LinearIterationStep() {}
     
     //! Constructor being given linear operator, solution and right hand side
-    LinearIterationStep(const MatrixType& mat, VectorType& x, VectorType& rhs) {
+    LinearIterationStep(const MatrixType& mat, VectorType& x, const VectorType& rhs) {
         mat_     = &mat;
         this->x_ = &x;
         rhs_     = &rhs;
     }
     
     //! Set linear operator, solution and right hand side
-    virtual void setProblem(const MatrixType& mat, VectorType& x, VectorType& rhs) {
+    virtual void setProblem(const MatrixType& mat, VectorType& x, const VectorType& rhs) {
         this->x_ = &x;
         rhs_     = &rhs;
         mat_     = &mat;
@@ -57,7 +57,7 @@ public:
     }
     
     //! The container for the right hand side
-    VectorType* rhs_;
+    const VectorType* rhs_;
     
     //! The linear operator
     const MatrixType* mat_;
diff --git a/dune/solvers/iterationsteps/mmgstep.cc b/dune/solvers/iterationsteps/mmgstep.cc
index a57e7008ed40f4870c14338091383a9486f9f86a..eb9f1d40a59ac3d8332da391724896701a6f89f6 100644
--- a/dune/solvers/iterationsteps/mmgstep.cc
+++ b/dune/solvers/iterationsteps/mmgstep.cc
@@ -42,7 +42,7 @@ preprocess()
 
         QuadraticIPOptSolver<OperatorType,DiscFuncType>* ipoptBaseSolver = dynamic_cast<QuadraticIPOptSolver<OperatorType,DiscFuncType>*> (this->basesolver_);
      
-        ipoptBaseSolver->obstacles_   = (this->numLevels_ == 1) ? &(*obstacles_)[0]   : NULL;
+        ipoptBaseSolver->obstacles_ = &(*obstacles_)[0];
 #endif
     } else {
         DUNE_THROW(SolverError, "You can't use " << typeid(*this->basesolver_).name()
@@ -76,7 +76,7 @@ void MonotoneMGStep<OperatorType, DiscFuncType>::nestedIteration()
         // If we start from an infeasible configuration, the restricted
         // obstacles may be inconsistent.  We do an ad hoc correction here.
         // The true obstacles on the maxlevel are not touched.
-        for (int i=0; i<(*obstacles_)[this->level_].size(); i++) {
+        for (size_t i=0; i<(*obstacles_)[this->level_].size(); i++) {
             BoxConstraint<field_type,dim>& cO = (*obstacles_)[this->level_][i];
             for (int j=0; j<dim; j++)
                 if (cO.lower(j) > cO.upper(j))
@@ -116,7 +116,7 @@ void MonotoneMGStep<OperatorType, DiscFuncType>::iterate()
 
         // Determine critical nodes  (only for debugging)
         const double eps = 1e-12;
-        for (int i=0; i<obstacles[level].size(); i++) {
+        for (size_t i=0; i<obstacles[level].size(); i++) {
             
             for (int j=0; j<dim; j++) {
                 
@@ -145,7 +145,7 @@ void MonotoneMGStep<OperatorType, DiscFuncType>::iterate()
         std::vector<BoxConstraint<field_type,dim> > obstacleBackup = obstacles[level];
         
         // Compute defect obstacles
-        for (int i=0; i<(*obstacles_)[level].size(); i++)
+        for (size_t i=0; i<(*obstacles_)[level].size(); i++)
             obstacles[level][i] -= x[level][i];
         
         // ///////////////////////
@@ -154,7 +154,7 @@ void MonotoneMGStep<OperatorType, DiscFuncType>::iterate()
         
         // Determine critical nodes
         const double eps = 1e-12;
-        for (int i=0; i<obstacles[level].size(); i++) {
+        for (size_t i=0; i<obstacles[level].size(); i++) {
             
             for (int j=0; j<dim; j++) {
                 
diff --git a/dune/solvers/iterationsteps/multigridstep.cc b/dune/solvers/iterationsteps/multigridstep.cc
index ee1bb5c85fd334edeb8fa214c77a46d4fd20e6d9..5dc64f203bdab5df68c121ed2c21d977f42cb574 100644
--- a/dune/solvers/iterationsteps/multigridstep.cc
+++ b/dune/solvers/iterationsteps/multigridstep.cc
@@ -41,7 +41,6 @@ void MultigridStep<MatrixType, VectorType, BitVectorType>::preprocess()
         std::cout << "Warning: Preprocess has already been called without calling iterate afterwards!" << std::endl;
     preprocessCalled = true;
 
-    int i;
     this->level_ = this->numLevels_-1;
     
     // //////////////////////////////////////////////////////////
@@ -51,14 +50,15 @@ void MultigridStep<MatrixType, VectorType, BitVectorType>::preprocess()
         DUNE_THROW(SolverError, "You requested " << this->numLevels_ << " levels "
                    << "but you supplied " << this->mgTransfer_.size() << " transfer operators!");
 
-    for (i=0; i<this->mgTransfer_.size(); i++)
+    for (size_t i=0; i<this->mgTransfer_.size(); i++)
         if (this->mgTransfer_[i] == NULL)
             DUNE_THROW(SolverError, "You have not supplied a multigrid restriction operator "
                        "for level " << i);
 
-    // The matrices for the linear correction problems
-    for (i=this->numLevels_-2; i>=0; i--) {
-        
+    // /////////////////////////////////////////////////////
+    //   Assemble the complete hierarchy of matrices
+    // /////////////////////////////////////////////////////
+    for (int i=this->numLevels_-2; i>=0; i--) {
 
         // Delete coarse grid matrix if it exists already
         if (this->mat_[i]) {
@@ -67,21 +67,18 @@ void MultigridStep<MatrixType, VectorType, BitVectorType>::preprocess()
         }
 
         this->mat_[i] = new MatrixType;
-        
+
         // Compute which entries are present in the (sparse) coarse grid stiffness
         // matrices. 
         this->mgTransfer_[i]->galerkinRestrictSetOccupation(*this->mat_[i+1], *(const_cast<MatrixType*>(this->mat_[i])));
 
+        // setup matrix
+        this->mgTransfer_[i]->galerkinRestrict(*this->mat_[i+1], *(const_cast<MatrixType*>(this->mat_[i])));
+
         // Set solution vector sizes for the lower levels
         GenericVector::resize(this->x_[i],*this->mat_[i]);
     }
 
-    // /////////////////////////////////////////////////////
-    //   Assemble the complete hierarchy of matrices
-    // /////////////////////////////////////////////////////
-    for (i=this->mgTransfer_.size()-1; i>=0; i--) 
-        this->mgTransfer_[i]->galerkinRestrict(*this->mat_[i+1], *(const_cast<MatrixType*>(this->mat_[i])));
-
     // /////////////////////////////////////////////////////
     //   Setup dirichlet bitfields
     // /////////////////////////////////////////////////////
@@ -91,7 +88,7 @@ void MultigridStep<MatrixType, VectorType, BitVectorType>::preprocess()
     if (this->ignoreNodes_!=0)
     {
         ignoreNodesHierarchy_[this->numLevels_-1] = const_cast<BitVectorType*>(this->ignoreNodes_);
-        for (i=this->numLevels_-2; i>=0; --i)
+        for (int i=this->numLevels_-2; i>=0; --i)
         {
             ignoreNodesHierarchy_[i] = new BitVectorType();
             this->mgTransfer_[i]->restrictToFathers(*ignoreNodesHierarchy_[i+1], *ignoreNodesHierarchy_[i]);
diff --git a/dune/solvers/iterationsteps/multigridstep.hh b/dune/solvers/iterationsteps/multigridstep.hh
index 01870d35a847bb7b0e33e3be27b19010bca61924..49484036bff62509a8cd674e095a587faf4f025e 100644
--- a/dune/solvers/iterationsteps/multigridstep.hh
+++ b/dune/solvers/iterationsteps/multigridstep.hh
@@ -133,7 +133,7 @@
 
         virtual void setProblem(const MatrixType& mat, 
                                 VectorType& x, 
-                                VectorType& rhs)
+                                const VectorType& rhs)
         {
             level_ = numLevels_-1;
 
diff --git a/dune/solvers/iterationsteps/obstacletnnmgstep.hh b/dune/solvers/iterationsteps/obstacletnnmgstep.hh
new file mode 100644
index 0000000000000000000000000000000000000000..2b36ca516013d94bece7cc570706e2fab3f67154
--- /dev/null
+++ b/dune/solvers/iterationsteps/obstacletnnmgstep.hh
@@ -0,0 +1,461 @@
+#ifndef OBSTACLE_TNNMG_STEP_HH
+#define OBSTACLE_TNNMG_STEP_HH
+
+// std includes
+#include <vector>
+#include <algorithm>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+
+// dune-solver includes
+#include <dune/solvers/iterationsteps/iterationstep.hh>
+#include <dune/solvers/iterationsteps/truncatedblockgsstep.hh>
+#include <dune/solvers/iterationsteps/projectedblockgsstep.hh>
+#include <dune/solvers/iterationsteps/multigridstep.hh>
+
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/common/boxconstraint.hh>
+#include <dune/solvers/transferoperators/mandelobsrestrictor.hh>
+
+
+
+
+
+/**
+ * \brief TNNMG step for quadratic obstacle problems
+ *
+ * This is a dune-solvers iteration step implementing
+ * the TNNMG (truncated nonsmooth Newton multigrid) method
+ * for convex quadratic obstacle problems for use in the
+ * LoopSolver class.
+ *
+ * The method basically consists of a projected Gauss-Seidel
+ * method as nonlinear pre- and post-smoother and a linear
+ * multigrid method for a truncated linear system. For
+ * details please refer to
+ *
+ *   'Multigrid Methods for Obstacle Problems', C. Gräser and R. Kornhuber
+ *   Journal of Computational Mathematics 27 (1), 2009, pp. 1-44 
+ *
+ * A much more general and flexible implementation that can also be used for
+ * energy functionals with nonquadratic (anisotropic) smooth part
+ * and other nonsmooth part, e.g. incorporating local simplex
+ * constraints, can be found in the dune-tnnmg module.
+ *
+ * \tparam MatrixType Type for the matrix describing the quadratic part
+ * \tparam VectorType Type for solution and rhs vectors
+ */
+template<class MatrixType, class VectorType>
+class ObstacleTNNMGStep
+    : public IterationStep<VectorType>
+{
+        typedef IterationStep<VectorType> Base;
+        typedef typename VectorType::field_type field_type;
+
+        enum {blockSize = VectorType::block_type::dimension};
+    public:
+
+        typedef VectorType Vector;
+        typedef MatrixType Matrix;
+        typedef BoxConstraint<field_type, blockSize> BlockBoxConstraint;
+        typedef typename std::vector<BlockBoxConstraint> BoxConstraintVector;
+        typedef typename Dune::BitSetVector<VectorType::block_type::dimension> BitVector;
+
+        typedef MultigridTransfer<Vector, BitVector, Matrix> Transfer;
+        typedef typename std::vector<Transfer*> TransferPointerVector;
+
+    protected:
+
+        typedef ProjectedBlockGSStep<Matrix, Vector> NonlinearSmoother;
+        typedef TruncatedBlockGSStep<Matrix, Vector> LinearSmoother;
+        typedef MultigridStep<Matrix, Vector, BitVector> LinearMultigridStep;
+        typedef EnergyNorm<Matrix, Vector> LinearBaseSolverEnergyNorm;
+        typedef LoopSolver<Vector> LinearBaseSolver;
+
+    public:
+
+        /**
+         * \brief Construct iteration step
+         *
+         * \param mat Matrix describing the quadratic part of the energy
+         * \param x Solution vector containing the initial iterate
+         * \param rhs Vector containing the linear part of the energy
+         * \param ignore BitVector marking components to ignore (e.g. Dirichlet of hanging nodes)
+         * \param obstacles std::vector containing local obstacles for each global index
+         * \param transfer A vector of pointers to transfer operators describing the subspace hierarchy
+         * \param truncationTol Components will be truncated for the coarse grid correction if they are
+         *                      this close to the obstacle (the default will work well in general).
+         *
+         */
+        ObstacleTNNMGStep(
+                const Matrix& mat,
+                Vector& x,
+                const Vector& rhs,
+                const BitVector& ignore,
+                const BoxConstraintVector& obstacles,
+                const TransferPointerVector& transfer,
+                double truncationTol=1e-10) :
+            Base(x),
+            mat_(mat),
+            rhs_(rhs),
+            obstacles_(obstacles),
+            transfer_(transfer),
+            baseSolverNorm_(baseSolverStep_),
+            baseSolver_(&baseSolverStep_, 100, 1e-8, &baseSolverNorm_, LinearBaseSolver::QUIET),
+            nonlinearSmoother_(mat_, *x_, rhs_),
+            preSmoothingSteps_(3),
+            postSmoothingSteps_(3),
+            linearIterationSteps_(1),
+            linearPreSmoothingSteps_(3),
+            linearPostSmoothingSteps_(3),
+            truncationTol_(truncationTol)
+        {
+            ignoreNodes_ = &ignore;
+        }
+
+
+        /**
+         * \brief Set type of multigrid cycle
+         *
+         * If you don't call this method manually, 3 nonlinear pre- and post-smoothing
+         * steps and a linear V(3,3)-cycle will be used.
+         *
+         * \param preSmoothingSteps Number of nonlinear pre-smoothing steps.
+         * \param linearIterationSteps Number of linear multigrid steps for truncated problem
+         * \param postSmoothingSteps Number of nonlinear post-smoothing steps.
+         * \param linearPreSmoothingSteps Number of linear pre-smoothing steps during the linear multigrid V-cycle
+         * \param linearPostSmoothingSteps Number of linear post-smoothing steps during the linear multigrid V-cycle
+         */
+        void setSmoothingSteps(int preSmoothingSteps, int linearIterationSteps, int postSmoothingSteps, int linearPreSmoothingSteps, int linearPostSmoothingSteps)
+        {
+            preSmoothingSteps_ = preSmoothingSteps;
+            postSmoothingSteps_ = postSmoothingSteps;
+            linearIterationSteps_ = linearIterationSteps;
+            linearPreSmoothingSteps_ = linearPreSmoothingSteps;
+            linearPostSmoothingSteps_ = linearPostSmoothingSteps;
+        }
+
+
+        /**
+         * \brief Setup the iteration step
+         *
+         * This method must be called before the iterate method is called.
+         */
+        virtual void preprocess()
+        {
+            truncatedResidual_.resize(x_->size());
+            residual_.resize(x_->size());
+            coarseCorrection_.resize(x_->size());
+            temp_.resize(x_->size());
+
+            linearMGStep_.setNumberOfLevels(transfer_.size()+1);
+            linearMGStep_.setTransferOperators(transfer_);
+            linearMGStep_.setSmoother(&linearSmoother_);
+            linearMGStep_.basesolver_ = &baseSolver_;
+            linearMGStep_.setMGType(1, linearPreSmoothingSteps_, linearPostSmoothingSteps_);
+
+            hasObstacle_.resize(x_->size());
+            hasObstacle_.setAll();
+
+            nonlinearSmoother_.obstacles_ = &obstacles_;
+            nonlinearSmoother_.hasObstacle_ = &hasObstacle_;
+            nonlinearSmoother_.ignoreNodes_ = ignoreNodes_;
+            outStream_.str("");
+            outStream_ << "      step size";
+            for(int j=0; j<blockSize; ++j)
+                outStream_ << "  trunc" << std::setw(2) << j;
+        }
+
+
+        /**
+         * \brief Apply one iteration step
+         *
+         * You can query the current iterate using the getSol() method
+         * afterwards. The preprocess() method must be called before this
+         * one.
+         */
+        virtual void iterate()
+        {
+            // clear previous output data
+            outStream_.str("");
+
+            // use reference to simplify the following code
+            Vector& x = *x_;
+
+            // nonlinear pre-smoothing
+            for(int i=0; i<preSmoothingSteps_; ++i)
+                nonlinearSmoother_.iterate();
+            x = nonlinearSmoother_.getSol();
+
+            // compute residual
+            residual_ = rhs_;
+            mat_.mmv(x, residual_);
+
+            // initialize matrix, residual, and correction vector
+            // for linear coarse grid correction
+            Matrix truncatedMat = mat_;
+            truncatedResidual_ = residual_;
+            coarseCorrection_ = 0;
+
+            // mark indices for truncation and count truncated indices per component
+            typename Dune::array<int, blockSize> truncationCount;
+            truncationCount.fill(0);
+            BitVector truncate(x.size(), false);
+            for(int i=0; i<x.size(); ++i)
+            {
+                for (int j = 0; j < blockSize; ++j)
+                {
+                    if ((x[i][j]<=obstacles_[i].lower(j)+truncationTol_) or (x[i][j]>=obstacles_[i].upper(j)-truncationTol_))
+                    {
+                        truncate[i][j] = true;
+                        ++truncationCount[j];
+                    }
+                }
+            }
+
+            // truncate matrix and residual
+            typename Matrix::row_type::Iterator it;
+            typename Matrix::row_type::Iterator end;
+            for(int i=0; i<x.size(); ++i)
+            {
+                it = truncatedMat[i].begin();
+                end = truncatedMat[i].end();
+                for(; it!=end; ++it)
+                {
+                    int j = it.index();
+                    typename Matrix::block_type& Aij = *it;
+                    for (int ii = 0; ii < blockSize; ++ii)
+                        for (int jj = 0; jj < blockSize; ++jj)
+                            if (truncate[i][ii] or truncate[j][jj])
+                                Aij[ii][jj] = 0;
+                }
+                for (int ii = 0; ii < blockSize; ++ii)
+                    if (truncate[i][ii])
+                        truncatedResidual_[i][ii] = 0;
+            }
+
+            // apply linear multigrid to approximatively solve the truncated linear system
+            linearMGStep_.setProblem(truncatedMat, coarseCorrection_, truncatedResidual_);
+            linearMGStep_.ignoreNodes_ = ignoreNodes_;
+            linearMGStep_.preprocess();
+            for(int i=0; i<linearIterationSteps_; ++i)
+                linearMGStep_.iterate();
+            coarseCorrection_ = linearMGStep_.getSol();
+
+            // post process multigrid correction
+            // There will be spurious entries in non-truncated
+            // components since we did not truncate the transfer
+            // operators. These can safely be removed.
+            for(int i=0; i<x.size(); ++i)
+                for (int ii = 0; ii < blockSize; ++ii)
+                    if (truncate[i][ii])
+                        coarseCorrection_[i][ii] = 0;
+
+            // project correction into the defect domain
+            // and compute directional constraints for line search
+            BoxConstraint<field_type, 1> directionalConstraints;
+            for(int i=0; i<x.size(); ++i)
+            {
+                // compute local defect constraints
+                BlockBoxConstraint defectConstraint = obstacles_[i];
+                defectConstraint -= x[i];
+
+                // project correction into local defect constraints
+                for (int j = 0; j < blockSize; ++j)
+                {
+                    if (coarseCorrection_[i][j] < defectConstraint.lower(j))
+                        coarseCorrection_[i][j] = defectConstraint.lower(j);
+                    if (coarseCorrection_[i][j] > defectConstraint.upper(j))
+                        coarseCorrection_[i][j] = defectConstraint.upper(j);
+                }
+
+                for (int j = 0; j < blockSize; ++j)
+                {
+                    // compute relative defect constraints
+                    defectConstraint.lower(j) /= coarseCorrection_[i][j];
+                    defectConstraint.upper(j) /= coarseCorrection_[i][j];
+
+                    // orient relative defect constraints
+                    if (defectConstraint.lower(j) > defectConstraint.upper(j))
+                        std::swap(defectConstraint.lower(j), defectConstraint.upper(j));
+
+                    // update directional constraints
+                    directionalConstraints.lower(0) = std::max(directionalConstraints.lower(0), defectConstraint.lower(j));
+                    directionalConstraints.upper(0) = std::min(directionalConstraints.upper(0), defectConstraint.upper(j));
+                }
+            }
+
+            // compute damping parameter
+            mat_.mv(coarseCorrection_, temp_);
+            field_type damping = 0;
+            field_type coarseCorrectionNorm2 = coarseCorrection_*temp_;
+            if (coarseCorrectionNorm2 > 0)
+                damping = (residual_*coarseCorrection_) / coarseCorrectionNorm2;
+            else
+                damping = 0;
+            damping = std::max(damping, directionalConstraints.lower(0));
+            damping = std::min(damping, directionalConstraints.upper(0));
+
+            // apply correction
+            x.axpy(damping, coarseCorrection_);
+
+            // nonlinear post-smoothing
+            for(int i=0; i<postSmoothingSteps_; ++i)
+                nonlinearSmoother_.iterate();
+            x = nonlinearSmoother_.getSol();
+
+            // fill stream with output data
+            outStream_.setf(std::ios::scientific);
+            outStream_ << std::setw(15) << damping;
+            for(int j=0; j<blockSize; ++j)
+                outStream_ << std::setw(9) << truncationCount[j];
+        }
+
+
+        /**
+         * \brief Get the current iterate
+         */
+        VectorType getSol()
+        {
+            return *x_;
+        }
+
+
+        /**
+         * \brief Get output accumulated during last iterate() call.
+         *
+         * This method is used to report TNNMG-specific data to the
+         * LoopSolver class during the iterative solution.
+         */
+        std::string getOutput() const
+        {
+            std::string s = outStream_.str();
+            outStream_.str("");
+            return s;
+        }
+
+
+        /**
+         * \brief Compute initial iterate by nested iteration
+         *
+         * This method performs a purely algebraic variant of nested iteration.
+         * The coarse matrix and rhs are obtained using the transfer operators.
+         * Coarse obstacles are constructed using a local max/min over the
+         * support of coarse basis functions as suggested by J. Mandel.
+         * The ignore-marks are restricted such that a coarse dof
+         * is ignored if it is associated to an ignored fine dof
+         * in the sense that the corresponding transfer operators entry is 1.
+         *
+         * On each level a fixed number of v-cycles is performed.
+         *
+         * This method can be called before the preprocess() method.
+         * It does not change the ObstacleTNNMGStep state despite
+         * updating the solution vector.
+         *
+         * \param coarseIterationSteps Number of v-cycle performed on the corse levels.
+         */
+        void nestedIteration(int coarseIterationSteps=2)
+        {
+            int maxLevel = transfer_.size();
+
+            std::vector<Matrix> coarseMatrix(maxLevel);
+            std::vector<Vector> coarseSolution(maxLevel);
+            std::vector<Vector> coarseRhs(maxLevel);
+            std::vector<BitVector> coarseIgnore(maxLevel);
+            std::vector<BoxConstraintVector> coarseObstacle(maxLevel);
+            MandelObstacleRestrictor<Vector> obstacleRestrictor;
+            BitVector critical;
+
+            critical.resize(x_->size());
+            critical.unsetAll();
+
+            transfer_[maxLevel-1]->galerkinRestrictSetOccupation(mat_, coarseMatrix[maxLevel-1]);
+            transfer_[maxLevel-1]->galerkinRestrict(mat_, coarseMatrix[maxLevel-1]);
+            transfer_[maxLevel-1]->restrict(rhs_, coarseRhs[maxLevel-1]);
+            transfer_[maxLevel-1]->restrictToFathers(*ignoreNodes_, coarseIgnore[maxLevel-1]);
+            obstacleRestrictor.restrict(obstacles_, coarseObstacle[maxLevel-1], hasObstacle_, hasObstacle_, *(transfer_[maxLevel-1]), critical);
+            for (int i = maxLevel-2; i>=0; --i)
+            {
+                transfer_[i]->galerkinRestrictSetOccupation(coarseMatrix[i+1], coarseMatrix[i]);
+                transfer_[i]->galerkinRestrict(coarseMatrix[i+1], coarseMatrix[i]);
+                transfer_[i]->restrict(coarseRhs[i+1], coarseRhs[i]);
+                transfer_[i]->restrictToFathers(coarseIgnore[i+1], coarseIgnore[i]);
+                obstacleRestrictor.restrict(coarseObstacle[i+1], coarseObstacle[i], hasObstacle_, hasObstacle_, *(transfer_[i]), critical);
+                coarseSolution[i].resize(coarseMatrix[i].N());
+            }
+
+            coarseSolution[0] = 0;
+            for (int i = 0; i<maxLevel; ++i)
+            {
+
+                TransferPointerVector coarseTransfer(i);
+                for (int j = 0; j < i; ++j)
+                    coarseTransfer[j] = transfer_[j];
+
+                ObstacleTNNMGStep coarseTNNMGStep(
+                        coarseMatrix[i],
+                        coarseSolution[i],
+                        coarseRhs[i],
+                        coarseIgnore[i],
+                        coarseObstacle[i],
+                        coarseTransfer,
+                        truncationTol_);
+                coarseTNNMGStep.preprocess();
+                for (int j = 0; j < coarseIterationSteps; ++j)
+                    coarseTNNMGStep.iterate();
+                coarseSolution[i] = coarseTNNMGStep.getSol();
+
+                if (i<maxLevel-1)
+                    transfer_[i]->prolong(coarseSolution[i], coarseSolution[i+1]);
+            }
+            transfer_[maxLevel-1]->prolong(coarseSolution[maxLevel-1], *x_);
+        }
+
+    protected:
+
+        // problem description given by the user
+        const Matrix& mat_;
+        using Base::x_;
+        using Base::ignoreNodes_;
+        const Vector& rhs_;
+        const BoxConstraintVector& obstacles_;
+
+        // transfer operators given by the user
+        const TransferPointerVector& transfer_;
+
+        // vectors needed during iteration
+        Vector residual_;
+        Vector temp_;
+        Vector truncatedResidual_;
+        Vector coarseCorrection_;
+
+        // solver components
+        NonlinearSmoother nonlinearSmoother_;
+        typename Dune::BitSetVector<1> hasObstacle_;
+        double truncationTol_;
+
+        LinearMultigridStep linearMGStep_;
+        LinearSmoother linearSmoother_;
+        LinearSmoother baseSolverStep_;
+        LinearBaseSolverEnergyNorm baseSolverNorm_;
+        LinearBaseSolver baseSolver_;
+
+        int preSmoothingSteps_;
+        int postSmoothingSteps_;
+        int linearIterationSteps_;
+        int linearPreSmoothingSteps_;
+        int linearPostSmoothingSteps_;
+
+        mutable std::ostringstream outStream_;
+};
+
+
+
+
+
+
+
+
+#endif
diff --git a/dune/solvers/iterationsteps/projectedblockgsstep.cc b/dune/solvers/iterationsteps/projectedblockgsstep.cc
index 8de381085d44af1754686b96d40a652e1ed23381..51b3694a242ef32d0abc7076b89f450cbb520255 100644
--- a/dune/solvers/iterationsteps/projectedblockgsstep.cc
+++ b/dune/solvers/iterationsteps/projectedblockgsstep.cc
@@ -55,7 +55,7 @@ void ProjectedBlockGSStep<OperatorType, DiscFuncType>::iterate()
             // Initial correction
             v = 0;
 
-            for (int j=0; j<20; j++) {
+            for (int j=0; j< ((BlockSize==1) ? 1 : 20); j++) {
 
                 for (int k=0; k<BlockSize; k++) {
 
diff --git a/dune/solvers/iterationsteps/projectedblockgsstep.hh b/dune/solvers/iterationsteps/projectedblockgsstep.hh
index 58f7419eb54916c45a16a2335be22f2da5733d25..e5b25414f308ae680279af907fd0f454cd74efd3 100644
--- a/dune/solvers/iterationsteps/projectedblockgsstep.hh
+++ b/dune/solvers/iterationsteps/projectedblockgsstep.hh
@@ -22,7 +22,7 @@ public:
     ProjectedBlockGSStep() {}
     
     //! Constructor with a linear problem
-    ProjectedBlockGSStep(const OperatorType& mat, DiscFuncType& x, DiscFuncType& rhs)
+    ProjectedBlockGSStep(const OperatorType& mat, DiscFuncType& x, const DiscFuncType& rhs)
         : BlockGSStep<OperatorType,DiscFuncType>(mat, x, rhs)
     {}
 
diff --git a/dune/solvers/iterationsteps/truncatedblockgsstep.hh b/dune/solvers/iterationsteps/truncatedblockgsstep.hh
index 6691bac7dd2ffb83f76718f5952dcc35b0834f6b..4d1c0c522732cf6f72c0d8ffbbb00b858a4de394 100644
--- a/dune/solvers/iterationsteps/truncatedblockgsstep.hh
+++ b/dune/solvers/iterationsteps/truncatedblockgsstep.hh
@@ -153,8 +153,8 @@ public:
         const MatrixType& mat = *this->mat_;
         const SparseMatrixType& sparse_mat = mat.sparseMatrix();
         const LowRankMatrixType& lowrank_mat = mat.lowRankMatrix();
-        VectorType& rhs = *this->rhs_,
-                   & x   = *this->x_;
+        const VectorType& rhs = *this->rhs_;
+        VectorType& x   = *this->x_;
         const BitVectorType& ignore = *this->ignoreNodes_;
 
         // compute m*x once and only add/subtract single summands in the iterations later
diff --git a/dune/solvers/iterationsteps/trustregiongsstep.cc b/dune/solvers/iterationsteps/trustregiongsstep.cc
index 2cb95093178cd11fa26415ffd06b8427f7515c4e..ef83bc18a5a2cbcff601ddc785b31aa431469e61 100644
--- a/dune/solvers/iterationsteps/trustregiongsstep.cc
+++ b/dune/solvers/iterationsteps/trustregiongsstep.cc
@@ -72,11 +72,15 @@ void TrustRegionGSStep<OperatorType, DiscFuncType>::iterate()
                 //printf("%d %d konvex\n", i, j);
                 //double oldEnergy = computeEnergy(mat, *this->x_, *this->rhs_);
                 // 1d problem is convex
+#ifndef NDEBUG
                 double oldX = x;
+#endif
                 x = r / diag;
+#ifndef NDEBUG
                 double energyX = 0.5*diag*x*x - r*x;
                 double energyOldX = 0.5*diag*oldX*oldX - r*oldX;
                 assert(energyX <= energyOldX + 1e-6);
+#endif
                 if ( x < obstacles[i].lower(j)) {
                     // assert that projection increases functional value
 //                     double trueMin = 0.5*diag*x*x - r*x;
diff --git a/dune/solvers/norms/diagnorm.hh b/dune/solvers/norms/diagnorm.hh
index cc6ea40b70e6ed79d5101b33dd64b8e374a353f3..4a047a889fa26dd9ec2043d10043d02b3b4c7e97 100644
--- a/dune/solvers/norms/diagnorm.hh
+++ b/dune/solvers/norms/diagnorm.hh
@@ -1,3 +1,5 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
 #ifndef DIAGNORM_HH
 #define DIAGNORM_HH
 
@@ -15,6 +17,8 @@ template <class Diagonal=Dune::BlockVector<Dune::FieldVector <double,1> >, class
 class DiagNorm:
     public Norm<VectorType>
 {
+        typedef typename VectorType::size_type SizeType;
+
     public:
         DiagNorm(const double alpha, const Diagonal &d) :
             d(d),
@@ -27,7 +31,7 @@ class DiagNorm:
             double r = 0.0;
 
             typename VectorType::value_type Dv(0.0);
-            for(size_t row = 0; row < v.size(); ++row)
+            for(SizeType row = 0; row < v.size(); ++row)
             {
                 d[row].mv(v[row],Dv);
                 r += Dv*v[row];
@@ -42,7 +46,7 @@ class DiagNorm:
             double r = 0.0;
 
             typename VectorType::value_type Dv(0.0);
-            for(size_t row = 0; row < v1.size(); ++row)
+            for(SizeType row = 0; row < v1.size(); ++row)
             {
                 d[row].mv(v1[row]-v2[row],Dv);
                 r += Dv*(v1[row]-v2[row]);
@@ -63,6 +67,8 @@ class DiagNorm<Dune::BlockVector<Dune::FieldVector <double,1> >, Dune::BlockVect
     public Norm<Dune::BlockVector<Dune::FieldVector <double,1> > >
 {
         typedef Dune::BlockVector<Dune::FieldVector <double,1> > VectorType;
+        typedef VectorType::size_type SizeType;
+
     public:
         DiagNorm(const double alpha, const VectorType &d) :
             d(d),
@@ -74,7 +80,7 @@ class DiagNorm<Dune::BlockVector<Dune::FieldVector <double,1> >, Dune::BlockVect
         {
             double r = 0.0;
 
-            for(size_t row = 0; row < v.size(); ++row)
+            for(SizeType row = 0; row < v.size(); ++row)
                 r += d[row] * v[row] * v[row];
 
             return sqrt(fabs(alpha * r));
@@ -85,7 +91,7 @@ class DiagNorm<Dune::BlockVector<Dune::FieldVector <double,1> >, Dune::BlockVect
         {
             double r = 0.0;
 
-            for (size_t row = 0; row < v1.size(); ++row)
+            for (SizeType row = 0; row < v1.size(); ++row)
                 r += (double)d[row] * (v1[row]-v2[row]) * (v1[row] - v2[row]);
 
             return sqrt(fabs(alpha * r));
diff --git a/dune/solvers/norms/fullnorm.hh b/dune/solvers/norms/fullnorm.hh
index b503325c4dcf75bb6340c695d94b7190c4aea542..0191cb0241ebbec9fd09beed7fbbbe6338e8619a 100644
--- a/dune/solvers/norms/fullnorm.hh
+++ b/dune/solvers/norms/fullnorm.hh
@@ -1,3 +1,5 @@
+// -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
 #ifndef FULLNORM_HH
 #define FULLNORM_HH
 
@@ -6,7 +8,7 @@
 
 #include "norm.hh"
 
-/** \brief Vector norm induced by filled in low rank linear operator M
+/** \brief Vector norm induced by dense low rank linear operator M
  *
  *  \f$M = m^t*m\f$
  *  \f$\Vert u \Vert_M = (u, Mu)^{1/2} = (mu,mu)^{1/2}\f$
@@ -30,8 +32,6 @@ class FullNorm: public Norm<VectorType>
             r = 0.0;
 
             lowRankFactor_.umv(v,r);
-//            for (size_t row = 0; row < v.size(); ++row)
-//                r += m[row] * v[row];
 
             return sqrt(fabs(alpha*(r*r)));
         }
@@ -42,10 +42,6 @@ class FullNorm: public Norm<VectorType>
             VectorType r(lowRankFactor_.N());
             r = 0.0;
 
-//            VectorType temp(v1);
-//            v1 -= v2;
-//            lowRankFactor_.umv(temp,r);
-
             for (typename LowRankFactor::size_type k=0; k<lowRankFactor_.N(); ++k)
                 for (typename LowRankFactor::size_type i=0; i<lowRankFactor_.M(); ++i)
                     lowRankFactor_[k][i].umv(v1[i]-v2[i],r[k]);
@@ -65,6 +61,8 @@ class FullNorm<Dune::BlockVector<Dune::FieldVector<double,1> >, Dune::BlockVecto
     public Norm<Dune::BlockVector<Dune::FieldVector<double,1> > >
 {
         typedef Dune::BlockVector<Dune::FieldVector<double,1> > VectorType;
+        typedef VectorType::size_type SizeType;
+
     public:
         FullNorm(const double alpha, const VectorType &m) :
             m(m),
@@ -76,7 +74,7 @@ class FullNorm<Dune::BlockVector<Dune::FieldVector<double,1> >, Dune::BlockVecto
         {
             double r = 0.0;
 
-            for (size_t row = 0; row < v.size(); ++row)
+            for (SizeType row = 0; row < v.size(); ++row)
                 r += m[row] * v[row];
 
             return sqrt(fabs(alpha*r*r));
@@ -87,7 +85,7 @@ class FullNorm<Dune::BlockVector<Dune::FieldVector<double,1> >, Dune::BlockVecto
         {
             double r = 0.0;
 
-            for (size_t row = 0; row < v1.size(); ++row)
+            for (SizeType row = 0; row < v1.size(); ++row)
                 r += (double)m[row] * (v1[row] - v2[row]);
 
             return sqrt(fabs(alpha*r*r));
diff --git a/dune/solvers/solvers/solver.hh b/dune/solvers/solvers/solver.hh
index 28aff314709896d73de6a49ab1b58dd6ecc35faa..caa6dd8036dc281ae53feda9afc290a6025d5f8c 100644
--- a/dune/solvers/solvers/solver.hh
+++ b/dune/solvers/solvers/solver.hh
@@ -15,17 +15,14 @@
         virtual ~Solver() {}
 
         /** \brief Do the necessary preprocessing */
-        virtual void preprocess();
+        virtual void preprocess()
+        {
+            // Default: Do nothing
+        }
 
         /** \brief Derived classes overload this with the actual
          * solution algorithm */
         virtual void solve() = 0;
 
     };
-
-void Solver::preprocess()
-{
-    // Default: Do nothing
-}
-
 #endif
diff --git a/dune/solvers/test/Makefile.am b/dune/solvers/test/Makefile.am
index df9e99f234d816170d8267defe9c16902cc56a8a..3b8b4eaf102fdb6a56e073b87875f545e1477788 100644
--- a/dune/solvers/test/Makefile.am
+++ b/dune/solvers/test/Makefile.am
@@ -2,15 +2,16 @@
 # list of tests to run
 TESTS = lowrankoperatortest \
         nulloperatortest \
-        sumoperatortest
+        sumoperatortest \
+        obstacletnnmgtest
 
 # programs just to build when "make check" is used
 check_PROGRAMS = $(TESTS)
 
-noinst_HEADERS =
+noinst_HEADERS = common.hh
 
 # collect some common flags
-COMMON_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS)
+COMMON_CPPFLAGS = $(AM_CPPFLAGS) $(DUNEMPICPPFLAGS) -I$(top_srcdir)
 COMMON_LDADD =  $(DUNE_LDFLAGS) $(DUNE_LIBS) $(DUNEMPILIBS) $(LDADD)
 COMMON_LDFLAGS = $(AM_LDFLAGS) $(DUNEMPILDFLAGS) $(DUNE_LDFLAGS)
 
@@ -35,4 +36,9 @@ sumoperatortest_CPPFLAGS = $(COMMON_CPPFLAGS) $(GRID_CPPFLAGS)
 sumoperatortest_LDADD = $(COMMON_LDADD) $(GRID_LDADD)
 sumoperatortest_LDFLAGS = $(COMMON_LDFLAGS) $(GRID_LDFLAGS)
 
+obstacletnnmgtest_SOURCES = obstacletnnmgtest.cc
+obstacletnnmgtest_CPPFLAGS = $(COMMON_CPPFLAGS) $(GRID_CPPFLAGS)
+obstacletnnmgtest_LDADD = $(COMMON_LDADD) $(GRID_LDADD)
+obstacletnnmgtest_LDFLAGS = $(COMMON_LDFLAGS) $(GRID_LDFLAGS)
+
 include $(top_srcdir)/am/global-rules
diff --git a/dune/solvers/test/common.hh b/dune/solvers/test/common.hh
new file mode 100644
index 0000000000000000000000000000000000000000..7b68d5fbc1e9f341d9ac746e0572a01e1a4dddc5
--- /dev/null
+++ b/dune/solvers/test/common.hh
@@ -0,0 +1,345 @@
+#ifndef DUNE_SOLVERS_TESTS_COMMON_HH
+#define DUNE_SOLVERS_TESTS_COMMON_HH
+
+
+#include <dune/common/fvector.hh>
+#include <dune/common/fmatrix.hh>
+
+#include <dune/istl/matrixindexset.hh>
+
+#include <dune/geometry/quadraturerules.hh>
+
+#include <dune/localfunctions/lagrange/pqkfactory.hh>
+
+
+template<class GridView, class Matrix>
+void constructPQ1Pattern(const GridView& gridView, Matrix& matrix)
+{
+    static const int dim = GridView::Grid::dimension;
+
+    typedef typename GridView::IndexSet IndexSet;
+    typedef typename GridView::template Codim<0>::Iterator ElementIterator;
+    typedef typename GridView::template Codim<0>::Entity Element;
+    typedef typename Dune::PQkLocalFiniteElementCache<double, double, dim, 1> FiniteElementCache;
+    typedef typename FiniteElementCache::FiniteElementType FiniteElement;
+
+    const IndexSet& indexSet = gridView.indexSet();
+    FiniteElementCache cache;
+
+    int size = indexSet.size(dim);
+
+    Dune::MatrixIndexSet indices(size, size);
+
+    ElementIterator it = gridView.template begin<0>();
+    ElementIterator end = gridView.template end<0>();
+    for (; it != end; ++it)
+    {
+        Element& e = *it;
+        const FiniteElement& fe = cache.get(e.type());
+
+        int localSize = fe.localBasis().size();
+        for (int i = 0; i < localSize; ++i)
+        {
+            int iGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(i).subEntity(), dim);
+            for (int j = 0; j < localSize; ++j)
+            {
+                int jGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(j).subEntity(), dim);
+                indices.add(iGlobal, jGlobal);
+                indices.add(jGlobal, iGlobal);
+            }
+        }
+    }
+    indices.exportIdx(matrix);
+}
+
+
+template<class GridView, class Matrix>
+void assemblePQ1Stiffness(const GridView& gridView, Matrix& matrix)
+{
+    static const int dim = GridView::Grid::dimension;
+    static const int dimworld = GridView::Grid::dimensionworld;
+
+    typedef typename GridView::IndexSet IndexSet;
+    typedef typename GridView::template Codim<0>::Iterator ElementIterator;
+    typedef typename GridView::template Codim<0>::Entity Element;
+    typedef typename Dune::PQkLocalFiniteElementCache<double, double, dim, 1> FiniteElementCache;
+    typedef typename FiniteElementCache::FiniteElementType FiniteElement;
+
+    typedef typename Dune::QuadratureRule<double,dim> QuadratureRule;
+    typedef typename Dune::template FieldVector<double,dim> LocalCoordinate;
+    typedef typename Dune::template FieldVector<double,dimworld> GlobalCoordinate;
+    typedef typename Dune::template FieldMatrix<double,dimworld,dim> InverseJacobianTransposed;
+    typedef typename FiniteElement::Traits::LocalBasisType::Traits::JacobianType JacobianType;
+
+    const IndexSet& indexSet = gridView.indexSet();
+    FiniteElementCache cache;
+
+    ElementIterator it = gridView.template begin<0>();
+    ElementIterator end = gridView.template end<0>();
+    for (; it != end; ++it)
+    {
+        Element& e = *it;
+        const FiniteElement& fe = cache.get(e.type());
+
+        int localSize = fe.localBasis().size();
+
+        // get quadrature rule of appropiate order (P1/Q1)
+        int order = 0;
+        if (e.type().isSimplex())
+            order = 2*(1-1);
+        else
+            order = 2*(dim-1);
+        const QuadratureRule& quad = Dune::template QuadratureRules<double, dim>::rule(e.type(), order);
+
+        // store gradients of shape functions and base functions
+        std::vector<JacobianType> referenceGradients(localSize);
+        std::vector<GlobalCoordinate> gradients(localSize);
+
+        for (size_t pt=0; pt < quad.size(); ++pt)
+        {
+            // get quadrature point
+            const LocalCoordinate& quadPos = quad[pt].position();
+
+            // get transposed inverse of Jacobian of transformation
+            const InverseJacobianTransposed& invJacobian = e.geometry().jacobianInverseTransposed(quadPos);
+
+            // get integration factor
+            const double integrationElement = e.geometry().integrationElement(quadPos);
+
+            // evaluate gradients of shape functions
+            fe.localBasis().evaluateJacobian(quadPos, referenceGradients);
+
+            // transform gradients
+            for (size_t i=0; i<gradients.size(); ++i)
+                invJacobian.mv(referenceGradients[i][0], gradients[i]);
+
+            // compute matrix entries
+            double z = quad[pt].weight() * integrationElement;
+            for (int i = 0; i < localSize; ++i)
+            {
+                int iGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(i).subEntity(), dim);
+                for (int j = i+1; j < localSize; ++j)
+                {
+                    int jGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(j).subEntity(), dim);
+
+                    double zij = (gradients[i] * gradients[j]) * z;
+                    matrix[iGlobal][jGlobal] += zij;
+                    matrix[jGlobal][iGlobal] += zij;
+                }
+                double zii = (gradients[i] * gradients[i]) * z;
+                matrix[iGlobal][iGlobal] += zii;
+            }
+        }
+    }
+}
+
+
+template<class GridView, class Matrix>
+void assemblePQ1Mass(const GridView& gridView, Matrix& matrix)
+{
+    static const int dim = GridView::Grid::dimension;
+
+    typedef typename GridView::IndexSet IndexSet;
+    typedef typename GridView::template Codim<0>::Iterator ElementIterator;
+    typedef typename GridView::template Codim<0>::Entity Element;
+    typedef typename Dune::PQkLocalFiniteElementCache<double, double, dim, 1> FiniteElementCache;
+    typedef typename FiniteElementCache::FiniteElementType FiniteElement;
+
+    typedef typename Dune::QuadratureRule<double,dim> QuadratureRule;
+    typedef typename Dune::template FieldVector<double,dim> LocalCoordinate;
+    typedef typename FiniteElement::Traits::LocalBasisType::Traits::RangeType RangeType;
+
+    const IndexSet& indexSet = gridView.indexSet();
+    FiniteElementCache cache;
+
+    ElementIterator it = gridView.template begin<0>();
+    ElementIterator end = gridView.template end<0>();
+    for (; it != end; ++it)
+    {
+        Element& e = *it;
+        const FiniteElement& fe = cache.get(e.type());
+
+        int localSize = fe.localBasis().size();
+
+        // get quadrature rule of appropiate order (P1/Q1)
+        int order = 0;
+        if (e.type().isSimplex())
+            order = 2*1;
+        else
+            order = 2*dim;
+        const QuadratureRule& quad = Dune::template QuadratureRules<double, dim>::rule(e.type(), order);
+
+        // store values of shape functions
+        std::vector<RangeType> values(localSize);
+
+        for (size_t pt=0; pt < quad.size(); ++pt)
+        {
+            // get quadrature point
+            const LocalCoordinate& quadPos = quad[pt].position();
+
+            // get integration factor
+            const double integrationElement = e.geometry().integrationElement(quadPos);
+
+            // evaluate basis functions
+            fe.localBasis().evaluateFunction(quadPos, values);
+
+            // compute matrix entries
+            double z = quad[pt].weight() * integrationElement;
+            for (int i = 0; i < localSize; ++i)
+            {
+                int iGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(i).subEntity(), dim);
+                double zi = values[i]*z;
+                for (int j = i+1; j < localSize; ++j)
+                {
+                    int jGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(j).subEntity(), dim);
+
+                    double zij = values[j] * zi;
+                    matrix[iGlobal][jGlobal] += zij;
+                    matrix[jGlobal][iGlobal] += zij;
+                }
+                double zii = values[i] * zi;
+                matrix[iGlobal][iGlobal] += zii;
+            }
+        }
+    }
+}
+
+
+template<class GridView, class Vector, class Function>
+void assemblePQ1RHS(const GridView& gridView, Vector& r, const Function& f)
+{
+    static const int dim = GridView::Grid::dimension;
+
+    typedef typename GridView::IndexSet IndexSet;
+    typedef typename GridView::template Codim<0>::Iterator ElementIterator;
+    typedef typename GridView::template Codim<0>::Entity Element;
+    typedef typename Dune::PQkLocalFiniteElementCache<double, double, dim, 1> FiniteElementCache;
+    typedef typename FiniteElementCache::FiniteElementType FiniteElement;
+
+    typedef typename Dune::QuadratureRule<double,dim> QuadratureRule;
+    typedef typename Dune::template FieldVector<double,dim> LocalCoordinate;
+    typedef typename FiniteElement::Traits::LocalBasisType::Traits::RangeType RangeType;
+    typedef typename Function::RangeType FunctionRangeType;
+
+    const IndexSet& indexSet = gridView.indexSet();
+    FiniteElementCache cache;
+
+    int size = indexSet.size(dim);
+
+    ElementIterator it = gridView.template begin<0>();
+    ElementIterator end = gridView.template end<0>();
+    for (; it != end; ++it)
+    {
+        Element& e = *it;
+        const FiniteElement& fe = cache.get(e.type());
+
+        int localSize = fe.localBasis().size();
+
+        // get quadrature rule of appropiate order (P1/Q1)
+        int order = 0;
+        if (e.type().isSimplex())
+            order = 2*1;
+        else
+            order = 2*dim;
+        const QuadratureRule& quad = Dune::template QuadratureRules<double, dim>::rule(e.type(), order);
+
+        // store values of shape functions
+        std::vector<RangeType> values(localSize);
+
+        for (size_t pt=0; pt < quad.size(); ++pt)
+        {
+            // get quadrature point
+            const LocalCoordinate& quadPos = quad[pt].position();
+
+            // get integration factor
+            const double integrationElement = e.geometry().integrationElement(quadPos);
+
+            // evaluate basis functions
+            fe.localBasis().evaluateFunction(quadPos, values);
+
+            // evaluate function
+            FunctionRangeType fAtPos;
+            f.evaluate(e.geometry().global(quadPos), fAtPos);
+
+            // add vector entries
+            double z = quad[pt].weight() * integrationElement;
+            for (int i = 0; i < localSize; ++i)
+            {
+                int iGlobal = indexSet.subIndex(e, fe.localCoefficients().localKey(i).subEntity(), dim);
+
+                r[iGlobal].axpy(values[i]*z, fAtPos);
+            }
+        }
+    }
+}
+
+
+template<class Intersection>
+bool intersectionContainsVertex(const Intersection& i, int vertexInInsideElement)
+{
+    static const int dim = Intersection::dimension;
+
+    int faceIdx = i.indexInInside();
+
+    const Dune::GenericReferenceElement<double,dim>& refElement
+        = Dune::GenericReferenceElements<double, dim>::general(i.inside()->type());
+
+    for (int j = 0; j<refElement.size(faceIdx, 1, dim); ++j)
+    {
+        int intersectionVertexInElement = refElement.subEntity(faceIdx, 1, j, dim);
+
+        if (intersectionVertexInElement == vertexInInsideElement)
+            return true;
+    }
+    return false;
+}
+
+
+template<class GridView, class BitVector>
+void markBoundaryDOFs(const GridView& gridView, BitVector& isBoundary)
+{
+    static const int dim = GridView::Grid::dimension;
+
+    typedef typename GridView::IndexSet IndexSet;
+    typedef typename GridView::template Codim<0>::Iterator ElementIterator;
+    typedef typename GridView::template Codim<0>::Entity Element;
+    typedef typename GridView::IntersectionIterator IntersectionIterator;
+    typedef typename GridView::Intersection Intersection;
+    typedef typename Dune::PQkLocalFiniteElementCache<double, double, dim, 1> FiniteElementCache;
+    typedef typename FiniteElementCache::FiniteElementType FiniteElement;
+
+    const IndexSet& indexSet = gridView.indexSet();
+    FiniteElementCache cache;
+
+    ElementIterator it = gridView.template begin<0>();
+    ElementIterator end = gridView.template end<0>();
+    for (; it != end; ++it)
+    {
+        Element& e = *it;
+        const FiniteElement& fe = cache.get(e.type());
+        int localSize = fe.localBasis().size();
+
+        IntersectionIterator nIt = gridView.ibegin(e);
+        IntersectionIterator nEnd = gridView.iend(e);
+
+        for (; nIt!=nEnd; ++nIt)
+        {
+            const Intersection& i = *nIt;
+
+            if (i.boundary())
+            {
+                for (int j = 0; j < localSize; ++j)
+                {
+                    unsigned int vertexInInsideElement = fe.localCoefficients().localKey(j).subEntity();
+                    int iGlobal = indexSet.subIndex(e, vertexInInsideElement, dim);
+                    if (intersectionContainsVertex(i, vertexInInsideElement))
+                        isBoundary[iGlobal] = true;
+                }
+            }
+        }
+    }
+}
+
+
+
+#endif
diff --git a/dune/solvers/test/obstacletnnmgtest.cc b/dune/solvers/test/obstacletnnmgtest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..98560c971a0261530a27a42cf0fa0fdd62f3b66a
--- /dev/null
+++ b/dune/solvers/test/obstacletnnmgtest.cc
@@ -0,0 +1,219 @@
+#include <config.h>
+
+#include <iostream>
+#include <sstream>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+
+// dune-istl includes
+#include <dune/istl/bcrsmatrix.hh>
+
+// dune-grid includes
+#include <dune/grid/yaspgrid.hh>
+#include <dune/grid/io/file/vtk/vtkwriter.hh>
+
+// dune-solver includes
+#include <dune/solvers/iterationsteps/obstacletnnmgstep.hh>
+#include <dune/solvers/transferoperators/compressedmultigridtransfer.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/solvers/loopsolver.hh>
+
+
+#include "common.hh"
+
+template <class DomainType, class RangeType>
+class ConstantFunction :
+    public Dune::VirtualFunction<DomainType, RangeType>
+{
+    public:
+        ConstantFunction(double c):
+            c_(c)
+        {}
+
+        void evaluate(const DomainType& x, RangeType& y) const
+        {
+            y = c_;
+        }
+
+    private:
+        double c_;
+};
+
+
+
+template<class GridType, class MatrixType, class VectorType>
+void solveObstacleProblemByTNNMG(const GridType& grid, const MatrixType& mat, VectorType& x, const VectorType& rhs, int maxIterations=100, double tolerance=1.0e-10)
+{
+    // bit vector marking dofs to ignore
+    typedef typename ObstacleTNNMGStep<MatrixType, VectorType>::BitVector BitVector;
+    BitVector ignore(rhs.size());
+    ignore.unsetAll();
+
+    solveObstacleProblemByTNNMG(grid, mat, x, rhs, ignore, maxIterations, tolerance);
+}
+
+
+template<class GridType, class MatrixType, class VectorType, class BitVector>
+void solveObstacleProblemByTNNMG(const GridType& grid, const MatrixType& mat, VectorType& x, const VectorType& rhs, const BitVector& ignore, int maxIterations=100, double tolerance=1.0e-10)
+{
+    typedef VectorType Vector;
+    typedef MatrixType Matrix;
+    typedef EnergyNorm<Matrix, Vector> Norm;
+    typedef LoopSolver<Vector> Solver;
+    typedef ObstacleTNNMGStep<Matrix, Vector> TNNMGStep;
+    typedef typename TNNMGStep::Transfer Transfer;
+    typedef typename TNNMGStep::BoxConstraintVector BoxConstraintVector;
+    typedef CompressedMultigridTransfer<Vector, BitVector, Matrix> TransferImplementation;
+
+    const int blockSize = VectorType::block_type::dimension;
+
+    // we need a vector of pointers to the transfer operator base class
+    std::vector<Transfer*> transfer(grid.maxLevel());
+    for (int i = 0; i < transfer.size(); ++i)
+    {
+        // create transfer operator from level i to i+1
+        TransferImplementation* t = new TransferImplementation;
+        t->setup(grid, i, i+1);
+        transfer[i] = t;
+    }
+
+    // create double obstacle constraints
+    BoxConstraintVector boxConstraints(rhs.size());
+    for (int i = 0; i < boxConstraints.size(); ++i)
+    {
+        for (int j = 0; j < blockSize; ++j)
+        {
+            boxConstraints[i].lower(j) = -1;
+            boxConstraints[i].upper(j) = +1;
+        }
+    }
+
+    // we want to control errors with respect to the energy norm induced by the matrix
+    Norm norm(mat);
+
+    // create iteration step
+    TNNMGStep tnnmgStep(mat, x, rhs, ignore, boxConstraints, transfer);
+    tnnmgStep.preprocess();
+    tnnmgStep.nestedIteration();
+
+    // cretae loop solver
+    Solver solver(&tnnmgStep, maxIterations, tolerance, &norm, Solver::FULL);
+
+    // solve problem
+    solver.solve();
+}
+
+
+
+
+
+template <class GridType>
+bool checkWithGrid(const GridType& grid, const std::string fileName="")
+{
+    bool passed = true;
+
+    static const int dim = GridType::dimension;
+
+    typedef typename Dune::FieldMatrix<double, 1, 1> MatrixBlock;
+    typedef typename Dune::FieldVector<double, 1> VectorBlock;
+    typedef typename Dune::BCRSMatrix<MatrixBlock> Matrix;
+    typedef typename Dune::BlockVector<VectorBlock> Vector;
+    typedef typename Dune::BitSetVector<1> BitVector;
+
+    typedef typename GridType::LeafGridView GridView;
+    typedef typename Dune::FieldVector<double, dim> DomainType;
+    typedef typename Dune::FieldVector<double, 1> RangeType;
+
+
+    const GridView gridView = grid.leafView();
+
+    Matrix A;
+    constructPQ1Pattern(gridView, A);
+    A=0.0;
+    assemblePQ1Stiffness(gridView, A);
+
+    Vector rhs(A.N());
+    rhs = 0;
+    ConstantFunction<DomainType, RangeType> f(50);
+    assemblePQ1RHS(gridView, rhs, f);
+
+    Vector u(A.N());
+    u = 0;
+
+    BitVector ignore(A.N());
+    ignore.unsetAll();
+    markBoundaryDOFs(gridView, ignore);
+
+
+    solveObstacleProblemByTNNMG(grid, A, u, rhs, ignore);
+
+    if (fileName!="")
+    {
+        typename Dune::VTKWriter<GridView> vtkWriter(gridView);
+        vtkWriter.addVertexData(u, "solution");
+        vtkWriter.write(fileName);
+    }
+
+
+    return passed;
+}
+
+
+template <int dim>
+bool checkWithYaspGrid(int refine, const std::string fileName="")
+{
+    bool passed = true;
+
+    typedef Dune::YaspGrid<dim> GridType;
+
+    typename Dune::FieldVector<typename GridType::ctype,dim> L(1.0);
+    typename Dune::FieldVector<int,dim> s(1);
+    typename Dune::FieldVector<bool,dim> periodic(false);
+
+    GridType grid(L, s, periodic, 0);
+
+    for (int i = 0; i < refine; ++i)
+        grid.globalRefine(1);
+
+    std::cout << "Testing with YaspGrid<" << dim << ">" << std::endl;
+    std::cout << "Number of levels: " << (grid.maxLevel()+1) << std::endl;
+    std::cout << "Number of leaf nodes: " << grid.leafView().size(dim) << std::endl;
+
+    passed = passed and checkWithGrid(grid, fileName);
+
+
+    return passed;
+}
+
+
+
+
+int main(int argc, char** argv) try
+{
+    bool passed(true);
+
+
+    int refine1d = 16;
+    int refine2d = 8;
+    int refine3d = 5;
+
+    if (argc>1)
+        std::istringstream(argv[1]) >> refine1d;
+    if (argc>2)
+        std::istringstream(argv[2]) >> refine2d;
+    if (argc>3)
+        std::istringstream(argv[3]) >> refine3d;
+
+    passed = passed and checkWithYaspGrid<1>(refine1d, "obstacletnnmgtest_yasp_1d_solution");
+    passed = passed and checkWithYaspGrid<2>(refine2d, "obstacletnnmgtest_yasp_2d_solution");
+    passed = passed and checkWithYaspGrid<3>(refine3d, "obstacletnnmgtest_yasp_3d_solution");
+
+    return passed ? 0 : 1;
+}
+catch (Dune::Exception e) {
+
+    std::cout << e << std::endl;
+
+}
+
diff --git a/dune/solvers/transferoperators/genericmultigridtransfer.hh b/dune/solvers/transferoperators/genericmultigridtransfer.hh
index 6e7d501f18e88a940a074c23c64c06808de0a807..2388f0c169deedd29679c75ae1a32533956f314d 100644
--- a/dune/solvers/transferoperators/genericmultigridtransfer.hh
+++ b/dune/solvers/transferoperators/genericmultigridtransfer.hh
@@ -53,7 +53,7 @@ public:
     }
 
     template<class MatrixType>
-    static bool diagonalIsOne(const MatrixType& A, int j)
+    static bool diagonalIsOne(const MatrixType& A, unsigned int j)
     {
         if (j>=int(A.N()))
             return A[0][0]>1-1e-5;
@@ -273,7 +273,7 @@ public:
         typedef typename TransferOperatorType::row_type RowType;
         typedef typename RowType::ConstIterator ColumnIterator;
 
-        for (size_t rowIdx=0; rowIdx<matrix.N(); rowIdx++)
+        for(size_t rowIdx=0; rowIdx<matrix.N(); rowIdx++)
         {
             const RowType& row = matrix[rowIdx];
 
@@ -514,7 +514,7 @@ public:
                 for(int i=0; i<virtualBlockSize; ++i)
                 {
                     const typename BitVectorType::const_reference fineBits = f[rowIdx*virtualBlockSize+i];
-                    for (size_t j=0; j<fineBits.size(); ++j)
+                    for(size_t j=0; j<fineBits.size(); ++j)
                         if (fineBits.test(j))
                             t[cIt.index()*virtualBlockSize+i][j] = true;
                 }
@@ -602,7 +602,7 @@ public:
                 for(int i=0; i<virtualBlockSize; ++i)
                 {
                     const typename BitVectorType::const_reference fineBits = f[rowIdx*virtualBlockSize+i];
-                    for (size_t j=0; j<fineBits.size(); ++j)
+                    for(size_t j=0; j<fineBits.size(); ++j)
                         if (fineBits.test(j))
                             if (diagonalIsOne(*cIt, j))
                                 t[cIt.index()*virtualBlockSize+i][j] = true;
@@ -639,6 +639,8 @@ public:
 
             for (; m!=mEnd; ++m)
             {
+                if (m->infinity_norm()==0)
+                    continue;
                 int w = m.index();
 
                 // Loop over all coarse grid vectors iv that have v in their support
@@ -702,6 +704,8 @@ public:
 
             for (; m!=mEnd; ++m)
             {
+                if (m->infinity_norm()==0)
+                    continue;
                 int w = m.index();
                 int w_block = w/virtualBlockSize;
                 int w_inblock = w%virtualBlockSize;
@@ -766,6 +770,8 @@ public:
 
             for (; m!=mEnd; ++m)
             {
+                if (m->infinity_norm()==0)
+                    continue;
                 int w = m.index();
 
                 // Loop over all coarse grid vectors iv that have v in their support
@@ -825,6 +831,8 @@ public:
 
             for (; m!=mEnd; ++m)
             {
+                if (m->infinity_norm()==0)
+                    continue;
                 int w = m.index();
                 int w_block = w/virtualBlockSize;
                 int w_inblock = w%virtualBlockSize;
diff --git a/dune/solvers/transferoperators/mandelobsrestrictor.cc b/dune/solvers/transferoperators/mandelobsrestrictor.cc
index 2bab8b36a219e8c42406210d941164984d5721ca..b30c278d6114ad8f072594401e93327cd9b27676 100644
--- a/dune/solvers/transferoperators/mandelobsrestrictor.cc
+++ b/dune/solvers/transferoperators/mandelobsrestrictor.cc
@@ -28,7 +28,7 @@ restrict(const std::vector<BoxConstraint<typename DiscFuncType::field_type,block
     typedef typename RowType::ConstIterator ColumnIterator;
     
     // Loop over all coarse grid dofs
-    for (int i=0; i<transferMat.N(); i++) {
+    for (size_t i=0; i<transferMat.N(); i++) {
         
         const RowType& row = transferMat[i];
         
diff --git a/dune/solvers/transferoperators/truncatedcompressedmgtransfer.cc b/dune/solvers/transferoperators/truncatedcompressedmgtransfer.cc
index 38d1514c7c5e682775a823517deab3475827c7a9..55d0755e3b54a53f82297aba71972f8863bfa5be 100644
--- a/dune/solvers/transferoperators/truncatedcompressedmgtransfer.cc
+++ b/dune/solvers/transferoperators/truncatedcompressedmgtransfer.cc
@@ -19,9 +19,8 @@ void TruncatedCompressedMGTransfer<VectorType, BitVectorType, MatrixType>::prolo
     typedef typename RowType::ConstIterator ColumnIterator;
 
     Iterator tIt      = t.begin();
-    ConstIterator fIt = f.begin(); 
 
-    for(int rowIdx=0; rowIdx<this->matrix_->N(); rowIdx++) {
+    for(size_t rowIdx=0; rowIdx<this->matrix_->N(); rowIdx++) {
 
         const RowType& row = (*this->matrix_)[rowIdx];
 
@@ -65,7 +64,7 @@ void TruncatedCompressedMGTransfer<VectorType, BitVectorType, MatrixType>::restr
     typedef typename TransferOperatorType::row_type RowType;
     typedef typename RowType::ConstIterator ColumnIterator;
 
-    for (int rowIdx=0; rowIdx<this->matrix_->N(); rowIdx++) {
+    for (size_t rowIdx=0; rowIdx<this->matrix_->N(); rowIdx++) {
 
         const RowType& row = (*this->matrix_)[rowIdx];
 
@@ -108,7 +107,7 @@ galerkinRestrict(const MatrixType& fineMat, MatrixType& coarseMat,
   if (this->recompute_ == NULL)
       coarseMat = 0;
   else {
-      for (int i=0; i<coarseMat.N(); i++) {
+      for (size_t i=0; i<coarseMat.N(); i++) {
 
           RowType& row = coarseMat[i];
 
@@ -125,7 +124,7 @@ galerkinRestrict(const MatrixType& fineMat, MatrixType& coarseMat,
   }
 
   // Loop over all rows of the stiffness matrix
-  for (int v=0; v<fineMat.N(); v++) {
+  for (size_t v=0; v<fineMat.N(); v++) {
 
       const RowType& row = fineMat[v];