diff --git a/dune/solvers/iterationsteps/Makefile.am b/dune/solvers/iterationsteps/Makefile.am
index ddc45f75da2d5c93c09f391a0f228cab01ed1819..aac9853c9a5c3026577394e6dccad78064518865 100644
--- a/dune/solvers/iterationsteps/Makefile.am
+++ b/dune/solvers/iterationsteps/Makefile.am
@@ -12,6 +12,7 @@ iterationsteps_HEADERS = amgstep.hh \
                          mmgstep.hh mmgstep.cc \
                          multigridstep.hh \
                          multigridstep.cc \
+                         obstacletnnmgstep.hh \
                          projectedblockgsstep.hh \
                          projectedblockgsstep.cc \
                          projectedlinegsstep.cc projectedlinegsstep.hh \
diff --git a/dune/solvers/iterationsteps/obstacletnnmgstep.hh b/dune/solvers/iterationsteps/obstacletnnmgstep.hh
new file mode 100644
index 0000000000000000000000000000000000000000..799902839509475695015fce2bce8dfa2820dace
--- /dev/null
+++ b/dune/solvers/iterationsteps/obstacletnnmgstep.hh
@@ -0,0 +1,383 @@
+#ifndef OBSTACLE_TNNMG_STEP_HH
+#define OBSTACLE_TNNMG_STEP_HH
+
+// std includes
+#include <vector>
+#include <algorithm>
+
+// dune-common includes
+#include <dune/common/bitsetvector.hh>
+
+// dune-solver includes
+#include <dune/solvers/iterationsteps/iterationstep.hh>
+#include <dune/solvers/iterationsteps/truncatedblockgsstep.hh>
+#include <dune/solvers/iterationsteps/projectedblockgsstep.hh>
+#include <dune/solvers/iterationsteps/multigridstep.hh>
+
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+#include <dune/solvers/common/boxconstraint.hh>
+
+
+
+
+
+/**
+ * \brief TNNMG step for quadratic obstacle problems
+ *
+ * This is a dune-solvers iteration step implementing
+ * the TNNMG (truncated nonsmooth Newton multigrid) method
+ * for convex quadratic obstacle problems for use in the
+ * LoopSolver class.
+ *
+ * The method basically consists of a projected Gauss-Seidel
+ * method as nonlinear pre- and post-smoother and a linear
+ * multigrid method for a truncated linear system. For
+ * details please refer to
+ *
+ *   'Multigrid Methods for Obstacle Problems', C. Gräser and R. Kornhuber
+ *   Journal of Computational Mathematics 27 (1), 2009, pp. 1-44 
+ *
+ * A much more general and flexible implementation that can also be used for
+ * energy functionals with nonquadratic (anisotropic) smooth part
+ * and other nonsmooth part, e.g. incorporating local simplex
+ * constraints, can be found in the dune-tnnmg module.
+ *
+ * \tparam MatrixType Type for the matrix describing the quadratic part
+ * \tparam VectorType Type for solution and rhs vectors
+ */
+template<class MatrixType, class VectorType>
+class ObstacleTNNMGStep
+    : public IterationStep<VectorType>
+{
+        typedef IterationStep<VectorType> Base;
+        typedef typename VectorType::field_type field_type;
+
+        enum {blockSize = VectorType::block_type::dimension};
+    public:
+
+        typedef VectorType Vector;
+        typedef MatrixType Matrix;
+        typedef BoxConstraint<field_type, blockSize> BlockBoxConstraint;
+        typedef typename std::vector<BlockBoxConstraint> BoxConstraintVector;
+        typedef typename Dune::BitSetVector<VectorType::block_type::dimension> BitVector;
+
+        typedef MultigridTransfer<Vector, BitVector, Matrix> Transfer;
+        typedef typename std::vector<Transfer*> TransferPointerVector;
+
+    protected:
+
+        typedef ProjectedBlockGSStep<Matrix, Vector> NonlinearSmoother;
+        typedef TruncatedBlockGSStep<Matrix, Vector> LinearSmoother;
+        typedef MultigridStep<Matrix, Vector, BitVector> LinearMultigridStep;
+        typedef EnergyNorm<Matrix, Vector> LinearBaseSolverEnergyNorm;
+        typedef LoopSolver<Vector> LinearBaseSolver;
+
+    public:
+
+        /**
+         * \brief Construct iteration step
+         *
+         * \param mat Matrix describing the quadratic part of the energy
+         * \param x Solution vector containing the initial iterate
+         * \param rhs Vector containing the linear part of the energy
+         * \param ignore BitVector marking components to ignore (e.g. Dirichlet of hanging nodes)
+         * \param obstacles std::vector containing local obstacles for each global index
+         * \param transfer A vector of pointers to transfer operators describing the subspace hierarchy
+         * \param truncationTol Components will be truncated for the coarse grid correction if they are
+         *                      this close to the obstacle (the default will work well in general).
+         *
+         */
+        ObstacleTNNMGStep(
+                const Matrix& mat,
+                Vector& x,
+                const Vector& rhs,
+                const BitVector& ignore,
+                const BoxConstraintVector& obstacles,
+                const TransferPointerVector& transfer,
+                double truncationTol=1e-10) :
+            Base(x),
+            mat_(mat),
+            rhs_(rhs),
+            obstacles_(obstacles),
+            transfer_(transfer),
+            baseSolverNorm_(baseSolverStep_),
+            baseSolver_(&baseSolverStep_, 100, 1e-8, &baseSolverNorm_, LinearBaseSolver::QUIET),
+            nonlinearSmoother_(mat_, *x_, rhs_),
+            preSmoothingSteps_(3),
+            postSmoothingSteps_(3),
+            linearIterationSteps_(1),
+            linearPreSmoothingSteps_(3),
+            linearPostSmoothingSteps_(3),
+            truncationTol_(truncationTol)
+        {
+            ignoreNodes_ = &ignore;
+        }
+
+
+        /**
+         * \brief Set type of multigrid cycle
+         *
+         * If you don't call this method manually, 3 nonlinear pre- and post-smoothing
+         * steps and a linear V(3,3)-cycle will be used.
+         *
+         * \param preSmoothingSteps Number of nonlinear pre-smoothing steps.
+         * \param linearIterationSteps Number of linear multigrid steps for truncated problem
+         * \param postSmoothingSteps Number of nonlinear post-smoothing steps.
+         * \param linearPreSmoothingSteps Number of linear pre-smoothing steps during the linear multigrid V-cycle
+         * \param linearPostSmoothingSteps Number of linear post-smoothing steps during the linear multigrid V-cycle
+         */
+        void setSmoothingSteps(int preSmoothingSteps, int linearIterationSteps, int postSmoothingSteps, int linearPreSmoothingSteps, int linearPostSmoothingSteps)
+        {
+            preSmoothingSteps_ = preSmoothingSteps;
+            postSmoothingSteps_ = postSmoothingSteps;
+            linearIterationSteps_ = linearIterationSteps;
+            linearPreSmoothingSteps_ = linearPreSmoothingSteps;
+            linearPostSmoothingSteps_ = linearPostSmoothingSteps;
+        }
+
+
+        /**
+         * \brief Setup the iteration step
+         *
+         * This method must be called before the iterate method is called.
+         */
+        virtual void preprocess()
+        {
+            truncatedResidual_.resize(x_->size());
+            residual_.resize(x_->size());
+            coarseCorrection_.resize(x_->size());
+            temp_.resize(x_->size());
+
+            linearMGStep_.setNumberOfLevels(transfer_.size()+1);
+            linearMGStep_.setTransferOperators(transfer_);
+            linearMGStep_.setSmoother(&linearSmoother_);
+            linearMGStep_.basesolver_ = &baseSolver_;
+            linearMGStep_.setMGType(1, linearPreSmoothingSteps_, linearPostSmoothingSteps_);
+
+            hasObstacle_.resize(x_->size());
+            hasObstacle_.setAll();
+
+            nonlinearSmoother_.obstacles_ = &obstacles_;
+            nonlinearSmoother_.hasObstacle_ = &hasObstacle_;
+            nonlinearSmoother_.ignoreNodes_ = ignoreNodes_;
+            outStream_.str("");
+            outStream_ << "      step size";
+            for(int j=0; j<blockSize; ++j)
+                outStream_ << "  trunc" << std::setw(2) << j;
+        }
+
+
+        /**
+         * \brief Apply one iteration step
+         *
+         * You can query the current iterate using the getSol() method
+         * afterwards. The preprocess() method must be called before this
+         * one.
+         */
+        virtual void iterate()
+        {
+            // clear previous output data
+            outStream_.str("");
+
+            // use reference to simplify the following code
+            Vector& x = *x_;
+
+            // nonlinear pre-smoothing
+            for(int i=0; i<preSmoothingSteps_; ++i)
+                nonlinearSmoother_.iterate();
+            x = nonlinearSmoother_.getSol();
+
+            // compute residual
+            residual_ = rhs_;
+            mat_.mmv(x, residual_);
+
+            // initialize matrix, residual, and correction vector
+            // for linear coarse grid correction
+            Matrix truncatedMat = mat_;
+            truncatedResidual_ = residual_;
+            coarseCorrection_ = 0;
+
+            // mark indices for truncation and count truncated indices per component
+            typename Dune::array<int, blockSize> truncationCount;
+            truncationCount.fill(0);
+            BitVector truncate(x.size(), false);
+            for(int i=0; i<x.size(); ++i)
+            {
+                for (int j = 0; j < blockSize; ++j)
+                {
+                    if ((x[i][j]<=obstacles_[i].lower(j)+truncationTol_) or (x[i][j]>=obstacles_[i].upper(j)-truncationTol_))
+                    {
+                        truncate[i][j] = true;
+                        ++truncationCount[j];
+                    }
+                }
+            }
+
+            // truncate matrix and residual
+            typename Matrix::row_type::Iterator it;
+            typename Matrix::row_type::Iterator end;
+            for(int i=0; i<x.size(); ++i)
+            {
+                it = truncatedMat[i].begin();
+                end = truncatedMat[i].end();
+                for(; it!=end; ++it)
+                {
+                    int j = it.index();
+                    typename Matrix::block_type& Aij = *it;
+                    for (int ii = 0; ii < blockSize; ++ii)
+                        for (int jj = 0; jj < blockSize; ++jj)
+                            if (truncate[i][ii] or truncate[j][jj])
+                                Aij[ii][jj] = 0;
+                }
+                for (int ii = 0; ii < blockSize; ++ii)
+                    if (truncate[i][ii])
+                        truncatedResidual_[i][ii] = 0;
+            }
+
+            // apply linear multigrid to approximatively solve the truncated linear system
+            linearMGStep_.setProblem(truncatedMat, coarseCorrection_, truncatedResidual_);
+            linearMGStep_.ignoreNodes_ = ignoreNodes_;
+            linearMGStep_.preprocess();
+            for(int i=0; i<linearIterationSteps_; ++i)
+                linearMGStep_.iterate();
+            coarseCorrection_ = linearMGStep_.getSol();
+
+            // post process multigrid correction
+            // There will be spurious entries in non-truncated
+            // components since we did not truncate the transfer
+            // operators. These can safely be removed.
+            for(int i=0; i<x.size(); ++i)
+                for (int ii = 0; ii < blockSize; ++ii)
+                    if (truncate[i][ii])
+                        coarseCorrection_[i][ii] = 0;
+
+            // project correction into the defect domain
+            // and compute directional constraints for line search
+            BoxConstraint<field_type, 1> directionalConstraints;
+            for(int i=0; i<x.size(); ++i)
+            {
+                // compute local defect constraints
+                BlockBoxConstraint defectConstraint = obstacles_[i];
+                defectConstraint -= x[i];
+
+                // project correction into local defect constraints
+                for (int j = 0; j < blockSize; ++j)
+                {
+                    if (coarseCorrection_[i][j] < defectConstraint.lower(j))
+                        coarseCorrection_[i][j] = defectConstraint.lower(j);
+                    if (coarseCorrection_[i][j] > defectConstraint.upper(j))
+                        coarseCorrection_[i][j] = defectConstraint.upper(j);
+                }
+
+                for (int j = 0; j < blockSize; ++j)
+                {
+                    // compute relative defect constraints
+                    defectConstraint.lower(j) /= coarseCorrection_[i][j];
+                    defectConstraint.upper(j) /= coarseCorrection_[i][j];
+
+                    // orient relative defect constraints
+                    if (defectConstraint.lower(j) > defectConstraint.upper(j))
+                        std::swap(defectConstraint.lower(j), defectConstraint.upper(j));
+
+                    // update directional constraints
+                    directionalConstraints.lower(0) = std::max(directionalConstraints.lower(0), defectConstraint.lower(j));
+                    directionalConstraints.upper(0) = std::min(directionalConstraints.upper(0), defectConstraint.upper(j));
+                }
+            }
+
+            // compute damping parameter
+            mat_.mv(coarseCorrection_, temp_);
+            field_type damping = 0;
+            field_type coarseCorrectionNorm2 = coarseCorrection_*temp_;
+            if (coarseCorrectionNorm2 > 0)
+                damping = (residual_*coarseCorrection_) / coarseCorrectionNorm2;
+            else
+                damping = 0;
+            damping = std::max(damping, directionalConstraints.lower(0));
+            damping = std::min(damping, directionalConstraints.upper(0));
+
+            // apply correction
+            x.axpy(damping, coarseCorrection_);
+
+            // nonlinear post-smoothing
+            for(int i=0; i<postSmoothingSteps_; ++i)
+                nonlinearSmoother_.iterate();
+            x = nonlinearSmoother_.getSol();
+
+            // fill stream with output data
+            outStream_.setf(std::ios::scientific);
+            outStream_ << std::setw(15) << damping;
+            for(int j=0; j<blockSize; ++j)
+                outStream_ << std::setw(9) << truncationCount[j];
+        }
+
+
+        /**
+         * \brief Get the current iterate
+         */
+        VectorType getSol()
+        {
+            return *x_;
+        }
+
+
+        /**
+         * \brief Get output accumulated during last iterate() call.
+         *
+         * This method is used to report TNNMG-specific data to the
+         * LoopSolver class during the iterative solution.
+         */
+        std::string getOutput() const
+        {
+            std::string s = outStream_.str();
+            outStream_.str("");
+            return s;
+        }
+
+    protected:
+
+        // problem description given by the user
+        const Matrix& mat_;
+        using Base::x_;
+        using Base::ignoreNodes_;
+        const Vector& rhs_;
+        const BoxConstraintVector& obstacles_;
+
+        // transfer operators given by the user
+        const TransferPointerVector& transfer_;
+
+        // vectors needed during iteration
+        Vector residual_;
+        Vector temp_;
+        Vector truncatedResidual_;
+        Vector coarseCorrection_;
+
+        // solver components
+        NonlinearSmoother nonlinearSmoother_;
+        typename Dune::BitSetVector<1> hasObstacle_;
+        double truncationTol_;
+
+        LinearMultigridStep linearMGStep_;
+        LinearSmoother linearSmoother_;
+        LinearSmoother baseSolverStep_;
+        LinearBaseSolverEnergyNorm baseSolverNorm_;
+        LinearBaseSolver baseSolver_;
+
+        int preSmoothingSteps_;
+        int postSmoothingSteps_;
+        int linearIterationSteps_;
+        int linearPreSmoothingSteps_;
+        int linearPostSmoothingSteps_;
+
+        mutable std::ostringstream outStream_;
+};
+
+
+
+
+
+
+
+
+#endif