diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8e2152c39350204f86f0d42bd087c860fbd2194c..be2ee137d335cbbd8f63b5fe208a1fb9788e7850 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -13,7 +13,3 @@ dune:git  clang  C++17:
 dune:git  gcc-8  C++17:
   image: registry.dune-project.org/docker/ci/dune:git-debian-10-gcc-8-17
   script: duneci-standard-test
-
-dune:git  gcc-6  C++14:
-  image: registry.dune-project.org/docker/ci/dune:git-debian-9-gcc-6-14
-  script: duneci-standard-test
diff --git a/dune/elasticity/assemblers/feassembler.hh b/dune/elasticity/assemblers/feassembler.hh
index 0486478a3df58ad5b879f4c148237093b6f2148e..9e0c82f59a912538dbc951844985d5d0b1c96ed6 100644
--- a/dune/elasticity/assemblers/feassembler.hh
+++ b/dune/elasticity/assemblers/feassembler.hh
@@ -28,6 +28,12 @@ class FEAssembler {
 public:
     const Basis basis_;
 
+    /** \brief Partition type on which to assemble
+     *
+     * We want a fully nonoverlapping partition, and therefore need to skip ghost elements.
+     */
+    static constexpr auto partitionType = Dune::Partitions::interiorBorder;
+
 protected:
 
     LocalFEStiffness<GridView,
@@ -70,7 +76,7 @@ getNeighborsPerVertex(Dune::MatrixIndexSet& nb) const
 
     nb.resize(n, n);
 
-    for (const auto& element : elements(basis_.getGridView(), Dune::Partitions::interior))
+    for (const auto& element : elements(basis_.getGridView(), partitionType))
     {
         const typename Basis::LocalFiniteElement& lfe = basis_.getLocalFiniteElement(element);
 
@@ -111,7 +117,7 @@ assembleGradientAndHessian(const VectorType& sol,
     gradient.resize(sol.size());
     gradient = 0;
 
-    for (const auto& element : elements(basis_.getGridView(), Dune::Partitions::interior))
+    for (const auto& element : elements(basis_.getGridView(), partitionType))
     {
         const int numOfBaseFct = basis_.getLocalFiniteElement(element).localBasis().size();
 
@@ -159,7 +165,7 @@ computeEnergy(const VectorType& sol) const
         DUNE_THROW(Dune::Exception, "Solution vector doesn't match the basis!");
 
     // Loop over all elements
-    for (const auto& element : elements(basis_.getGridView(), Dune::Partitions::interior))
+    for (const auto& element : elements(basis_.getGridView(), partitionType))
     {
         // Number of degrees of freedom on this element
         size_t nDofs = basis_.getLocalFiniteElement(element).localBasis().size();
diff --git a/dune/elasticity/assemblers/localadolcstiffness.hh b/dune/elasticity/assemblers/localadolcstiffness.hh
index 582381f1772bc9701088413d2c554830066a318c..a22bb83dcd930a288b5ef390f2879cbe4751c985 100644
--- a/dune/elasticity/assemblers/localadolcstiffness.hh
+++ b/dune/elasticity/assemblers/localadolcstiffness.hh
@@ -68,11 +68,12 @@ energy(const Entity& element,
        const LocalFiniteElement& localFiniteElement,
        const VectorType& localSolution) const
 {
+    int rank = Dune::MPIHelper::getCollectiveCommunication().rank();
     double pureEnergy;
 
     std::vector<Dune::FieldVector<adouble,blocksize> > localASolution(localSolution.size());
 
-    trace_on(1);
+    trace_on(rank);
 
     adouble energy = 0;
 
@@ -84,7 +85,7 @@ energy(const Entity& element,
 
     energy >>= pureEnergy;
 
-    trace_off();
+    trace_off(rank);
 
     return pureEnergy;
 }
@@ -103,6 +104,7 @@ assembleGradientAndHessian(const Entity& element,
                 const VectorType& localSolution,
                 VectorType& localGradient)
 {
+    int rank = Dune::MPIHelper::getCollectiveCommunication().rank();
     // Tape energy computation.  We may not have to do this every time, but it's comparatively cheap.
     energy(element, localFiniteElement, localSolution);
 
@@ -121,7 +123,7 @@ assembleGradientAndHessian(const Entity& element,
 
   // Compute gradient
     std::vector<double> g(nDoubles);
-    gradient(1,nDoubles,xp.data(),g.data());                  // gradient evaluation
+    gradient(rank,nDoubles,xp.data(),g.data());                  // gradient evaluation
 
     // Copy into Dune type
     localGradient.resize(localSolution.size());
@@ -151,7 +153,7 @@ assembleGradientAndHessian(const Entity& element,
           v[i*blocksize + k] = (k==ii);
 
         int rc= 3;
-        MINDEC(rc, hess_vec(1, nDoubles, xp.data(), v.data(), w.data()));
+        MINDEC(rc, hess_vec(rank, nDoubles, xp.data(), v.data(), w.data()));
         if (rc < 0)
           DUNE_THROW(Dune::Exception, "ADOL-C has returned with error code " << rc << "!");
 
@@ -181,7 +183,7 @@ assembleGradientAndHessian(const Entity& element,
         tangent[(j/blocksize)*embeddedBlocksize+i][j] = orthonormalFrame[j/blocksize][j%blocksize][i];
     }
 
-    hess_mat(1,nDoubles,nDirections,xp.data(),tangent,rawHessian);
+    hess_mat(rank,nDoubles,nDirections,xp.data(),tangent,rawHessian);
 
     // Copy Hessian into Dune data type
     for(size_t i=0; i<nDoubles; i++)
diff --git a/dune/elasticity/common/trustregionsolver.cc b/dune/elasticity/common/trustregionsolver.cc
index 5d16bf1c87a5692b6cd649fea7ef27c251f13850..0d1da517579abca3b98f6ebe52e357054f8f4c74 100644
--- a/dune/elasticity/common/trustregionsolver.cc
+++ b/dune/elasticity/common/trustregionsolver.cc
@@ -40,16 +40,8 @@ setup(const typename BasisType::GridView::Grid& grid,
          int nu2,
          int baseIterations,
          double baseTolerance)
-{
-#if HAVE_DUNE_PARMG
-    Dune::Timer setupTimer;
-    mgSetup_ = std::make_unique<MGSetup>(grid);
-    mgSetup_->overlap(true);
-    mgSetup_->setupSmoother(1.0);
 
-    if (grid.comm().rank()==0)
-        std::cout << "Parallel multigrid setup took " << setupTimer.stop() << " seconds." << std::endl;
-#endif
+{
     grid_                     = &grid;
     assembler_                = assembler;
     x_                        = x;
@@ -59,10 +51,30 @@ setup(const typename BasisType::GridView::Grid& grid,
     innerIterations_          = multigridIterations;
     innerTolerance_           = mgTolerance;
     ignoreNodes_              = std::make_shared<Dune::BitSetVector<blocksize>>(dirichletNodes);
+    baseIterations_           = baseIterations;
+    baseTolerance_            = baseTolerance;
 
     int numLevels = grid_->maxLevel()+1;
 
-#if !HAVE_DUNE_PARMG
+#if HAVE_DUNE_PARMG
+    Dune::Timer setupTimer;
+    mgSetup_ = std::make_unique<MGSetup>(grid);
+    mgSetup_->overlap(false);
+    mgSetup_->ignore(std::const_pointer_cast<Dune::BitSetVector<blocksize> >(ignoreNodes_));
+
+    BasisType feBasis(grid.levelGridView(grid.maxLevel()));
+    DuneFunctionsBasis<BasisType> basis(feBasis);
+
+    innerMultigridStep_ = std::make_unique<Dune::ParMG::Multigrid<VectorType> >();
+    innerMultigridStep_->mu_ = mu;
+    innerMultigridStep_->preSmootherSteps_ = nu1;
+    innerMultigridStep_->postSmootherSteps_ = nu2;
+
+    if (grid.comm().rank()==0)
+        std::cout << "Parallel multigrid setup took " << setupTimer.stop() << " seconds." << std::endl;
+#else
+    DuneFunctionsBasis<BasisType> basis(grid.leafGridView());
+
     // ////////////////////////////////
     //   Create a multigrid solver
     // ////////////////////////////////
@@ -87,11 +99,6 @@ setup(const typename BasisType::GridView::Grid& grid,
                                                                             baseNorm,
                                                                             Solver::QUIET);
 #endif
-#endif
-
-#if HAVE_DUNE_PARMG
-    mgSetup_->ignore(std::const_pointer_cast<Dune::BitSetVector<blocksize> >(ignoreNodes_));
-#else
     // Make pre and postsmoothers
     auto presmoother  = std::make_shared< TrustRegionGSStep<MatrixType, CorrectionType> >();
     auto postsmoother = std::make_shared< TrustRegionGSStep<MatrixType, CorrectionType> >();
@@ -108,13 +115,6 @@ setup(const typename BasisType::GridView::Grid& grid,
     // //////////////////////////////////////////////////////////////////////////////////////
     //   Assemble a Laplace matrix to create a norm that's equivalent to the H1-norm
     // //////////////////////////////////////////////////////////////////////////////////////
-
-#if HAVE_DUNE_PARMG
-    BasisType feBasis(grid.levelGridView(grid.maxLevel()));
-    DuneFunctionsBasis<BasisType> basis(feBasis);
-#else
-    DuneFunctionsBasis<BasisType> basis(grid.leafGridView());
-#endif
     OperatorAssembler<DuneFunctionsBasis<BasisType>,DuneFunctionsBasis<BasisType>,Dune::Partitions::Interior> operatorAssembler(basis, basis);
 
     LaplaceAssembler<GridType,
@@ -129,19 +129,6 @@ setup(const typename BasisType::GridView::Grid& grid,
 
     h1SemiNorm_ = std::make_shared< H1SemiNorm<CorrectionType> >(*A);
 
-#if HAVE_DUNE_PARMG
-    innerMultigridStep_ = std::make_unique<Dune::ParMG::Multigrid<VectorType> >();
-    innerMultigridStep_->mu_ = mu;
-    innerMultigridStep_->preSmootherSteps_ = nu1;
-    innerMultigridStep_->postSmootherSteps_ = nu2;
-#else
-    innerSolver_ = std::make_shared<LoopSolver<CorrectionType> >(mmgStep,
-                                                                   innerIterations_,
-                                                                   innerTolerance_,
-                                                                   h1SemiNorm_,
-                                                                 Solver::REDUCED);
-#endif
-
     // //////////////////////////////////////////////////////////////////////////////////////
     //   Assemble a mass matrix to create a norm that's equivalent to the L2-norm
     //   This will be used to monitor the gradient
@@ -167,6 +154,11 @@ setup(const typename BasisType::GridView::Grid& grid,
     indices.exportIdx(*hessianMatrix_);
 
 #if !HAVE_DUNE_PARMG
+    innerSolver_ = std::make_shared<LoopSolver<CorrectionType> >(mmgStep,
+                                                                   innerIterations_,
+                                                                   innerTolerance_,
+                                                                   h1SemiNorm_,
+                                                                 Solver::REDUCED);
     // ////////////////////////////////////
     //   Create the transfer operators
     // ////////////////////////////////////
@@ -248,9 +240,6 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
     BasisType basis(grid_->levelGridView(grid_->maxLevel()));
     BasisType coarseBasis(grid_->levelGridView(0));
     std::vector<BoxConstraint<typename VectorType::field_type, blocksize>> coarseTrustRegionObstacles(coarseBasis.size());
-    int numLevels = grid_->maxLevel()+1;
-
-    auto& levelOp = mgSetup_->levelOps_;
 #endif
     MaxNormTrustRegion<blocksize> trustRegion(basis.size(), initialTrustRegionRadius_);
 
@@ -287,6 +276,10 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
                                                    *hessianMatrix_,
                                                    i==0    // assemble occupation pattern only for the first call
                                                    );
+#if HAVE_DUNE_PARMG
+            std::function<void(VectorType&)> accumulate = Dune::ParMG::makeAccumulate<VectorType>(*(mgSetup_->comms_.back()));
+            accumulate(rhs);
+#endif
 
             rhs *= -1;        // The right hand side is the _negative_ gradient
 
@@ -306,15 +299,13 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
             if (this->verbosity_ == Solver::FULL)
               std::cout << "Assembly took " << gradientTimer.elapsed() << " sec." << std::endl;
 
-            // Transfer matrix data
-            stiffnessMatrix = *hessianMatrix_;
-
 #if HAVE_DUNE_PARMG
-            Dune::ParMG::collectDiagonal(stiffnessMatrix, *mgSetup_->comms_.back());
+            if (!mgSetup_->overlap())
+                Dune::ParMG::collectDiagonal(*hessianMatrix_, *mgSetup_->comms_.back());
             mgSetup_->matrix(hessianMatrix_);
-
-            levelOp.back().maybeRestrictToMaster(rhs);
 #endif
+            // Transfer matrix data
+            stiffnessMatrix = *hessianMatrix_;
 
             recomputeGradientHessian = false;
 
@@ -323,17 +314,22 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
         CorrectionType corr(rhs.size());
         corr = 0;
 
+        //Take the obstacles on the finest grid and give them to the multigrid solver, it will create a hierarchy for all coarser grids
+        trustRegionObstacles = trustRegion.obstacles();
+        
 #if ! HAVE_DUNE_PARMG
         mgStep->setProblem(stiffnessMatrix, corr, rhs);
 
-        trustRegionObstacles = trustRegion.obstacles();
         mgStep->setObstacles(trustRegionObstacles);
 #else
-        mgSetup_->setupLevelOps();
+        mgSetup_->setupObstacles(std::make_shared< std::vector<BoxConstraint<typename VectorType::field_type, blocksize>> >(trustRegionObstacles));
+        mgSetup_->setupSmoother(damping_);
+
+        auto& levelOp = mgSetup_->levelOps_;
 
         bool enableCoarseCorrection = true;
         if (enableCoarseCorrection)
-          mgSetup_->setupCoarseIPOPTSolver();
+          mgSetup_->setupCoarseIPOPTSolver(baseTolerance_, baseIterations_);
         else
           mgSetup_->setupCoarseNullSolver();
 
@@ -343,29 +339,15 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
         using ProjGS = Dune::ParMG::ParallelProjectedGS<MatrixType, VectorType>;
         using namespace Dune::ParMG;
 
-        auto& levelOp = mgSetup_->levelOps_;
 
-        trustRegionObstacles = trustRegion.obstacles();
-        for (int i = 0; i < coarseTrustRegionObstacles.size(); i++)
-          coarseTrustRegionObstacles[i] = trustRegionObstacles[i];
-        mgSetup_->setCoarseObstacles(coarseTrustRegionObstacles);
         ProjGS projGs(&stiffnessMatrix, &trustRegionObstacles, mgSetup_->ignores_.back().get());
         projGs.accumulate([&](VectorType& x) {
           levelOp.back().maybeCopyFromMaster(x);
         });
         projGs.dampening(1.0);
 
-        std::function<void(VectorType&)> collect = Dune::ParMG::makeCollect<VectorType>(*(mgSetup_->comms_.back()));
         std::function<void(VectorType&)> restrictToMaster = [op=levelOp.back()](VectorType& x) { op.maybeRestrictToMaster(x); };
-
-        auto energyFunctional = Dune::ParMG::makeParallelEnergyFunctional(
-          stiffnessMatrix,
-          rhs,
-          grid_->comm(),
-          //collect
-          restrictToMaster
-          );
-          // matrix, b, dofmap, master);
+        restrictToMaster(rhs);
 
         //Distributed energy norm
         auto energyNorm = Dune::ParMG::parallelEnergyNorm<VectorType>(stiffnessMatrix, restrictToMaster, grid_->comm());
@@ -377,30 +359,16 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
 
         levelOp.back().maybeCopyFromMaster(corr);
 
-        double J = 0.0;
         VectorType b;
         unsigned step = 0;
-        int activeObstacles;
         MPI_Barrier(grid_->comm());
-        auto realIterationStep = [&](VectorType& correction) {
-          auto& project = projGs.project_;
-          projGs.apply(correction, rhs);
-          // prepare truncation
-          auto currentIgnore = std::make_shared<Dune::BitSetVector<VectorType::block_type::dimension> >(*ignoreNodes_);
-          auto extraIgnore = std::make_shared<Dune::BitSetVector<VectorType::block_type::dimension> >(ignoreNodes_->size(), false);
-          project.ignoreActive(correction, *currentIgnore);
-          project.ignoreActive(correction, *extraIgnore);
-          mgSetup_->ignore(currentIgnore);
-          assert(currentIgnore->count() - ignoreNodes_->count() == extraIgnore->count());
-          activeObstacles = extraIgnore->count();
-
+        auto realIterationStep = [&](VectorType& innerIterate) {
           // Create vector b to keep rhs untouched!
           b = rhs;
-          innerMultigridStep_->apply(correction, b);
-
-          project(correction);
 
-          J = energyFunctional(correction);
+          // The variable innerIterate is the correction that will be added
+          // to the current iterate in the trust region step.
+          innerMultigridStep_->apply(innerIterate, b);
 
           ++step;
         };
@@ -410,22 +378,9 @@ void TrustRegionSolver<BasisType,VectorType>::solve()
         auto innerSolver_ = std::make_shared< LoopSolver<CorrectionType> >(iterationStep,
                                                                            innerIterations_,
                                                                            innerTolerance_,
-                                                                           // h1SemiNorm_,
+                                                                           // h1SemiNorm_, //TODO: test this!
                                                                            solverNorm,
                                                                            NumProc::QUIET);
-        innerSolver_->addCriterion(
-          [&]() {
-            return Dune::formatString("  % 12.10e", J);
-          },
-          "  energy      "
-          );
-
-        innerSolver_->addCriterion(
-          [&]() {
-            return Dune::formatString("  % 9d", activeObstacles);
-          },
-          "  activeObs"
-          );
 #endif
 
         innerSolver_->preprocess();
diff --git a/dune/elasticity/common/trustregionsolver.hh b/dune/elasticity/common/trustregionsolver.hh
index 1b8716552121fc1e3fae105561acb6bfba8c7c5b..d819a2a9102814e18d87ef4d4b9e730a577fc72d 100644
--- a/dune/elasticity/common/trustregionsolver.hh
+++ b/dune/elasticity/common/trustregionsolver.hh
@@ -119,6 +119,12 @@ protected:
     /** \brief Error tolerance of the multigrid QP solver */
     double innerTolerance_;
 
+    /** \brief Maximum number of base solver iterations */
+    int baseIterations_;
+
+    /** \brief Error tolerance of the base solver inside the multigrid QP solver */
+    double baseTolerance_;
+
     /** \brief Hessian matrix */
     std::shared_ptr<MatrixType> hessianMatrix_;
 
diff --git a/src/finite-strain-elasticity.cc b/src/finite-strain-elasticity.cc
index 1956edfec6bf7b6c06a1f650bab2814483293056..e5dccd81003cbfa9df6eacfbf8f7782899ab1c65 100644
--- a/src/finite-strain-elasticity.cc
+++ b/src/finite-strain-elasticity.cc
@@ -185,8 +185,7 @@ int main (int argc, char *argv[]) try
   BoundaryPatch<GridView> dirichletBoundary(gridView, dirichletVertices);
   BoundaryPatch<GridView> neumannBoundary(gridView, neumannVertices);
 
-  if (mpiHelper.rank()==0)
-    std::cout << "Neumann boundary has " << neumannBoundary.numFaces() << " faces\n";
+  std::cout << "On process " << mpiHelper.rank() << ": Neumann boundary has " << neumannBoundary.numFaces() << " faces\n";
 
 
   BitSetVector<1> dirichletNodes(feBasis.size(), false);
@@ -257,6 +256,7 @@ int main (int argc, char *argv[]) try
       neumannFunction = std::make_shared<NeumannFunction>(parameterSet.get<FieldVector<double,dim> >("neumannValues"),
                                                      homotopyParameter);
 
+    if (mpiHelper.rank()==0)
       std::cout << "Neumann values: " << parameterSet.get<FieldVector<double,dim> >("neumannValues") << std::endl;
     }
 
@@ -267,6 +267,7 @@ int main (int argc, char *argv[]) try
     }
 
     // Assembler using ADOL-C
+    if (mpiHelper.rank()==0)
     std::cout << "Selected energy is: " << parameterSet.get<std::string>("energy") << std::endl;
     std::shared_ptr<Elasticity::LocalEnergy<GridView,
                                      FEBasis::LocalView::Tree::FiniteElement,