diff --git a/dune/solvers/iterationsteps/CMakeLists.txt b/dune/solvers/iterationsteps/CMakeLists.txt
index 5fde9a8fc16ff62b4d9efe5f674b834eed3e8750..1ec54b402acde3d9513a84641b6160a6f1bcda64 100644
--- a/dune/solvers/iterationsteps/CMakeLists.txt
+++ b/dune/solvers/iterationsteps/CMakeLists.txt
@@ -5,6 +5,7 @@ install(FILES
     blockgssteps.hh
     cgstep.cc
     cgstep.hh
+    choleskystep.hh
     istlseqilu0step.hh
     istlseqssorstep.hh
     iterationstep.hh
diff --git a/dune/solvers/iterationsteps/choleskystep.hh b/dune/solvers/iterationsteps/choleskystep.hh
new file mode 100644
index 0000000000000000000000000000000000000000..2a2157b354a589060e4aa69687f7044d7ede9eef
--- /dev/null
+++ b/dune/solvers/iterationsteps/choleskystep.hh
@@ -0,0 +1,320 @@
+#ifndef DUNE_SOLVERS_ITERATIONSTEPS_CHOLESKYSTEP_HH
+#define DUNE_SOLVERS_ITERATIONSTEPS_CHOLESKYSTEP_HH
+
+#include <dune/matrix-vector/transpose.hh>
+#include <dune/matrix-vector/triangularsolve.hh>
+
+namespace {
+#if 0
+// slow!
+template <class Matrix>
+void Aupper(Matrix const &A, Matrix &G) {
+  Dune::Timer timer;
+
+  // Could be in-place but we unfortunately still need A.
+  Matrix R;
+  Dune::MatrixVector::transpose(A, R);
+  //printf("Aupper: copied and transposed.\n");
+
+  std::vector<typename Matrix::ColIterator> RDiagonals(R.N());
+  for (auto it = R.begin(); it != R.end(); ++it) {
+    auto const row = it.index();
+    for (auto cIt = it->begin(); cIt != it->end(); ++cIt) {
+      auto const col = cIt.index();
+      if (row == col) {
+          RDiagonals[row] = cIt;
+        break;
+      }
+    }
+  }
+  //printf("Aupper: diagonal elements found.\n");
+  Dune::Timer t2;
+  for (auto it = R.begin(); it != R.end(); ++it) {
+      auto j = it.index();
+      auto &Rjj = *RDiagonals[j];
+      Rjj = std::sqrt(Rjj);
+      for (auto kit = R.begin(); kit.index() < j; ++kit) {
+          auto k = kit.index();
+          if (!R.exists(k,j))
+              continue;
+          auto &Rkj = (*kit)[j];
+          auto jiIt = it->begin();
+          auto kiIt = kit->begin();
+          while (kiIt != kit->end() and jiIt != it->end()) {
+              auto ki = kiIt.index();
+              auto ji = jiIt.index();
+              if (ki < ji or ki <= j) {
+                  ++kiIt;
+                  continue;
+              }
+              if (ji < ki or ji <= j) {
+                  ++jiIt;
+                  continue;
+              }
+              *jiIt -= *kiIt * Rkj;
+              ++kiIt;
+              ++jiIt;
+          }
+      }
+      auto jiIt = RDiagonals[j];
+      jiIt++;
+      for (; jiIt != it->end(); ++jiIt) {
+          size_t const i = jiIt.index();
+          auto &Rii = *RDiagonals[i];
+          auto &Rji = *jiIt;
+          Rji /= Rjj;
+          Rii -= Rji * Rji;
+      }
+  }
+  //printf("Main loop took: %g\n", t2.elapsed());
+
+  Dune::MatrixIndexSet indices(R.N(), R.M());
+  // Pattern: upper triangular
+  for (auto it = R.begin(); it != R.end(); ++it) {
+    auto const row = it.index();
+    for (auto cIt = it->begin(); cIt != it->end(); ++cIt) {
+      auto const col = cIt.index();
+      if (row > col)
+          continue;
+      indices.add(row, col);
+    }
+  }
+  indices.exportIdx(G);
+  //printf("Aupper: Pattern constructed.\n");
+
+  // Final filling: triu(R)
+  typename Matrix::Iterator gIt = G.begin();
+  for (auto it = R.begin(); it != R.end(); ++it, ++gIt) {
+    auto const row = it.index();
+    typename Matrix::ColIterator gcIt = gIt->begin();
+    for (auto cIt = RDiagonals[row]; cIt != it->end(); ++cIt, ++gcIt) {
+      *gcIt = *cIt;
+    }
+  }
+  //printf("Aupper. Done overall after %g.\n", timer.elapsed());
+}
+#endif
+
+#if 0
+// slow!
+template <class Matrix>
+void Alower(Matrix const &A, Matrix &G) {
+  Dune::Timer timer;
+
+  // Could be in-place but we unfortunately still need A
+  Matrix R = A;
+  //printf("Alower: copied.\n");
+
+  size_t n = R.N();
+  for (size_t j = 0; j < n; ++j) {
+      R[j][j] = std::sqrt(R[j][j]);
+      for (size_t k = 0; k < j; ++k) {
+          if (!R.exists(j,k))
+              continue;
+          for (size_t i = j+1; i < n; ++i) {
+              if (!R.exists(i,k))
+                  continue;
+              if (R.exists(i,j)) // FIXME
+                  R[i][j] -= R[i][k] * R[j][k];
+          }
+      }
+      for (size_t i = j+1; i < n; ++i)
+          if (R.exists(i,j)) {
+              R[i][j] /= R[j][j];
+              R[i][i] -= R[i][j] * R[i][j];
+          }
+  }
+
+  Dune::MatrixIndexSet indices(R.N(), R.M());
+  // Pattern: lower triangular
+  for (auto it = R.begin(); it != R.end(); ++it) {
+    auto const row = it.index();
+    for (auto cIt = it->begin(); cIt != it->end(); ++cIt) {
+      auto const col = cIt.index();
+      if (row < col)
+          break;
+      indices.add(row, col);
+    }
+  }
+  indices.exportIdx(G);
+  //printf("Alower: Pattern constructed.\n");
+
+  // Final filling: tril(R)
+  typename Matrix::Iterator gIt = G.begin();
+  for (auto it = R.begin(); it != R.end(); ++it, ++gIt) {
+    auto const row = it.index();
+    typename Matrix::ColIterator gcIt = gIt->begin();
+    for (auto cIt = it->begin(); cIt != it->end(); ++cIt, ++gcIt) {
+      *gcIt = *cIt;
+      if (row == cIt.index())
+          break;
+    }
+  }
+  //printf("Alower. Done overall after %g.\n", timer.elapsed());
+}
+#endif
+
+#if 0
+// slow!
+template <class Matrix>
+void Cupper(Matrix const &A, Matrix &G) {
+  Dune::Timer timer;
+  Dune::MatrixIndexSet indices(A.N(), A.M());
+
+  // Pattern: upper triangular
+  std::vector<typename Matrix::ConstColIterator> originalDiagonals(A.N());
+  for (auto it = A.begin(); it != A.end(); ++it) {
+    auto const row = it.index();
+    for (auto cIt = it->begin(); cIt != it->end(); ++cIt) {
+      auto const col = cIt.index();
+      if (row > col)
+        continue;
+      if (row == col)
+        originalDiagonals[row] = cIt;
+      indices.add(row, col);
+    }
+  }
+  indices.exportIdx(G);
+  //printf("Cupper: Pattern constructed.\n");
+
+  // Initial filling: triu(A)
+  typename Matrix::Iterator gIt = G.begin();
+  for (auto it = A.begin(); it != A.end(); ++it, ++gIt) {
+    auto const row = it.index();
+    typename Matrix::ColIterator gcIt = gIt->begin();
+    for (auto cIt = originalDiagonals[row]; cIt != it->end(); ++cIt, ++gcIt)
+      *gcIt = *cIt;
+  }
+  //printf("Cupper: Initial filling done.\n");
+
+  for (size_t sweep = 0; sweep < 1; ++sweep) {
+    for (auto it = A.begin(); it != A.end(); ++it) {
+      auto const i = it.index();
+      for (auto cIt = originalDiagonals[i]; cIt != it->end(); ++cIt) {
+        auto const j = cIt.index();
+        auto s = *cIt;
+        for (size_t k = 0; k < i; ++k)
+          // Here, we suffer
+          if (G.exists(k, i) && G.exists(k, j))
+            s -= G[k][i] * G[k][j];
+        assert(G[i].begin().index() == i);
+        auto &Gii = *G[i].begin();
+        if (i != j)
+          G[i][j] = s / Gii;
+        else
+          Gii = std::sqrt(s);
+      }
+    }
+  }
+  //printf("Cupper. Done overall after %g.\n", timer.elapsed());
+}
+#endif
+
+template <class Matrix>
+void Clower(Matrix const &A, Matrix &Gt) {
+  Dune::Timer timer;
+  Matrix G;
+  Dune::MatrixIndexSet indices(A.N(), A.M());
+
+  // Pattern: upper triangular
+  std::vector<typename Matrix::ConstColIterator> originalDiagonals(A.N());
+  for (auto it = A.begin(); it != A.end(); ++it) {
+    auto const row = it.index();
+    for (auto cIt = it->begin(); cIt != it->end(); ++cIt) {
+      auto const col = cIt.index();
+      if (row > col)
+        continue;
+      if (row == col)
+        originalDiagonals[row] = cIt;
+      indices.add(row, col);
+    }
+  }
+  indices.exportIdx(G);
+  // printf("Clower: Pattern constructed.\n");
+
+  // Initial filling: triu(A)
+  typename Matrix::Iterator gIt = G.begin();
+  for (auto it = A.begin(); it != A.end(); ++it, ++gIt) {
+    auto const row = it.index();
+    typename Matrix::ColIterator gcIt = gIt->begin();
+    for (auto cIt = originalDiagonals[row]; cIt != it->end(); ++cIt, ++gcIt)
+      *gcIt = *cIt;
+  }
+  // printf("Clower: Initial filling done.\n");
+
+  Dune::MatrixVector::transpose(G, Gt);
+  for (size_t sweep = 0; sweep < 1; ++sweep) {
+    for (auto it = A.begin(); it != A.end(); ++it) {
+      auto const i = it.index();
+      for (auto cIt = originalDiagonals[i]; cIt != it->end(); ++cIt) {
+        auto const j = cIt.index();
+        auto s = *cIt;
+        auto Gti = Gt[i].begin();
+        auto Gtj = Gt[j].begin();
+        typename Matrix::block_type *Gtii = nullptr;
+        typename Matrix::block_type *Gtji = nullptr; // exists in A!
+        while (Gti != Gt[i].end() and Gtj != Gt[j].end()) {
+          auto gti = Gti.index();
+          auto gtj = Gtj.index();
+          if (gti == gtj) {
+            if (gti == i) {
+              Gtii = &*Gti;
+              Gtji = &*Gtj;
+              break;
+            }
+            s -= *Gti * *Gtj;
+            Gti++;
+            Gtj++;
+            continue;
+          }
+          if (gti < gtj)
+            ++Gti;
+          else
+            ++Gtj;
+        }
+        if (i != j)
+          *Gtji = s / *Gtii;
+        else
+          *Gtii = std::sqrt(s);
+      }
+    }
+  }
+  // printf("Clower. Done overall after %g.\n", timer.
+}
+}
+
+template <class Matrix, class Vector,
+          class BitVector = Dune::Solvers::DefaultBitVector_t<Vector>>
+class CholeskyStep : public LinearIterationStep<Matrix, Vector, BitVector> {
+public:
+  virtual void preprocess() override {
+    if (needReconstruction_) {
+      Clower(*this->mat_, G);
+      needReconstruction_ = false;
+    }
+  }
+
+  /** \brief Perform one iteration */
+  virtual void iterate() override {
+    /*
+      Turn GG'x = b into
+      1. Gy  = b
+      2. G'x = y
+    */
+    Vector y(G.N());
+    BitVector const *ignore = this->hasIgnore() ? &this->ignore() : nullptr;
+    Dune::MatrixVector::lowerTriangularSolve(G, *this->rhs_, y, ignore);
+    Dune::MatrixVector::upperTriangularSolve(G, y, *this->x_, ignore, true);
+  }
+
+  virtual void setMatrix(Matrix const &mat) override {
+    this->mat_ = Dune::stackobject_to_shared_ptr(mat);
+    needReconstruction_ = true;
+  }
+
+private:
+  Matrix G;
+  bool needReconstruction_;
+};
+
+#endif
diff --git a/dune/solvers/test/cgsteptest.cc b/dune/solvers/test/cgsteptest.cc
index 0ccc7372ae052cdcd527c6b28945399eb965ff97..e4af6264408af6339c069de305a94f7526914a66 100644
--- a/dune/solvers/test/cgsteptest.cc
+++ b/dune/solvers/test/cgsteptest.cc
@@ -11,6 +11,7 @@
 #include <dune/solvers/solvers/loopsolver.hh>
 #include <dune/solvers/iterationsteps/blockgssteps.hh>
 #include <dune/solvers/iterationsteps/cgstep.hh>
+#include <dune/solvers/iterationsteps/choleskystep.hh>
 #include <dune/solvers/iterationsteps/istlseqilu0step.hh>
 #include <dune/solvers/iterationsteps/istlseqssorstep.hh>
 
@@ -122,6 +123,69 @@ struct CGTestSuite
             solver.preprocess();
             solver.solve();
 
+            passed &= analyser.analyse(u_copy, solver.getResult(), timer.elapsed(), testCase);
+        }
+        {
+            std::string const testCase = "Cholesky";
+
+            typename Problem::Vector u_copy = p.u;
+            typename Problem::Vector rhs_copy = p.rhs;
+
+#if 0
+            // Debug: Do we have CC' ~= A?
+            {
+                typename Problem::Matrix C;
+                Clower(p.A, C);
+                typename Problem::Matrix Ct;
+                Arithmetic::transpose(C, Ct);
+
+                typename Problem::Vector Ctx(p.rhs.size());
+                typename Problem::Vector CCtx(p.rhs.size());
+                typename Problem::Vector Ax(p.rhs.size());
+                Ct.mv(p.u_ex, Ctx);
+                C.mv(Ctx, CCtx);
+                p.A.mv(p.u_ex, Ax);
+                printf("Two norms: Ax -> %g, Ax - CCtx -> %g\n",
+                       p.twoNorm(Ax), p.twoNorm.diff(Ax, CCtx));
+            }
+#endif
+
+#if 0
+            // Debug: Does the CholeskyStep on its own reduce the residual?
+            {
+                typename Problem::Vector u = p.u;
+                printf("Two norm diff before: %g\n",
+                       p.twoNorm.diff(u, p.u_ex));
+                printf("Energy norm diff before: %g\n",
+                       p.energyNorm.diff(u, p.u_ex));
+                CholeskyStep<typename Problem::Matrix, typename Problem::Vector>
+                    cholesky;
+                cholesky.setProblem(p.A, u, rhs_copy);
+                cholesky.preprocess();
+                cholesky.iterate();
+                printf("Two norm diff after: %g\n",
+                       p.twoNorm.diff(u, p.u_ex));
+                printf("Energy norm diff after: %g\n",
+                       p.energyNorm.diff(u, p.u_ex));
+            }
+#endif
+
+            CholeskyStep<typename Problem::Matrix, typename Problem::Vector>
+                cholesky;
+            Dune::Solvers::CGStep<typename Problem::Matrix,
+                                  typename Problem::Vector> cgStep(p.A, u_copy,
+                                                                   rhs_copy,
+                                                                   cholesky);
+            if (withIgnore)
+                cgStep.setIgnore(p.ignore);
+            ::LoopSolver<typename Problem::Vector> solver(
+                &cgStep, maxIterations, stepTol, &p.twoNorm, verbosity,
+                relativeErrors, &p.u_ex);
+            solver.check();
+            timer.reset();
+            solver.preprocess();
+            solver.solve();
+
             passed &= analyser.analyse(u_copy, solver.getResult(),
                                        timer.elapsed(), testCase);
         }