From dcfe5a2f41b2cd2b218e515bae3040e3fd1cffef Mon Sep 17 00:00:00 2001
From: Oliver Sander <sander@igpm.rwth-aachen.de>
Date: Wed, 16 Jul 2014 10:56:36 +0200
Subject: [PATCH] Compute block-residuals, instead of scalar ones

This transverses the matrix less, and uses the cache better.  On an example
with 6x6 blocks I can measure a speedup in the range of 30% for a full
multi-grid iteration (including transfer operators, too).
---
 .../iterationsteps/trustregiongsstep.cc       | 67 +++++++------------
 .../iterationsteps/trustregiongsstep.hh       |  2 -
 2 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/dune/solvers/iterationsteps/trustregiongsstep.cc b/dune/solvers/iterationsteps/trustregiongsstep.cc
index 50387fa3..1505d00a 100644
--- a/dune/solvers/iterationsteps/trustregiongsstep.cc
+++ b/dune/solvers/iterationsteps/trustregiongsstep.cc
@@ -7,70 +7,49 @@ VectorType TrustRegionGSStep<MatrixType, VectorType>::getSol()
 
 template<class MatrixType, class VectorType>
 inline
-typename VectorType::field_type TrustRegionGSStep<MatrixType, VectorType>::
-residual(unsigned int index, int blockIndex) const
+void TrustRegionGSStep<MatrixType, VectorType>::iterate()
 {
     const MatrixType& mat = *this->mat_;
+    VectorType& x = *this->x_;
+    const std::vector<BoxConstraint<field_type,blocksize> >& obstacles = *this->obstacles_;
 
-    typedef typename MatrixType::row_type RowType;
-    typedef typename RowType::ConstIterator ColumnIterator;
-
-    /* The following loop computes
-     * \f[ sum_i = \sum_{j \ne i} A_{ij}x_j \f]
-     */
-    field_type r = (*this->rhs_)[index][blockIndex];
-    ColumnIterator cIt    = mat[index].template begin();
-    ColumnIterator cEndIt = mat[index].template end();
-
-    for (; cIt!=cEndIt; ++cIt) {
-        // r_i -= A_ij x_j
-        // r -= (*cIt)[blockIndex] * (*this->x_)[cIt.index()];
-        for (int i=0; i<blocksize; i++) {
-
-            // Omit diagonal element
-            if (cIt.index()==index && i==blockIndex)
-                continue;
+    for (size_t i=0; i<x.size(); i++) {
 
-            r -= (*cIt)[blockIndex][i] * (*this->x_)[cIt.index()][i];
+        // Dirichlet nodes in this block
+        std::bitset<blocksize> ignoreNodes(0);
+        if (this->ignoreNodes_)
+            ignoreNodes = (*this->ignoreNodes_)[i];
 
-        }
+        if (ignoreNodes.count() == blocksize)
+            continue;
 
-    }
+        VectorBlock blockResidual;
+        BlockGSStep<MatrixType,VectorType>::residual(i, blockResidual);
 
-    return r;
-}
-
-template<class MatrixType, class VectorType>
-inline
-void TrustRegionGSStep<MatrixType, VectorType>::iterate()
-{
-    const MatrixType& mat = *this->mat_;
-    const std::vector<BoxConstraint<field_type,blocksize> >& obstacles = *this->obstacles_;
-
-    for (size_t i=0; i<this->x_->size(); i++) {
+        mat[i][i].umv(x[i], blockResidual);
 
         // scalar gauss-seidel
         for (int j=0; j<blocksize; j++) {
 
-            if (this->ignoreNodes_ and (*this->ignoreNodes_)[i][j])
+            if (ignoreNodes[j])
                 continue;
 
             // Compute residual
-            field_type r = residual(i, j);
+            field_type r = blockResidual[j] - mat[i][i][j] * x[i];
+            r += mat[i][i][j][j] * x[i][j];
+
             const field_type& diag = mat[i][i][j][j];
 
             // Find local minimum
-            field_type& x = (*this->x_)[i][j];
-
             if (diag > 0) {
                 // 1d problem is convex
-                x = r / diag;
+                x[i][j] = r / diag;
 
-                if ( x < obstacles[i].lower(j)) {
-                    x = obstacles[i].lower(j);
+                if ( x[i][j] < obstacles[i].lower(j)) {
+                    x[i][j] = obstacles[i].lower(j);
                 }
-                else if ( x > obstacles[i].upper(j)) {
-                    x = obstacles[i].upper(j);
+                else if ( x[i][j] > obstacles[i].upper(j)) {
+                    x[i][j] = obstacles[i].upper(j);
                 }
 
             } else  {
@@ -87,7 +66,7 @@ void TrustRegionGSStep<MatrixType, VectorType>::iterate()
                 field_type lValue = 0.5*diag*lBound*lBound - r*lBound;
                 field_type uValue = 0.5*diag*uBound*uBound - r*uBound;
 
-                x = (lValue < uValue) ? lBound : uBound;
+                x[i][j] = (lValue < uValue) ? lBound : uBound;
 
             }
 
diff --git a/dune/solvers/iterationsteps/trustregiongsstep.hh b/dune/solvers/iterationsteps/trustregiongsstep.hh
index eb134d13..ef95795b 100644
--- a/dune/solvers/iterationsteps/trustregiongsstep.hh
+++ b/dune/solvers/iterationsteps/trustregiongsstep.hh
@@ -37,8 +37,6 @@
 
         virtual VectorType getSol();
 
-        field_type residual(unsigned int index, int blockIndex) const;
-
     };
 
 #include "trustregiongsstep.cc"
-- 
GitLab