diff --git a/dune/elasticity/assemblers/feassembler.hh b/dune/elasticity/assemblers/feassembler.hh
index 19a71f1ec61d5ef14d4e2c81abd06dfe166370c0..8b922c6fc0f7eb43a6264899396d1b5734a18898 100644
--- a/dune/elasticity/assemblers/feassembler.hh
+++ b/dune/elasticity/assemblers/feassembler.hh
@@ -54,6 +54,11 @@ public:
           localStiffness_(localStiffness)
     {}
 
+    /** \brief Assemble the functional gradient */
+    void assembleGradient(const VectorType& configuration,
+                          VectorType& gradient) const;
+
+
     /** \brief Assemble the tangent stiffness matrix and the functional gradient together
      *
      * This may be more efficient than computing them separately
@@ -71,6 +76,47 @@ public:
 }; // end class
 
 
+
+template <class Basis, class VectorType>
+void FEAssembler<Basis,VectorType>::
+assembleGradient(const VectorType& configuration,
+                 VectorType& gradient) const
+{
+  // create backends for multi index access
+  auto configurationBackend      = Dune::Functions::istlVectorBackend(configuration);
+  auto gradientBackend = Dune::Functions::istlVectorBackend(gradient);
+
+  gradientBackend.resize(basis_);
+
+  gradient = 0;
+
+  auto localView = basis_.localView();
+
+  for (const auto& element : elements(basis_.gridView(), partitionType))
+  {
+    localView.bind(element);
+    const auto size = localView.size();
+
+    // Extract local configuration
+    std::vector<field_type> localConfiguration(size);
+    std::vector<field_type> localGradient(size);
+
+    for (size_t i=0; i<size; i++)
+      localConfiguration[i] = configurationBackend[ localView.index(i) ];
+
+    // setup local matrix and gradient
+    localStiffness_->assembleGradient(localView, localConfiguration, localGradient);
+
+    for (size_t i=0; i<size; i++)
+    {
+      auto row = localView.index(i);
+      gradientBackend[row] += localGradient[i];
+    }
+  }
+}
+
+
+
 template <class Basis, class VectorType>
 template <class MatrixType>
 void FEAssembler<Basis,VectorType>::
diff --git a/dune/elasticity/assemblers/localadolcstiffness.hh b/dune/elasticity/assemblers/localadolcstiffness.hh
index 3dd6512808a7fe658a2d594d4a27a8ef9c26d054..ec6d51e5d0f620c847b66036adbef003d5338b22 100644
--- a/dune/elasticity/assemblers/localadolcstiffness.hh
+++ b/dune/elasticity/assemblers/localadolcstiffness.hh
@@ -41,6 +41,14 @@ public:
     virtual double energy (const LocalView& localView,
                            const std::vector<double>& localConfiguration) const;
 
+    /** \brief Assemble the local gradient at the current position
+    *
+    *     This uses the automatic differentiation toolbox ADOL_C.
+    */
+    virtual void assembleGradient(const LocalView& localView,
+                                  const std::vector<double>& localConfiguration,
+                                  std::vector<double>& localGradient);
+
     /** \brief Assemble the local stiffness matrix at the current position
      *
      *     This uses the automatic differentiation toolbox ADOL_C.
@@ -88,6 +96,31 @@ energy(const LocalView& localView,
 }
 
 
+
+template<class LocalView>
+void LocalADOLCStiffness<LocalView>::
+assembleGradient(const LocalView& localView,
+                 const std::vector<double>& localConfiguration,
+                 std::vector<double>& localGradient
+                )
+{
+  int rank = Dune::MPIHelper::getCommunication().rank();
+  // Tape energy computation.  We may not have to do this every time, but it's comparatively cheap.
+  energy(localView, localConfiguration);
+
+  /////////////////////////////////////////////////////////////////
+  // Compute the energy gradient
+  /////////////////////////////////////////////////////////////////
+
+  // Compute the actual gradient
+  size_t nDoubles = localConfiguration.size();
+
+  // Compute gradient
+  localGradient.resize(nDoubles);
+  gradient(rank,nDoubles,localConfiguration.data(),localGradient.data());
+}
+
+
 template<class LocalView>
 void LocalADOLCStiffness<LocalView>::
 assembleGradientAndHessian(const LocalView& localView,
@@ -97,24 +130,19 @@ assembleGradientAndHessian(const LocalView& localView,
                           )
 {
     int rank = Dune::MPIHelper::getCommunication().rank();
-    // Tape energy computation.  We may not have to do this every time, but it's comparatively cheap.
-    energy(localView, localConfiguration);
 
     /////////////////////////////////////////////////////////////////
     // Compute the energy gradient
     /////////////////////////////////////////////////////////////////
 
-    // Compute the actual gradient
-    size_t nDoubles = localConfiguration.size();
-
-    // Compute gradient
-    localGradient.resize(nDoubles);
-    gradient(rank,nDoubles,localConfiguration.data(),localGradient.data());
+    // this tapes the energy computation
+    assembleGradient(localView, localConfiguration, localGradient);
 
     /////////////////////////////////////////////////////////////////
     // Compute Hessian
     /////////////////////////////////////////////////////////////////
 
+    size_t nDoubles = localConfiguration.size();
     localHessian.setSize(nDoubles,nDoubles);
 
     // allocate raw doubles: ADOL-C requires "**double" arguments
diff --git a/dune/elasticity/assemblers/localfestiffness.hh b/dune/elasticity/assemblers/localfestiffness.hh
index bcb5ef82de94ae2d4b1f5dc2633335bd1067c1bb..d622727457a3f5d1be577a3c60c4a7eba67d08f0 100644
--- a/dune/elasticity/assemblers/localfestiffness.hh
+++ b/dune/elasticity/assemblers/localfestiffness.hh
@@ -16,6 +16,13 @@ class LocalFEStiffness
 
 public:
 
+    /** \brief Assemble the local gradient at the current position
+    */
+    virtual void assembleGradient(const LocalView& localView,
+                                  const std::vector<RT>& localConfiguration,
+                                  std::vector<RT>& localGradient
+                                 ) = 0;
+
     /** \brief Assemble the local gradient and tangent matrix at the current position
     */
     virtual void assembleGradientAndHessian(const LocalView& localView,
diff --git a/dune/elasticity/assemblers/localhyperdualstiffness.hh b/dune/elasticity/assemblers/localhyperdualstiffness.hh
index 058935904e1618b3a544e6e959077790bf26b474..398e18c23ac8d679f93eb6286e1c990b4b819de0 100644
--- a/dune/elasticity/assemblers/localhyperdualstiffness.hh
+++ b/dune/elasticity/assemblers/localhyperdualstiffness.hh
@@ -27,6 +27,15 @@ public:
     virtual double energy (const LocalView& localView,
                            const std::vector<double>& localConfiguration) const;
 
+
+    /** \brief Assemble the local gradient at the current position
+     *
+     *     This uses the automatic differentiation using hyperdual numbers
+     */
+    virtual void assembleGradient(const LocalView& localView,
+                                  const std::vector<double>& localConfiguration,
+                                  std::vector<double>& localGradient);
+
     /** \brief Assemble the local stiffness matrix at the current position
      *
      *     This uses the automatic differentiation using hyperdual numbers
@@ -62,6 +71,37 @@ energy(const LocalView& localView,
 }
 
 
+
+template<class LocalView>
+void LocalHyperDualStiffness<LocalView>::
+assembleGradient(const LocalView& localView,
+                 const std::vector<double>& localConfiguration,
+                 std::vector<double>& localGradient
+)
+{
+    size_t nDoubles = localConfiguration.size();
+
+    localGradient.resize(nDoubles);
+
+    // Create hyperdual vector localHyperDualConfiguration = double vector localConfiguration
+    std::vector<hyperdual> localHyperDualConfiguration(nDoubles);
+    for (size_t i=0; i<nDoubles; i++)
+        localHyperDualConfiguration[i] = hyperdual(localConfiguration[i]);
+
+    // Compute gradient and hessian (symmetry is used) using hyperdual function evaluation
+    // localHyperDualConfiguration puts Ones in the eps1 and eps2 component to evaluate the different partial derivatives
+    for (size_t i=0; i<nDoubles; i++)
+    {
+        localHyperDualConfiguration[i].seteps1(1.0);
+
+        hyperdual temp = localEnergy_->energy(localView, localHyperDualConfiguration);
+        localGradient[i] = temp.eps1();
+
+        localHyperDualConfiguration[i].seteps1(0.0);
+    }
+}
+
+
 template<class LocalView>
 void LocalHyperDualStiffness<LocalView>::
 assembleGradientAndHessian(const LocalView& localView,
diff --git a/test/localhyperdualstiffnesstest.cc b/test/localhyperdualstiffnesstest.cc
index f0c8887990fc94257a33d3cd89b05ecef9e62132..f60dcef72834ad21b0dc0684e62f33f3aec71621 100644
--- a/test/localhyperdualstiffnesstest.cc
+++ b/test/localhyperdualstiffnesstest.cc
@@ -85,10 +85,10 @@ TestSuite hyperdualEqualsADOL_C()
 
   Timer timer;
   hyperdualAssembler.assembleGradientAndHessian(x,hyperdualGradient,hyperdualHessian);
-  std::cout <<  dim <<"D: hyperdual assembler took " << timer.elapsed() << " sec." << std::endl;
+  std::cout <<  dim <<"D: hyperdual hessian and gradient assembler took " << timer.elapsed() << " sec." << std::endl;
   timer.reset();
   adolcAssembler.assembleGradientAndHessian(x,adolcGradient,adolcHessian);
-  std::cout << dim << "D: ADOL-C assembler took " << timer.elapsed() << " sec." << std::endl;
+  std::cout << dim << "D: ADOL-C hessian and gradient assembler took " << timer.elapsed() << " sec." << std::endl;
 
 
   hyperdualHessian -= adolcHessian;
@@ -97,6 +97,18 @@ TestSuite hyperdualEqualsADOL_C()
   // check the relative error
   t.check(hyperdualHessian.frobenius_norm()/adolcHessian.frobenius_norm() < 1e-12 && hyperdualGradient.two_norm()/adolcGradient.two_norm() < 1e-12);
 
+
+  timer.reset();
+  hyperdualAssembler.assembleGradient(x,hyperdualGradient);
+  std::cout <<  dim <<"D: hyperdual gradient assembler took " << timer.elapsed() << " sec." << std::endl;
+  timer.reset();
+  adolcAssembler.assembleGradient(x,adolcGradient);
+  std::cout << dim << "D: ADOL-C gradient assembler took " << timer.elapsed() << " sec." << std::endl;
+  hyperdualGradient -= adolcGradient;
+
+  // check the relative error
+  t.check(hyperdualGradient.two_norm()/adolcGradient.two_norm() < 1e-12);
+
   return t;
 
 }