diff --git a/.gitignore b/.gitignore
index eec8f5abb22f13e206cc514f642362527bb18840..2aa5144f890f46fcc27ec766b7cf1e0ef5a906ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,11 @@
 .idea/
 .ipynb_checkpoints/
 datasets/
+*.aux
+*.bbl
+*.blg
+*.log
+*.out
+*.synctex.gz
 
 
diff --git a/lls_demo.ipynb b/lls_demo.ipynb
index c11345f0e487e71d36a449f3dc63b126b90e9e00..b44fa05ddb33eff9cdee26e08b30d053d5897da3 100644
--- a/lls_demo.ipynb
+++ b/lls_demo.ipynb
@@ -54,7 +54,7 @@
     "\n",
     "$det(A - I\\epsilon) = 0 \\iff Eig(A, \\epsilon) \\neq \\emptyset$\n",
     "\n",
-    "As $A$ is a symmetric matrix, we know it is positive semidefinite, therefore it\n",
+    "As $A$ in our case is $X^TX$, it is a symmetric positive semidefinite matrix, therefore it\n",
     "has only non-negative eigenvalues. Therefore, we could safely assume\n",
     "\n",
     "$det(A + I\\epsilon) \\neq 0$\n",
diff --git a/script/bib.bib b/script/bib.bib
new file mode 100644
index 0000000000000000000000000000000000000000..041ea407b89e8746ad42f6c9516c8db26b3f38bf
--- /dev/null
+++ b/script/bib.bib
@@ -0,0 +1,40 @@
+ @article{moorepen,
+  author = {Gower, John},
+  year = {2012},
+  month = {04},
+  pages = {806-807},
+  title = {Generalized Inverse Matrices},
+  volume = {14},
+  journal = {Technometrics},
+  doi = {10.1080/00401706.1972.10488972}
+ }
+
+@article{mnist,
+	added-at = {2010-06-28T21:16:30.000+0200},
+	author = {LeCun, Yann and Cortes, Corinna},
+	biburl = {https://www.bibsonomy.org/bibtex/2935bad99fa1f65e03c25b315aa3c1032/mhwombat},
+	groups = {public},
+	howpublished = {http://yann.lecun.com/exdb/mnist/},
+	interhash = {21b9d0558bd66279df9452562df6e6f3},
+	intrahash = {935bad99fa1f65e03c25b315aa3c1032},
+	keywords = {MSc _checked character_recognition mnist network neural},
+	lastchecked = {2016-01-14 14:24:11},
+	timestamp = {2016-07-12T19:25:30.000+0200},
+	title = {{MNIST} handwritten digit database},
+	url = {http://yann.lecun.com/exdb/mnist/},
+	username = {mhwombat},
+	year = 2010
+}
+
+@misc{lai,
+	author = {Frank Hoffman},
+	year = {2016},
+	title = {Lineare Algebra für Informatiker}
+}
+
+@misc{laii,
+	author = {Victoria Hoskins},
+	year = {2018},
+	title = {Lineare Algebra II},
+	url = {https://userpage.fu-berlin.de/hoskins/LAII\_Lehrplan.pdf}
+}
\ No newline at end of file
diff --git a/script/lls.pdf b/script/lls.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e1fbdace3d49dbf5d4e38e4e2f1882e8578ccd76
Binary files /dev/null and b/script/lls.pdf differ
diff --git a/script/lls.tex b/script/lls.tex
new file mode 100644
index 0000000000000000000000000000000000000000..fcf6a1ffb2cb2d3a5d4a1f130c7f96b9b911c19c
--- /dev/null
+++ b/script/lls.tex
@@ -0,0 +1,370 @@
+\input{./res/header.tex}														
+
+\newcommand{\abstractcontent}{
+	Linear regression is used to find a linear relationship between independent variables and outcomes of an experiment dependent on those variables. In this tutorial
+	we are going to find a model which describes this linear relationship between variables and outcomes with respect to some parameters. Then, we are going to
+	define a reasonable criterion for how accurate our model is given specific experiment data. Based on this criterion, we are going to derive a solution for
+	finding the optimal model parameters.
+	
+	We are then going to generalize this idea and talk about how linear regression can be used for polynomial approximation and compare the results with the ones
+	obtained using polynomial interpolation. Then we are going to use linear regression for solving a classification problem, in particular for recognizing
+	handwritten digits. We are then going to observe the flaws of this solution and give some motivation on alternative solutions.
+}
+
+\begin{document}
+
+	\input{./res/titlepage.tex}										
+	
+	\section{Motivation}
+		Given a set of points $(x_1, y_1), \dots, (x_n, y_n)$ we are looking for a linear function which, given $x_i$, best approximates $y_i$.
+		We are assuming each $x_i$ is the data with which an experiment was conducted and $y_i$ is the measured result of this experiment, with some small
+		measurement error.
+		
+		In other words, we can define our experiment as a function $f(x)$ such that:
+		
+		\begin{align}
+			f(x_i) = \hat{y_i} \\
+			\hat{f(x_i)} = f(x_i) + \mathcal{N}(\mu,\,\sigma^{2}) = \hat{y_i} + \mathcal{N}(\mu,\,\sigma^{2}) = y_i
+		\end{align}
+		
+		Here $\mathcal{N}(\mu,\,\sigma^{2})$ is a small measurement error, denoted as a sampled point from a normal distribution with mean $\mu$ and standard deviation $\sigma^2$.
+		Equation (1) models the outcome of an experiment with variables $x_i$. Equation (2) models the outcomes we measure when conducting 
+		an experiment with variables $x_i$. Our task is to find a good approximation of $f(x)$ using observations of $\hat{f(x)}$.
+		
+		In a 2-dimensional case where $x_i$ is just a point on the x-axis, we are looking for $f(x)$ such that:
+		
+		\begin{align*}
+			f(x) = ax + b
+		\end{align*}
+		
+		Figure \ref{fig:2dreg} shows an example of such a scenario and the optimal $f(x)$ line with slope $a$ and bias $b$.
+	
+		\begin{figure}[h!]
+			\includegraphics[width=\textwidth]{./plots/2d_regression.png}
+			\caption{Linear regression in 2D}
+			\label{fig:2dreg}
+		\end{figure}
+	
+		\clearpage
+	
+	\section{Model accuracy criterion (Mean squared error)}
+		Using the 2-dimensional model defined above, we are looking for $f(x)$, such that
+		
+		\begin{align*}
+			& \forall i \in \{1, \dots, n\}: f(x_i) = a x_i + b = y_i \\
+			\iff & \forall i \in \{1, \dots, n\}: f(x_i) - y_i = (a x_i + b) - y_i = 0 
+		\end{align*}
+		
+		However, as one can see from the example above, such a function doesn't exist, as there is no line which goes through all points. We need to find the
+		best possible line, therefore we can define our error for each data point as being $f(x_i) - y_i$ and try to minimize this error. As we want
+		$f(x_i) = y_i$, we actually want to minimize the absolute value, so as to minimize the deviation between prediction and observation.
+		Therefore, we use the mean squared error.
+		
+		\begin{align}
+			MSE((x_1, y_1), \dots, (x_n, y_n)) = \frac{1}{n}\sum_{i = 0}^{n} (f(x_i) - y_i)^2 = (a x_i + b - y_i)^2
+		\end{align}
+		
+	
+	\section{m-dimensional scenario}
+		
+		If $x_i$ is an m-dimensional variable, our linear model becomes
+		
+		\begin{align*}
+			f(x_i) = \omega_1 x_{i,1} + \omega_2 x_{i, 2} + \dots + \omega_m x{i, m} + b
+		\end{align*}
+		
+		We are now going to redefine our problem in matrix notation, as the derivation of the optimal parameters $\omega_1,  \dots \omega_m$ will be much easier in this notation.
+		First, we observe we can represent $f(x_i)$ as just a vector multiplications, as
+		
+		\begin{align*}
+			f(x_i) &= \omega_1 x_{i,1} + \omega_2 x_{i, 2} + \dots + \omega_m x{i, m} + b \\
+				   &= [x_{i, 1}, \dots, x_{i,m}, 1] \begin{bmatrix}
+				   		\omega_{1} \\
+				   		\vdots \\
+				   		\omega_{m} \\
+				   		b
+				   \end{bmatrix}
+		\end{align*}
+		
+		We define our experiment variables $x_1, \dots, x_n$ as a matrix $\hat{X}$ such that
+		
+		\begin{align*}
+			\hat{X} &= \begin{bmatrix}
+			x_{1} \\
+			x_{2} \\
+			\vdots \\
+			x_{n}
+			\end{bmatrix}  \\
+				&= \begin{bmatrix}
+				x_{1,1} & x_{1,2} & \dots & x_{1,m} \\
+				x_{2,1} & x_{2,2} & \dots & x_{2,m} \\
+				\vdots & & & \vdots \\
+				x_{n,1} & x_{n,2} & \dots & x_{n,m} \\
+				\end{bmatrix} \in Mat_{n \times m}(\mathbb{R})
+		\end{align*}
+		
+		To use the observation above and represent the model with just a matrix-vector multiplication, we then define
+		
+		\begin{align*}
+			X &= \begin{bmatrix}
+				& & 1 \\
+				& \hat{X} & \vdots \\
+				& & 1
+			\end{bmatrix}
+		\end{align*}
+		
+		We define our model parameters $\omega_1,  \dots \omega_m$ as a vector
+		
+		\begin{align*}
+			\omega &= \begin{bmatrix}
+				\omega_{1} \\
+				\omega_{2} \\
+				\vdots \\
+				\omega_{m}
+			\end{bmatrix}
+		\end{align*}		
+		
+		and our experiment observations as a vector
+		
+		\begin{align*}
+			y &= \begin{bmatrix}
+				y_{1} \\
+				y_{2} \\
+				\vdots \\
+				y_{n}
+			\end{bmatrix}
+		\end{align*}
+		
+		We then observe
+		
+		\begin{align}
+			MSE(X, y) &= \frac{1}{n}(X \omega - y)^{T} (X \omega - y) \\
+				&= \frac{1}{n} ((X \omega)^T - y^T)(X \omega - y) \\ 
+				&= \frac{1}{n} ( (\omega^T X^T - y^T)(X \omega - y)  )\\
+				&= \frac{1}{n} ( \omega^T X^T X \omega - \omega^T X^T y - y^T X \omega + y^Ty )\\
+				&= \frac{1}{n} ( \omega^T X^T X \omega - 2 \omega^T X^T y + y^Ty )
+		\end{align}
+		
+		Note that $(7) = (8)$ as $\omega^T X^T y = (y^T X \omega)^T$ and both are scalars. Further, for all scalars $\forall x \in \mathbb{R}: x^T = x$
+		
+	
+	\section{Deriving least squares solution to linear regression}
+	
+		\subsection{Minimizing the mean squared error}
+		
+			We know the mean squared error is a quadratic function with strictly positive values. Therefore, it has only one extreme, which is it's minimum.
+			Therefore, the optimal parameters $\omega$ are those for which $MSE$ is minimal. We can find those by finding the $\omega$ for which $\frac{\partial MSE(X, y)}{\partial \omega} = 0$.
+			
+			\begin{align*}
+				& \frac{\partial MSE(X, y)}{\partial \omega} &= & 0 \\
+					\iff & \frac{\partial n MSE(X, y)}{\partial \omega} &= & 0 \\
+					\iff & \frac{\partial \omega^T X^T X \omega - 2 \omega^T X^T y + y^Ty}{\partial} &= & 0 \\
+					\iff & \frac{\partial \omega^T X^T X \omega - 2 \omega^T X^T y}{\partial} &= & 0 \\
+					\iff & \frac{\partial \omega^T X^T X \omega}{\partial \omega} - \frac{2 \omega^T X^T y}{\partial} &= & 0 \\
+					\iff & \frac{\partial \omega^T X^T X \omega}{\partial \omega} &= & \frac{2 \omega^T X^T y}{\partial} \\
+			\end{align*}
+		
+		\subsection{Finding matrix derivatives}
+		
+			Let's start with $\frac{\partial 2 \omega^T X^T y}{\partial \omega}$.
+			
+			We simplify the situation by observing that $X^T y$ is just a vector. Therefore, for a vectors $v, x \in \mathbb{R}^k$ we find
+			
+			\begin{align*}
+				& \frac{\partial x^T v}{\partial x} \\
+				&= \frac{\partial x_1 v_1 + x_2 v_2 + \dots + x_k v_k}{\partial x} \\
+				&= \begin{bmatrix}
+					\frac{\partial x_1 v_1 + x_2 v_2 + \dots + x_k v_k}{\partial x_1} \\
+					\vdots \\
+					\frac{\partial x_1 v_1 + x_2 v_2 + \dots + x_k v_k}{\partial x_k} \\
+				\end{bmatrix} \\
+				& = \begin{bmatrix}
+					v_1 \\
+					\vdots \\
+					v_k
+				\end{bmatrix} \\
+				& = v
+			\end{align*}
+			
+			$\implies \frac{\partial 2 \omega^T X^T y}{\partial \omega} = 2 X^T y $
+		
+			\rule{\linewidth}{0.8pt}
+			\vspace{1cm}
+			
+			
+			Finding the partial derivative $\frac{\partial \omega^T X^T X \omega}{\partial \omega}$ is a little more complicated though. We observe $X^TX$ is a symmetric matrix,
+			as clearly $(X^TX)^T = X^TX$. So we can simplify the situation by looking for the derivative $\frac{\partial x^T A x}{\partial x}$ for $x \in \mathbb{R}^k, A \in Mat_{k \times k}(\mathbb{R})$.
+			
+			A symmetric square matrix can be uniquely represented by a quadratic form as learned in Linear Algebra II \cite{laii}, so we could omit the calculation of
+			$x^T A x$. For the sake of completeness, we show this result anyways. 
+			
+			We observe for $A = 
+			\begin{bmatrix}
+				a_{1,1} & \dots & a_{1, k} \\
+				\vdots & \ddots & \vdots \\
+				a_{k, 1} & \dots & a_{k, k}
+			\end{bmatrix} = 
+			\begin{bmatrix}
+				a_{1,1} & \dots & a_{1, k} \\
+				\vdots & \ddots & \vdots \\
+				a_{1, k} & \dots & a_{k, k}
+			\end{bmatrix}
+			$
+			
+			\begin{align*}
+				\frac{\partial x^T A X}{\partial x} &= \frac{\partial \sum_{i = 0}^{k} \sum_{j = 0}^{k} x_i x_j a_{i, j} } {\partial x} \\
+					&=  \frac{\partial \sum_{i = 0}^{k} x_i^2 a_{i, i} + \sum_{j = 0}^k\sum_{l \neq j} 2 x_j x_l a_{j, l} }{\partial x}  \tag{because $a_{i, j} = a_{j, i}$ as A is symmetric} \\
+					&= \begin{bmatrix}
+						\frac{\partial \sum_{i = 0}^{k} x_i^2 a_{i, i} + \sum_{j = 0}^k\sum_{l \neq j} 2 x_j x_l a_{j, l} }{\partial x_1} \\
+						\vdots \\
+						\frac{\partial \sum_{i = 0}^{k} x_i^2 a_{i, i} + \sum_{j = 0}^k\sum_{l \neq j} 2 x_j x_l a_{j, l} }{\partial x_k}
+					\end{bmatrix} \\
+					&= \begin{bmatrix}
+						2 a_{1,1} x_1 + \dots + 2 a_{1, k} x_k \\
+						\vdots \\
+						2 a_{k,1} x_1 + \dots + 2 a_{k, k} x_k
+					\end{bmatrix} \\
+					&= 2Ax
+			\end{align*}
+			
+			$\implies \frac{\partial \omega^T X^T X \omega}{\partial \omega} = 2 X^T X \omega$
+			
+		\clearpage
+			
+		\subsection{Combining results}
+			Combining the results of the previous two sections, we  arrive at the conclusion
+			
+			\begin{align*}
+				& \frac{\partial MSE(X, y)}{\partial \omega} &= & 0 \\
+					\iff & \frac{\partial \omega^T X^T X \omega}{\partial \omega} &= & \frac{2 \omega^T X^T y}{\partial} \\
+					\iff & 2 X^T X \omega &=& 2 X^T y \\
+					\iff & X^T X \omega &=& X^T y \\
+					\iff & \omega &=& (X^T X)^{-1} X^T y
+			\end{align*}
+			
+			So the optimal least squares solution to linear regression is $\omega = (X^T X)^{-1} X^T y$.
+			However, we are not done yet, as the matrix $X^TX$ is possibly singular and therefore not invertable (one could actually show in which cases it is singular,
+			we are omitting this step here though as it is out of the scope of the current tutorial).
+	
+		\subsection{Inverting possibly singular matrix}
+		In order to invert a singular matrix, often the Moore-Penrose pseudoinverse is used \cite{moorepen}.
+		As we know from "Lineare Algebra für Informatiker" \cite{lai}, the characteristic polynomial of  $A \in Mat_{n \times n}(\mathbb{R})$ is
+		$\chi_A(\lambda) = det(X - I \lambda)$. Further we know
+		
+		\begin{align*}
+			& \chi_A(\lambda) = 0 \\
+			\iff & det(X - I \lambda) = 0 \\
+			\iff & Ker(X - I \lambda) \neq \emptyset \\
+			\iff & Eig(A, \lambda) \neq \emptyset \\
+			\iff & \lambda \text{ is an eigenvalue of } X
+		\end{align*}
+		
+		But further, as learned in Linear Algebra II \cite{laii}, $\forall x \in \mathbb{R}^k: xAx \geq 0 \iff (\forall \lambda_i: Eig(A, \lambda_i) \neq \emptyset \iff \lambda_i > 0)$.
+		Or informally, a positive semidefinite symmetric matrix A has only positive eigenvalues. \\
+		
+		$\implies \forall \epsilon > 0: det(X^TX + I\epsilon) = det(X^TX - I(-\epsilon)) \neq 0$ \\
+		
+		Therefore, we could use $(X^TX + I\epsilon)^{-1}$ instead of $(X^TX)^{-1}$ for some very small $\epsilon$.
+		
+	\clearpage
+	
+	\section{Polynomial approximation}
+	
+		Linear regression is only linear in the parameter space $\omega$. However, we could apply a non-linear transformation to the data set before applying linear regression.
+		
+		In a 2-dimensional scenario we could transform the matrix $X$ from above
+		
+		\begin{align*}
+			X = \begin{bmatrix*}
+				x_{1,1} & 1 \\
+				\vdots & \vdots \\
+				x_{n,1} & 1
+			\end{bmatrix*}
+		\end{align*}
+		
+		into k-dimensional data
+	
+		\begin{align*}
+			X_k = & \begin{bmatrix*}
+				x_{1,1} & x_{1,2} & \dots & x_{1,k} & 1 \\
+				\vdots & & & \vdots \\
+				x_{n,1} & x_{n,2} & \dots & x_{n,k} & 1 \\
+			\end{bmatrix*} \\
+			=& \begin{bmatrix*}
+				x_{1,1} & x_{1,1}^2 & \dots & x_{1,1}^k & 1 \\
+				\vdots & & & \vdots \\
+				x_{n,1} & x_{n,1}^2 & \dots & x_{n,1}^k & 1 \\
+			\end{bmatrix*}
+		\end{align*}
+		
+		This way, we can approximate a polynomial of degree $k$. Often it makes sense to increment the degree $k$ until one is happy with the mean squared error.
+		Otherwise, one might have a very precise model when tested on the data the model is supposed to fit, but a very inaccurate one when tested on unseen data. This
+		phenomenon is known as "overfitting" the data. Figure Figure \ref{fig:polapint} compares results of polynomial approximation and polynomial approximation.
+		
+		\begin{figure}[h!]
+			\includegraphics[width=\textwidth]{./plots/2d_pol_ap_int.png}
+			\caption{Polynomial approximation VS interpolation}
+			\label{fig:polapint}
+		\end{figure}
+		
+		
+	\section*{Classification}
+	
+		One could also use linear regression to solve a classification problem. For this example, we are going to distinguish between hand-written digits
+		using the famous MNIST \cite{mnist} data set. \\
+		
+		The data set consist of $28 \times 28$ images. Our algorithm needs to predict the digit $y_i \in {0, \dots, 9}$ given an image $\hat{x_i} = \begin{bmatrix*}
+			x_{i, 1, 1} & \dots & x_{i, 1, 28} \\
+			\vdots & \ddots & \vdots \\
+			x_{i, 28, 1} & \dots & x_{i, 28, 28}
+		\end{bmatrix*}$
+		
+		First, we are transforming each image into a $28 \times 28$ - dimensional row vector
+		
+		\begin{align*}
+				\hat{x_i} = [x_{i, 1, 1}, \dots, x_{i, 1, 28},x_{i,2,1}, \dots, x_{i,2,28}, \dots, x_{i, 28, 28}]
+		\end{align*}
+		
+		Then, similarly to previous sections, we define
+		
+		\begin{align*}
+			X = \begin{bmatrix}
+				\hat{x_1} & 1 \\
+				\vdots & \vdots \\
+				\hat{x_n} \vdots & 1 \\
+			\end{bmatrix}
+		\end{align*}
+		
+			\subsection{Distinguishing between two digits}
+			
+			In order to distinguish between digits i and j, we only take the corresponding $X_i, X_j$ such that $\forall x_k \in X_k: f(x_k) = k$.
+			Then, we train a binary classifier, which distinguishes between i and j. \\
+			
+			We do this by training a linear regression to predict, for $x_k \in X_i \cup X_j$
+			
+			\begin{align*}
+				f(x_k) = \begin{cases}
+					1 				& k = i \\
+					-1              & \text{otherwise}
+				\end{cases}
+			\end{align*}
+			
+			Or in other words, we have a dataset of $x_k = [x_{k, 1}, \dots, x_{k, 28 \times 28}]$ and $y_k \in {i, j}$.
+			Then we train a linear regression as discussed in previous sections using data $(x_1, y_1), \dots, (x_{|X_i \cup X_j|}, y_{|X_i \cup X_j|})$
+			
+			Then, in order to predict $\hat{y_l}$, which is whether a given image vector $x_l$ is a digit i or digit j, we use
+			
+			\begin{align*}
+				 \hat{y_l} = \begin{cases}
+					i 				& x_l^T \omega > 0 \\
+					j              & \text{otherwise}
+				\end{cases}
+			\end{align*}
+		
+	\clearpage
+
+	\bibliographystyle{unsrt}
+	\bibliography{bib}
+
+
+\end{document}
diff --git a/script/plots/2d_pol_ap_int.png b/script/plots/2d_pol_ap_int.png
new file mode 100644
index 0000000000000000000000000000000000000000..bf8c1b03a9f3af3c1b6750aa1b80f79075f0adc7
Binary files /dev/null and b/script/plots/2d_pol_ap_int.png differ
diff --git a/script/plots/2d_regression.png b/script/plots/2d_regression.png
new file mode 100644
index 0000000000000000000000000000000000000000..b981b60d625980f04ab70d4730bff8cb0313732c
Binary files /dev/null and b/script/plots/2d_regression.png differ
diff --git a/script/res/fu_logo.eps b/script/res/fu_logo.eps
new file mode 100644
index 0000000000000000000000000000000000000000..669749068f2e6fbd87614b1572d137fbab9bc699
Binary files /dev/null and b/script/res/fu_logo.eps differ
diff --git a/script/res/header.tex b/script/res/header.tex
new file mode 100644
index 0000000000000000000000000000000000000000..ae7718a113c8775299c0518799ac95ae35785859
--- /dev/null
+++ b/script/res/header.tex
@@ -0,0 +1,217 @@
+\documentclass[
+	10pt,								
+	parskip=half-,						
+	paper=a4,							
+	english,					
+	]{article}							
+
+
+\usepackage{titling}
+\usepackage[fleqn]{amsmath}
+\usepackage[fleqn]{mathtools}
+\usepackage{mathtools}			
+\usepackage{amssymb}			
+\usepackage{amsthm}				
+\usepackage{mathrsfs}			
+\usepackage{latexsym}
+\usepackage{marvosym}			
+
+\usepackage[absolute,overlay]{textpos}
+\setlength{\TPHorizModule}{1mm}
+\setlength{\TPVertModule}{1mm}	
+
+\usepackage{fontspec} 			
+\usepackage{microtype}			
+\usepackage{lmodern}			
+
+\usepackage{verbatim}
+\usepackage{listings}			
+
+\usepackage{graphicx}
+\usepackage{tabularx}			
+\usepackage{fullpage}
+\usepackage{multirow}			
+\usepackage{rotate}
+\usepackage[cmyk,table]{xcolor} 
+\usepackage[					
+	colorlinks,					
+	linktocpage,				
+	linkcolor=blue				
+	]{hyperref}					
+\usepackage{url}				
+
+\usepackage{enumerate}			
+\usepackage{xspace}				
+\usepackage{cancel}				
+\usepackage{float}  
+
+\usepackage{graphicx}
+\usepackage{fancyvrb}            
+
+
+\usepackage{fp}
+\usepackage{tikz}
+\usetikzlibrary{tikzmark}			
+\usetikzlibrary{positioning}	
+\usetikzlibrary{automata}		
+\usetikzlibrary{arrows}
+\usetikzlibrary{shapes}
+\usetikzlibrary{decorations.pathmorphing}
+\usetikzlibrary{decorations.pathreplacing}
+\usetikzlibrary{decorations.shapes}
+\usetikzlibrary{decorations.text}
+
+
+\lstloadlanguages{Python, Haskell, [LaTeX]TeX, Java}
+\lstset{
+   basicstyle=\footnotesize\ttfamily,	
+   backgroundcolor = \color{bgcolour},	
+   breakatwhitespace=false,	
+   breaklines=true,			
+   captionpos=t,				
+   commentstyle=\color{codeblue}\ttfamily,
+   frame=single,				
+   keepspaces=true,										
+   keywordstyle=\bfseries\ttfamily\color{codepurple},
+   numbers=left,				   							
+   numberstyle=\tiny\color{codegreen},	
+   numbersep=5pt,			
+   stepnumber=1,				
+   showspaces=false,								
+   showstringspaces=false,	
+   showtabs=false,			
+   flexiblecolumns=false,
+   tabsize=1,				
+   stringstyle=\color{orange}\ttfamily,	
+   numberblanklines=false,				
+   xleftmargin=1.2em,					
+   xrightmargin=0.4em,					
+   aboveskip=2ex,
+}
+
+\lstdefinestyle{py}{
+   language=Python,
+}
+\lstdefinestyle{hs}{
+   language=Haskell,
+}
+\lstdefinestyle{tex}{
+	language=[LaTeX]TeX,
+	escapeinside={\%*}{*)},     
+	texcsstyle=*\bfseries\color{blue},
+	morekeywords={*,$,\{,\},\[,\],lstinputlisting,includegraphics,
+	rowcolor,columncolor,listoffigures,lstlistoflistings,
+	subsection,subsubsection,textcolor,tableofcontents,colorbox,
+	fcolorbox,definecolor,cellcolor,url,linktocpage,subtitle,
+	subject,maketitle,usetikzlibrary,node,path,addbibresource,
+	printbibliography},
+     numbers=none,
+     numbersep=0pt,
+     xleftmargin=0.4em,
+}
+
+\lstdefinestyle{java}{
+	language=Java,
+	extendedchars=true,		   						
+}
+
+\lstdefinelanguage[x64]{Assembler}     
+   [x86masm]{Assembler}    
+   {morekeywords={CDQE,CQO,CMPSQ,CMPXCHG16B,JRCXZ,LODSQ,MOVSXD, %
+                  POPFQ,PUSHFQ,SCASQ,STOSQ,IRETQ,RDTSCP,SWAPGS, %
+                  rax,rdx,rcx,rbx,rsi,rdi,rsp,rbp, %
+                  r8,r8d,r8w,r8b,r9,r9d,r9w,r9b}
+}					
+
+\lstdefinestyle{c}{
+	language=c,
+	extendedchars=true,		
+}
+
+\lstdefinestyle{Bash}
+{language=bash,
+keywordstyle=\color{blue},
+basicstyle=\ttfamily,
+morekeywords={peter@kbpet},
+alsoletter={:~$},
+morekeywords=[2]{peter@kbpet:},
+keywordstyle=[2]{\color{red}},
+literate={\$}{{\textcolor{red}{\$}}}1
+         {:}{{\textcolor{red}{:}}}1
+         {~}{{\textcolor{red}{\textasciitilde}}}1,
+}
+
+
+\newcommand\FU{Freie Universität Berlin\xspace}
+\newcommand\gdw{g.\,d.\,w.\xspace}
+\newcommand\oBdA{o.\,B.\,d.\,A.\xspace}
+\newcommand{\Eu}{\texteuro}
+\newcommand\N{\mathbb{N}\xspace}
+\newcommand\Q{\mathbb{Q}\xspace}
+\newcommand\R{\mathbb{R}\xspace}
+\newcommand\Z{\mathbb{Z}\xspace}
+\newcommand\ohneNull{\ensuremath{\backslash\lbrace 0\rbrace}}
+\let\dhALT\dh	
+\renewcommand\dh{d.\,h.\xspace}	%renew überschreibt command \dh
+\newcommand\Bolt{\;\text{\LARGE\raisebox{-0.3em}{\Lightning}\normalsize}\xspace}
+\newcommand\zz{\ensuremath{\raisebox{+0.25ex}{Z}
+			\kern-0.4em\raisebox{-0.25ex}{Z}%
+			\;\xspace}}
+\newcommand{\from}{\ensuremath{\colon}}
+\newcommand{\floor}[1]{\lfloor{#1}\rfloor}
+\newcommand{\ceil}[1]{\lceil{#1}\rceil}
+ \renewcommand{\L}{\ensuremath{\mathcal{L}}\xspace}
+ \renewcommand{\P}{\ensuremath{\mathcal{P}}\xspace}
+ \newcommand{\NL}{\ensuremath{\mathcal{N}\kern-0.2em\mathcal{L}}\xspace}
+ \newcommand{\NP}{\ensuremath{\mathcal{NP}}\xspace}
+
+
+\DeclareMathOperator{\Landau}{\mathcal{O}}
+\DeclareMathOperator{\True}{True}
+\DeclareMathOperator{\False}{False}
+
+
+\newtheorem{theorem}{Theorem}
+\newtheorem{corollary}[theorem]{Consequence}
+\newtheorem{lemma}[theorem]{Lemma}
+\newtheorem{observation}[theorem]{Observation}
+\newtheorem{definition}[theorem]{Definition}
+\newtheorem{Literatur}[theorem]{Literature}
+
+\makeatletter
+\newenvironment{Proof}[1][\proofname]{\par
+  \pushQED{\qed}%
+  \normalfont \topsep6\p@\@plus6\p@\relax
+  \trivlist
+  \item[\hskip\labelsep
+
+        \bfseries
+    #1\@addpunct{.}]\ignorespaces
+}{%
+  \popQED\endtrivlist\@endpefalse
+}
+\makeatother
+
+
+\let\definecolor=\xdefinecolor
+\definecolor{FUgreen}{RGB}{153,204,0}
+\definecolor{FUblue}{RGB}{0,51,102}
+
+\definecolor{middlegray}{rgb}{0.5,0.5,0.5}
+\definecolor{lightgray}{rgb}{0.8,0.8,0.8}
+\definecolor{orange}{rgb}{0.8,0.3,0.3}
+\definecolor{azur}{rgb}{0,0.7,1}
+\definecolor{yac}{rgb}{0.6,0.6,0.1}
+\definecolor{Pink}{rgb}{1,0,0.6}
+
+\definecolor{bgcolour}{rgb}{0.97,0.97,0.97}
+\definecolor{codegreen}{rgb}{0,0.6,0}
+\definecolor{codegray}{rgb}{0.35,0.35,0.35}
+\definecolor{codepurple}{rgb}{0.58,0,0.82}
+\definecolor{codeblue}{rgb}{0.4,0.5,1}
+
+
+
+\textheight = 230mm		
+\footskip = 10ex			
+\parindent 0pt			
diff --git a/script/res/titlepage.tex b/script/res/titlepage.tex
new file mode 100644
index 0000000000000000000000000000000000000000..9475cf43768c92c170ea9a154ddf7a06efff666e
--- /dev/null
+++ b/script/res/titlepage.tex
@@ -0,0 +1,23 @@
+\begin{titlepage}
+	\title{Linear Least Squares Regression}
+	\author{Boyan Hristov}
+	\date{\normalsize \today}
+
+	\maketitle
+	
+	\rule{\linewidth}{0.8pt}
+	\vspace{1cm}
+	
+	\begin{abstract}
+		\abstractcontent
+	\end{abstract}
+
+
+	\begin{textblock}{100}(95,7)
+		\makebox[\dimexpr\textwidth+1cm][r]{
+			\includegraphics[width=0.4\textwidth]{./res/fu_logo.eps}
+		}
+	\end{textblock}
+		
+	
+\end{titlepage}
\ No newline at end of file