diff --git a/.gitignore b/.gitignore index eec8f5abb22f13e206cc514f642362527bb18840..2aa5144f890f46fcc27ec766b7cf1e0ef5a906ec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,11 @@ .idea/ .ipynb_checkpoints/ datasets/ +*.aux +*.bbl +*.blg +*.log +*.out +*.synctex.gz diff --git a/lls_demo.ipynb b/lls_demo.ipynb index c11345f0e487e71d36a449f3dc63b126b90e9e00..b44fa05ddb33eff9cdee26e08b30d053d5897da3 100644 --- a/lls_demo.ipynb +++ b/lls_demo.ipynb @@ -54,7 +54,7 @@ "\n", "$det(A - I\\epsilon) = 0 \\iff Eig(A, \\epsilon) \\neq \\emptyset$\n", "\n", - "As $A$ is a symmetric matrix, we know it is positive semidefinite, therefore it\n", + "As $A$ in our case is $X^TX$, it is a symmetric positive semidefinite matrix, therefore it\n", "has only non-negative eigenvalues. Therefore, we could safely assume\n", "\n", "$det(A + I\\epsilon) \\neq 0$\n", diff --git a/script/bib.bib b/script/bib.bib new file mode 100644 index 0000000000000000000000000000000000000000..041ea407b89e8746ad42f6c9516c8db26b3f38bf --- /dev/null +++ b/script/bib.bib @@ -0,0 +1,40 @@ + @article{moorepen, + author = {Gower, John}, + year = {2012}, + month = {04}, + pages = {806-807}, + title = {Generalized Inverse Matrices}, + volume = {14}, + journal = {Technometrics}, + doi = {10.1080/00401706.1972.10488972} + } + +@article{mnist, + added-at = {2010-06-28T21:16:30.000+0200}, + author = {LeCun, Yann and Cortes, Corinna}, + biburl = {https://www.bibsonomy.org/bibtex/2935bad99fa1f65e03c25b315aa3c1032/mhwombat}, + groups = {public}, + howpublished = {http://yann.lecun.com/exdb/mnist/}, + interhash = {21b9d0558bd66279df9452562df6e6f3}, + intrahash = {935bad99fa1f65e03c25b315aa3c1032}, + keywords = {MSc _checked character_recognition mnist network neural}, + lastchecked = {2016-01-14 14:24:11}, + timestamp = {2016-07-12T19:25:30.000+0200}, + title = {{MNIST} handwritten digit database}, + url = {http://yann.lecun.com/exdb/mnist/}, + username = {mhwombat}, + year = 2010 +} + +@misc{lai, + author = {Frank Hoffman}, + year = {2016}, + title = {Lineare Algebra für Informatiker} +} + +@misc{laii, + author = {Victoria Hoskins}, + year = {2018}, + title = {Lineare Algebra II}, + url = {https://userpage.fu-berlin.de/hoskins/LAII\_Lehrplan.pdf} +} \ No newline at end of file diff --git a/script/lls.pdf b/script/lls.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e1fbdace3d49dbf5d4e38e4e2f1882e8578ccd76 Binary files /dev/null and b/script/lls.pdf differ diff --git a/script/lls.tex b/script/lls.tex new file mode 100644 index 0000000000000000000000000000000000000000..fcf6a1ffb2cb2d3a5d4a1f130c7f96b9b911c19c --- /dev/null +++ b/script/lls.tex @@ -0,0 +1,370 @@ +\input{./res/header.tex} + +\newcommand{\abstractcontent}{ + Linear regression is used to find a linear relationship between independent variables and outcomes of an experiment dependent on those variables. In this tutorial + we are going to find a model which describes this linear relationship between variables and outcomes with respect to some parameters. Then, we are going to + define a reasonable criterion for how accurate our model is given specific experiment data. Based on this criterion, we are going to derive a solution for + finding the optimal model parameters. + + We are then going to generalize this idea and talk about how linear regression can be used for polynomial approximation and compare the results with the ones + obtained using polynomial interpolation. Then we are going to use linear regression for solving a classification problem, in particular for recognizing + handwritten digits. We are then going to observe the flaws of this solution and give some motivation on alternative solutions. +} + +\begin{document} + + \input{./res/titlepage.tex} + + \section{Motivation} + Given a set of points $(x_1, y_1), \dots, (x_n, y_n)$ we are looking for a linear function which, given $x_i$, best approximates $y_i$. + We are assuming each $x_i$ is the data with which an experiment was conducted and $y_i$ is the measured result of this experiment, with some small + measurement error. + + In other words, we can define our experiment as a function $f(x)$ such that: + + \begin{align} + f(x_i) = \hat{y_i} \\ + \hat{f(x_i)} = f(x_i) + \mathcal{N}(\mu,\,\sigma^{2}) = \hat{y_i} + \mathcal{N}(\mu,\,\sigma^{2}) = y_i + \end{align} + + Here $\mathcal{N}(\mu,\,\sigma^{2})$ is a small measurement error, denoted as a sampled point from a normal distribution with mean $\mu$ and standard deviation $\sigma^2$. + Equation (1) models the outcome of an experiment with variables $x_i$. Equation (2) models the outcomes we measure when conducting + an experiment with variables $x_i$. Our task is to find a good approximation of $f(x)$ using observations of $\hat{f(x)}$. + + In a 2-dimensional case where $x_i$ is just a point on the x-axis, we are looking for $f(x)$ such that: + + \begin{align*} + f(x) = ax + b + \end{align*} + + Figure \ref{fig:2dreg} shows an example of such a scenario and the optimal $f(x)$ line with slope $a$ and bias $b$. + + \begin{figure}[h!] + \includegraphics[width=\textwidth]{./plots/2d_regression.png} + \caption{Linear regression in 2D} + \label{fig:2dreg} + \end{figure} + + \clearpage + + \section{Model accuracy criterion (Mean squared error)} + Using the 2-dimensional model defined above, we are looking for $f(x)$, such that + + \begin{align*} + & \forall i \in \{1, \dots, n\}: f(x_i) = a x_i + b = y_i \\ + \iff & \forall i \in \{1, \dots, n\}: f(x_i) - y_i = (a x_i + b) - y_i = 0 + \end{align*} + + However, as one can see from the example above, such a function doesn't exist, as there is no line which goes through all points. We need to find the + best possible line, therefore we can define our error for each data point as being $f(x_i) - y_i$ and try to minimize this error. As we want + $f(x_i) = y_i$, we actually want to minimize the absolute value, so as to minimize the deviation between prediction and observation. + Therefore, we use the mean squared error. + + \begin{align} + MSE((x_1, y_1), \dots, (x_n, y_n)) = \frac{1}{n}\sum_{i = 0}^{n} (f(x_i) - y_i)^2 = (a x_i + b - y_i)^2 + \end{align} + + + \section{m-dimensional scenario} + + If $x_i$ is an m-dimensional variable, our linear model becomes + + \begin{align*} + f(x_i) = \omega_1 x_{i,1} + \omega_2 x_{i, 2} + \dots + \omega_m x{i, m} + b + \end{align*} + + We are now going to redefine our problem in matrix notation, as the derivation of the optimal parameters $\omega_1, \dots \omega_m$ will be much easier in this notation. + First, we observe we can represent $f(x_i)$ as just a vector multiplications, as + + \begin{align*} + f(x_i) &= \omega_1 x_{i,1} + \omega_2 x_{i, 2} + \dots + \omega_m x{i, m} + b \\ + &= [x_{i, 1}, \dots, x_{i,m}, 1] \begin{bmatrix} + \omega_{1} \\ + \vdots \\ + \omega_{m} \\ + b + \end{bmatrix} + \end{align*} + + We define our experiment variables $x_1, \dots, x_n$ as a matrix $\hat{X}$ such that + + \begin{align*} + \hat{X} &= \begin{bmatrix} + x_{1} \\ + x_{2} \\ + \vdots \\ + x_{n} + \end{bmatrix} \\ + &= \begin{bmatrix} + x_{1,1} & x_{1,2} & \dots & x_{1,m} \\ + x_{2,1} & x_{2,2} & \dots & x_{2,m} \\ + \vdots & & & \vdots \\ + x_{n,1} & x_{n,2} & \dots & x_{n,m} \\ + \end{bmatrix} \in Mat_{n \times m}(\mathbb{R}) + \end{align*} + + To use the observation above and represent the model with just a matrix-vector multiplication, we then define + + \begin{align*} + X &= \begin{bmatrix} + & & 1 \\ + & \hat{X} & \vdots \\ + & & 1 + \end{bmatrix} + \end{align*} + + We define our model parameters $\omega_1, \dots \omega_m$ as a vector + + \begin{align*} + \omega &= \begin{bmatrix} + \omega_{1} \\ + \omega_{2} \\ + \vdots \\ + \omega_{m} + \end{bmatrix} + \end{align*} + + and our experiment observations as a vector + + \begin{align*} + y &= \begin{bmatrix} + y_{1} \\ + y_{2} \\ + \vdots \\ + y_{n} + \end{bmatrix} + \end{align*} + + We then observe + + \begin{align} + MSE(X, y) &= \frac{1}{n}(X \omega - y)^{T} (X \omega - y) \\ + &= \frac{1}{n} ((X \omega)^T - y^T)(X \omega - y) \\ + &= \frac{1}{n} ( (\omega^T X^T - y^T)(X \omega - y) )\\ + &= \frac{1}{n} ( \omega^T X^T X \omega - \omega^T X^T y - y^T X \omega + y^Ty )\\ + &= \frac{1}{n} ( \omega^T X^T X \omega - 2 \omega^T X^T y + y^Ty ) + \end{align} + + Note that $(7) = (8)$ as $\omega^T X^T y = (y^T X \omega)^T$ and both are scalars. Further, for all scalars $\forall x \in \mathbb{R}: x^T = x$ + + + \section{Deriving least squares solution to linear regression} + + \subsection{Minimizing the mean squared error} + + We know the mean squared error is a quadratic function with strictly positive values. Therefore, it has only one extreme, which is it's minimum. + Therefore, the optimal parameters $\omega$ are those for which $MSE$ is minimal. We can find those by finding the $\omega$ for which $\frac{\partial MSE(X, y)}{\partial \omega} = 0$. + + \begin{align*} + & \frac{\partial MSE(X, y)}{\partial \omega} &= & 0 \\ + \iff & \frac{\partial n MSE(X, y)}{\partial \omega} &= & 0 \\ + \iff & \frac{\partial \omega^T X^T X \omega - 2 \omega^T X^T y + y^Ty}{\partial} &= & 0 \\ + \iff & \frac{\partial \omega^T X^T X \omega - 2 \omega^T X^T y}{\partial} &= & 0 \\ + \iff & \frac{\partial \omega^T X^T X \omega}{\partial \omega} - \frac{2 \omega^T X^T y}{\partial} &= & 0 \\ + \iff & \frac{\partial \omega^T X^T X \omega}{\partial \omega} &= & \frac{2 \omega^T X^T y}{\partial} \\ + \end{align*} + + \subsection{Finding matrix derivatives} + + Let's start with $\frac{\partial 2 \omega^T X^T y}{\partial \omega}$. + + We simplify the situation by observing that $X^T y$ is just a vector. Therefore, for a vectors $v, x \in \mathbb{R}^k$ we find + + \begin{align*} + & \frac{\partial x^T v}{\partial x} \\ + &= \frac{\partial x_1 v_1 + x_2 v_2 + \dots + x_k v_k}{\partial x} \\ + &= \begin{bmatrix} + \frac{\partial x_1 v_1 + x_2 v_2 + \dots + x_k v_k}{\partial x_1} \\ + \vdots \\ + \frac{\partial x_1 v_1 + x_2 v_2 + \dots + x_k v_k}{\partial x_k} \\ + \end{bmatrix} \\ + & = \begin{bmatrix} + v_1 \\ + \vdots \\ + v_k + \end{bmatrix} \\ + & = v + \end{align*} + + $\implies \frac{\partial 2 \omega^T X^T y}{\partial \omega} = 2 X^T y $ + + \rule{\linewidth}{0.8pt} + \vspace{1cm} + + + Finding the partial derivative $\frac{\partial \omega^T X^T X \omega}{\partial \omega}$ is a little more complicated though. We observe $X^TX$ is a symmetric matrix, + as clearly $(X^TX)^T = X^TX$. So we can simplify the situation by looking for the derivative $\frac{\partial x^T A x}{\partial x}$ for $x \in \mathbb{R}^k, A \in Mat_{k \times k}(\mathbb{R})$. + + A symmetric square matrix can be uniquely represented by a quadratic form as learned in Linear Algebra II \cite{laii}, so we could omit the calculation of + $x^T A x$. For the sake of completeness, we show this result anyways. + + We observe for $A = + \begin{bmatrix} + a_{1,1} & \dots & a_{1, k} \\ + \vdots & \ddots & \vdots \\ + a_{k, 1} & \dots & a_{k, k} + \end{bmatrix} = + \begin{bmatrix} + a_{1,1} & \dots & a_{1, k} \\ + \vdots & \ddots & \vdots \\ + a_{1, k} & \dots & a_{k, k} + \end{bmatrix} + $ + + \begin{align*} + \frac{\partial x^T A X}{\partial x} &= \frac{\partial \sum_{i = 0}^{k} \sum_{j = 0}^{k} x_i x_j a_{i, j} } {\partial x} \\ + &= \frac{\partial \sum_{i = 0}^{k} x_i^2 a_{i, i} + \sum_{j = 0}^k\sum_{l \neq j} 2 x_j x_l a_{j, l} }{\partial x} \tag{because $a_{i, j} = a_{j, i}$ as A is symmetric} \\ + &= \begin{bmatrix} + \frac{\partial \sum_{i = 0}^{k} x_i^2 a_{i, i} + \sum_{j = 0}^k\sum_{l \neq j} 2 x_j x_l a_{j, l} }{\partial x_1} \\ + \vdots \\ + \frac{\partial \sum_{i = 0}^{k} x_i^2 a_{i, i} + \sum_{j = 0}^k\sum_{l \neq j} 2 x_j x_l a_{j, l} }{\partial x_k} + \end{bmatrix} \\ + &= \begin{bmatrix} + 2 a_{1,1} x_1 + \dots + 2 a_{1, k} x_k \\ + \vdots \\ + 2 a_{k,1} x_1 + \dots + 2 a_{k, k} x_k + \end{bmatrix} \\ + &= 2Ax + \end{align*} + + $\implies \frac{\partial \omega^T X^T X \omega}{\partial \omega} = 2 X^T X \omega$ + + \clearpage + + \subsection{Combining results} + Combining the results of the previous two sections, we arrive at the conclusion + + \begin{align*} + & \frac{\partial MSE(X, y)}{\partial \omega} &= & 0 \\ + \iff & \frac{\partial \omega^T X^T X \omega}{\partial \omega} &= & \frac{2 \omega^T X^T y}{\partial} \\ + \iff & 2 X^T X \omega &=& 2 X^T y \\ + \iff & X^T X \omega &=& X^T y \\ + \iff & \omega &=& (X^T X)^{-1} X^T y + \end{align*} + + So the optimal least squares solution to linear regression is $\omega = (X^T X)^{-1} X^T y$. + However, we are not done yet, as the matrix $X^TX$ is possibly singular and therefore not invertable (one could actually show in which cases it is singular, + we are omitting this step here though as it is out of the scope of the current tutorial). + + \subsection{Inverting possibly singular matrix} + In order to invert a singular matrix, often the Moore-Penrose pseudoinverse is used \cite{moorepen}. + As we know from "Lineare Algebra für Informatiker" \cite{lai}, the characteristic polynomial of $A \in Mat_{n \times n}(\mathbb{R})$ is + $\chi_A(\lambda) = det(X - I \lambda)$. Further we know + + \begin{align*} + & \chi_A(\lambda) = 0 \\ + \iff & det(X - I \lambda) = 0 \\ + \iff & Ker(X - I \lambda) \neq \emptyset \\ + \iff & Eig(A, \lambda) \neq \emptyset \\ + \iff & \lambda \text{ is an eigenvalue of } X + \end{align*} + + But further, as learned in Linear Algebra II \cite{laii}, $\forall x \in \mathbb{R}^k: xAx \geq 0 \iff (\forall \lambda_i: Eig(A, \lambda_i) \neq \emptyset \iff \lambda_i > 0)$. + Or informally, a positive semidefinite symmetric matrix A has only positive eigenvalues. \\ + + $\implies \forall \epsilon > 0: det(X^TX + I\epsilon) = det(X^TX - I(-\epsilon)) \neq 0$ \\ + + Therefore, we could use $(X^TX + I\epsilon)^{-1}$ instead of $(X^TX)^{-1}$ for some very small $\epsilon$. + + \clearpage + + \section{Polynomial approximation} + + Linear regression is only linear in the parameter space $\omega$. However, we could apply a non-linear transformation to the data set before applying linear regression. + + In a 2-dimensional scenario we could transform the matrix $X$ from above + + \begin{align*} + X = \begin{bmatrix*} + x_{1,1} & 1 \\ + \vdots & \vdots \\ + x_{n,1} & 1 + \end{bmatrix*} + \end{align*} + + into k-dimensional data + + \begin{align*} + X_k = & \begin{bmatrix*} + x_{1,1} & x_{1,2} & \dots & x_{1,k} & 1 \\ + \vdots & & & \vdots \\ + x_{n,1} & x_{n,2} & \dots & x_{n,k} & 1 \\ + \end{bmatrix*} \\ + =& \begin{bmatrix*} + x_{1,1} & x_{1,1}^2 & \dots & x_{1,1}^k & 1 \\ + \vdots & & & \vdots \\ + x_{n,1} & x_{n,1}^2 & \dots & x_{n,1}^k & 1 \\ + \end{bmatrix*} + \end{align*} + + This way, we can approximate a polynomial of degree $k$. Often it makes sense to increment the degree $k$ until one is happy with the mean squared error. + Otherwise, one might have a very precise model when tested on the data the model is supposed to fit, but a very inaccurate one when tested on unseen data. This + phenomenon is known as "overfitting" the data. Figure Figure \ref{fig:polapint} compares results of polynomial approximation and polynomial approximation. + + \begin{figure}[h!] + \includegraphics[width=\textwidth]{./plots/2d_pol_ap_int.png} + \caption{Polynomial approximation VS interpolation} + \label{fig:polapint} + \end{figure} + + + \section*{Classification} + + One could also use linear regression to solve a classification problem. For this example, we are going to distinguish between hand-written digits + using the famous MNIST \cite{mnist} data set. \\ + + The data set consist of $28 \times 28$ images. Our algorithm needs to predict the digit $y_i \in {0, \dots, 9}$ given an image $\hat{x_i} = \begin{bmatrix*} + x_{i, 1, 1} & \dots & x_{i, 1, 28} \\ + \vdots & \ddots & \vdots \\ + x_{i, 28, 1} & \dots & x_{i, 28, 28} + \end{bmatrix*}$ + + First, we are transforming each image into a $28 \times 28$ - dimensional row vector + + \begin{align*} + \hat{x_i} = [x_{i, 1, 1}, \dots, x_{i, 1, 28},x_{i,2,1}, \dots, x_{i,2,28}, \dots, x_{i, 28, 28}] + \end{align*} + + Then, similarly to previous sections, we define + + \begin{align*} + X = \begin{bmatrix} + \hat{x_1} & 1 \\ + \vdots & \vdots \\ + \hat{x_n} \vdots & 1 \\ + \end{bmatrix} + \end{align*} + + \subsection{Distinguishing between two digits} + + In order to distinguish between digits i and j, we only take the corresponding $X_i, X_j$ such that $\forall x_k \in X_k: f(x_k) = k$. + Then, we train a binary classifier, which distinguishes between i and j. \\ + + We do this by training a linear regression to predict, for $x_k \in X_i \cup X_j$ + + \begin{align*} + f(x_k) = \begin{cases} + 1 & k = i \\ + -1 & \text{otherwise} + \end{cases} + \end{align*} + + Or in other words, we have a dataset of $x_k = [x_{k, 1}, \dots, x_{k, 28 \times 28}]$ and $y_k \in {i, j}$. + Then we train a linear regression as discussed in previous sections using data $(x_1, y_1), \dots, (x_{|X_i \cup X_j|}, y_{|X_i \cup X_j|})$ + + Then, in order to predict $\hat{y_l}$, which is whether a given image vector $x_l$ is a digit i or digit j, we use + + \begin{align*} + \hat{y_l} = \begin{cases} + i & x_l^T \omega > 0 \\ + j & \text{otherwise} + \end{cases} + \end{align*} + + \clearpage + + \bibliographystyle{unsrt} + \bibliography{bib} + + +\end{document} diff --git a/script/plots/2d_pol_ap_int.png b/script/plots/2d_pol_ap_int.png new file mode 100644 index 0000000000000000000000000000000000000000..bf8c1b03a9f3af3c1b6750aa1b80f79075f0adc7 Binary files /dev/null and b/script/plots/2d_pol_ap_int.png differ diff --git a/script/plots/2d_regression.png b/script/plots/2d_regression.png new file mode 100644 index 0000000000000000000000000000000000000000..b981b60d625980f04ab70d4730bff8cb0313732c Binary files /dev/null and b/script/plots/2d_regression.png differ diff --git a/script/res/fu_logo.eps b/script/res/fu_logo.eps new file mode 100644 index 0000000000000000000000000000000000000000..669749068f2e6fbd87614b1572d137fbab9bc699 Binary files /dev/null and b/script/res/fu_logo.eps differ diff --git a/script/res/header.tex b/script/res/header.tex new file mode 100644 index 0000000000000000000000000000000000000000..ae7718a113c8775299c0518799ac95ae35785859 --- /dev/null +++ b/script/res/header.tex @@ -0,0 +1,217 @@ +\documentclass[ + 10pt, + parskip=half-, + paper=a4, + english, + ]{article} + + +\usepackage{titling} +\usepackage[fleqn]{amsmath} +\usepackage[fleqn]{mathtools} +\usepackage{mathtools} +\usepackage{amssymb} +\usepackage{amsthm} +\usepackage{mathrsfs} +\usepackage{latexsym} +\usepackage{marvosym} + +\usepackage[absolute,overlay]{textpos} +\setlength{\TPHorizModule}{1mm} +\setlength{\TPVertModule}{1mm} + +\usepackage{fontspec} +\usepackage{microtype} +\usepackage{lmodern} + +\usepackage{verbatim} +\usepackage{listings} + +\usepackage{graphicx} +\usepackage{tabularx} +\usepackage{fullpage} +\usepackage{multirow} +\usepackage{rotate} +\usepackage[cmyk,table]{xcolor} +\usepackage[ + colorlinks, + linktocpage, + linkcolor=blue + ]{hyperref} +\usepackage{url} + +\usepackage{enumerate} +\usepackage{xspace} +\usepackage{cancel} +\usepackage{float} + +\usepackage{graphicx} +\usepackage{fancyvrb} + + +\usepackage{fp} +\usepackage{tikz} +\usetikzlibrary{tikzmark} +\usetikzlibrary{positioning} +\usetikzlibrary{automata} +\usetikzlibrary{arrows} +\usetikzlibrary{shapes} +\usetikzlibrary{decorations.pathmorphing} +\usetikzlibrary{decorations.pathreplacing} +\usetikzlibrary{decorations.shapes} +\usetikzlibrary{decorations.text} + + +\lstloadlanguages{Python, Haskell, [LaTeX]TeX, Java} +\lstset{ + basicstyle=\footnotesize\ttfamily, + backgroundcolor = \color{bgcolour}, + breakatwhitespace=false, + breaklines=true, + captionpos=t, + commentstyle=\color{codeblue}\ttfamily, + frame=single, + keepspaces=true, + keywordstyle=\bfseries\ttfamily\color{codepurple}, + numbers=left, + numberstyle=\tiny\color{codegreen}, + numbersep=5pt, + stepnumber=1, + showspaces=false, + showstringspaces=false, + showtabs=false, + flexiblecolumns=false, + tabsize=1, + stringstyle=\color{orange}\ttfamily, + numberblanklines=false, + xleftmargin=1.2em, + xrightmargin=0.4em, + aboveskip=2ex, +} + +\lstdefinestyle{py}{ + language=Python, +} +\lstdefinestyle{hs}{ + language=Haskell, +} +\lstdefinestyle{tex}{ + language=[LaTeX]TeX, + escapeinside={\%*}{*)}, + texcsstyle=*\bfseries\color{blue}, + morekeywords={*,$,\{,\},\[,\],lstinputlisting,includegraphics, + rowcolor,columncolor,listoffigures,lstlistoflistings, + subsection,subsubsection,textcolor,tableofcontents,colorbox, + fcolorbox,definecolor,cellcolor,url,linktocpage,subtitle, + subject,maketitle,usetikzlibrary,node,path,addbibresource, + printbibliography}, + numbers=none, + numbersep=0pt, + xleftmargin=0.4em, +} + +\lstdefinestyle{java}{ + language=Java, + extendedchars=true, +} + +\lstdefinelanguage[x64]{Assembler} + [x86masm]{Assembler} + {morekeywords={CDQE,CQO,CMPSQ,CMPXCHG16B,JRCXZ,LODSQ,MOVSXD, % + POPFQ,PUSHFQ,SCASQ,STOSQ,IRETQ,RDTSCP,SWAPGS, % + rax,rdx,rcx,rbx,rsi,rdi,rsp,rbp, % + r8,r8d,r8w,r8b,r9,r9d,r9w,r9b} +} + +\lstdefinestyle{c}{ + language=c, + extendedchars=true, +} + +\lstdefinestyle{Bash} +{language=bash, +keywordstyle=\color{blue}, +basicstyle=\ttfamily, +morekeywords={peter@kbpet}, +alsoletter={:~$}, +morekeywords=[2]{peter@kbpet:}, +keywordstyle=[2]{\color{red}}, +literate={\$}{{\textcolor{red}{\$}}}1 + {:}{{\textcolor{red}{:}}}1 + {~}{{\textcolor{red}{\textasciitilde}}}1, +} + + +\newcommand\FU{Freie Universität Berlin\xspace} +\newcommand\gdw{g.\,d.\,w.\xspace} +\newcommand\oBdA{o.\,B.\,d.\,A.\xspace} +\newcommand{\Eu}{\texteuro} +\newcommand\N{\mathbb{N}\xspace} +\newcommand\Q{\mathbb{Q}\xspace} +\newcommand\R{\mathbb{R}\xspace} +\newcommand\Z{\mathbb{Z}\xspace} +\newcommand\ohneNull{\ensuremath{\backslash\lbrace 0\rbrace}} +\let\dhALT\dh +\renewcommand\dh{d.\,h.\xspace} %renew überschreibt command \dh +\newcommand\Bolt{\;\text{\LARGE\raisebox{-0.3em}{\Lightning}\normalsize}\xspace} +\newcommand\zz{\ensuremath{\raisebox{+0.25ex}{Z} + \kern-0.4em\raisebox{-0.25ex}{Z}% + \;\xspace}} +\newcommand{\from}{\ensuremath{\colon}} +\newcommand{\floor}[1]{\lfloor{#1}\rfloor} +\newcommand{\ceil}[1]{\lceil{#1}\rceil} + \renewcommand{\L}{\ensuremath{\mathcal{L}}\xspace} + \renewcommand{\P}{\ensuremath{\mathcal{P}}\xspace} + \newcommand{\NL}{\ensuremath{\mathcal{N}\kern-0.2em\mathcal{L}}\xspace} + \newcommand{\NP}{\ensuremath{\mathcal{NP}}\xspace} + + +\DeclareMathOperator{\Landau}{\mathcal{O}} +\DeclareMathOperator{\True}{True} +\DeclareMathOperator{\False}{False} + + +\newtheorem{theorem}{Theorem} +\newtheorem{corollary}[theorem]{Consequence} +\newtheorem{lemma}[theorem]{Lemma} +\newtheorem{observation}[theorem]{Observation} +\newtheorem{definition}[theorem]{Definition} +\newtheorem{Literatur}[theorem]{Literature} + +\makeatletter +\newenvironment{Proof}[1][\proofname]{\par + \pushQED{\qed}% + \normalfont \topsep6\p@\@plus6\p@\relax + \trivlist + \item[\hskip\labelsep + + \bfseries + #1\@addpunct{.}]\ignorespaces +}{% + \popQED\endtrivlist\@endpefalse +} +\makeatother + + +\let\definecolor=\xdefinecolor +\definecolor{FUgreen}{RGB}{153,204,0} +\definecolor{FUblue}{RGB}{0,51,102} + +\definecolor{middlegray}{rgb}{0.5,0.5,0.5} +\definecolor{lightgray}{rgb}{0.8,0.8,0.8} +\definecolor{orange}{rgb}{0.8,0.3,0.3} +\definecolor{azur}{rgb}{0,0.7,1} +\definecolor{yac}{rgb}{0.6,0.6,0.1} +\definecolor{Pink}{rgb}{1,0,0.6} + +\definecolor{bgcolour}{rgb}{0.97,0.97,0.97} +\definecolor{codegreen}{rgb}{0,0.6,0} +\definecolor{codegray}{rgb}{0.35,0.35,0.35} +\definecolor{codepurple}{rgb}{0.58,0,0.82} +\definecolor{codeblue}{rgb}{0.4,0.5,1} + + + +\textheight = 230mm +\footskip = 10ex +\parindent 0pt diff --git a/script/res/titlepage.tex b/script/res/titlepage.tex new file mode 100644 index 0000000000000000000000000000000000000000..9475cf43768c92c170ea9a154ddf7a06efff666e --- /dev/null +++ b/script/res/titlepage.tex @@ -0,0 +1,23 @@ +\begin{titlepage} + \title{Linear Least Squares Regression} + \author{Boyan Hristov} + \date{\normalsize \today} + + \maketitle + + \rule{\linewidth}{0.8pt} + \vspace{1cm} + + \begin{abstract} + \abstractcontent + \end{abstract} + + + \begin{textblock}{100}(95,7) + \makebox[\dimexpr\textwidth+1cm][r]{ + \includegraphics[width=0.4\textwidth]{./res/fu_logo.eps} + } + \end{textblock} + + +\end{titlepage} \ No newline at end of file