diff --git a/text/thesis/02MaterialsAndMethods.tex b/text/thesis/02MaterialsAndMethods.tex
index cc549d7..2e5b9e4 100644
--- a/text/thesis/02MaterialsAndMethods.tex
+++ b/text/thesis/02MaterialsAndMethods.tex
@@ -17,7 +17,7 @@
         The frequencies typically used for movement prediction in EEG are about 8-24 Hz (\cite{Blokland15},\cite{Ahmadian13},\cite{Wang09}).
         EEG is often used for non-invasive BCIs because it's cheap and easier to use than e.g. fMRI. The electrodes have to be spread over the scalp. To allow for comparability there are standardized methods for this. These methods also bring a naming convention with them.
         \subsubsection{10-20 system}
-            In this standard adjacent electrodes are placed either 10\% or 20\% of the total front-back or left-right distance apart. This standardization also makes it possible to name each electrode or rather here place. This is done with capital letters for lobes (Frontal, \qq{Central}, Parietal, Occipital and Temporal) and numbers for the specific place on the lobe. Even numbers are on the right side of the head, odd on the left; larger numbers are closer to the ears, lower numbers closer to the other hemisphere. The exact number now refers to the exact distance from centre: $$\left\lceil\frac{x}{2}\right\rceil\cdot \frac{d}{10}$$ where $x$ is the number and $d$ the diameter of the scalp. Electrodes in the centre are named with a lower case $z$ e.g. $Cz$.\\
+            In this standard adjacent electrodes are placed either 10\% or 20\% of the total front-back or left-right distance apart. This standardization also makes it possible to name each electrode or rather here place. This is done with capital letters for lobes (Frontal, \qq{Central}, Parietal, Occipital and Temporal) and numbers for the specific place on the lobe. Even numbers are on the right side of the head, odd on the left; larger numbers are closer to the ears, lower numbers closer to the other hemisphere. The exact number now refers to the exact distance from center: $$\left\lceil\frac{x}{2}\right\rceil\cdot \frac{d}{10}$$ where $x$ is the number and $d$ the diameter of the scalp. Electrodes in the centre are named with a lower case $z$ e.g. $Cz$.\\
             Electrodes between two lobes (10\% instead of 20\% distance) are named with the both adjacent lobes (anterior first) e.g. $FCz$ (between frontal and central lobe).
             Also see figure~\ref{fig:10-20}.
         \begin{figure}[!p]
@@ -28,15 +28,29 @@
         \end{figure}
     \subsection{Power estimation}
         \subsubsection{EEG}
-            To use data from EEG one way is to analyse the occurring frequencies and their respective power.\\
+            To use data from EEG one way is to analyze the occurring frequencies and their respective power.\\
             To gain these from the continuous signal there are different methods. The intuitive approach would be to use Fourier transformation however the Fourier transform does not need to exists for a continuous signal. So we used power spectral density (PSD) estimation.
         \subsubsection{Power spectral density estimation}
-            The PSD is the power per frequency. Power here refers to the square of the amplitude. %TODO: formulation, additional explanation?, fft
-            If the Fourier transform is existing, PSD can be calculated from it e.g. as periodogram. If not it has to be estimated. One way to do so is parametrised with an Autoregressive model. Here one assumes that the there is a correlation between $p$ consecutive samples and the one following of the spectral density. This leads to an equation with only $p$ parameters which can be estimated in different ways. We used Burg's method (\texttt{pburg} from \matlab{} library).
-        \subsubsection{Burg's method}
+            The PSD is the power per frequency. Power here refers to the square of the amplitude. %TODO: formulation,fft
+            If the Fourier transform is existing, PSD can be calculated from it e.g. as periodogram. If not it has to be estimated. One way to do so is parametrized with an Autoregressive model(AR). Here one assumes that the there is a correlation of the spectral density between $p$ consecutive samples and the following one. This leads to an equation with only $p$ parameters which can be estimated in different ways. We used Burg's method (\texttt{pburg} from \matlab{} library).\\
+            In Figure~\ref{fig:psd} we see the difference between autoregressive \texttt{pburg} and periodogram \texttt{pwelch} PSD estimation.
+            \begin{figure}
+                \includegraphics[width=\textwidth]{psd.png}
+                \caption{PSD with Autoregressive model and FFT respectively\protect\footnotemark}
+
+                \label{fig:psd}
+            \end{figure}
+            \footnotetext{The signal was unfiltered EEG data from channel \textit{Cz} second run of second session with subject AO}
+        \subsubsection{Burg's method - Autoregressive Model}
         \label{mat:burg}
             Burg's method (\cite{Burg75}) is a special case of parametric PSD estimation. It interprets the Yule-Walker-Equations as least squares problem and iteratively estimates solutions.\\
-            According to \cite{Huang14} Burg's method fits well in cases with the need of high resolution. %TODO: Autoregressive model vs. fft
+            According to \cite{Huang14} Burg's method fits well in cases with the need of high resolution.\\
+            Burg and Levinson-Durbin algorithms are examples for PSD estimation where an autoregressive model is used instead of Fast Fourier Transformation. The approach is described well by Spyers-Ashby et al. (\cite{Spyers98}). The idea is to lower the number of parameters determining the production of the signal. The number of parameters used is called \textit{model order} (250 in our example, lower in most cases). These parameters are estimated from the original data. For PSD estimation the modeled values are used which allows easier transformation since the data is generated by an known process.\\
+            Often the Rational transfer function modeling is used having the general form of $$x_n=-\sum\limits_{k=1}^p a_kx_{n-k}+ \sum\limits_{k=0}^q b_ku_{n-k},$$ where $x_n$ is the output, $u_n$ the input. $a,b$ are the system parameters which have to be estimated from original data. As we have unknown input in our application the output can only be estimated which simplifies the formula as follows $$\hat{x}_n=-\sum\limits_{k=1}^p a_k\hat{x}_{n-k}.$$
+            Estimating the parameters is done by minimizing the forward prediction error $E$: $$E=\frac{1}{N}\sum\limits_{i=1}^N \left(x_i-\hat{x}_i\right)^2$$
+            The minimum has zero slope and can be found by setting the derivative to zero:$$\frac{\partial E}{\partial a_k},\text{ for } 1\le k\le p$$
+            This yields a set of equations called \emph{Yule-Walker-Equations} (cf. \cite{Yule27},\cite{Walker31}).\\
+            Using forward and backward prediction the parameters ($a_k$) are estimated based on the Yule-Walker-Equations then.
     \subsection{Low Frequencies}
         In the 2000s there began a movement using new techniques to record ultrafast and infraslow brainwaves (above 50Hz and below 1Hz). These were found to have some importance (cf. \cite{Vanhatalo04}).\\
         Also in predicting movements there was found some significance in low frequency as was done by Liu et al. (\cite{Liu11}) and Antelis et al. (\cite{Antelis13}) for example. Antelis et al. found correlations between hand movement and low frequency signal of $(0.29,0.15,0.37)$ in the dimensions respectively.\\
@@ -54,7 +68,7 @@
     \subsection{Synergies}
         Movement of the arm (and other parts of the body) are under-determined meaning with given trajectory there are different muscle contractions possible. One idea how this problem could be solved by our nervous system are synergies. Proposed by Bernstein in 1967 (\cite{Bernstein67}) they describe the goal of the movement (e.g. the trajectory) instead of controlling single muscles. This would mean however that predicting the activity of single muscles from EEG is harder than predicting a synergy which in turn determines the contraction of muscles.\\
         Evidence for the use of synergies in the nervous system was found e.g. by Bizzi et al. (\cite{Bizzi08}) and Byadarhaly et al. (\cite{Byadarhaly12}). They also showed that synergies meet the necessary requirement to be able to build predictable trajectories.\\
-        Synergies are usually gotten from EMG signal through a principal component analysis (PCA, cf. \ref{mat:pca}), non-negative matrix factorisation (NMF, cf. \ref{mat:nmf}) or autoencoders (a form of neuronal network, cf. \ref{mat:autoenc}).
+        Synergies are usually gotten from EMG signal through a principal component analysis (PCA, cf. \ref{mat:pca}), non-negative matrix factorization (NMF, cf. \ref{mat:nmf}) or autoencoders (a form of neuronal network, cf. \ref{mat:autoenc}).
     \subsection{PCA}
     \label{mat:pca}
         Principal Component Analysis (PCA) is probably the most common technique for dimensionality reduction. The idea is to use those dimensions with the highest variance to keep as much information as possible in the lower dimensional room.\\
@@ -64,9 +78,10 @@
             \caption{Eigenvectors of Gaussian scatter}
             \label{fig:pca}
         \end{figure}
+        %TODO: Explanation, formula, ...
     \subsection{NMF}
     \label{mat:nmf}
-        In some applications Non-negative Matrix Factorization (NMF) is preferred over PCA (cf. \cite{Lee99}). This is because it does not learn eigenvectors but decomposes the input into parts which are all possibly used in the input. When seen as matrix factorisation PCA yields matrices of arbitrary sign where one represents the eigenvectors the other the specific mixture of them. Because an entry may be negative cancellation is possible. This leads to unintuitive representation in the first matrix.\\
+        In some applications Non-negative Matrix Factorization (NMF) is preferred over PCA (cf. \cite{Lee99}). This is because it does not learn eigenvectors but decomposes the input into parts which are all possibly used in the input. When seen as matrix factorization PCA yields matrices of arbitrary sign where one represents the eigenvectors the other the specific mixture of them. Because an entry may be negative cancellation is possible. This leads to unintuitive representation in the first matrix.\\
         NMF in contrast only allows positive entries. This leads to \qq{what is in, is in} meaning no cancellation which in turn yields more intuitive matrices. The first contains possible parts of the data, the second how strongly they are represented in the current input.\\
         The formula for NMF is
         $$Input\approx \mathbf{WH}$$
@@ -98,17 +113,17 @@
     The data used for this work were mainly recorded by Farid Shiman, Nerea Irastorza-Landa, and Andrea Sarasola-Sanz for their work (\cite{Shiman15},\cite{Sarasola15}). We were allowed to use them for further analysis.\\
     There were 9 right-handed subjects%TODO
     All the tasks were performed with the right hand.\\
-    To perform was a centre-out reaching task to one of four targets (see \ref{fig:experimentalDesign}) while 32 channel EEG, at least%
+    To perform was a center-out reaching task to one of four targets (see \ref{fig:experimentalDesign}) while 32 channel EEG, at least%
     \footnote{\texttt{'AbdPolLo', 'Biceps', 'Triceps', 'FrontDelt', 'MidDelt'} and \texttt{'BackDelt'} were recorded for every subject, others only in some. Only the 6 channels tracked in every session were used} %
     6 channel surface EMG and 7 DOF kinematics were tracked.
     \begin{figure}[b]
         \centering
         \includegraphics{experimentalDesign.jpg}
-        \caption{Centre-out reaching task with four colour-coded targets}
+        \caption{Center-out reaching task with four color-coded targets}
         \label{fig:experimentalDesign}
     \end{figure}
     Of the kinematic information tracked we only used position ($x,y$) and angle ($\theta$, rotation around $z$-axis) of the hand.\\
     Only complete sessions were used in our analysis to ensure better comparability.\\
     One session consists of 5 runs with 40 trials each. The trials were separated by resting phases of varying length (2-3s, randomly assigned). Each trial began with an auditory cue specifying the random but equally distributed target for this trial. This leads to 50 reaches to the same target each session.
     After the cue the participants should \qq{perform the movement and return to the starting position at a comfortable pace but within 4 seconds}\footnote{\cite{Shiman15}}\\
-    For each subject there were 4 to 6 sessions, each recorded on a different day. All in all there were 255 runs in 51 sessions. Each session was analysed independently as one continuous task.
+    For each subject there were 4 to 6 sessions, each recorded on a different day. All in all there were 255 runs in 51 sessions. Each session was analyzed independently as one continuous task.
diff --git a/text/thesis/mylit.bib b/text/thesis/mylit.bib
index d873256..dcb17f6 100755
--- a/text/thesis/mylit.bib
+++ b/text/thesis/mylit.bib
@@ -183,6 +183,30 @@
     volume    = "51",
     pages     = "155-173"
 }
+@article{Spyers98,
+    author    = "J.M. Spyers-Ashby and P.G. Bain and S.J. Roberts",
+    title     = "A comparison of fast fourier transform (FFT) ans autoregressive (AR) spectral estimation techniques fir the analysi of tremor data",
+    year      = "1998",
+    journal   = "Journal of Neuroscience Methods",
+    volume    = "83",
+    pages     = "35-43"
+}
+@article{Yule27,
+    author    = "Yule, G. Udny",
+    year      = "1927",
+    title     = "On a Method of Investigating Periodicities in Disturbed Series, with Special Reference to Wolfer's Sunspot Numbers",
+    journal   = "Philosophical Transactions of the Royal Society of London",
+    volume    = "226",
+    pages     = "267–298"
+}
+@article{Walker31,
+    author  = "Walker, Gilbert",
+    year    = "1931",
+    title   = "On Periodicity in Series of Related Terms",
+    journal = "Proceedings of the Royal Society of London",
+    volume  = "131",
+    pages   = "518–532"
+}
 
 
 @article{Ting07,
diff --git a/text/thesis/thesis.tex b/text/thesis/thesis.tex
index 467cdfe..837151b 100644
--- a/text/thesis/thesis.tex
+++ b/text/thesis/thesis.tex
@@ -16,6 +16,7 @@
 \usepackage{hyperref}
 \usepackage{helvet}
 \usepackage{pdfpages}
+\usepackage{amsmath}
 \usepackage[official]{eurosym}
 \usepackage[chapter]{algorithm}
 \usepackage{algpseudocode}
@@ -219,6 +220,14 @@
 % \input{05Future}
 % \cleardoublepage
 
+%%Appendix
+\appendix
+\input{Aenvironment}
+\cleardoublepage
+
+\input{Bfunctions}
+\cleardoublepage
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%% Bibliographie