diff --git a/is/UB4/ISUB4.pdf b/is/UB4/ISUB4.pdf index 49e24e8..a35f01d 100644 --- a/is/UB4/ISUB4.pdf +++ b/is/UB4/ISUB4.pdf Binary files differ diff --git a/is/UB4/ISUB4.tex b/is/UB4/ISUB4.tex index f17f27f..8796bc6 100644 --- a/is/UB4/ISUB4.tex +++ b/is/UB4/ISUB4.tex @@ -80,29 +80,35 @@ \newcommand{\blasso}{\beta^{\mathrm{LASSO}}} \newcommand{\bzero}{\beta^0} \newcommand{\bLS}{\hat{\beta}^{\mathrm{LS}}} +\DeclareMathOperator*{\argmin}{arg\,min} +\newcommand{\textcorr}[1]{\textcolor{red}{#1}} +\newenvironment{corr}{\color{red}}{\color{black}\newline} +\newcommand{\ok}{\begin{corr} + $\checkmark$ + \end{corr}} \begin{document} %\header{BlattNr}{Tutor}{Abgabedatum}{Vorlesungsname}{Namen}{Semester}{Anzahl Aufgaben} - \header{1}{}{2015-04-22}{Intelligent Systems I}{\textit{Maximus Mutschler}\\ \textit{Jan-Peter Hohloch} - }{SS 15}{4} + \header{4}{}{2015-05-21}{Intelligent Systems I}{\textit{Maximus Mutschler}\\ \textit{Jan-Peter Hohloch} + }{SS 15}{2} \vspace{1cm} \Aufgabe{LASSO \& $l_0$}{30} \begin{enumerate} \item \begin{enumerate} - \item \includegraphics[width=.4\textwidth]{ghard.png} + \item \includegraphics[width=.4\textwidth]{ghard.png}\textcorr{\checkmark} \item \begin{align*} \hat{\beta}^{LS}&=\left(X^TX\right)^{-1}X^T\mathbf{y}\\ &=\left(nI_{n\times n} \right)^{-1}X^T\mathbf{y}\\ &=\frac{1}{n}\cdot I_{n\times n}X^T\mathbf{y}\\ - &=\frac{1}{n}\cdot X^T\mathbf{y} + &=\frac{1}{n}\cdot X^T\mathbf{y}\textcorr{\checkmark} \end{align*} \item \begin{math} \bzero(\lambda) := \mathrm{arg \, min}_{\beta} \frac{1}{n} \norm{Y - X \beta}_2^2 + \lambda \norm{\beta}_0 ,\\ =\mathrm{arg \, min}_{\beta} \underbrace{\frac{1}{n} Y^{T}Y}_{constant}- \frac{2}{n}Y^{T}X\beta+\norm{\beta}^2+\lambda \norm{\beta}_0 \\ =\mathrm{arg \, min}_{\beta} -\frac{2}{n}Y^{T}X\beta+\norm{\beta}^2+\lambda \norm{\beta}_0 \\ - =\mathrm{min}_{\beta} \sum^p_{i=1}-\frac{2}{n}Y^{T}_iX_i\beta_i+\beta_i^2+\lambda \cdot \begin{cases} + =\textcorr{\textbf{arg }}\mathrm{min}_{\beta} \sum^p_{i=1}-\frac{2}{n}Y^{T}_iX_i\beta_i+\beta_i^2+\lambda \cdot \begin{cases} 0, \beta_i =0\\ 1, \beta_i \neq 0\\ \end{cases} %0 oder eins @@ -124,6 +130,22 @@ 1, |X^T_iY_i|><\sqrt{\lambda}\\ \end{cases} \end{math} + \begin{corr} + \begin{align*} + \beta^0(\lambda)&=\argmin_\beta \mathcal{L}(\beta)\\ + &= \argmin_\beta \frac{1}{n}\left(Y-X\beta\right)^T\left(Y-X\beta\right)+\sum\limits_{i=1}^p\lambda \mathds{1}_{\{\beta_i\not=0\}}\\ + &= \argmin_\beta \frac{1}{n}\left(-2Y^TX\beta +n\beta^T\beta\right)+\sum\limits_{i=1}^p\lambda \mathds{1}_{\{\beta_i\not=0\}}\\ + &=\argmin_\beta \sum\limits_{i=1}^p \underbrace{-\frac{2}{n}\left(Y^TX\right)_i\beta_i+|\beta_i|^2+\lambda\mathds{1}_{\{\beta_i\not=0\}}}_{=:\mathcal{L}_i\left(\beta_i\right)}\\ + \beta_i^0&=\argmin_{\beta_i} \mathcal{L}_i(\beta_i)\\ + \frac{2}{n}\left(Y^TX\right)_i\beta_i+|\beta_i|^2&= \beta_i\left(\beta_i-\frac{2}{n}\left(Y^TX)_i\right)\right)\\ + \text{solution: }& \text{minimum at } \frac{1}{n}\left(Y^TX\right)_i\\ + \Rightarrow \mathcal{L}_i\left(\tilde{\beta_i}\right) &= \lambda \mathds{1}_{\{\beta_i\not=0\}} - \left(\frac{1}{n}\left(Y^TX\right)_i\right)^2\\ + (assume\ \beta_i\not=0) &= \lambda -\left(\frac{1}{n}\left(Y^TX\right)_i\right)^2\\ + \min_{\beta_i}\mathcal{L}_i\left(\beta_i\right)&=\min\left(\underbrace{\min_{\beta_i\not=0}\mathcal{L}_i\left(\beta_i\right)}_{\lambda-z_i^2},\underbrace{\mathcal{L}_i(0)}_{=0}\right)\\ + \lambda-z_i^2\leq 0 &\Leftrightarrow \sqrt{\lambda}\leq z_i\\ + \Rightarrow \beta_i^0(\lambda) &= z_i\mathds{1}_{\{|z_i|\geq\sqrt{\lambda}\}}=g_{hard,\lambda}(z_i) + \end{align*} + \end{corr} \end{enumerate} \item \begin{enumerate} \item \includegraphics[width=0.4\textwidth]{gsoft.png} @@ -146,6 +168,26 @@ % quelle http://stats.stackexchange.com/questions/17781/derivation-of-closed-form-lasso-solution \end{math}\\ We needed more than 12 hours to solve these equations. Please be aware of the fact that we are computer scientist with only a basic math education. We are not mathematics students! + \begin{corr} + \begin{align*} + \mathcal{L}(\beta)&=\argmin_\beta \frac{1}{n}\left|\left|Y-X\beta\right|\right|^2+\lambda ||\beta||_1\\ + &= \argmin_\beta \frac{1}{n}\left(-2Y^TX\beta +n\beta^T\beta\right)+\lambda \sum\limits_{i=1}^p|\beta_i|\\ + &=\argmin_\beta \sum\limits_{i=1}^p \underbrace{-2z_i\beta_i+|\beta_i|^2+\lambda|\beta_i|}_{=:\mathcal{L}_i} + \end{align*} + \begin{align*} + \beta_i\geq 0: & \mathcal{L}_i(\beta_i)&=\beta_i\left(\beta_i-2z_i+\lambda\right)\\ + &\tilde{\beta}_i&=\begin{cases} + z_i-\frac{\lambda}{2} & if\ z_i\geq \frac{\lambda}{2}\\ + 0 & else + \end{cases}\\ + \beta_i\leq 0: & \mathcal{L}_i(\beta_i)&=\beta_i\left(\beta_i-2z_i-\lambda\right)\\ + &\tilde{\beta}_i&=\begin{cases} + z_i+\frac{\lambda}{2} & if\ z_i\leq -\frac{\lambda}{2}\\ + 0 & else + \end{cases}\\ + \Rightarrow & \beta_i^{LASSO}&=g_{soft,\frac{\lambda}{2}}\left(z_i\right) + \end{align*} + \end{corr} \end{enumerate} \end{enumerate} \Aufgabe{LASSO}{70} @@ -169,6 +211,7 @@ \begin{itemize} \item many non-zero weights \item possible large weights + \item \textcorr{the solution is not unique} \end{itemize} \item Does this happen with the function \_linear\_model.LinearReagression()\_ ? \begin{itemize} @@ -194,5 +237,7 @@ print('Regression Coefficient of Gene 5954 : ', lm.coef_[5954]) \end{lstlisting} + \textcorr{has to be 5953}\\ + \textcorr{\textbf{other gene missing}} \end{document}