diff --git a/ea/UB3/Blatt3/EA3/src/individuals/GAIndividual.java b/ea/UB3/Blatt3/EA3/src/individuals/GAIndividual.java index e14ea6a..a5f4fc1 100644 --- a/ea/UB3/Blatt3/EA3/src/individuals/GAIndividual.java +++ b/ea/UB3/Blatt3/EA3/src/individuals/GAIndividual.java @@ -132,12 +132,12 @@ indB.getGenotype().clear(i); } } - System.out.println("______"); - System.out.println(ind1.getStringRepresentation()); - System.out.println(ind2.getStringRepresentation()); - System.out.println(rand); - System.out.println(indA.getStringRepresentation()); - System.out.println(indB.getStringRepresentation()); + // System.out.println("______"); + // System.out.println(ind1.getStringRepresentation()); + // System.out.println(ind2.getStringRepresentation()); + // System.out.println(rand); + // System.out.println(indA.getStringRepresentation()); + // System.out.println(indB.getStringRepresentation()); GAIndividual[] crossed = { indA, indB }; return crossed; } diff --git a/ea/UB3/Blatt3/EA3/src/strategies/GeneticAlgorithm.java b/ea/UB3/Blatt3/EA3/src/strategies/GeneticAlgorithm.java index a901091..7a619a0 100644 --- a/ea/UB3/Blatt3/EA3/src/strategies/GeneticAlgorithm.java +++ b/ea/UB3/Blatt3/EA3/src/strategies/GeneticAlgorithm.java @@ -121,17 +121,17 @@ public static void main(String[] args) { // TODO: parameters for the GeneticAlgorithm, adjust these values as // necessary - int genotypeLength = 50; + int genotypeLength = 25; int mu = 10; int lambda = 50; - int optimizationSteps = 1000; - int multiRuns = 10; + int optimizationSteps = 100; + int multiRuns = 100; GeneticAlgorithm program = new GeneticAlgorithm(genotypeLength, mu, lambda, optimizationSteps, multiRuns); int TmpMeanCalls = 0, TmpMeanFitness = 0; // perform repeated optimization for (int i = 0; i < program.m_MultiRuns; i++) { - System.out.println(i); + // System.out.println(i); program.initialize(); program.optimize(); TmpMeanCalls += program.m_OptimizationStepsNeeded; diff --git a/ea/UB3/Blatt3/src/strategies/GeneticAlgorithm.java b/ea/UB3/Blatt3/src/strategies/GeneticAlgorithm.java index d0dd3dd..299e391 100644 --- a/ea/UB3/Blatt3/src/strategies/GeneticAlgorithm.java +++ b/ea/UB3/Blatt3/src/strategies/GeneticAlgorithm.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.List; import tools.RandomNumberGenerator; @@ -84,7 +85,7 @@ // select the mu best individuals for the new population // od - ArrayList population = new ArrayList(); + List population = new ArrayList(); for (int i = 0; i < m_Mu; i++) { population.add(new GAIndividual(m_GenotypeLength)); @@ -119,7 +120,7 @@ // m_population = m_population.subList(0, m_Mu); m_Best = children.get(0); - population = children; + population = children.subList(0, m_Mu); if (m_Best.evaluateAsMaxiBits() != 0) { m_OptimizationStepsNeeded++; } diff --git a/is/UB4/Ex4.ipynb b/is/UB4/Ex4.ipynb index d273c2a..6d60849 100644 --- a/is/UB4/Ex4.ipynb +++ b/is/UB4/Ex4.ipynb @@ -186,7 +186,8 @@ "\\begin{align*}%not sure whether this is meant...\n", " \\hat{\\beta}^{LS}&=\\left(X^TX\\right)^{-1}X^T\\mathbf{y}\\\\\n", " &=\\left(nI_{n\\times n} \\right)^{-1}X^T\\mathbf{y}\\\\\n", - " &=\\frac{1}{n}\\cdot I_{n\\times n}X^T\\mathbf{y}\n", + " &=\\frac{1}{n}\\cdot I_{n\\times n}X^T\\mathbf{y}\\\\\n", + " &=\\frac{1}{n}\\cdot X^T\\mathbf{y}\n", "\\end{align*}" ] }, @@ -194,6 +195,44 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "##### 1.1.3\n", + "\\begin{align}\n", + " \\bzero(\\lambda) &:= \\mathrm{arg \\, min}_{\\beta} \\frac{1}{n} \\norm{Y - X \\beta}_2^2 + \\lambda \\norm{\\beta}_0 ,\\\\\n", + " &=\\mathrm{arg \\, min}_{\\beta} \\underbrace{\\frac{1}{n} Y^{T}Y}_{constant}- \\frac{2}{n}Y^{T}X\\beta+\\norm{\\beta}^2+\\lambda \\norm{\\beta}_0 \\\\\n", + " &=\\mathrm{arg \\, min}_{\\beta} -\\frac{2}{n}Y^{T}X\\beta+\\norm{\\beta}^2+\\lambda \\norm{\\beta}_0 \\\\\n", + " &=\\mathrm{min}_{\\beta} \\sum^p_{i=1}-\\frac{2}{n}Y^{T}_iX_i\\beta_i+\\beta_i^2+\\lambda \\cdot \\begin{cases}\n", + " 0, & \\beta_i =0\\\\\n", + " 1, & \\beta_i \\neq 0\\\\\n", + "\\end{cases} %0 oder eins\n", + " \\\\\\frac{d \\bzero(\\lambda) }{d \\beta}&= -\\frac{2}{n} X_i^TY_i +2 \\beta_i \\overset{!}{=} 0\\\\\n", + " \\beta_i&= \\frac{1}{n} X^T_iY_i\\\\\n", + " \\text{$|\\beta_i|$ in $\\bzero(\\lambda)$,$\\beta_i \\neq 0$, determine sum element of $\\bzero(\\lambda) > 0$}\\\\\n", + " 0&< -2|\\beta_i|^2+|\\beta_i|^2 +\\lambda\\\\\n", + " \\lambda&<|\\beta_i|^2\\\\\n", + " \\lambda &< \\left|\\frac{1}{n} X^T_iY_i\\right|^2 \\\\\n", + " \\sqrt{\\lambda} &< \\left|\\frac{1}{n} X^T_iY_i\\right| \\\\\n", + " \\bzero(\\lambda) = \\frac{1}{n} X^T_iY_i \\cdot \n", + " \\begin{cases}\n", + " 0, & |X^T_iY_i|\\leq\\sqrt{\\lambda}\\\\\n", + " 1, & |X^T_iY_i|>\\sqrt{\\lambda}\\\\\n", + "\\end{cases} \n", + " \\end{align} \n", + " Das ist etwas reverse engineered. ich bezweifle die Korrektheit aber vielleicht hilft es um zur richtigen Lösung zu kommen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "## LASSO estimator in the orthonormal design\n", "\n", "Let $g_{\\mathrm{soft}, \\lambda}$ be the function:\n", @@ -317,6 +356,50 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "##### 1.2.2\n", + "\\begin{align}\\blasso(\\lambda) &:= \\mathrm{arg \\, min}_{\\beta} \\frac{1}{n} \\norm{Y - X \\beta}_2^2 + \\lambda \\norm{\\beta}_1 ,\\\\\n", + " &=\\mathrm{arg \\, min}_{\\beta} \\underbrace{\\frac{1}{n} Y^{T}Y}_{constant}- \\frac{2}{n}Y^{T}X\\beta+\\norm{\\beta}^2+\\lambda \\norm{\\beta}_1 \\\\\n", + " &=\\mathrm{arg \\, min}_{\\beta} -\\frac{2}{n}Y^{T}X\\beta+\\norm{\\beta}^2+\\lambda \\norm{\\beta}_1 \\\\\n", + " &=\\mathrm{min}_{\\beta} \\sum^p_{i=1}-\\frac{2}{n}Y^{T}_iX_i\\beta_i+\\beta_i^2+\\lambda |\\beta|\\\\\n", + " \\text{Minimize for a fixed i}\\\\\\text{The problem can be simplified to the sum of its parts}\\\\\n", + " L_i&=-\\frac{2}{n}Y^{T}_iX_i\\beta_i+\\beta_i^2+\\lambda |\\beta|\\\\\n", + " \\text{ $Y^{T}_iX_i >0$ subjects to $\\beta_i \\geq 0$}\\\\\\text{and vice versa $Y^{T}_iX_i \\leq0$ subjects to $\\beta_i \\leq 0$ }\\\\\\\\\n", + " \\text{Case 1: $Y^{T}_iX_i >0$ and $\\beta_i \\geq 0$}\\\\\n", + " \\frac{dL_i}{d\\beta} &=-\\frac{2}{n}Y^{T}_iX_i+2\\beta_i+\\lambda \\overset{!}{=} 0\\\\\n", + " \\beta_i &= \\frac{1}{n}Y^{T}_iX_i-\\frac{\\lambda}{2}\\\\\n", + " \\text{this is only feasible if the right-hand side is nonnegative}\\\\\n", + " \\blasso_i(\\lambda)& =\\left(\\frac{1}{n}Y^{T}_iX_i-\\frac{\\lambda}{2}\\right)^+\n", + " \\\\&= sgn(\\frac{1}{n}Y^{T}_iX_i)% den schritt verstehe ich nicht\n", + " \\cdot\\left(\\left|\\frac{1}{n}Y^{T}_iX_i\\right|-\\frac{\\lambda}{2}\\right)^+\\\\\n", + " \\text{Case 2: $Y^{T}_iX_i \\leq0$ and $\\beta_i \\leq 0$}\\\\\n", + " \\text{leads obviously to the same equation, so it's proven}\n", + " % quelle http://stats.stackexchange.com/questions/17781/derivation-of-closed-form-lasso-solution\n", + " \\end{align}\n", + " We needed more than 12 hours to solve these equations. Please be aware of the fact that we are computer scientist with only a basic math education. We are not mathematics students! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "# Application of LASSO on a gene data set (70 points)\n", "\n", "(This exercise is independent of the first one.)\n",