diff --git a/fastRidge/displaypath.m b/fastRidge/displaypath.m deleted file mode 100644 index 3586ff0..0000000 --- a/fastRidge/displaypath.m +++ /dev/null @@ -1,42 +0,0 @@ -function displaypath(b, DOF, score) - -%% Create a new figure -figure; -clf; -hold on; -grid on; -box on; - -title('Ridge Regression Path'); - -% Display the path -plot(DOF, b); -xlim([0, max(DOF)]); -xlabel('Degrees of Freedom'); -ylabel('Regression Coefficients'); - -% Show the minimum criterion score model -[~,I] = min(score); -yl = ylim; -plot([DOF(I), DOF(I)], [yl(1), yl(2)], '--'); - -%% Create a new figure -figure; -clf; -hold on; -grid on; -box on; - -title('Ridge Regression Criterion Score'); - -% Display the path -plot(DOF, score); -xlim([0, max(DOF)]); -xlabel('Degrees of Freedom'); -ylabel('Criterion Score'); - -% Show the minimum criterion score model -[~,I] = min(score); -yl = ylim; -plot([DOF(I), DOF(I)], [yl(1), yl(2)], '--'); - diff --git a/fastRidge/fastridge.m b/fastRidge/fastridge.m deleted file mode 100644 index cf78141..0000000 --- a/fastRidge/fastridge.m +++ /dev/null @@ -1,225 +0,0 @@ -%% Implementation of the fast ridge regression, specifically for the case when p > n -% -% function [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, 'param1', value1, ...) -% -% Parameters: -% X = regressor matrix [n x p] -% y = response vector [n x 1] -% -% Optional arguments: -% -% If no path, lambda or dof is specified, fastridge will search for the -% optimal ridge model using the specified criterion (default is 'mmlu') -% -% 'path' = generate a path of beta's, with the number of lambda's given by the argument [ integer scalar ] -% 'criterion' = specify which model selection criterion to use [ 'mmlu', 'aicc', 'kicc' ] -% 'lambda' = fit a ridge model with a specified lambda [ positive scalar ] -% 'dof' = fit a ridge model with specified degrees-of-freedom [ postive scalar ] -% -% Returns: -% beta = regression parameters [p x nlambda] -% b0 = regression param. for constant [1 x 1] -% tau2 = noise variance sigma^2 [1 x nlambda] -% DOF = degrees-of-freedom of fitted models [1 x nlambda] -% score = model selection criterion score of fitted models [1 x nlambda] -% -% -% Example: -% % Generate a regularisation path with 100 lambda's -% [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, 'path', 1e2); -% -% % Fit with a specified DOF -% [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, 'dof', 88); -% -% See fastridge_example.m for a more extensive example with timings -% -% References: -% -% Efficient quadratic regularization for expression arrays -% Trevor Hastie and Robert Tibshirani -% Biostatistics, Vol. 5, No. 3, pp. 329--340, 2004 -% -% MML Invariant Linear Regression -% Daniel F. Schmidt and Enes Makalic -% Lecture Notes in Computer Science, Vol. 5866, pp. 312--321, 2009 -% -% Regression and Time Series Model Selection in Small Samples -% Clifford M. Hurvich and Chih-Ling Tsai -% Biometrika, Vol. 76, No. 2, pp. 297--307, 1989 -% -% (c) Copyright Enes Makalic and Daniel F. Schmidt, 2015 -% -function [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, varargin) - -%% Standardise the input data -[n, p] = size(X); -[X, muX, normX, y, muY] = standardise(X, y); - -% Determine the maximum DOF -maxDOF = min(n-3/2, p); - -%% Parse options -inParser = inputParser; - -%% Default parameter values -defaultLambda = NaN; -defaultDOF = NaN; -defaultCriterion = 'mmlu'; -defaultPath = NaN; - -expectedCriterion = {'mmlu', 'aicc'}; - -%% Define parameters -addParameter(inParser,'lambda',defaultLambda,@(x) isnumeric(x) && (x >= 0) && isscalar(x)); -addParameter(inParser,'dof',defaultDOF,@(x) isnumeric(x) && isscalar(x) && x >= 0 && x <= maxDOF); -addParameter(inParser,'criterion',defaultCriterion,@(x) any(validatestring(x,expectedCriterion))); -addParameter(inParser,'path',defaultPath,@(x) isnumeric(x) && isscalar(x) && x > 0); - -%% Parse options -parse(inParser, varargin{:}); - -% General options for all samplers -DOF = inParser.Results.dof; -lambda = inParser.Results.lambda; -path = inParser.Results.path; -criterion = inParser.Results.criterion; - -%% Compute the SVD and sufficient statistics -[u,s,v] = svd(X, 'econ'); - -ds = diag(s); -r = bsxfun(@times, u, ds'); % r = u*s; -rtr = ds.^2; % r'*r -ry = r'*y; -yty = y'*y; - -%% Error checking -if (~isnan(DOF) && ~isnan(lambda)) - error('Cannot specify both DOF and lambda'); -end -if ((~isnan(DOF) || ~isnan(lambda)) && ~isnan(path)) - error('Cannot specify a lambda/DOF and request a path'); -end - -%% Initialise -score = NaN; - -%% If a DOF was specified, convert it to a lambda -if (~isnan(DOF)) - lambda = DOF2lambda(DOF, rtr); -end - -%% If no DOF/lambda, or path option was specified, search for the best model using the specified criterion -if (isnan(DOF) && isnan(lambda) && isnan(path)) - minLambda = DOF2lambda(maxDOF, rtr, 0); - maxLambda = DOF2lambda(1e-04, rtr, 0); - - % Search for the smallest model by criterion score - [lambda, score] = fmincon( @(l) fitfastridge_wrapper(exp(l), X, y, yty, v, rtr, ry, criterion), minLambda*1.05, [], [], [], [], log(minLambda), log(maxLambda), [], optimoptions('fmincon','Display','off')); - lambda = exp(lambda); -end - -%% If a lambda was specified, fit the specified model -if (~isnan(lambda)) - [beta, tau2, DOF] = fitfastridge(X, y, yty, v, rtr, ry, lambda); -end - -%% If a path was requested -if (~isnan(path)) - % Use a linear grid over the DOF's - DOF = linspace(0, maxDOF, path); - score = zeros(1, path); - beta = zeros(p, path); - tau2 = zeros(1, path); - - % Try each model in the lambda grid - for i = 1:path, - L = DOF2lambda(DOF(i), rtr, 0); - lambda(i) = L; - - % Fit the model - [beta(:,i), tau2(i), DOF(i), mmlu, aicc] = fitfastridge(X, y, yty, v, rtr, ry, lambda(i)); - switch (criterion) - case 'mmlu' - score(i) = mmlu; - case 'aicc' - score(i) = aicc; - end - end -end - -%% Re-scale coefficients -beta = bsxfun(@rdivide, beta, normX'); -b0 = muY-muX*beta; - -end - - -%% Convert degrees-of-freedom to a lambda regularisation parameter -function [lambda, f] = DOF2lambda(dof, rtr, lambda) - -if (~exist('lambda','var')) - lambda = 0; -end -f = inf; - -% Newton-Raphson to find lambda -while (abs(f-dof)>1e-2) - lambda = lambda + (sum( rtr ./ (rtr + lambda) ) - dof) / sum(rtr./(rtr + lambda).^2); - f = sum(rtr ./ (rtr + lambda)); -end - -end - - -%% Wrapper function for fast ridge fitting -function f = fitfastridge_wrapper(lambda, X, y, yty, v, rtr, ry, criterion) - -[~, ~, ~, mmlu, aicc] = fitfastridge(X, y, yty, v, rtr, ry, lambda); -switch lower(criterion) - case 'mmlu' - f = mmlu; - case 'aicc' - f = aicc; -end - -end - - -%% Fit a ridge model in a fast manner -function [b, tau2, dof, mmlu, aicc] = fitfastridge(X, y, yty, v, rtr, ry, lambda) - -n = length(y); -b = bsxfun(@times, v, 1./(rtr + lambda)') * ry; -dof = sum(rtr ./ (rtr + lambda)); - -e = (y - X*b); -tau2 = e'*e/n; - -%% Model selection criteria -mmlu = (n-dof)/2*log(2*pi) + (n-dof)/2*log(tau2) + (n-dof)/2 + dof/2*log(pi*yty) - gammaln(dof/2+1) + 1/2*log(dof+1); -aicc = n/2*log(tau2) + n/2 + dof*n/(n-dof-1); - -end - - -%% Standardise the covariates to have zero mean and x_i'x_i = 1 -% function [X,meanX,stdX,y,meany]=standardise(X,y) -function [X,meanX,stdX,y,meany]=standardise(X,y) - -%% params -n=size(X,1); -meanX=mean(X); -stdX=std(X,0,1); - -%% Standardise Xs -X=bsxfun(@minus,X,meanX); -X=bsxfun(@rdivide,X,stdX); - -%% Standardise ys (if neccessary) -if(nargin == 2) - meany=mean(y); - y=y-ones([n,1])*meany; -end; - -end \ No newline at end of file diff --git a/fastRidge/fastridge_example.m b/fastRidge/fastridge_example.m deleted file mode 100644 index 59d1928..0000000 --- a/fastRidge/fastridge_example.m +++ /dev/null @@ -1,128 +0,0 @@ -%% Example -%% Load diabetes data -clear; - - data='EEG'; - goal='Autoenc'; - k=10; - ridgeParams=10; - shiftEMG=0.05; - shiftEEG=0.2; - eegOffset=1; - - load '/home/jph/Uni/masterarbeit/messor/matlabData/MB2200msWindowEMG1sWindowEEG50msShiftEMG200msShiftEEGFreq0to200Pause0pBurg50.mat'; - - clear classification; - eeg=false; - if strcmp(data,'EEG') - eeg=true; - trainingData=trainingDataEEG(eegOffset+1:end,:); - elseif strcmp(data,'EMG') - trainingData=trainingDataEMG; - elseif strcmp(data,'LF') - eeg=true; - trainingData=trainingDataEEGlf(eegOffset+1:end,:); - else - error('only EEG, EMG and LF are valid inputs for data'); - end - - factor=size(trainingDataEMG,1)/(size(trainingData,1)+eegOffset); - - clear trainingDataEEG; - clear trainingDataEEGlf; - clear trainingDataEMG; - - if strcmp(goal,'Autoenc') - predicted=shiftingMean(synergiesAutoenc,factor); - elseif strcmp(goal,'PCA') - predicted=shiftingMean(synergiesPCA,factor); - elseif strcmp(goal,'NNMF') - predicted=shiftingMean(synergiesNNMF,factor); - elseif strcmp(goal,'kin') - predicted=shiftingMean(kinematics,factor); - else - error('only kin, Autoenc, PCA and NNMF are valid inputs for goal'); - end - - clear kinematics; - clear synergies*; - - if eeg - predicted=predicted(1:end-eegOffset,:); - end - -X = trainingData; -y = predicted; - -n = size(X,1); - -%% Augment the design matrix with large number of additional variables -Xn = [X, randn(n, 4e3)]; -p = size(Xn,2); - -fprintf('*****************************************************************************\n'); -fprintf('* fastridge example using diabetes data (augmented with extra noise variables):\n'); -fprintf(' - n = %d\n', n); -fprintf(' - p = %d\n', p); -fprintf('\n'); - -%% Produce a regularisation path -fprintf('* Producing a regularisation path of 100 betas using fastridge '); -tic; -[beta, b0, tau2, DOF, lambda, score] = fastridge(Xn, y)%, 'path', 1e2); -fprintf('(%.2fs)\n', toc); - -[~,I] = min(score); -fprintf(' - DOF of best model = %.2f\n', DOF(I)); -fprintf(' - lambda of best model = %.2f\n', lambda(I)); - -displaypath(beta, DOF, score); - -fprintf('\n'); - -%% Fit model with specified DOF -fprintf('* Fitting model with %.2f DOF ', DOF(I)); -tic; -[b, ~, ~, ~, ~, ~] = fastridge(Xn, y, 'DOF', DOF(I)); -fprintf('(%.2fs)\n', toc); -fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b-beta(:,I))); - -fprintf('\n'); - -%% Fit model with specified lambda -fprintf('* Fitting model with lambda=%.2f ', lambda(I)); -tic; -[b, ~, ~, ~, ~, ~] = fastridge(Xn, y, 'lambda', lambda(I)); -fprintf('(%.2fs)\n', toc); -fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b-beta(:,I))); - -fprintf('\n'); - -%% Search for best model using MML criterion -fprintf('* Fitting model using MML (searching for lambda) '); -tic; -[b_srch, ~, ~, DOF_srch, lambda_srch] = fastridge(Xn, y); -fprintf('(%.2fs)\n', toc); -fprintf(' - DOF of this model = %.2f\n', DOF_srch); -fprintf(' - lambda of this model = %.2f\n', lambda_srch); -fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b_srch-beta(:,I))); - -fprintf('\n'); - -%% Search for best model using AICc criterion -fprintf('* Fitting model using AICc (searching for lambda) '); -tic; -[b_srch, ~, ~, DOF_srch, lambda_srch] = fastridge(Xn, y, 'criterion', 'aicc'); -fprintf('(%.2fs)\n', toc); -fprintf(' - DOF of this model = %.2f\n', DOF_srch); -fprintf(' - lambda of this model = %.2f\n', lambda_srch); -fprintf(' - L2 norm between this model and best model from MML path = %.2f\n', norm(b_srch-beta(:,I))); - -fprintf('\n'); - -%% Finally fit a model using the standard ridge -fprintf('* Fitting model with lambda=%.2f using MATLAB Ridge ', lambda(I)); -tic; -b_matlab = ridge(y,Xn,lambda(I),0); -fprintf('(%.2fs)\n', toc); -fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b_matlab(2:end)-beta(:,I))); \ No newline at end of file diff --git a/fastRidge/license.txt b/fastRidge/license.txt deleted file mode 100644 index b4c3de1..0000000 --- a/fastRidge/license.txt +++ /dev/null @@ -1,24 +0,0 @@ -Copyright (c) 2015, Statovic -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the distribution - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE.