diff --git a/fastRidge/displaypath.m b/fastRidge/displaypath.m new file mode 100644 index 0000000..3586ff0 --- /dev/null +++ b/fastRidge/displaypath.m @@ -0,0 +1,42 @@ +function displaypath(b, DOF, score) + +%% Create a new figure +figure; +clf; +hold on; +grid on; +box on; + +title('Ridge Regression Path'); + +% Display the path +plot(DOF, b); +xlim([0, max(DOF)]); +xlabel('Degrees of Freedom'); +ylabel('Regression Coefficients'); + +% Show the minimum criterion score model +[~,I] = min(score); +yl = ylim; +plot([DOF(I), DOF(I)], [yl(1), yl(2)], '--'); + +%% Create a new figure +figure; +clf; +hold on; +grid on; +box on; + +title('Ridge Regression Criterion Score'); + +% Display the path +plot(DOF, score); +xlim([0, max(DOF)]); +xlabel('Degrees of Freedom'); +ylabel('Criterion Score'); + +% Show the minimum criterion score model +[~,I] = min(score); +yl = ylim; +plot([DOF(I), DOF(I)], [yl(1), yl(2)], '--'); + diff --git a/fastRidge/fastridge.m b/fastRidge/fastridge.m new file mode 100644 index 0000000..cf78141 --- /dev/null +++ b/fastRidge/fastridge.m @@ -0,0 +1,225 @@ +%% Implementation of the fast ridge regression, specifically for the case when p > n +% +% function [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, 'param1', value1, ...) +% +% Parameters: +% X = regressor matrix [n x p] +% y = response vector [n x 1] +% +% Optional arguments: +% +% If no path, lambda or dof is specified, fastridge will search for the +% optimal ridge model using the specified criterion (default is 'mmlu') +% +% 'path' = generate a path of beta's, with the number of lambda's given by the argument [ integer scalar ] +% 'criterion' = specify which model selection criterion to use [ 'mmlu', 'aicc', 'kicc' ] +% 'lambda' = fit a ridge model with a specified lambda [ positive scalar ] +% 'dof' = fit a ridge model with specified degrees-of-freedom [ postive scalar ] +% +% Returns: +% beta = regression parameters [p x nlambda] +% b0 = regression param. for constant [1 x 1] +% tau2 = noise variance sigma^2 [1 x nlambda] +% DOF = degrees-of-freedom of fitted models [1 x nlambda] +% score = model selection criterion score of fitted models [1 x nlambda] +% +% +% Example: +% % Generate a regularisation path with 100 lambda's +% [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, 'path', 1e2); +% +% % Fit with a specified DOF +% [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, 'dof', 88); +% +% See fastridge_example.m for a more extensive example with timings +% +% References: +% +% Efficient quadratic regularization for expression arrays +% Trevor Hastie and Robert Tibshirani +% Biostatistics, Vol. 5, No. 3, pp. 329--340, 2004 +% +% MML Invariant Linear Regression +% Daniel F. Schmidt and Enes Makalic +% Lecture Notes in Computer Science, Vol. 5866, pp. 312--321, 2009 +% +% Regression and Time Series Model Selection in Small Samples +% Clifford M. Hurvich and Chih-Ling Tsai +% Biometrika, Vol. 76, No. 2, pp. 297--307, 1989 +% +% (c) Copyright Enes Makalic and Daniel F. Schmidt, 2015 +% +function [beta, b0, tau2, DOF, lambda, score] = fastridge(X, y, varargin) + +%% Standardise the input data +[n, p] = size(X); +[X, muX, normX, y, muY] = standardise(X, y); + +% Determine the maximum DOF +maxDOF = min(n-3/2, p); + +%% Parse options +inParser = inputParser; + +%% Default parameter values +defaultLambda = NaN; +defaultDOF = NaN; +defaultCriterion = 'mmlu'; +defaultPath = NaN; + +expectedCriterion = {'mmlu', 'aicc'}; + +%% Define parameters +addParameter(inParser,'lambda',defaultLambda,@(x) isnumeric(x) && (x >= 0) && isscalar(x)); +addParameter(inParser,'dof',defaultDOF,@(x) isnumeric(x) && isscalar(x) && x >= 0 && x <= maxDOF); +addParameter(inParser,'criterion',defaultCriterion,@(x) any(validatestring(x,expectedCriterion))); +addParameter(inParser,'path',defaultPath,@(x) isnumeric(x) && isscalar(x) && x > 0); + +%% Parse options +parse(inParser, varargin{:}); + +% General options for all samplers +DOF = inParser.Results.dof; +lambda = inParser.Results.lambda; +path = inParser.Results.path; +criterion = inParser.Results.criterion; + +%% Compute the SVD and sufficient statistics +[u,s,v] = svd(X, 'econ'); + +ds = diag(s); +r = bsxfun(@times, u, ds'); % r = u*s; +rtr = ds.^2; % r'*r +ry = r'*y; +yty = y'*y; + +%% Error checking +if (~isnan(DOF) && ~isnan(lambda)) + error('Cannot specify both DOF and lambda'); +end +if ((~isnan(DOF) || ~isnan(lambda)) && ~isnan(path)) + error('Cannot specify a lambda/DOF and request a path'); +end + +%% Initialise +score = NaN; + +%% If a DOF was specified, convert it to a lambda +if (~isnan(DOF)) + lambda = DOF2lambda(DOF, rtr); +end + +%% If no DOF/lambda, or path option was specified, search for the best model using the specified criterion +if (isnan(DOF) && isnan(lambda) && isnan(path)) + minLambda = DOF2lambda(maxDOF, rtr, 0); + maxLambda = DOF2lambda(1e-04, rtr, 0); + + % Search for the smallest model by criterion score + [lambda, score] = fmincon( @(l) fitfastridge_wrapper(exp(l), X, y, yty, v, rtr, ry, criterion), minLambda*1.05, [], [], [], [], log(minLambda), log(maxLambda), [], optimoptions('fmincon','Display','off')); + lambda = exp(lambda); +end + +%% If a lambda was specified, fit the specified model +if (~isnan(lambda)) + [beta, tau2, DOF] = fitfastridge(X, y, yty, v, rtr, ry, lambda); +end + +%% If a path was requested +if (~isnan(path)) + % Use a linear grid over the DOF's + DOF = linspace(0, maxDOF, path); + score = zeros(1, path); + beta = zeros(p, path); + tau2 = zeros(1, path); + + % Try each model in the lambda grid + for i = 1:path, + L = DOF2lambda(DOF(i), rtr, 0); + lambda(i) = L; + + % Fit the model + [beta(:,i), tau2(i), DOF(i), mmlu, aicc] = fitfastridge(X, y, yty, v, rtr, ry, lambda(i)); + switch (criterion) + case 'mmlu' + score(i) = mmlu; + case 'aicc' + score(i) = aicc; + end + end +end + +%% Re-scale coefficients +beta = bsxfun(@rdivide, beta, normX'); +b0 = muY-muX*beta; + +end + + +%% Convert degrees-of-freedom to a lambda regularisation parameter +function [lambda, f] = DOF2lambda(dof, rtr, lambda) + +if (~exist('lambda','var')) + lambda = 0; +end +f = inf; + +% Newton-Raphson to find lambda +while (abs(f-dof)>1e-2) + lambda = lambda + (sum( rtr ./ (rtr + lambda) ) - dof) / sum(rtr./(rtr + lambda).^2); + f = sum(rtr ./ (rtr + lambda)); +end + +end + + +%% Wrapper function for fast ridge fitting +function f = fitfastridge_wrapper(lambda, X, y, yty, v, rtr, ry, criterion) + +[~, ~, ~, mmlu, aicc] = fitfastridge(X, y, yty, v, rtr, ry, lambda); +switch lower(criterion) + case 'mmlu' + f = mmlu; + case 'aicc' + f = aicc; +end + +end + + +%% Fit a ridge model in a fast manner +function [b, tau2, dof, mmlu, aicc] = fitfastridge(X, y, yty, v, rtr, ry, lambda) + +n = length(y); +b = bsxfun(@times, v, 1./(rtr + lambda)') * ry; +dof = sum(rtr ./ (rtr + lambda)); + +e = (y - X*b); +tau2 = e'*e/n; + +%% Model selection criteria +mmlu = (n-dof)/2*log(2*pi) + (n-dof)/2*log(tau2) + (n-dof)/2 + dof/2*log(pi*yty) - gammaln(dof/2+1) + 1/2*log(dof+1); +aicc = n/2*log(tau2) + n/2 + dof*n/(n-dof-1); + +end + + +%% Standardise the covariates to have zero mean and x_i'x_i = 1 +% function [X,meanX,stdX,y,meany]=standardise(X,y) +function [X,meanX,stdX,y,meany]=standardise(X,y) + +%% params +n=size(X,1); +meanX=mean(X); +stdX=std(X,0,1); + +%% Standardise Xs +X=bsxfun(@minus,X,meanX); +X=bsxfun(@rdivide,X,stdX); + +%% Standardise ys (if neccessary) +if(nargin == 2) + meany=mean(y); + y=y-ones([n,1])*meany; +end; + +end \ No newline at end of file diff --git a/fastRidge/fastridge_example.m b/fastRidge/fastridge_example.m new file mode 100644 index 0000000..59d1928 --- /dev/null +++ b/fastRidge/fastridge_example.m @@ -0,0 +1,128 @@ +%% Example +%% Load diabetes data +clear; + + data='EEG'; + goal='Autoenc'; + k=10; + ridgeParams=10; + shiftEMG=0.05; + shiftEEG=0.2; + eegOffset=1; + + load '/home/jph/Uni/masterarbeit/messor/matlabData/MB2200msWindowEMG1sWindowEEG50msShiftEMG200msShiftEEGFreq0to200Pause0pBurg50.mat'; + + clear classification; + eeg=false; + if strcmp(data,'EEG') + eeg=true; + trainingData=trainingDataEEG(eegOffset+1:end,:); + elseif strcmp(data,'EMG') + trainingData=trainingDataEMG; + elseif strcmp(data,'LF') + eeg=true; + trainingData=trainingDataEEGlf(eegOffset+1:end,:); + else + error('only EEG, EMG and LF are valid inputs for data'); + end + + factor=size(trainingDataEMG,1)/(size(trainingData,1)+eegOffset); + + clear trainingDataEEG; + clear trainingDataEEGlf; + clear trainingDataEMG; + + if strcmp(goal,'Autoenc') + predicted=shiftingMean(synergiesAutoenc,factor); + elseif strcmp(goal,'PCA') + predicted=shiftingMean(synergiesPCA,factor); + elseif strcmp(goal,'NNMF') + predicted=shiftingMean(synergiesNNMF,factor); + elseif strcmp(goal,'kin') + predicted=shiftingMean(kinematics,factor); + else + error('only kin, Autoenc, PCA and NNMF are valid inputs for goal'); + end + + clear kinematics; + clear synergies*; + + if eeg + predicted=predicted(1:end-eegOffset,:); + end + +X = trainingData; +y = predicted; + +n = size(X,1); + +%% Augment the design matrix with large number of additional variables +Xn = [X, randn(n, 4e3)]; +p = size(Xn,2); + +fprintf('*****************************************************************************\n'); +fprintf('* fastridge example using diabetes data (augmented with extra noise variables):\n'); +fprintf(' - n = %d\n', n); +fprintf(' - p = %d\n', p); +fprintf('\n'); + +%% Produce a regularisation path +fprintf('* Producing a regularisation path of 100 betas using fastridge '); +tic; +[beta, b0, tau2, DOF, lambda, score] = fastridge(Xn, y)%, 'path', 1e2); +fprintf('(%.2fs)\n', toc); + +[~,I] = min(score); +fprintf(' - DOF of best model = %.2f\n', DOF(I)); +fprintf(' - lambda of best model = %.2f\n', lambda(I)); + +displaypath(beta, DOF, score); + +fprintf('\n'); + +%% Fit model with specified DOF +fprintf('* Fitting model with %.2f DOF ', DOF(I)); +tic; +[b, ~, ~, ~, ~, ~] = fastridge(Xn, y, 'DOF', DOF(I)); +fprintf('(%.2fs)\n', toc); +fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b-beta(:,I))); + +fprintf('\n'); + +%% Fit model with specified lambda +fprintf('* Fitting model with lambda=%.2f ', lambda(I)); +tic; +[b, ~, ~, ~, ~, ~] = fastridge(Xn, y, 'lambda', lambda(I)); +fprintf('(%.2fs)\n', toc); +fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b-beta(:,I))); + +fprintf('\n'); + +%% Search for best model using MML criterion +fprintf('* Fitting model using MML (searching for lambda) '); +tic; +[b_srch, ~, ~, DOF_srch, lambda_srch] = fastridge(Xn, y); +fprintf('(%.2fs)\n', toc); +fprintf(' - DOF of this model = %.2f\n', DOF_srch); +fprintf(' - lambda of this model = %.2f\n', lambda_srch); +fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b_srch-beta(:,I))); + +fprintf('\n'); + +%% Search for best model using AICc criterion +fprintf('* Fitting model using AICc (searching for lambda) '); +tic; +[b_srch, ~, ~, DOF_srch, lambda_srch] = fastridge(Xn, y, 'criterion', 'aicc'); +fprintf('(%.2fs)\n', toc); +fprintf(' - DOF of this model = %.2f\n', DOF_srch); +fprintf(' - lambda of this model = %.2f\n', lambda_srch); +fprintf(' - L2 norm between this model and best model from MML path = %.2f\n', norm(b_srch-beta(:,I))); + +fprintf('\n'); + +%% Finally fit a model using the standard ridge +fprintf('* Fitting model with lambda=%.2f using MATLAB Ridge ', lambda(I)); +tic; +b_matlab = ridge(y,Xn,lambda(I),0); +fprintf('(%.2fs)\n', toc); +fprintf(' - L2 norm between this model and best model from path = %.2f\n', norm(b_matlab(2:end)-beta(:,I))); \ No newline at end of file diff --git a/fastRidge/license.txt b/fastRidge/license.txt new file mode 100644 index 0000000..b4c3de1 --- /dev/null +++ b/fastRidge/license.txt @@ -0,0 +1,24 @@ +Copyright (c) 2015, Statovic +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE.