function [sel, b, rss, r2, cp, rss2, f, adj, pvalue, aic, bic] = bestsub(x1, y, c) %REGRESS Performs Best Subsets Regression %INPUTS: x1 = a set of n candidate independent variables % y = a vector of dependent variable measures % c = the number of variables to be selected %OUTPUTS: sel = a set of c selected variables % b = a vector of d + 1 regression parameters % rss = the residual sum of squares % r2 = r-squared % tic; eps=.0000001; [m,d]=size(x1); % ############################################################ % COMPUTE TOTAL SUM OF SQUARES FOR Y AND STEPWISE SOLUTION % ############################################################ vlist = 1:d; mu = mean(y); ydiff = (y-mu).*(y-mu); sstot = sum(ydiff); emin = 999999999999999999999999999; for kk = 1:d x = [ones(size(y)) x1(:,kk)]; v = x\y; yhat = x*v; ess(kk) = sum((y-yhat).^2); gsel(kk) = 0; if ess(kk) < emin emin = ess(kk); ksel = kk; end end x = [ones(size(y)) x1(:,ksel)]; gsel(ksel) = 1; g = 1; gpick(g) = ksel; while g < c; g = g + 1; emin = 99999999999999999999999999.; for kk = 1:d if gsel(kk) == 0 xx = [x x1(:,kk)]; v = xx\y; yhat = xx*v; erss = sum((y-yhat).^2); if erss < emin emin = erss; ksel = kk; end end end gsel(ksel) = 1; x = [x x1(:,ksel)]; gpick(g) = ksel; end trig = 1; while trig == 1 trig = 0; for kremov = 1:c kr = gpick(kremov); for kinclud = 1:d if gsel(kinclud) == 0 xx = x; xx(:,kremov+1) = x1(:,kinclud); v = xx\y; yhat = xx*v; erss = sum((y-yhat).^2); if erss < emin emin = erss; gpick(kremov) = kinclud; gsel(kinclud) =1; gsel(kr) = 0; kr = kinclud; x = xx; trig = 1; end end end end end r2best = (sstot-emin)./sstot; r2best = r2best-.0000001; sbest = setdiff(vlist,gpick); % ################################################################ % REORDER THE VARIABLES (ASCENDING) BASED ON BEST ESS VALUES % ################################################################ for i = 1:c k = gpick(i); order(i) = k; end for i = c+1:d emin = 999999999999999999999; for j = 1:d if gsel(j) == 0 if ess(j) < emin emin = ess(j); k = j; end end end gsel(k) = 1; order(i) = k; end a = x1(:,order); a = [a y]; r = corr(a); for j = 1:d [r, z] = sweep2(r, j); end % ################################################################### % BEGIN THE BRANCH AND BOUND ALGORITHM % ################################################################### % ###################### % STEP 0: INITIALIZE % ###################### ip=0; trig = 0; trig1 = 0; while trig == 0 if trig1 == 0; ip = ip + 1; % ADVANCE if (ip == 1) s(ip) = 1; [r, z] = sweep2(r, s(ip)); else s(ip) = s(ip-1) + 1; [r, z] = sweep2(r, s(ip)); end trig1 = 1; trig2 = 0; end if trig2 == 0; if d-s(ip) < d-c-ip % FEASIBILITY trig3 = 1; trig4 = 1; else trig3 = 0; trig4 = 0; end end if trig3 == 0 % SUBOPTIMALITY if z < r2best trig4 = 1; end end if trig4 == 0 % UPDATE INCUMBENT? if ip ~= d-c % If still too many varibles, trig1 = 0; % then return to the top and continue % discard another. else r2best = z; % But...if at the correct size, sbest = s; % then update the incumbent, end end if s(ip) < d % BRANCH RIGHT IF POSSIBLE [r, z] = sweep2(r, s(ip)); s(ip) = s(ip) + 1; [r, z] = sweep2(r, s(ip)); trig2 = 0; continue end [r, z] = sweep2(r, s(ip)); s(ip) = 0; % RETRACT IF CAN'T BRANCH RIGHT ip = ip - 1; if(ip == 0) trig = 1; else trig2 = 1; trig3 = 1; trig4 = 1; end end sel2 = setdiff(vlist, sbest); sel = order(sel2); x = [ones(size(y)) x1(:,sel)]; b = x\y; yhat = x*b; rss = sum((y-yhat).^2); r2 = (sstot-rss)./sstot; xx = [ones(size(y)) x1]; v = xx\y; yhat = xx*v; rss2 = sum((y-yhat).^2); rss2 = rss2 ./ (m-d-1); cp = rss./rss2 - m + 2*(c+1); cc = c+1; mse = rss./(m-cc); msr = (sstot-rss)./c; f = msr./mse; v1 = c; v2 = m - cc; X = v2./(v2+v1.*f); pvalue = betainc(X,v2./2,v1./2); %pvalue = 1-fcdf(f,c,m-cc); %[pvalue] = fcomp2(f,c,m-cc); LL = -(m./2).*log(2.*pi.*(rss./m))-m./2; aic = -2.*(LL-cc); bic = LL-(1./2).*cc.*log(m); adj = 1 - (1-r2).*((m-1)./(m-cc)); %toc