// $URL: svn://churro.cnbc.cmu.edu/igorcode/Recording%20Artist/Other/GLMFit.ipf $ // $Author: rick $ // $Rev: 606 $ // $Date: 2012-04-24 22:40:38 -0400 (Tue, 24 Apr 2012) $ #pragma rtGlobals=1 // Use modern global access method. // GLMFit: Generalized linear model. // Example: GLMFit(observation,{covariate1,covariate2,covariate3},"log") Function GLMFit(yy,wxx,distr[,distrParams,link,betas,prior,priorParams,func,splines,nonlinear,basis,bin,method,stepMethod,maxStepSize,maxIters,tolerance,brief,quiet,noConstant,train,condition,noGroup,depth]) wave yy // Y values = Dependent variable = To be predicted = Measurements wave /wave wxx // X values = numCovariates = Regressors = Inputs; note that this is a wave of wave references, so that many waves can be passed. string distr // Distribution, e.g. gaussian, poisson, vonmises. wave distrParams string link // Link function, e.g. identity, log, tan. By default, uses the canonical link function for the distribution. wave /d betas // Initial guesses for the coefficients. If the likelihood function is totally convex, this is unnecessary. Otherwise, you may need to specify these to avoid getting trapped in a local maximum. funcref L2Norm prior wave /wave priorParams // If an part of the model is non-linear, i.e. the covariate will be passed through a non-linearity, these next two parameters must be set. string func // If func is "smith", will use functions with names like "log_poisson_smith" instead of "log_poisson". This way arbitrary nonlinearities can be specified in the model. string nonlinear // apply non-linear function func to covariate i with initial guesses a,b,c,... for function parameters using the syntax "i,func,a,b,c,...". Mutliple numCovariates can have non-linearities, separated by semi-colons. wave splines // The first entry contains the order of the polynomial to be used, i.e. {3} for a cubic. // The second entry contains the maximum order of derivative to make continuous at the knots, i.e. {2} for all derivatives up to and including second derivatives. // The third entry contains the index of the covariate which contains the linear variable, e.g. 't'. // The fourth entry contains the modulus for repetition of the same splines, in units of the linear covariate. Repeating splines will use the same coefficients as the original splines. // The fifth entry is a boolean indicating whether the splines exist on a circular domain, i.e. does the end of the last spline meet the beginning of the first spline. // The remaining entries contain the locations of the knots, in units of the linear covariate. // To apply splines to multiple numCovariates, use one column for each, with each column containing the entries above. string basis // replace covariate i (indexed from 1, with 0 as the constant term) in wxx with the basis expansion returned (as columns) by func using the syntax "i:func". Mutliple numCovariates can have non-linearities, separated by semi-colons. wave bin // divide covariate i (indexed from 1, with 0 as the constant term) in wxx into n bins in the range a,b and optionally enforce a mean of 0 using the syntax: {i,n,a,b,mean0}. Multiple waves can be in additional columns. variable method // method = 0 : Use IRLS function (by far the fastest way). // method = 1 : Use Optimize to find the maximum of the log likelihood function. // method = 2 : Use FindRoots to find the roots of the score function (gradient of the log likelihood function). Faster than method 1, but perhaps more error prone (programmer error?) variable stepMethod // Method for stepping from one candidate solution to the next, for method = 1. Has no effect for other methods. // stepMethod=0: Line Search // stepMethod=1: Dogleg // stepMethod=2: More-Hebdon // stepMethod=3: Simulated Annealing variable maxStepSize // Maximum step size in searching for solutions. variable maxIters // Maximum iterations in searching for solutions. variable tolerance // Tolerance (deviance above which fitting will terminate). variable brief // 0 to calculate everything including submodel deviances. // (1) to calculate only the main model solution, not the errors, t-values, deviances, or p-values. // (2) to calculate only the main model solution and the the errors, t-values, and deviances. // (3) same as (2) but echo the betas, their errors, and t-values. // (4) don't even fit, just return the likelihood with the provided betas, and generated the data matrix. // (>=5) don't train on the training set, just return the likelhood of the test data with the provided betas. // (-1) to calculate everything and also echo submodel betas. variable quiet // Do not echo results. variable noConstant // Do not add a constant term (column of ones) to the list of numCovariates. wave train // Training/Testing mask. Set values to 1 to use for training (fitting), and to -1 to use for testing (cross-validation), or 0 for both. Likelihood returned will be for testing set. variable condition // Condition resultant X matrices by equalizing the L1 norm of each column. Values > 0 restore the original data matrix and fix the betas to match the original matrix. Values < 1 keep the conditioned values. variable noGroup // Do not group coefficients according to their origin. Test every submodel that does not contain one of them. variable depth // Recursion depth. variable i,j,k variable vars=1 // Number of variables needed to specify the mean in this distribution. variable numCovariates=0 variable numCoefficients=0 variable observations=dimsize(yy,0) brief = paramisdefault(brief) ? (depth>0 ? 1 : 0) : brief stepMethod=paramisdefault(stepMethod) ? 2 : stepMethod // stepMethod=2 (More-Hebdon) appears to be about 35% faster than the other step methods. maxStepSize=paramisdefault(maxStepSize) ? 10 : maxStepSize maxIters=paramisdefault(maxIters) ? 1000 : maxIters tolerance=paramisdefault(tolerance) ? 0.0001 : tolerance if(paramisdefault(train)) make /free/n=(dimsize(yy,0)) train=0 endif if(paramisdefault(link)) // Use canonical link functions. strswitch(distr) case "gaussian": link="identity" break case "poisson": link="log" break case "binomial": link="logit" break case "multinomial": link="logit" vars=paramisdefault(distrParams) ? wavemax(yy) : distrParams[0]-1 break case "exponential": link="inverse" break case "vonmises": link="tan" break default: printf "Unknown distribution: %s. Exiting.\r",distr return -1 endswitch endif if(paramisdefault(prior)) funcref L2norm prior=$"" make /free/wave/n=0 priorParams endif if(paramisdefault(priorParams) && !paramisdefault(prior)) string priorName = stringbykey("NAME",FuncRefInfo(prior)) strswitch(priorName) case "L1norm": case "L2norm": make /free/d/n=1 param1=0 // Mean parameter. make /free/d/n=1 param2=1 // Variance parameter. make /free/wave/n=2 priorParams = {param1,param2} break default: printf "Could not generate default prior parameters for prior '%s'.\r", priorName return nan break endswitch endif noConstant=!(noConstant==0) // Force to be 0 or 1. make /o/d/n=0 xx // Create a matrix with that is observations x numCovariates. make /free/n=0 coefficientIndex // An index to group related coefficients together (used later). make /free/t/n=0 covariate_names if(!noConstant) redimension /n=(observations,1) xx xx=1 coefficientIndex[0]={0} covariate_names[0]={"Constant"} numCovariates+=1 numCoefficients+=1 endif for(i=0;i=numCovariates) continue endif variable bins=bin[1][i] variable low=bin[2][i] variable high=bin[3][i] variable mean0=bin[4][i] ? 1 : 0 variable width=(high-low)/bins wave w=wxx[wn-1] // One covariate that should be binned. -1 is because covariate 1 is the 0th entry in wxx (the constant term is covariate 0). variable column=wn+numCoefficients-numCovariates // wn if it is the only covariate being binned; otherwise account for others numCovariates binned before it. insertpoints /m=1 column,bins-1,xx insertpoints column,bins-1,coefficientIndex for(j=0;j=(low+j*width) && w<(low+(j+1)*width)) // 0 or 1. xx[][column+j]=wBin[p]-mean0/bins // Subtract 1/bins so that the mean across bins is 0 instead of 1/bins. coefficientIndex[column+j]={wn} waveclear wBin endfor //covariateIndex[column+j]=gnoise(1) // Testing this out to see if ensuring that the columns are linearly independent helps. numCoefficients+=bins-1 endfor endif if(!paramisdefault(basis)) for(wn=0;wn=numCovariates) printf "%d is larger than the highest-numbered covariate.\r",wn continue endif string funcName=stringfromlist(1,nonlinear_,",") funcref fitFuncProto fitFunc=$funcName if(!strlen(stringbykey("NAME",funcrefinfo(fitFunc)))) // If the function doest not match the prototype. printf "%s does not match the fitting function prototype; treating as linear instead.\r",funcName continue endif funcRefs[wn]=funcName make /o/n=0 $("guess_"+num2str(wn)) /wave=guesses for(j=2;j=6) // At least 4 metaparameters plus 2 knots. duplicate /free splines,splines_ // Make a copy to operate on. for(k=0;ksplineModulus) printf "Spline modulus must be at least as large as the position of the largest knot.\r" return -1 endif if(splineCircular) //variable numNewCoefficients=(splineOrder-splineDiff)*numKnots - 1 variable numNewCoefficients=splineOrder+1+(splineOrder-splineDiff)*(numKnots-1) - (splineDiff+1) - 1 else numNewCoefficients=splineOrder+1+(splineOrder-splineDiff)*(numKnots-2) - 1 // Add splineOrder+1 coefficients for each spline, but take away those coefficients which are determined by coeffcients in previous splines. endif insertpoints /m=1 splineRootCol,numNewCoefficients,xx // Be careful because the index in the xx wave corresponding to the term to be fitted with splines may have changed as a result of the use of other optional arguments. splines_[2][]+=splines_[2][q]>splineRootCol ? numNewCoefficients : 0 // Change the root column for subsequent numCovariates to reflect changes in the size of the xx wave. insertpoints splineRootCol,numNewCoefficients,coefficientIndex coefficientIndex[splineRootCol,splineRootCol+numNewCoefficients-1]=splineRootCol numCoefficients+=numNewCoefficients make /free/n=(numpnts(tt)) knotIndices=binarysearch(knots,tt[p]) // Which spline does each observation belong to? variable offset=0 if(splineCircular) knotIndices = knotIndices<0 ? numKnots-1 : knotIndices[p] //tt += tt[p]=i ? (tt[p]-knots[i])^power : 0 // Set each observation to the jth polynomial term for the ith knot if it is in the ith spline, and otherwise set it to zero. endfor offset+=(splineOrder-splineDiff) endif endif endfor endfor xx=numtype(xx[p][q])==2 ? 0 : xx[p][q] endif variable yColumns=max(1,dimsize(yy,1)) // Number of columns of the data to be predicted. Usually 1. if(paramisdefault(betas)) make /o/d/n=(numCoefficients,vars,yColumns) betas=gnoise(0.01) // Initial guess at solution. else redimension /d/n=(numCoefficients,vars,yColumns) betas endif if(!paramisdefault(prior)) for(i=0;i=0 extract /free/indx train,testIndices,train<=0 make /free/n=(numpnts(trainIndices),dimsize(yy,1)) yyTrain=yy[trainIndices[p]][q] make /free/n=(numpnts(trainIndices),dimsize(xx,1)) xxTrain=xx[trainIndices[p]][q] make /free/n=(numpnts(testIndices),dimsize(yy,1)) yyTest=yy[testIndices[p]][q] make /free/n=(numpnts(testIndices),dimsize(xx,1)) xxTest=xx[testIndices[p]][q] else duplicate /free yy,yyTrain,yyTest duplicate /free xx,xxTrain,xxTest endif strswitch(distr) case "gaussian": // Must put this in normal form so the variance is independent of the mean. wavestats /q yyTrain yyTrain/=v_sdev wavestats /q yyTest yyTest/=v_sdev variable y_stdev=v_sdev break endswitch if(brief==4) return likelihood(xxTrain,betas) endif if(condition) xxTrain/=scaling[q] if(!paramisdefault(priorParams)) for(i=0;i0) xxTrain*=scaling[q] betas/=scaling[p] if(!paramisdefault(priorParams)) for(i=0;i1) matrixop /o mu=exp(eta)/colrepeat(sumrows(exp(eta))+1,vars) else matrixop /o mu=exp(eta)/(sumrows(exp(eta))+1) endif break case "inverse": matrixop /o mu=powR(eta,-1) break case "tan": matrixop /o mu=2*atan(eta) break default: printf "No such link function %s. Exiting.\r",link return -1 endswitch strswitch(distr) case "gaussian": // Must put this in normal form so the variance is independent of the mean. mu *= y_stdev //betas *= y_stdev break endswitch variable modelLikelihood=likelihood(xxTest,betas) // maximized log likelihood of the model (ignores prior). if(depth==0) variable r2=statscorrelation(yyTest,mu)^2 duplicate /free mu,mu_ duplicate /free xx,xx_full // Backup covariate matrix from full_model, since xx may be overwritten during recursion. endif if(abs(brief)==1 || brief==5 || depth>0) strswitch(distr) case "gaussian": // Must put this in normal form so the variance is independent of the mean. betas *= y_stdev break endswitch if(brief==-1) printf "{ " for(i=0;i1) variable covariate,coefficient=0 for(covariate=0;covariate 1. Otherwise, it will be 1. if(brief==3) // Report stats but do not test sub-models. else // Test sub-models. Prog("Sub",covariate,numCovariates) duplicate /free/wave wxx,wxx_sub if(covariate>0 || noConstant) if(!noGroup) deletepoints covariate-!noConstant,1,wxx_sub else variable which_wxx=0 variable covariates_so_far=0 do covariates_so_far+=dimsize(wxx[which_wxx],1) if(covariates_so_far>=covariate) break else which_wxx+=1 endif while(1) variable wxx_index = covariate-(covariates_so_far-dimsize(wxx[which_wxx],1))-!noConstant wave w = wxx[which_wxx] duplicate /free w,w_sub deletepoints /m=1 wxx_index,1,w_sub wxx_sub[which_wxx]=w_sub endif endif duplicate /free betas,betas_sub deletepoints /m=0 covariate,familySize,betas_sub betas_sub+=gnoise(0.00001) // Tweak the betas just a little bit to avoid some singular hessian matrix issues when testing the sub-models. if(!paramisdefault(splines)) duplicate /free splines,splines_sub if(i1) deletepoints /m=(k) coefficient,familySize*(j==1 ? vars : 1),priorParam_sub else break endif k+=1 while(1) priorParams_sub[j]=priorParam_sub endfor variable subModelLikelihood=GLMFit(yy,wxx_sub,distr,link=link,splines=splines_sub,method=method,stepMethod=stepMethod,maxStepSize=maxStepSize,maxIters=maxIters,betas=betas_sub,prior=prior,priorParams=priorParams_sub,brief=1,quiet=1,noConstant=(covariate==0 || noConstant ? 1 : 0),train=train,depth=depth+1) betaDeviances[coefficient]=2*(modelLikelihood-subModelLikelihood) // deviance between this model and the full model. endif dof=(familySize>1) ? familySize : 1 for(j=0;j1,"","["+num2str(k)+"]"),betas[coefficient][k],betaErrors[coefficient][k],betaTs[coefficient][k],betaPartials[coefficient][k],betaDeviances[coefficient][k],betaPvals[coefficient][k] // Summary of this covariate. endfor else // Binned or Basised. for(j=0;j1,"","["+num2str(k)+"]"),betas[coefficient][k],betaErrors[coefficient][k],betaTs[coefficient][k],betaPartials[coefficient][k],betaDeviances[coefficient][k],betaPvals[coefficient][k] // Summary of this covariate. else format = "(%s%s) %.[[digits]]f +/- %.[[digits]]f; t=%.[[digits]]f\r" format = replacestring("[[digits]]",format,num2str(digits)) printf format,num2str(covariate)+num2char(97+j),selectstring(vars>1,"","["+num2str(k)+"]"),betas[coefficient+j][k],betaErrors[coefficient+j][k],betaTs[coefficient+j][k] // Summary of this covariate. Don't show deviance and p-value for subsequent bins since this is redundant. endif endfor endfor endif endif //if(!paramisdefault(splines)) // duplicate /o splines_,splines // Restore knots from full model. //endif coefficient+=familySize endfor endif endif if(!quiet) printf "Log-Likelihood = %f\r",modelLikelihood // The log-likelihood. printf "R2 = %.3f\r",r2 printf "AIC = %.2f\r",AIC(modelLikelihood,xxTest,betas) // Here 'modelLikelihood' is already the log-likelihood, so we don't need the ln. printf "BIC = %.2f\r",BIC(modelLikelihood,xxTest,betas) endif endif if(depth==0) duplicate /o mu_,mu // Restore full model mu. duplicate /o xx_full,xx // Restore full model xx, which may have been overwritten during recursion or broken up during testing/training. if(norm(train) != 0) // There was a training and testing phase. duplicate /o xxTrain $"xxTrain" duplicate /o xxTest $"xxTest" endif endif return modelLikelihood End // ---------------- Gaussian distribution; Identity link function. ----------- // The following functions will either maximize the log likelihood, or find roots of the gradient of the log likelihood, according to the following set of equations: L({yi}|mu) = prod{ exp(-(yi-mu)^2/sigma^2) } // The likelihood function for a gaussian distribution. ln(L) =sum{ -(yi-mu)^2/sigma^2 } // Log likelihood. ln(L) =sum{ -(yi-b0-b1*x1i-b2*x2i-...)^2/sigma^2 } // Plug in the glm equation mu = b0 + b1*x1 + b2*x2 + ..., where mu=E(y). We can drop the sigma^2 when maximizing. grad(ln(L))=sum{ (yi-b0-b1*x1i-b2*x2i-...) } // The gradient of the log likelihood, with constants taken outside of the summation and dropped since we will be setting equal to zero. sum{ x1i*(yi-b0-b1*x1i-b2*x2i-...) } sum{ x2i*(yi-b0-b1*x1i-b2*x2i-...) } ... hessian(ln(L))=sum( -1 -x1i -x2i .... ) ( -x1i -x1i^2 -x1i*x2i .... ) ( -x2i -x1i*x2i -x2i^2 .... ) ( .... ) // L Function gaussian_identity(xx,betas) wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. wave betas // The coefficients that will be adjusted to find the maximum. wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. matrixop /free eta=xx x betas make /free/n=(numpnts(eta)) likelihoods = -(yy[p]-eta[p])^2 // Just minimizing the sum of squares in this simple case. variable logL = sum(likelihoods) return logL End // L' Function /wave d_gaussian_identity(xx,betas) wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. wave betas // The coefficients that will be adjusted to find the roots of the gradient function. wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. variable observations=dimsize(xx,0) variable numCovariates=dimsize(xx,1) variable yColumns=max(1,dimsize(betas,1)) variable i,j matrixop /free eta=xx x betas matrixop /free grad=2*(xx^t x (yy - eta)) return grad End // -L'' Function /wave d2_gaussian_identity(xx,betas) wave xx,betas variable xColumns=dimsize(xx,1) variable yColumns=max(1,dimsize(betas,1)) matrixop /free fisher_=2*(xx^t x xx)///numrows(xx) // Positive because the negative sign on the Hessian cancels with the negative 1 that we multiply to get a Fisher information matrix. make /free/d/n=(xColumns,xColumns,yColumns) fisher=fisher_[p][q] return fisher End // ---------------- Gaussian distribution; Log link function. Not canonical. ----------- // The following functions will either maximize the log likelihood, or find roots of the gradient of the log likelihood, according to the following set of equations: L({yi}|mu) = prod{ exp(-(yi-mu)^2/sigma^2) } // The likelihood function for a gaussian distribution. ln(L) =sum{ -(yi-mu)^2/sigma^2 } // Log likelihood. ln(L) =sum{ -(yi-exp(b0-b1*x1i-b2*x2i-...))^2/sigma^2 } // Plug in the glm equation ln(mu) = b0 + b1*x1 + b2*x2 + ..., where mu=E(y). We can drop the sigma^2 when maximizing. grad(ln(L))=sum{ exp(b0-b1*x1i-b2*x2i-...) - (yi - exp(b0-b1*x1i-b2*x2i-...)) } // The gradient of the log likelihood, with constants taken outside of the summation and dropped since we will be setting equal to zero. sum{ exp(b0-b1*x1i-b2*x2i-...) - x1i*(yi - exp(b0-b1*x1i-b2*x2i-...)) } sum{ exp(b0-b1*x1i-b2*x2i-...) - x2i*(yi - exp(b0-b1*x1i-b2*x2i-...)) } ... hessian(ln(L))=exp(b0-b1*x1i-b2*x2i-...) + (yi - 2*exp(b0-b1*x1i-b2*x2i-...)) * sum( -1 -x1i -x2i .... ) ( -x1i -x1i^2 -x1i*x2i .... ) ( -x2i -x1i*x2i -x2i^2 .... ) ( .... ) // L Function gaussian_log(xx,betas) wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. wave betas // The coefficients that will be adjusted to find the maximum. wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. matrixop /free eta=xx x betas make /free/n=(numpnts(eta)) likelihoods = -(yy[p]-exp(eta[p]))^2 return sum(likelihoods) End // L' Function /wave d_gaussian_log(xx,betas) wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. wave betas // The coefficients that will be adjusted to find the roots of the gradient function. wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. matrixop /free eta=xx x betas matrixop /free grad= - (xx^t x (yy - exp(eta))) + rowrepeat(sumcols(exp(eta))^t,4) return grad End // -L'' // TO DO: Check for correctness. Function /wave d2_gaussian_log(xx,betas) wave xx,betas wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. variable observations=dimsize(xx,0) variable xColumns=dimsize(xx,1) variable yColumns=max(1,dimsize(betas,1)) matrixop /free eta=xx x betas make /free/d/n=(observations,yColumns) multiplier=sqrt(abs(yy - 2*exp(eta))) make /free/d/n=(observations,xColumns,yColumns) matrix=exp(eta) + xx[p][q] * multiplier[p][r] matrixop /free fisher=matrix^t x matrix return fisher End // ---------------- Poisson distribution; Logarithmic link function. ----------- // The following functions will either maximize the log likelihood, or find roots of the gradient of the log likelihood, according to the following set of equations: L({y}|mu) = prod{ exp(-mu)*mu^yi/yi! } // The likelihood function for a poisson distribution. ln(L) =sum{ -mu + yi*ln(mu) - ln(yi!) } // Log likelihood. ln(L) =sum{ -exp(b0+b1*x1i+b2*x2i+...) + yi*(b0+b1*x1i+b2*x2i+...) - ln(yi!) } // Plug in the glm equation ln(mu) = b0 + b1*x1 + b2*x2 + ..., where mu=E(y). grad(ln(L))=sum{ -exp(b0+b1*x1i+b2*x2i+...) + yi} // The gradient of the log likelihood. sum{ -x1i*exp(b0+b1*x1i+b2*x2i+...) + yi*x1i} sum{ -x2i*exp(b0+b1*x1i+b2*x2i+...) + yi*x2i} ... hessian(ln(L))=sum{ -exp(b0+b1*x1i+b2*x2i+...) * ( 1 x1i x2i .... ) ( x1i x1i^2 x1i*x2i .... ) ( x2i x1i*x2i x2i^2 .... ) ( .... ) } //Function poisson_log(xx,betas[,knots]) // wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. // wave betas // The coefficients that will be adjusted to find the maximum. // wave knots // The first entry contains the order of the polynomial to be used, i.e. {3} for a cubic. The second entry contains the indices of the numCovariates which contains the linear variable, e.g. 't'. // // Subsequent powers of that variable are assumed to be contained in immediately subsequent columns, up to the polynomial order specified. So if covariate 3 might contains 't', // // the second entry would be {3}. The third entry contains the index of the coefficient (column of the beta wave) which contains the constant coefficient for the first spline. Subsequent coefficients for // // that spline and coefficients for subsequent splines are assumed to be contained in immediately subsequent columns. The remaining entries contain the locations of the knots, in units of the linear covariate. // // wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. // // matrixop /free xxBeta=xx x betas // xxBeta = -exp(xxBeta[p]) + yy[p]*xxBeta[p] - ln(factorial(yy[p])) // return sum(xxBeta) //End Function poisson_log(xx,betas) wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. wave betas // The coefficients that will be adjusted to find the maximum. wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. matrixop /free eta=xx x betas make /free/d/n=(dimsize(eta,0),dimsize(eta,1)) likelihoods = -exp(eta[p]) + yy[p]*eta[p] - ln(factorial(yy[p])) likelihoods = numtype(likelihoods[p][q])==1 ? (-exp(eta[p]) + yy[p]*eta[p] - Stirling(yy[p])) : likelihoods[p][q] // Use Stirling's approximation if there are any Inf's or -Inf's. variable result = sum(likelihoods) return result End Function /wave d_poisson_log(xx,betas) wave xx // The numCovariates. The first column [0] is ones, the rest [1,] are x values. wave betas // The coefficients that will be adjusted to find the roots of the gradient function. wave yy=yy_ // A global corresponding to observations. I could have stuck this as a column in the xx wave, but then the indexing becomes non-intuitive. matrixop /free eta=xx x betas matrixop /free grad=xx^t x (yy - exp(eta)) return grad End // L'' Function /wave d2_poisson_log(xx,betas) wave xx,betas variable observations=dimsize(xx,0) variable xColumns=dimsize(xx,1) variable yColumns=max(1,dimsize(betas,1)) matrixop /free multiplier=sqrt(exp(xx x betas)) make /free/d/n=(observations,xColumns,yColumns) matrix=xx[p][q] * multiplier[p][r] make /free/d/n=(xColumns,xColumns,yColumns) fisher variable i for(i=0;i1) matrixop /free expEta=colrepeat(sumrows(exp(eta))+1,vars) else matrixop /free expEta=sumrows(exp(eta))+1 endif make /free/n=(observations,vars,yColumns) state=(yy[p][r]==(q+1)) //duplicate /o state,state_ //duplicate /o eta,eta_ //duplicate /o expEta,expEta_ //expEta=exp(eta-ln(expEta)) //duplicate /o expEta,expEta_ matrixop /free grad=xx^t x (-exp(eta-ln(expEta))+state) // The use of exp(ln(a)-ln(b)) in here instead of just a/b is to avoid NaNs from dividing large numbers. return grad End // L'' // TODO: Set up to work correctly with multiple dependent variables (yColumns>1). Function /wave d2_multinomial_logit(xx,betas) wave xx,betas wave yy=yy_ variable observations=dimsize(xx,0) variable numCovariates=dimsize(xx,1) variable vars=max(1,dimsize(betas,1)) variable yColumns=1//max(1,dimsize(yy,1)) matrixop /free eta=xx x betas if(vars>1) matrixop /free expEta=colrepeat(sumrows(exp(eta))+1,vars) else matrixop /free expEta=sumrows(exp(eta))+1 endif make /free/d/n=(observations,vars,yColumns) zz=exp(eta[p][q][r]-ln(expEta[p][0][r]))//exp(eta[p][q][r])/expEta[p][0][r] #if 0 // Slow. make /free/d/n=(numCovariates*vars,numCovariates*vars,observations) matrix=xx[r][mod(p,numCovariates)] * xx[r][mod(q,numCovariates)] * (((floor(p/numCovariates)==floor(q/numCovariates)) ? zz[r][floor(p/numCovariates)] : 0) - zz[r][floor(p/numCovariates)]*zz[r][floor(q/numCovariates)]) matrixop /free fisher=sumbeams(matrix) #else // Faster. make /free/d/n=(numCovariates*vars,numCovariates*vars) fisher variable i,j for(i=0;imaxStepSize) diffBetas*=(maxStepSize/extremum) endif elseif(numtype(maxStepSize)==0) diffBetas=limit(diffBetas[p][q][r],-maxStepSize,maxStepSize) endif if(numtype(sum(diffBetas))) variable fishDet=matrixdet(fish) if(fishDet==0) printf "Encountered a singular Hessian matrix during an update to the coefficients.\r" duplicate /o fish,fish_ matrixop /free xxSums=sumcols(xx)^t // Compute the column sums of the data matrix. extract /free/indx xxSums,xxSumsZero,xxSums==0 // Find the indices corresponding to columns whose sums are zero. variable i printf "Setting data columns " for(i=0;i=sum(lineSearchLikelihood)) || 0*(provisionalLikelihood>newLikelihood)) break else //Prog("Backtracks",backTracks,Inf) diffBetas*=beta_ backTracks+=1 if(backTracks==1000) // If we have to backtrack this many times, the Fisher matrix is probably close to singular and is no longer a useful guide. break endif //printf "\tBacktracks: %d\r",backTracks endif while(1) if(backTracks<1000) // If we had to backtrack more than 1000 times, the update probably isn't a very good one. betas=oldBetas+diffBetas //continue endif newLikelihood=likelihood(xx,betas) if(usePrior) newLikelihood+=prior(betas,priorParams) endif deviance=2*(newLikelihood-oldLikelihood) if(deviance<0) // We jumped too far. maxStepSize/=10 else maxStepSize+=(startMaxStepSize-maxStepSize)/10 endif if(0 && !flag && deviancetolerance) || flag) // Break on maximum iterations reached or small deviance. if(!quiet) printf "After %d iterations, reached log-likelihood of %.3f.\r",iteration,newLikelihood endif //killwaves /z yy_ return betas end // Weighted least squares. function /wave WLS(yy,xx,weights) wave yy,xx,weights variable numObservations=dimsize(yy,0) variable numnumCovariates=dimsize(xx,1) make /free/n=(numObservations,numnumCovariates) wxx=weights[p]*xx[p][q] matrixop /free result=inv(xx^t x wxx) x xx^t x (weights * yy) //matrixop /free result=inv(xx^t x diagonal(weights) x xx) x xx^t x diagonal(weights) x yy return result end // -------------- Spline functions --------------------- //function /wave SplineCoefs(order,tKnots,xx,xxCol,betas,betasCol) // variable order // Order of the polynomial used to fit each spline. // wave tKnots // Wave of knot times. // wave xx // Wave of independent variables. Each column contains one variable, and each row is an observation. // variable xxCol // Column number in wave 'xx' which contains the linear variable. // wave betas // Complete list of coeficients. This function returns a subset of this list applicable to each observation, depending on the spline it belongs to. // variable betasCol // Column number in wave 'betas' which contains the constant coefficient for the first spline. // // make /free/wave/n=(dimsize(xx,0)) betaSelector // An wave of references to beta waves. The ith reference will indicate the appropriate betas to use for the ith observation. // variable i,knots=numpnts(tKnots) // make /free/wave/n=(knots-1) betaSelections // duplicate /free/r=[betasCol,betasCol+2*knots-1] betas,knotBetas // switch(order) // case 3: // Cubic polynomial. // variable b0=knotBetas[0] // variable b1=knotBetas[1] // variable b2=knotBetas[2] // variable b3=knotBetas[3] // i=0 // do // make /free/n=(order+1) newBetas={b0,b1,b2,b3} // betaSelections[i]=knotBetas // i+=1 // if(i>=knots-1) // break // else // variable dt=tKnots[i]-tKnots[i-1] // b0=b0+b1*(dt)+b2*(dt)^2+b3*(dt)^3 // b1=b1+2*b2*(dt)+3*b3*(dt)^2 // b2=knotBetas[order+1+2*(i-1)] // b3=knotBetas[order+2+2*(i-1)] // endif // while(1) // break // endswitch // matrixop /free parameter=col(betas,betasCol) // make /free/n=(dimsize(xx,0)) knotIndex=binarysearch(tKnots,parameter[p]) // betaSelector = betaSelections[knotIndex[p]] // There is no checking here to see if we the knotIndex is < 0 (i.e. before the first or after the last knot), because Igor doesn't support conditional assignment with // // wave reference waves. So checking will have to be done in the calling function to be sure that likelihoods for values outside the knot boundaries evaluate to // // 0 or 1 or whatever. // return betaSelector //end function SplineCoefs2(order,diff,tt,Tx,k,n,power,i) variable order,diff variable tt // Current value of the splined variable, e.g. trial time. variable Tx // Max value, i.e. modulus of the periodic domain. wave k // Knot locations. variable n // Spline index for this coefficient. variable power // To which power in a spline n to apply this result. variable i // The value tt belongs to spline i. variable last = numpnts(k)-1 make /free/n=(numpnts(k)) t_ = tt - k[p] switch(order) case 2: switch(diff) case 0: if(i == last) t_ += tt=numKnots-1) // If we have gone through all the splines, break out. break else // Compute the coefficients for the next spline, based on the betas. make /free/d/n=(order+1) dtp=p==0 ? 1 : (knots[i]-knots[i-1])^p // Powers of the time between this knot and the previous one. if(diff>=0) b[0]=matrixdot(b,dtp) // Set the constant term for this spline to be equal to the value of the polynomial for the last spline evaluated at this knot. endif if(diff>=1) make /free/d/n=(order) slope=b[p+1]*dtp[p]*(p+1) // Set up the RHS of the equation for setting slopes to be equal at the knot. This is the first derivative of the polynomial. b[1]=sum(slope) // Set the linear term for this spline to be equal to the value of that first derivative. endif if(diff>=2) make /free/d/n=(order-1) curvature=b[p+2]*dtp[p]*(p+1)*(p+2) // Set up the RHS of the equation for setting curvatures to be equal at the knot. This is the second derivative of the polynomial. b[2]=sum(curvature) endif if(circular && i==numKnots-2) // Last spline, must make continuous on its right edge with the first spline's left edge (i.e. they share a knot). So redo some of the b's. b[diff+1,order]=betas[col+order+1+p-(diff+1)-1+(i-1)*(order-diff)] // Set the remaining terms to be equal to the corresponding betas, which are independent of the previous splines. // Provisionally set the remaining terms to be equal to the corresponding betas, which are independent of the previous splines. make /free/d/n=(order+1) dtp=p==0 ? 1 : (knots[i+1]-knots[i])^p // Powers of the time between this knot and the final one. switch(order) case 0: b[0]=betas[col] break case 1: switch(diff) case 0: b[1]=(betas[col]-b[0])/dtp[1] break endswitch break case 2: switch(diff) case 0: b[1]=(betas[col]-b[0]-b[2]*dtp[2])/dtp[1] break case 1: b[2]=(betas[col+1]*dtp[1]-betas[col]+b[0])/dtp[2] break endswitch break case 3: switch(diff) case 0: b[1]=(betas[col]-b[0]-b[2]*dtp[2]-b[3]*dtp[3])/dtp[1] break case 1: b[2]=(3*betas[col]-betas[col+1]*dtp[1]-3*b[0]-2*b[1]*dtp[1])/dtp[2] b[3]=(2*b[0]+b[1]*dtp[1]-2*betas[col]+betas[col+1]*dtp[1])/dtp[3] break endswitch break endswitch else b[diff+1,order]=betas[col+order+1+p-(diff+1)+(i-1)*(order-diff)] // Set the remaining terms to be equal to the corresponding betas, which are independent of the previous splines. endif endif while(1) // else // switch(order) // case 2: // Quadratic polynomial. // b[0]=betas[col] // b[1]=betas[col+1] // b[2]=betas[col+2] // i=0 // do // newBetas[3*i]=b[0] // newBetas[3*i+1]=b[1] // newBetas[3*i+2]=b[2] // i+=1 // if(i>=numKnots-1) // break // else // variable dt=knots[i]-knots[i-1] // b[0]=b[0]+b[1]*(dt)+b[2]*(dt)^2 // b[1]=b[1]+2*b[2]*(dt) // b[2]=betas[col+order+1+2*(i-1)] // endif // while(1) // break // case 3: // Cubic polynomial. // b[0]=betas[col] // b[1]=betas[col+1] // b[2]=betas[col+2] // b[3]=betas[col+3] // i=0 // do // newBetas[4*i]=b[0] // newBetas[4*i+1]=b[1] // newBetas[4*i+2]=b[2] // newBetas[4*i+3]=b[3] // i+=1 // if(i>=numKnots-1) // break // else // dt=knots[i]-knots[i-1] // b[0]=b[0]+b[1]*(dt)+b[2]*(dt)^2+b[3]*(dt)^3 // b[1]=b[1]+2*b[2]*(dt)+3*b[3]*(dt)^2 // b[2]=betas[col+order+1+2*(i-1)] // b[3]=betas[col+order+2+2*(i-1)] // endif // while(1) // break // endswitch // endif return newBetas end // -------------- Basis functions --------------------- Function /wave basisFuncProto(w) wave w End Function /wave circularBasis(w) wave w make /free/n=(dimsize(w,0),2) w_basis w_basis[][0]=cos(w[p]) w_basis[][1]=sin(w[p]) return w_basis End // --------------- Fit functions ------------------------- Function fitFuncProto(w,x) wave w variable x End // -------------- Auxiliary functions ----------------- static Function /wave Col(w,i) wave w variable i matrixop /free col_=col(w,i) return col_ End static function product(w) wave w matrixop /free result=exp(sum(ln(w)) return result[0] end function DecimalHash(str,digits) string str variable digits string hex=hash(str,1)[0,digits-1] variable dec sscanf hex,"%x",dec return dec end // Stirling's approximation of ln(n!) for large n. threadsafe function Stirling(n) variable n return 0.5*ln(2*pi*n) + n*ln(n) - n end //// The non mean-subtracted, non normalized correlation matrix of the columns of xx. //static function /wave correlationMatrix(xx) // wave xx // A 2D wave with m rows and n columns. // // matrixop /free result=numrows(xx) * synccorrelation(xx) * (varcols(xx)^t x varcols(xx)) // A 2D wave that is n x n. // return result //end // Return the mean and standard deviation of the fit evaluated at one point (for 1D betas waves). function /c fitMeanSEM(betas,fisher,link,data) wave betas,fisher wave data // A single observation of the covariates. string link variable i,iters=10000,numCovariates=dimsize(betas,0) matrixop /free cov = inv(fisher) wave noises = MakeCorrelatedNoises(iters,cov) make /free/n=(iters) mus for(i=0;i