#pragma TextEncoding="UTF-8" #pragma rtGlobals=3 // Use modern global access method and strict wave access #pragma DefaultTab={3,20,4} // Set default tab width in Igor Pro 9 and later // Original Copyright: /********************************************************************** * * File: KolmogorovSmirnovDist.c * Environment: ISO C99 or ANSI C89 * Author: Richard Simard * Organization: DIRO,Université de Montréal * Version: 10 december 2010 * Copyright 1 march 2010 by Université de Montréal, * Richard Simard and Pierre L'Ecuyer ===================================================================== This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation,version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not,see. Igor translation: Date: 15JAN26 Igor translation by A.G. with minor changes to use Igor's internal functions instead of using factorial arrays. Additional changes required the use of the trunc() function to simulate integer division. Also added a short root-finder method for computing the critical values. Retaining licensing under GPLv3 per the original copyright notice above. Original code is described in: Simard, R., & L’Ecuyer, P. (2011). "Computing the Two-Sided Kolmogorov-Smirnov Distribution" Journal of Statistical Software, 39(11), 1–18. https://doi.org/10.18637/jss.v039.i11 =====================================================================*/ constant num_Ln2=0.69314718055994530941 // ln(2) constant NEXACT=140 constant NKOLMO=100000 constant NFACT=20 constant MFACT=30 constant EPSILON=1.0E-12 /* For x close to 0 or 1,we use the exact formulae of Ruben-Gambino in all cases. For n<=NEXACT,we use exact algorithms: the Durbin matrix and the Pomeranz algorithms. For n>NEXACT,we use asymptotic methods except for x close to 0 where we still use the method of Durbin for n<=NKOLMO. For n>NKOLMO,we use asymptotic methods only and so the precision is less for x close to 0. We could increase the limit NKOLMO to 10^6 to get better precision for x close to 0,but at the price of a slower speed. */ // The Durbin matrix algorithm for the Kolmogorov-Smirnov distribution /*======================================================================== // Returns the natural logarithm of factorial n! static function getLogFactorial(int n) if(n<=MFACT) return ln(factorial(n)) else double x=(n+1) double y=1.0/(x*x) double z=((-(5.95238095238E-4*y)+7.936500793651E-4)*y -2.7777777777778E-3)*y+8.3333333333333E-2 z=((x-0.5)*ln(x)-x)+9.1893853320467E-1+z/x return z endif end //======================================================================== static Function KSPlusbarAsymp(int n,double x) // Compute the probability of the KS+ distribution using an asymptotic formula double tt=(6.0*n*x+1) double z=tt*tt/(18.0*n) double v=1.0-(2.0*z*z-4.0*z-1.0)/(18.0*n) if(v<=0.0) return 0.0 endif v=v*exp(-z) if(v >=1.0) return 1.0 endif return v End //======================================================================== // Compute the probability of the KS+ distribution in the upper tail using // Smirnov's stable formula static Function KSPlusbarUpper(int n,double x) double q double theSum=0.0 double term double tt double LogCom double LOGJMAX int j int jdiv int jmax=trunc(n*(1.0-x)) if(n>200000) return KSPlusbarAsymp(n,x) endif // Avoid ln(0) for j=jmax and q ~ 1.0 if((1.0-x-jmax/n)<=0.0) jmax-- endif if(n>3000) jdiv=2 else jdiv=3 endif j=jmax/jdiv+1 LogCom=getLogFactorial(n)-getLogFactorial(j)-getLogFactorial(n-j) LOGJMAX=LogCom if(j<=jmax) do q=j/n+x term=LogCom+(j-1)*ln(q)+(n-j)*log1p(-q) tt=exp(term) theSum +=tt LogCom +=ln((n-j)/(j+1)) if(tt<=theSum*EPSILON) break endif j++ while(j<=jmax) endif j=jmax/jdiv LogCom=LOGJMAX+ln((j+1)/(n-j)) if(j<=0) do q=j/n+x term=LogCom+(j-1)*ln(q)+(n-j)*log1p(-q) tt=exp(term) theSum +=tt LogCom +=ln(j/(n-j+1)) if(tt<=theSum*EPSILON) break endif j-- while(j>0) endif theSum*=x //add the term j=0 theSum +=exp(n*log1p(-x)) return theSum End /*======================================================================== Approximating the Lower Tail-Areas of the Kolmogorov-Smirnov One-Sample Statistic, Wolfgang Pelz and I. J. Good, Journal of the Royal Statistical Society,Series B. Vol. 38,No. 2(1976),pp. 152-156 ========================================================================*/ static function Pelz(int n,double x) Variable JMAX=20 Variable EPS=1.0e-10 Variable C=2.506628274631001 // sqrt(2*Pi) Variable C2=1.2533141373155001 // sqrt(Pi/2) Variable PI2=pi*pi Variable PI4=PI2*PI2 Variable RACN=sqrt(n) Variable z=RACN*x Variable z2=z*z Variable z4=z2*z2 Variable z6=z4*z2 Variable w=PI2/(2.0*z*z) double ti,term,tom double theSum int j term=1 j=0 theSum=0 do ti=j+0.5 term=exp(-ti*ti*w) theSum +=term j++ while(j<=JMAX && term>EPS*theSum) theSum*=C/z term=1 tom=0 j=0 do ti=j+0.5 term=(PI2*ti*ti-z2)*exp(-ti*ti*w) tom +=term j++ while(j<=JMAX && abs(term)>EPS*abs(tom)) theSum +=tom*C2/(RACN*3.0*z4) term=1 tom=0 j=0 do ti=j+0.5 term=6*z6+2*z4+PI2*(2*z4-5*z2)*ti*ti +PI4*(1-2*z2)*ti*ti*ti*ti term*=exp(-ti*ti*w) tom +=term j++ while(j<=JMAX && abs(term)>EPS*abs(tom)) theSum +=tom*C2/(n*36.0*z*z6) term=1 tom=0 j=1 do ti=j term=PI2*ti*ti*exp(-ti*ti*w) tom +=term j++ while(j<=JMAX && term>EPS*tom) theSum -=tom*C2/(n*18.0*z*z2) term=1 tom=0 j=0 do ti=j+0.5 ti=ti*ti term=-30*z6-90*z6*z2+PI2*(135*z4-96*z6)*ti+PI4*(212*z4-60*z2)*ti*ti+PI2*PI4*ti*ti*ti*(5 -30*z2) term*=exp(-ti*w) tom +=term j++ while(j<=JMAX && abs(term)>EPS*abs(tom)) theSum +=tom*C2/(RACN*n*3240.0*z4*z6) term=1 tom=0 j=1 do ti=j*j term=(3*PI2*ti*z2-PI4*ti*ti)*exp(-ti*w) tom +=term j++ while(j<=JMAX && abs(term)>EPS*abs(tom)) theSum +=tom*C2/(RACN*n*108.0*z6) return theSum end //========================================================================= // Precompute A_i,floors,and ceilings for limits of sums in the Pomeranz // algorithm. // using trunc(i/2) to reproduce in Igor the equivalent for integer // truncation in C/C++. //========================================================================= function CalcFloorCeil(int n,double tt,Wave A,Wave Atflo,Wave Atcei) int i int ell=trunc(tt) // floor(tt)*/ double z=tt-ell // tt-floor(tt)*/ double w=ceil(tt)-tt if(z>0.5) for(i=2; i<=2*n+2; i +=2) Atflo[i]=trunc(i/2)-2-ell endfor for(i=1; i<=2*n+2; i +=2) Atflo[i]=trunc(i/2)-1-ell endfor for(i=2; i<=2*n+2; i +=2) Atcei[i]=trunc(i/2)+ell endfor for(i=1; i<=2*n+2; i +=2) Atcei[i]=trunc(i/2)+1+ell endfor elseif(z>0.0) for(i=1; i<=2*n+2; i++) Atflo[i]=trunc(i/2)-1-ell endfor for(i=2; i<=2*n+2; i++) Atcei[i]=trunc(i/2)+ell endfor Atcei[1]=1+ell else // z ==0*/ for(i=2; i<=2*n+2; i +=2) Atflo[i]=trunc(i/2)-1-ell endfor for(i=1; i<=2*n+2; i +=2) Atflo[i]=trunc(i/2)-ell endfor for(i=2; i<=2*n+2; i +=2) Atcei[i]=trunc(i/2)-1+ell endfor for(i=1; i<=2*n+2; i +=2) Atcei[i]=trunc(i/2)+ell endfor endif if(w< z) z=w endif A[1]=0 A[0]=A[1] A[2]=z A[3]=1-A[2] for(i=4; i<=2*n+1; i++) A[i]=A[i-2]+1 endfor A[2*n+2]=n End //======================================================================== // The Pomeranz algorithm to compute the KS distribution //======================================================================== Function Pomeranz(int n,double x) double EPS=1.0e-15 int ENO=350 double RENO=ldexp(1.0,ENO)// for renormalization of V*/ int coreno // counter: how many renormalizations*/ double tt=n*x double w,theSum,minsum int i,j,k,s int r1,r2 // Indices i and i-1 for V[i][]*/ int jlow,jup,klow,kup,kup0 Make/FREE/D/N=(2*n+3) A,Atflo,Atcei Make/O/FREE/N=(2,n+2)/D V Make/O/FREE/N=(4,n+2)/D H CalcFloorCeil(n,tt,A,Atflo,Atcei) for(j=1; j<=n+1; j++) V[0][j]=0 endfor for(j=2; j<=n+1; j++) V[1][j]=0 endfor V[1][1]=RENO coreno=1 // Precompute H[][]=(A[j]-A[j-1]^k/k! for speed H[0][0]=1 w=2.0*A[2]/n for(j=1; j<=n+1; j++) H[0][j]=w*H[0][j-1]/j endfor H[1][0]=1 w=(1.0-2.0*A[2])/n for(j=1; j<=n+1; j++) H[1][j]=w*H[1][j-1]/j endfor H[2][0]=1 w=A[2]/n for(j=1; j<=n+1; j++) H[2][j]=w*H[2][j-1]/j endfor H[3][0]=1 for(j=1; j<=n+1; j++) H[3][j]=0 endfor r1=0 r2=1 for(i=2; i<=2*n+2; i++) jlow=2+Atflo[i] if(jlow< 1) jlow=1 endif jup=Atcei[i] if(jup>n+1) jup=n+1 endif klow=2+Atflo[i-1] if(klow< 1) klow=1 endif kup0=Atcei[i-1] // Find to which case it corresponds w=(A[i]-A[i-1])/n s=-1 for(j=0; j< 4; j++) if(abs(w-H[j][1])<=EPS) s=j break endif endfor // assert(s >=0,"Pomeranz: s< 0"); minsum=RENO r1=(r1+1) & 1 // i-1 r2=(r2+1) & 1 // i for(j=jlow; j<=jup; j++) kup=kup0 if(kup>j) kup=j endif theSum=0 for(k=kup; k >=klow; k--) theSum +=V[r1][k]*H[s][j-k] endfor V[r2][j]=theSum if(theSum< minsum) minsum=theSum endif endfor if(minsum< 1.0e-280) // V is too small: renormalize to avoid underflow of probabilities for(j=jlow; j<=jup; j++) V[r2][j]*=RENO endfor coreno++ // keep track of ln of RENO endif endfor theSum=V[r2][n+1] w=getLogFactorial(n)-coreno*ENO*num_Ln2+ln(theSum) if(w >=0.) return 1. endif return exp(w) End //======================================================================== //======================================================================== function cdfSpecial(int n,double x) // The KS distribution is known exactly for these cases // For nx^2>18,KSfbar(n,x) is smaller than 5e-16 if((n*x*x >=18.0) ||(x >=1.0)) return 1.0 endif if(x<=0.5/n) return 0.0 endif if(n ==1) return 2.0*x-1.0 endif if(x<=1.0/n) double tt=2.0*x-1.0/n double w if(n<=NFACT) w=factorial(n) return w*(tt^n) endif w=getLogFactorial(n)+n*ln(tt) return exp(w) endif if(x >=1.0-1.0/n) return 1.0-2.0*(1.0-x)^n endif return -1.0 End //======================================================================== // Return the Kolmogorov-Smirnov CDF for n and x //======================================================================== Function KScdf(int n,double x) Variable w=n*x*x double u=cdfSpecial(n,x) if(u >=0.0) return u endif if(n<=NEXACT) if(w< 0.754693) return DurbinMatrix(n,x) endif if(w< 4.0) return Pomeranz(n,x) endif return 1.0-KSfbar(n,x) endif // if(n*x*sqrt(x)<=1.4) if((w*x*n<=2.0) &&(n<=NKOLMO)) return DurbinMatrix(n,x) endif return Pelz(n,x) End //========================================================================= Function fbarSpecial(int n,double x) variable w=n*x*x if((w >=370.0) ||(x >=1.0)) return 0.0 endif if((w<=0.0274) ||(x<=0.5/n)) return 1.0 endif if(n ==1) return 2.0-2.0*x endif if(x<=1.0/n) double z double tt=2.0*x-1.0/n if(n<=NFACT) z=factorial(n) return 1.0-z*(tt^n) endif z=getLogFactorial(n)+n*ln(tt) return 1.0-exp(z) endif if(x >=1.0-1.0/n) return 2.0*((1.0-x)^n) endif return -1.0 end //========================================================================= function KSfbar(int n,double x) double w=n*x*x double v=fbarSpecial(n,x) if(v >=0.0) return v endif if(n<=NEXACT) if(w< 4.0) return 1.0-KScdf(n,x) else return 2.0*KSPlusbarUpper(n,x) endif endif if(w >=2.2) return 2.0*KSPlusbarUpper(n,x) endif return 1.0-KScdf(n,x) end /*========================================================================= The following implements the Durbin matrix algorithm and was programmed by G. Marsaglia,Wai Wan Tsang and Jingbo Wong. I have made small modifications in their program.(Richard Simard) =========================================================================*/ /* The C program to compute Kolmogorov's distribution K(n,d)=Prob(D_n< d), where D_n=max(x_1-0/n,x_2-1/n...,x_n-(n-1)/n,1/n-x_1,2/n-x_2,...,n/n-x_n) with x_10 ?((2*hv-1)^m) : 0) for(i=0; i< m; i++) for(j=0; j< m; j++) if(i-j+1>0) for(g=1; g<=i-j+1; g++) H[i*m+j]/=g endfor endif endfor endfor eH=0 mPower(H,eH,Qw,eQ,m,n) s=Qw[(k-1)*m+k-1] for(i=1; i<=n; i++) s=s*i/n if(s< INORM) s*=kNORM eQ -=LOGNORM endif endfor s*=(10.^eQ) return s end //========================================================================= // The following is a simple matrix multiplication for square matrices of // size mxm. All matrices are stored in 1D waves. function mMultiply(wave A,wave B,wave C,int m) int i,j,k double s for(i=0; i< m; i++) for(j=0; j< m; j++) s=0. for(k=0; k< m; k++) s +=A[i*m+k]*B[k*m+j] endfor C[i*m+j]=s endfor endfor End //========================================================================= function renormalize(wave V,int m,int& pp) int i for(i=0; i< m*m; i++) V[i]*=INORM endfor pp +=LOGNORM End //========================================================================= static Function mPower(Wave A,int& eA,wave V,int& eV,int m,int n) int eB,i if(n ==1) for(i=0; i< m*m; i++) V[i]=A[i] endfor eV=eA return 0 // return value is never used endif mPower(A,eA,V,eV,m,n/2) Make/FREE/N=(m*m)/D B mMultiply(V,V,B,m) eB=2*(eV) if(B[(m/2)*m+(m/2)]>kNORM) renormalize(B,m,eB) endif if(mod(n,2) ==0) for(i=0; i< m*m; i++) V[i]=B[i] endfor eV=eB else mMultiply(A,B,V,m) eV=eA+eB endif if(V[(m/2)*m+(m/2)]>kNORM) renormalize(V,m,eV) endif end //========================================================================= // The function log1p() is built in ISO C99 but not in IP10. //========================================================================= static function log1p(double x) // returns a value equivalent to ln(1+x) accurate also for small x. if(abs(x)>0.1) return ln(1.0+x) else double term=x double thesum=x int s=2 if(((abs(term)>EPSILON*abs(thesum)) &&(s< 50))) do term*=-x thesum +=term/s s++ while((abs(term)>EPSILON*abs(thesum)) &&(s< 50)) endif return thesum endif End //========================================================================= // this standard function should be built-into Igor. //========================================================================= static Function ldExp(double x,int ex) return x*(2^ex) End //========================================================================= function getKSCritical(variable n,variable alpha) Make/N=(2)/FREE cw cw[0]=n cw[1]=1-alpha Variable lowEnd=1e-9 FindRoots /B=0/L=(lowEnd) /H=1 igorRootFinderFunc,cw return V_root End //========================================================================= Function igorRootFinderFunc(w,x) Wave w Variable x return KScdf(w[0],x)-w[1] // n is being passed in w[0], 1-alpha in w[1]; End //=========================================================================