#pragma TextEncoding="UTF-8"
#pragma rtGlobals=3 // Use modern global access method and strict wave access
#pragma DefaultTab={3,20,4} // Set default tab width in Igor Pro 9 and later
// Original Copyright:
/**********************************************************************
*
* File: KolmogorovSmirnovDist.c
* Environment: ISO C99 or ANSI C89
* Author: Richard Simard
* Organization: DIRO,Université de Montréal
* Version: 10 december 2010
* Copyright 1 march 2010 by Université de Montréal,
* Richard Simard and Pierre L'Ecuyer
=====================================================================
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation,version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not,see.
Igor translation:
Date: 15JAN26
Igor translation by A.G. with minor changes to use Igor's internal
functions instead of using factorial arrays. Additional changes
required the use of the trunc() function to simulate integer division.
Also added a short root-finder method for computing the critical values.
Retaining licensing under GPLv3 per
the original copyright notice above.
Original code is described in:
Simard, R., & L’Ecuyer, P. (2011).
"Computing the Two-Sided Kolmogorov-Smirnov Distribution"
Journal of Statistical Software, 39(11), 1–18.
https://doi.org/10.18637/jss.v039.i11
=====================================================================*/
constant num_Ln2=0.69314718055994530941 // ln(2)
constant NEXACT=140
constant NKOLMO=100000
constant NFACT=20
constant MFACT=30
constant EPSILON=1.0E-12
/* For x close to 0 or 1,we use the exact formulae of Ruben-Gambino in all
cases. For n<=NEXACT,we use exact algorithms: the Durbin matrix and
the Pomeranz algorithms. For n>NEXACT,we use asymptotic methods
except for x close to 0 where we still use the method of Durbin
for n<=NKOLMO. For n>NKOLMO,we use asymptotic methods only and
so the precision is less for x close to 0.
We could increase the limit NKOLMO to 10^6 to get better precision
for x close to 0,but at the price of a slower speed.
*/
// The Durbin matrix algorithm for the Kolmogorov-Smirnov distribution
/*========================================================================
// Returns the natural logarithm of factorial n!
static function getLogFactorial(int n)
if(n<=MFACT)
return ln(factorial(n))
else
double x=(n+1)
double y=1.0/(x*x)
double z=((-(5.95238095238E-4*y)+7.936500793651E-4)*y -2.7777777777778E-3)*y+8.3333333333333E-2
z=((x-0.5)*ln(x)-x)+9.1893853320467E-1+z/x
return z
endif
end
//========================================================================
static Function KSPlusbarAsymp(int n,double x)
// Compute the probability of the KS+ distribution using an asymptotic formula
double tt=(6.0*n*x+1)
double z=tt*tt/(18.0*n)
double v=1.0-(2.0*z*z-4.0*z-1.0)/(18.0*n)
if(v<=0.0)
return 0.0
endif
v=v*exp(-z)
if(v >=1.0)
return 1.0
endif
return v
End
//========================================================================
// Compute the probability of the KS+ distribution in the upper tail using
// Smirnov's stable formula
static Function KSPlusbarUpper(int n,double x)
double q
double theSum=0.0
double term
double tt
double LogCom
double LOGJMAX
int j
int jdiv
int jmax=trunc(n*(1.0-x))
if(n>200000)
return KSPlusbarAsymp(n,x)
endif
// Avoid ln(0) for j=jmax and q ~ 1.0
if((1.0-x-jmax/n)<=0.0)
jmax--
endif
if(n>3000)
jdiv=2
else
jdiv=3
endif
j=jmax/jdiv+1
LogCom=getLogFactorial(n)-getLogFactorial(j)-getLogFactorial(n-j)
LOGJMAX=LogCom
if(j<=jmax)
do
q=j/n+x
term=LogCom+(j-1)*ln(q)+(n-j)*log1p(-q)
tt=exp(term)
theSum +=tt
LogCom +=ln((n-j)/(j+1))
if(tt<=theSum*EPSILON)
break
endif
j++
while(j<=jmax)
endif
j=jmax/jdiv
LogCom=LOGJMAX+ln((j+1)/(n-j))
if(j<=0)
do
q=j/n+x
term=LogCom+(j-1)*ln(q)+(n-j)*log1p(-q)
tt=exp(term)
theSum +=tt
LogCom +=ln(j/(n-j+1))
if(tt<=theSum*EPSILON)
break
endif
j--
while(j>0)
endif
theSum*=x
//add the term j=0
theSum +=exp(n*log1p(-x))
return theSum
End
/*========================================================================
Approximating the Lower Tail-Areas of the Kolmogorov-Smirnov One-Sample
Statistic,
Wolfgang Pelz and I. J. Good,
Journal of the Royal Statistical Society,Series B.
Vol. 38,No. 2(1976),pp. 152-156
========================================================================*/
static function Pelz(int n,double x)
Variable JMAX=20
Variable EPS=1.0e-10
Variable C=2.506628274631001 // sqrt(2*Pi)
Variable C2=1.2533141373155001 // sqrt(Pi/2)
Variable PI2=pi*pi
Variable PI4=PI2*PI2
Variable RACN=sqrt(n)
Variable z=RACN*x
Variable z2=z*z
Variable z4=z2*z2
Variable z6=z4*z2
Variable w=PI2/(2.0*z*z)
double ti,term,tom
double theSum
int j
term=1
j=0
theSum=0
do
ti=j+0.5
term=exp(-ti*ti*w)
theSum +=term
j++
while(j<=JMAX && term>EPS*theSum)
theSum*=C/z
term=1
tom=0
j=0
do
ti=j+0.5
term=(PI2*ti*ti-z2)*exp(-ti*ti*w)
tom +=term
j++
while(j<=JMAX && abs(term)>EPS*abs(tom))
theSum +=tom*C2/(RACN*3.0*z4)
term=1
tom=0
j=0
do
ti=j+0.5
term=6*z6+2*z4+PI2*(2*z4-5*z2)*ti*ti +PI4*(1-2*z2)*ti*ti*ti*ti
term*=exp(-ti*ti*w)
tom +=term
j++
while(j<=JMAX && abs(term)>EPS*abs(tom))
theSum +=tom*C2/(n*36.0*z*z6)
term=1
tom=0
j=1
do
ti=j
term=PI2*ti*ti*exp(-ti*ti*w)
tom +=term
j++
while(j<=JMAX && term>EPS*tom)
theSum -=tom*C2/(n*18.0*z*z2)
term=1
tom=0
j=0
do
ti=j+0.5
ti=ti*ti
term=-30*z6-90*z6*z2+PI2*(135*z4-96*z6)*ti+PI4*(212*z4-60*z2)*ti*ti+PI2*PI4*ti*ti*ti*(5 -30*z2)
term*=exp(-ti*w)
tom +=term
j++
while(j<=JMAX && abs(term)>EPS*abs(tom))
theSum +=tom*C2/(RACN*n*3240.0*z4*z6)
term=1
tom=0
j=1
do
ti=j*j
term=(3*PI2*ti*z2-PI4*ti*ti)*exp(-ti*w)
tom +=term
j++
while(j<=JMAX && abs(term)>EPS*abs(tom))
theSum +=tom*C2/(RACN*n*108.0*z6)
return theSum
end
//=========================================================================
// Precompute A_i,floors,and ceilings for limits of sums in the Pomeranz
// algorithm.
// using trunc(i/2) to reproduce in Igor the equivalent for integer
// truncation in C/C++.
//=========================================================================
function CalcFloorCeil(int n,double tt,Wave A,Wave Atflo,Wave Atcei)
int i
int ell=trunc(tt) // floor(tt)*/
double z=tt-ell // tt-floor(tt)*/
double w=ceil(tt)-tt
if(z>0.5)
for(i=2; i<=2*n+2; i +=2)
Atflo[i]=trunc(i/2)-2-ell
endfor
for(i=1; i<=2*n+2; i +=2)
Atflo[i]=trunc(i/2)-1-ell
endfor
for(i=2; i<=2*n+2; i +=2)
Atcei[i]=trunc(i/2)+ell
endfor
for(i=1; i<=2*n+2; i +=2)
Atcei[i]=trunc(i/2)+1+ell
endfor
elseif(z>0.0)
for(i=1; i<=2*n+2; i++)
Atflo[i]=trunc(i/2)-1-ell
endfor
for(i=2; i<=2*n+2; i++)
Atcei[i]=trunc(i/2)+ell
endfor
Atcei[1]=1+ell
else // z ==0*/
for(i=2; i<=2*n+2; i +=2)
Atflo[i]=trunc(i/2)-1-ell
endfor
for(i=1; i<=2*n+2; i +=2)
Atflo[i]=trunc(i/2)-ell
endfor
for(i=2; i<=2*n+2; i +=2)
Atcei[i]=trunc(i/2)-1+ell
endfor
for(i=1; i<=2*n+2; i +=2)
Atcei[i]=trunc(i/2)+ell
endfor
endif
if(w< z)
z=w
endif
A[1]=0
A[0]=A[1]
A[2]=z
A[3]=1-A[2]
for(i=4; i<=2*n+1; i++)
A[i]=A[i-2]+1
endfor
A[2*n+2]=n
End
//========================================================================
// The Pomeranz algorithm to compute the KS distribution
//========================================================================
Function Pomeranz(int n,double x)
double EPS=1.0e-15
int ENO=350
double RENO=ldexp(1.0,ENO)// for renormalization of V*/
int coreno // counter: how many renormalizations*/
double tt=n*x
double w,theSum,minsum
int i,j,k,s
int r1,r2 // Indices i and i-1 for V[i][]*/
int jlow,jup,klow,kup,kup0
Make/FREE/D/N=(2*n+3) A,Atflo,Atcei
Make/O/FREE/N=(2,n+2)/D V
Make/O/FREE/N=(4,n+2)/D H
CalcFloorCeil(n,tt,A,Atflo,Atcei)
for(j=1; j<=n+1; j++)
V[0][j]=0
endfor
for(j=2; j<=n+1; j++)
V[1][j]=0
endfor
V[1][1]=RENO
coreno=1
// Precompute H[][]=(A[j]-A[j-1]^k/k! for speed
H[0][0]=1
w=2.0*A[2]/n
for(j=1; j<=n+1; j++)
H[0][j]=w*H[0][j-1]/j
endfor
H[1][0]=1
w=(1.0-2.0*A[2])/n
for(j=1; j<=n+1; j++)
H[1][j]=w*H[1][j-1]/j
endfor
H[2][0]=1
w=A[2]/n
for(j=1; j<=n+1; j++)
H[2][j]=w*H[2][j-1]/j
endfor
H[3][0]=1
for(j=1; j<=n+1; j++)
H[3][j]=0
endfor
r1=0
r2=1
for(i=2; i<=2*n+2; i++)
jlow=2+Atflo[i]
if(jlow< 1)
jlow=1
endif
jup=Atcei[i]
if(jup>n+1)
jup=n+1
endif
klow=2+Atflo[i-1]
if(klow< 1)
klow=1
endif
kup0=Atcei[i-1]
// Find to which case it corresponds
w=(A[i]-A[i-1])/n
s=-1
for(j=0; j< 4; j++)
if(abs(w-H[j][1])<=EPS)
s=j
break
endif
endfor
// assert(s >=0,"Pomeranz: s< 0");
minsum=RENO
r1=(r1+1) & 1 // i-1
r2=(r2+1) & 1 // i
for(j=jlow; j<=jup; j++)
kup=kup0
if(kup>j)
kup=j
endif
theSum=0
for(k=kup; k >=klow; k--)
theSum +=V[r1][k]*H[s][j-k]
endfor
V[r2][j]=theSum
if(theSum< minsum)
minsum=theSum
endif
endfor
if(minsum< 1.0e-280)
// V is too small: renormalize to avoid underflow of probabilities
for(j=jlow; j<=jup; j++)
V[r2][j]*=RENO
endfor
coreno++ // keep track of ln of RENO
endif
endfor
theSum=V[r2][n+1]
w=getLogFactorial(n)-coreno*ENO*num_Ln2+ln(theSum)
if(w >=0.)
return 1.
endif
return exp(w)
End
//========================================================================
//========================================================================
function cdfSpecial(int n,double x)
// The KS distribution is known exactly for these cases
// For nx^2>18,KSfbar(n,x) is smaller than 5e-16
if((n*x*x >=18.0) ||(x >=1.0))
return 1.0
endif
if(x<=0.5/n)
return 0.0
endif
if(n ==1)
return 2.0*x-1.0
endif
if(x<=1.0/n)
double tt=2.0*x-1.0/n
double w
if(n<=NFACT)
w=factorial(n)
return w*(tt^n)
endif
w=getLogFactorial(n)+n*ln(tt)
return exp(w)
endif
if(x >=1.0-1.0/n)
return 1.0-2.0*(1.0-x)^n
endif
return -1.0
End
//========================================================================
// Return the Kolmogorov-Smirnov CDF for n and x
//========================================================================
Function KScdf(int n,double x)
Variable w=n*x*x
double u=cdfSpecial(n,x)
if(u >=0.0)
return u
endif
if(n<=NEXACT)
if(w< 0.754693)
return DurbinMatrix(n,x)
endif
if(w< 4.0)
return Pomeranz(n,x)
endif
return 1.0-KSfbar(n,x)
endif
// if(n*x*sqrt(x)<=1.4)
if((w*x*n<=2.0) &&(n<=NKOLMO))
return DurbinMatrix(n,x)
endif
return Pelz(n,x)
End
//=========================================================================
Function fbarSpecial(int n,double x)
variable w=n*x*x
if((w >=370.0) ||(x >=1.0))
return 0.0
endif
if((w<=0.0274) ||(x<=0.5/n))
return 1.0
endif
if(n ==1)
return 2.0-2.0*x
endif
if(x<=1.0/n)
double z
double tt=2.0*x-1.0/n
if(n<=NFACT)
z=factorial(n)
return 1.0-z*(tt^n)
endif
z=getLogFactorial(n)+n*ln(tt)
return 1.0-exp(z)
endif
if(x >=1.0-1.0/n)
return 2.0*((1.0-x)^n)
endif
return -1.0
end
//=========================================================================
function KSfbar(int n,double x)
double w=n*x*x
double v=fbarSpecial(n,x)
if(v >=0.0)
return v
endif
if(n<=NEXACT)
if(w< 4.0)
return 1.0-KScdf(n,x)
else
return 2.0*KSPlusbarUpper(n,x)
endif
endif
if(w >=2.2)
return 2.0*KSPlusbarUpper(n,x)
endif
return 1.0-KScdf(n,x)
end
/*=========================================================================
The following implements the Durbin matrix algorithm and was programmed by
G. Marsaglia,Wai Wan Tsang and Jingbo Wong.
I have made small modifications in their program.(Richard Simard)
=========================================================================*/
/*
The C program to compute Kolmogorov's distribution
K(n,d)=Prob(D_n< d), where
D_n=max(x_1-0/n,x_2-1/n...,x_n-(n-1)/n,1/n-x_1,2/n-x_2,...,n/n-x_n)
with x_10 ?((2*hv-1)^m) : 0)
for(i=0; i< m; i++)
for(j=0; j< m; j++)
if(i-j+1>0)
for(g=1; g<=i-j+1; g++)
H[i*m+j]/=g
endfor
endif
endfor
endfor
eH=0
mPower(H,eH,Qw,eQ,m,n)
s=Qw[(k-1)*m+k-1]
for(i=1; i<=n; i++)
s=s*i/n
if(s< INORM)
s*=kNORM
eQ -=LOGNORM
endif
endfor
s*=(10.^eQ)
return s
end
//=========================================================================
// The following is a simple matrix multiplication for square matrices of
// size mxm. All matrices are stored in 1D waves.
function mMultiply(wave A,wave B,wave C,int m)
int i,j,k
double s
for(i=0; i< m; i++)
for(j=0; j< m; j++)
s=0.
for(k=0; k< m; k++)
s +=A[i*m+k]*B[k*m+j]
endfor
C[i*m+j]=s
endfor
endfor
End
//=========================================================================
function renormalize(wave V,int m,int& pp)
int i
for(i=0; i< m*m; i++)
V[i]*=INORM
endfor
pp +=LOGNORM
End
//=========================================================================
static Function mPower(Wave A,int& eA,wave V,int& eV,int m,int n)
int eB,i
if(n ==1)
for(i=0; i< m*m; i++)
V[i]=A[i]
endfor
eV=eA
return 0 // return value is never used
endif
mPower(A,eA,V,eV,m,n/2)
Make/FREE/N=(m*m)/D B
mMultiply(V,V,B,m)
eB=2*(eV)
if(B[(m/2)*m+(m/2)]>kNORM)
renormalize(B,m,eB)
endif
if(mod(n,2) ==0)
for(i=0; i< m*m; i++)
V[i]=B[i]
endfor
eV=eB
else
mMultiply(A,B,V,m)
eV=eA+eB
endif
if(V[(m/2)*m+(m/2)]>kNORM)
renormalize(V,m,eV)
endif
end
//=========================================================================
// The function log1p() is built in ISO C99 but not in IP10.
//=========================================================================
static function log1p(double x)
// returns a value equivalent to ln(1+x) accurate also for small x.
if(abs(x)>0.1)
return ln(1.0+x)
else
double term=x
double thesum=x
int s=2
if(((abs(term)>EPSILON*abs(thesum)) &&(s< 50)))
do
term*=-x
thesum +=term/s
s++
while((abs(term)>EPSILON*abs(thesum)) &&(s< 50))
endif
return thesum
endif
End
//=========================================================================
// this standard function should be built-into Igor.
//=========================================================================
static Function ldExp(double x,int ex)
return x*(2^ex)
End
//=========================================================================
function getKSCritical(variable n,variable alpha)
Make/N=(2)/FREE cw
cw[0]=n
cw[1]=1-alpha
Variable lowEnd=1e-9
FindRoots /B=0/L=(lowEnd) /H=1 igorRootFinderFunc,cw
return V_root
End
//=========================================================================
Function igorRootFinderFunc(w,x)
Wave w
Variable x
return KScdf(w[0],x)-w[1] // n is being passed in w[0], 1-alpha in w[1];
End
//=========================================================================