Average data according to a category (or key or index) wave

Natural categories often arise in data due to the date of measurement, the on/off state of a specific detector, or some other value.

This function averages over such categories and returns the mean and standard error of the mean, in waves described below. Other quantities can be retrieved by adding code after // get the desired summary statistics.

#pragma rtGlobals=3     // Use modern global access method and strict wave access.
function /WAVE averageByKey(waveToAvg, dataKeys[, keyTol])
// Given a wave waveToAvg and a wave dataKeys,
// determines the number of unique entries (categories/keys/...) in dataKeys
// and averages waveToAvg according to those entries.
// The standard error of the mean, # of points averaged, and corresponding
// categories are also saved.
// e.g.
//  averageByKey(calibrationData, calibrationSolution)  // Key by entry in calibrationSolution
//  make /n=(numpnts(signal)) signalInt=round(signal); averageByKey(signal,signalInt) // Average to nearest integer
//
// OUTPUT:
    // averageByKey(data, keys), produces:
    // wave data_Keys -- a list of the unique data keys found in keyWave
    // wave data_Kavg -- the mean of the data corresponding to the keys
    // wave data_KSE  -- the standard error of the mean
    // wave data_Knum -- the # of points used for calculating the Avg & SD
   
wave waveToAvg
wave dataKeys
variable keyTol // Optional: the tolerance of keys, e.g. is 1.001 a "1" or not?
if (ParamIsDefault(keyTol))
    keyTol= 1e-07
endif
   
    // get list of unique keys
    wave keys = getKeys(dataKeys, tol=keyTol, wname=nameOfWave(waveToAvg) + "_Keys")
    variable nkeys = numpnts(keys)
   
    // try to predict memory overflow
    variable np=numpnts(waveToAvg)
    if (np*nkeys > 2147000000)
        abort "averageByKey() was given too many keys / data points. Make a smaller selection."
    endif
   
    // prepare waves for saving summary stats
    make /d/n=(nkeys)/o $(nameOfWave(waveToAvg) + "_KAvg")      /WAVE=out_avg
    make /d/n=(nkeys)/o $(nameOfWave(waveToAvg) + "_KSE")       /WAVE=out_SE
    make /d/n=(nkeys)/o $(nameOfWave(waveToAvg) + "_Knum")      /WAVE=out_n
    out_avg=nan
    out_SE=nan
    out_n=nan
   
    // prepare output matrix
    make /FREE /d/n=(np,nkeys) outmat=nan // each column represents one key & contains only that key's data
    variable i,colkey
    for (i=0; i<nkeys; i+=1)
        SetDimLabel 1, i, $num2str(keys[i]), outmat  // label for later sorting
    endfor
   
    // go through the data, insert appropriate results into outmat
    string Sthiskey
    for (i=0; i<np; i+=1)
        Sthiskey = num2str(dataKeys[i])
        outmat[i][% $Sthiskey]=waveToAvg[i]
    endfor
   
    // get the desired summary statistics
    for (i=0; i<nkeys; i+=1)
        ImageStats /G={0, np-1, i,i} outmat
        if (V_flag < 0) // probably unnecessary: avoid saving the previous loop's stats
            abort "averageByKey(): unknown error"
        endif
        out_n[i]=V_npnts
        if (V_npnts>0) // for V_npnts==1, V_sdev is set to NaN
            out_avg[i]=V_avg
            out_SE[i]=V_sdev / sqrt(V_npnts)
        endif
    endfor
    return keys
end
 
function /WAVE getKeys(keys[, tol, wname])
// Given a list of keys (e.g. laser on/off = 0/1),
// return a wave of the unique keys only (in e.g. above, {0,1})
 
wave keys
variable tol // optional, allow 1.0001==1 to be true.
string wname // optional name of output wave
if (ParamIsDefault(tol))
    tol= 1e-07
endif
if (ParamIsDefault(wname))
    wname= nameOfWave(keys) + "_uniqKeys"
endif
 
    // create output wave
    make /d/o/n=0 $wname /WAVE=outw
    outw=nan
 
    // special case of an empty key wave, or one with only NaN/inf
    wavestats /Q/M=1 keys
    if (V_npnts==0)
        return outw
    endif
 
    // go through input wave & save any new entries
    variable i=0, np=numpnts(keys)
    do
        if (getIndex(keys[i], outw, tol=tol) == -1)
            appendWave(outw, {keys[i]})
            SetDimLabel 0, (numpnts(outw)-1), $num2str(keys[i]), outw // label for ease of use (but tol!=0 must be considered)
        endif
        i += 1
    while (i<np)
 
    return outw
end
 
function getIndex(v, w, [tol, rev, start])
// Finds the index of the given variable var in the given wave w.
// Optional variable tolerance gives the ± acceptance range. Default 1.0E-7.
// Optional variable rev searches in reverse if set.
// Capable of finding NaNs.
// Returns -1 if not found.
// see also FindValue
 
    variable v
    wave w
    variable tol
    variable rev
    variable start // default == 0
 
 
    if (paramIsDefault(tol)) // TODO: change this to an autodetected spacing from w
        tol = 1.0E-7
    elseif (tol < 0)
        tol = abs(tol)
    endif
 
    variable i, n = numpnts(w)
 
    if ((rev))
        if (ParamIsDefault(start))
            start = n-1
        endif
        if (start < 0)
            abort "Cannot start at a negative index."
            // Strictly speaking getIndex() is not responsible for this, but it helps with debugging.
        endif
 
        for (i =start; i >= 0; i -= 1)
            if (abs(w[i] - v) < tol)
                return i
            endif
        endfor
 
    else
 
        for (i =start; i < n; i += 1)
            if (abs(w[i] - v) < tol)
                return i
            endif
        endfor
 
    endif
 
    return -1
end
 
function appendWave(taker, appendThis)
// Concatenate alias. Given a wave "appendThis", appends its data to "taker". 1D waves only.
    wave taker, appendThis
    concatenate /NP=0 {appendThis}, taker
end

Forum

Support

Gallery

Igor Pro 9

Learn More

Igor XOP Toolkit

Learn More

Igor NIDAQ Tools MX

Learn More