Average data according to a category (or key or index) wave

Natural categories often arise in data due to the date of measurement, the on/off state of a specific detector, or some other value.

This function averages over such categories and returns the mean and standard error of the mean, in waves described below. Other quantities can be retrieved by adding code after
// get the desired summary statistics
.


#pragma rtGlobals=3		// Use modern global access method and strict wave access.
function /WAVE averageByKey(waveToAvg, dataKeys[, keyTol])
// Given a wave waveToAvg and a wave dataKeys,
// determines the number of unique entries (categories/keys/...) in dataKeys
// and averages waveToAvg according to those entries.
// The standard error of the mean, # of points averaged, and corresponding
// categories are also saved.
// e.g. 
//	averageByKey(calibrationData, calibrationSolution)	// Key by entry in calibrationSolution
//	make /n=(numpnts(signal)) signalInt=round(signal); averageByKey(signal,signalInt) // Average to nearest integer
//
// OUTPUT:
	// averageByKey(data, keys), produces:
	// wave data_Keys -- a list of the unique data keys found in keyWave
	// wave data_Kavg -- the mean of the data corresponding to the keys
	// wave data_KSE  -- the standard error of the mean
	// wave data_Knum -- the # of points used for calculating the Avg & SD
	
wave waveToAvg
wave dataKeys
variable keyTol // Optional: the tolerance of keys, e.g. is 1.001 a "1" or not?
if (ParamIsDefault(keyTol))
	keyTol= 1e-07
endif
	
	// get list of unique keys
	wave keys = getKeys(dataKeys, tol=keyTol, wname=nameOfWave(waveToAvg) + "_Keys")
	variable nkeys = numpnts(keys)
	
	// try to predict memory overflow
	variable np=numpnts(waveToAvg) 
	if (np*nkeys > 2147000000)
		abort "averageByKey() was given too many keys / data points. Make a smaller selection."
	endif
	
	// prepare waves for saving summary stats
	make /d/n=(nkeys)/o $(nameOfWave(waveToAvg) + "_KAvg")		/WAVE=out_avg
	make /d/n=(nkeys)/o $(nameOfWave(waveToAvg) + "_KSE")		/WAVE=out_SE
	make /d/n=(nkeys)/o $(nameOfWave(waveToAvg) + "_Knum")		/WAVE=out_n
	out_avg=nan
	out_SE=nan
	out_n=nan
	
	// prepare output matrix
	make /FREE /d/n=(np,nkeys) outmat=nan // each column represents one key & contains only that key's data
	variable i,colkey
	for (i=0; i<nkeys; i+=1)
		SetDimLabel 1, i, $num2str(keys[i]), outmat  // label for later sorting
	endfor
	
	// go through the data, insert appropriate results into outmat
	string Sthiskey
	for (i=0; i<np; i+=1)
		Sthiskey = num2str(dataKeys[i])
		outmat[i][% $Sthiskey]=waveToAvg[i]
	endfor
	
	// get the desired summary statistics
	for (i=0; i<nkeys; i+=1)
		ImageStats /G={0, np-1, i,i} outmat
		if (V_flag < 0) // probably unnecessary: avoid saving the previous loop's stats
			abort "averageByKey(): unknown error" 
		endif
		out_n[i]=V_npnts
		if (V_npnts>0) // for V_npnts==1, V_sdev is set to NaN
			out_avg[i]=V_avg
			out_SE[i]=V_sdev / sqrt(V_npnts)
		endif
	endfor
	return keys
end
 
function /WAVE getKeys(keys[, tol, wname])
// Given a list of keys (e.g. laser on/off = 0/1),
// return a wave of the unique keys only (in e.g. above, {0,1})
 
wave keys
variable tol // optional, allow 1.0001==1 to be true.
string wname // optional name of output wave
if (ParamIsDefault(tol))
	tol= 1e-07
endif
if (ParamIsDefault(wname))
	wname= nameOfWave(keys) + "_uniqKeys"
endif
 
	// create output wave
	make /d/o/n=0 $wname /WAVE=outw
	outw=nan
 
	// special case of an empty key wave, or one with only NaN/inf
	wavestats /Q/M=1 keys
	if (V_npnts==0)
		return outw
	endif
 
	// go through input wave & save any new entries
	variable i=0, np=numpnts(keys)
	do
		if (getIndex(keys[i], outw, tol=tol) == -1)
			appendWave(outw, {keys[i]})
			SetDimLabel 0, (numpnts(outw)-1), $num2str(keys[i]), outw // label for ease of use (but tol!=0 must be considered)
		endif
		i += 1
	while (i<np)
 
	return outw
end
 
function getIndex(v, w, [tol, rev, start])
// Finds the index of the given variable var in the given wave w.
// Optional variable tolerance gives the ± acceptance range. Default 1.0E-7.
// Optional variable rev searches in reverse if set.
// Capable of finding NaNs.
// Returns -1 if not found.
// see also FindValue
 
	variable v
	wave w
	variable tol
	variable rev
	variable start // default == 0
 
 
	if (paramIsDefault(tol)) // TODO: change this to an autodetected spacing from w
		tol = 1.0E-7
	elseif (tol < 0)
		tol = abs(tol)
	endif
 
	variable i, n = numpnts(w)
 
	if ((rev))
		if (ParamIsDefault(start))
			start = n-1
		endif
		if (start < 0)
			abort "Cannot start at a negative index." 
			// Strictly speaking getIndex() is not responsible for this, but it helps with debugging.
		endif
 
		for (i =start; i >= 0; i -= 1)
			if (abs(w[i] - v) < tol)
				return i
			endif
		endfor
 
	else
 
		for (i =start; i < n; i += 1)
			if (abs(w[i] - v) < tol)
				return i
			endif
		endfor
 
	endif
 
	return -1
end
 
function appendWave(taker, appendThis)
// Concatenate alias. Given a wave "appendThis", appends its data to "taker". 1D waves only.
	wave taker, appendThis
	concatenate /NP=0 {appendThis}, taker
end

Forum

Support

Gallery

Igor Pro 10

Learn More

Igor XOP Toolkit

Learn More

Igor NIDAQ Tools MX

Learn More