.abi / .ab1 reader

Hi all,

Does anyone know of an abi file format reader for Igor? (i.e. sequencing data + electropherograms)

Thanks!

Tom
No, but I did once write a reader for sequencing chromatograms in the SCF format. This was a long time ago and I've forgotten pretty much everything about it. I'm including the code below in case it can help you out.

Some quick searching revealed that the ABI format specification can be found online. I think you could use that to make your own reader in Igor but it depends on your familiarity with programming.


#pragma rtGlobals=1		// Use modern global access method.


// staden.sourceforge.net/scf-rfc.html


Structure SCFHeader
	uint32 magic_number		// Number of elements in Samples matrix
	uint32 samples				// Byte offset from start of file
	uint32 samples_offset		// Number of bases in Bases matrix
	uint32 bases				// OBSOLETE: No. bases in left clip (vector)
	uint32 bases_left_clip		// OBSOLETE: No. bases in right clip (qual)
	uint32 bases_right_clip	// Byte offset from start of file
	uint32 bases_offset			// Number of bytes in Comment section
	uint32 comments_size		// Byte offset from start of file
	uint32 comments_offset		// "version.revision", eg '3' '.' '0' '0'
	char version[4]
	uint32 sample_size
	uint32 code_set
	uint32 private_size
	uint32 private_offset
	uint32 spare[18]
EndStructure

Structure SequenceEntry
	uint32 peak_index
	uchar prob_A
	uchar prob_C
	uchar prob_G
	uchar prob_T
	char   base
	uchar prob_sub
	uchar prob_ins
	uchar prob_del
EndStructure

Function ReadSCFHeader()
	
	variable refNum, i
	
	Open /D/R /F="Chromatograms:.scf" refNum
	if (strlen(S_fileName) == 0)
		return 0
	endif
	
	Open /R refNum as S_fileName
	
	Struct SCFHeader header
	
	string magicString = "1234"
	variable bigEndian
	
	FBinRead refNum, magicString
	if (StringMatch(magicString, ".scf") == 1)
		// the format is big endian
		bigEndian = 1
	else
		bigEndian = 0
	endif
	
	FSetPos refNum, 0
	
	if (bigEndian == 1)	// big-endian
		FBinRead /B=2 refNum, header
	else
		FBinRead /B=3 refNum, header
	endif
	
	if (header.sample_size != 2)
		Close refNum
		Abort "The sample size is not 2"
	endif
	
	Make /W/O/N=(header.samples) ChromatogramA, ChromatogramC, ChromatogramG, ChromatogramT
	
	FSetPos refNum, header.samples_offset
	
	// read the raw chromatogram data
	if (bigEndian == 1)
		FBinRead /B=2 refNum, ChromatogramA
		FBinRead /B=2 refNum, ChromatogramC
		FBinRead /B=2 refNum, ChromatogramG
		FBinRead /B=2 refNum, ChromatogramT
	else
		FBinRead /B=3 refNum, ChromatogramA
		FBinRead /B=3 refNum, ChromatogramC
		FBinRead /B=3 refNum, ChromatogramG
		FBinRead /B=3 refNum, ChromatogramT
	endif
	
	ConvertSamplesToAbsoluteValues(ChromatogramA)
	ConvertSamplesToAbsoluteValues(ChromatogramC)
	ConvertSamplesToAbsoluteValues(ChromatogramG)
	ConvertSamplesToAbsoluteValues(ChromatogramT)
	
	// read the base assignments
	Make /T/O/N=(header.bases) BaseAssignments
	Make /O/N=(header.bases) BasePositions
	
	Struct SequenceEntry sequence
	
	FSetPos refNum, header.bases_offset
	
	for (i = 0; i < header.bases; i+=1)
		if (bigEndian == 1)
			FBinRead /B=2 refNum, sequence
		else
			FBinRead /B=3 refNum, sequence
		endif
		BaseAssignments[i] = GetMostLikelyBase(sequence)
		BasePositions[i] = sequence.peak_index
	endfor
	
	Close refNum
	
	// Display the chromatogram
	DoWindow /F ChromatogramViewer
	if (V_flag != 1)
		Display /N=ChromatogramViewer as "Chromatogram"
		AppendToGraph ChromatogramA, ChromatogramC, ChromatogramT, ChromatogramG
		ModifyGraph rgb(ChromatogramA)=(0,65535,0),rgb(ChromatogramC)=(0,0,65535);DelayUpdate
		ModifyGraph rgb(ChromatogramG)=(0,0,0)
	endif
	
End

Function ConvertSamplesToAbsoluteValues(samples)
	wave samples
	
	variable i, p_sample = 0
	variable nSamples = DimSize(samples, 0)
	
	for (i=0; i < nSamples ;i+=1)
		samples[i] = samples[i] + p_sample
		p_sample = samples[i]
	endfor
	p_sample = 0
	for (i=0; i < nSamples ;i+=1)
		samples[i] = samples[i] + p_sample
		p_sample = samples[i]
	endfor
End
	
Function /S GetMostLikelyBase(sequence)
	Struct SequenceEntry &sequence
	
	variable probA, probC, probT, probG
	
	probA = sequence.prob_A
	probT = sequence.prob_T
	probC = sequence.prob_C
	probG = sequence.prob_G
	
	variable highestProbability = max(max(probA, probT), max(probC, probG))
	
	if (highestProbability == probA)
		return "A"
	elseif (highestProbability == probT)
		return "T"
	elseif (highestProbability == probC)
		return "C"
	elseif (highestProbability == probG)
		return "G"
	endif
End