.abi / .ab1 reader

Hi all,

Does anyone know of an abi file format reader for Igor? (i.e. sequencing data + electropherograms)

Thanks!

Tom
No, but I did once write a reader for sequencing chromatograms in the SCF format. This was a long time ago and I've forgotten pretty much everything about it. I'm including the code below in case it can help you out.

Some quick searching revealed that the ABI format specification can be found online. I think you could use that to make your own reader in Igor but it depends on your familiarity with programming.

#pragma rtGlobals=1     // Use modern global access method.


// staden.sourceforge.net/scf-rfc.html


Structure SCFHeader
    uint32 magic_number     // Number of elements in Samples matrix
    uint32 samples              // Byte offset from start of file
    uint32 samples_offset       // Number of bases in Bases matrix
    uint32 bases                // OBSOLETE: No. bases in left clip (vector)
    uint32 bases_left_clip      // OBSOLETE: No. bases in right clip (qual)
    uint32 bases_right_clip // Byte offset from start of file
    uint32 bases_offset         // Number of bytes in Comment section
    uint32 comments_size        // Byte offset from start of file
    uint32 comments_offset      // "version.revision", eg '3' '.' '0' '0'
    char version[4]
    uint32 sample_size
    uint32 code_set
    uint32 private_size
    uint32 private_offset
    uint32 spare[18]
EndStructure

Structure SequenceEntry
    uint32 peak_index
    uchar prob_A
    uchar prob_C
    uchar prob_G
    uchar prob_T
    char   base
    uchar prob_sub
    uchar prob_ins
    uchar prob_del
EndStructure

Function ReadSCFHeader()
   
    variable refNum, i
   
    Open /D/R /F="Chromatograms:.scf" refNum
    if (strlen(S_fileName) == 0)
        return 0
    endif
   
    Open /R refNum as S_fileName
   
    Struct SCFHeader header
   
    string magicString = "1234"
    variable bigEndian
   
    FBinRead refNum, magicString
    if (StringMatch(magicString, ".scf") == 1)
        // the format is big endian
        bigEndian = 1
    else
        bigEndian = 0
    endif
   
    FSetPos refNum, 0
   
    if (bigEndian == 1) // big-endian
        FBinRead /B=2 refNum, header
    else
        FBinRead /B=3 refNum, header
    endif
   
    if (header.sample_size != 2)
        Close refNum
        Abort "The sample size is not 2"
    endif
   
    Make /W/O/N=(header.samples) ChromatogramA, ChromatogramC, ChromatogramG, ChromatogramT
   
    FSetPos refNum, header.samples_offset
   
    // read the raw chromatogram data
    if (bigEndian == 1)
        FBinRead /B=2 refNum, ChromatogramA
        FBinRead /B=2 refNum, ChromatogramC
        FBinRead /B=2 refNum, ChromatogramG
        FBinRead /B=2 refNum, ChromatogramT
    else
        FBinRead /B=3 refNum, ChromatogramA
        FBinRead /B=3 refNum, ChromatogramC
        FBinRead /B=3 refNum, ChromatogramG
        FBinRead /B=3 refNum, ChromatogramT
    endif
   
    ConvertSamplesToAbsoluteValues(ChromatogramA)
    ConvertSamplesToAbsoluteValues(ChromatogramC)
    ConvertSamplesToAbsoluteValues(ChromatogramG)
    ConvertSamplesToAbsoluteValues(ChromatogramT)
   
    // read the base assignments
    Make /T/O/N=(header.bases) BaseAssignments
    Make /O/N=(header.bases) BasePositions
   
    Struct SequenceEntry sequence
   
    FSetPos refNum, header.bases_offset
   
    for (i = 0; i < header.bases; i+=1)
        if (bigEndian == 1)
            FBinRead /B=2 refNum, sequence
        else
            FBinRead /B=3 refNum, sequence
        endif
        BaseAssignments[i] = GetMostLikelyBase(sequence)
        BasePositions[i] = sequence.peak_index
    endfor
   
    Close refNum
   
    // Display the chromatogram
    DoWindow /F ChromatogramViewer
    if (V_flag != 1)
        Display /N=ChromatogramViewer as "Chromatogram"
        AppendToGraph ChromatogramA, ChromatogramC, ChromatogramT, ChromatogramG
        ModifyGraph rgb(ChromatogramA)=(0,65535,0),rgb(ChromatogramC)=(0,0,65535);DelayUpdate
        ModifyGraph rgb(ChromatogramG)=(0,0,0)
    endif
   
End

Function ConvertSamplesToAbsoluteValues(samples)
    wave samples
   
    variable i, p_sample = 0
    variable nSamples = DimSize(samples, 0)
   
    for (i=0; i < nSamples ;i+=1)
        samples[i] = samples[i] + p_sample
        p_sample = samples[i]
    endfor
    p_sample = 0
    for (i=0; i < nSamples ;i+=1)
        samples[i] = samples[i] + p_sample
        p_sample = samples[i]
    endfor
End
   
Function /S GetMostLikelyBase(sequence)
    Struct SequenceEntry &sequence
   
    variable probA, probC, probT, probG
   
    probA = sequence.prob_A
    probT = sequence.prob_T
    probC = sequence.prob_C
    probG = sequence.prob_G
   
    variable highestProbability = max(max(probA, probT), max(probC, probG))
   
    if (highestProbability == probA)
        return "A"
    elseif (highestProbability == probT)
        return "T"
    elseif (highestProbability == probC)
        return "C"
    elseif (highestProbability == probG)
        return "G"
    endif
End