Hierarchical clustering 2: Dendrogram

The code constructs a dendrogram from a dissimilarity matrix (see Hierarchical clustering for a code to generate the matrix).

Function BuildDendrogram iterates over cells in the matrix and finds the cell with the lowest distance. The cell is removed from the matrix and added to a branch of a dendrogram. The output is to wave: sortedDissimilaryMatrix which is the sorted / final output and DendPosX and DendPosY waves which construct the dendrogram. Old cell position is saved in ClusXTxT wave.

Example usage:

make/o/n=(100,10) testWave=gnoise(1)+mod(q,2)//---one cluster in even columnsand another in odd columns
MakeDisSimilarityMatrix(testWave)//---See Hierarchical clustering for the code. The resulting disSimilaryMatrix looks pixelated
BuildDendrogram(DisSimilarityMatrix)//---The sorted matrix is stored in sortedDissimilaryMatrix 
Display/K=0 DendPosY vs DendPosX//---show the dendrogram
ModifyGraph userticks(bottom)={ClusX,ClusXTxT}//---show the original column information
//---helper function
Function DendMeanLoc(DendTxT,matchSt)
    wave/t DendTxT
    string matchSt
    variable i,pos=0,numpnt=0
    for(i=0;i<numpnts(DendTxT) ;i++)
        if(whichListItem(DendTxT[i],matchSt)>-1)
            pos+=i
            numpnt++
        endif
    endfor
    return pos/numpnt
end

//---computes the dendrogram and similarity matrix
Function BuildDendrogram(wave DisSimilarityMatrix)
    duplicate/o DisSimilarityMatrix,TempDisSimilarityMatrix     //---matrix that is reduced in size for each step
    make/o/n=(dimsize(DisSimilarityMatrix,0))/t DendTxT=num2str(p),OrigDendTxT=num2str(p)   //---original positions
    make/o/n=(1,3)/t DendClusTxT=""                                 //---saves the number of the original (unsorted) values
    make/o/n=(1,3) DendClus=nan,DendClusX=nan                       //---the magnitude of the clusters (y axis on a dendrogram)
    make/o/n=(0)/t ClusXTxT
    make/o/n=0 DendPosX,DendPosY                                        //---waves that draw the dendrogram
    variable run,clus  
    //---iterate over the dissimilarity matrix to find the smallest distance and remove that cell from the matrix
    for(run=0;run<dimsize(DisSimilarityMatrix,0)-1;run++)       //---over all cells in the dissimilarity matrix
        //---for each iteration find the cell with the smallest distance (value)
        imagestats/m=1 TempDisSimilarityMatrix
        variable minloc=min(V_minColLoc,V_minRowLoc)                //---removed row
        variable maxloc=max(V_minColLoc,V_minRowLoc)                //---removed col
        //---set the columns and the rows on the smallest value cell to the max value of the corresponding row and column
        TempDisSimilarityMatrix[minloc][]=max(TempDisSimilarityMatrix[minloc][q],TempDisSimilarityMatrix[maxloc][q])        //---find the max distance
        TempDisSimilarityMatrix[][minloc]=TempDisSimilarityMatrix[minloc][p]
        //---save position information and start building the dendrogram
        insertpoints 0,1, DendClusTxT,DendClus,DendClusX
        DendClusTxT[0][0]=DendTxT[minloc]                               //---info about child 1 (all the cells #)
        DendClusTxT[0][1]=DendTxT[maxloc]                               //---info about child 2
        DendClusTxT[0][2]=DendClusTxT[0][0]+";"+DendClusTxT[0][1]   //---parent - a list of all the cells in child1 and child 2
        //---modify the positions for the dendrogram
        for(clus=0;clus<numpnts(OrigDendTxT);clus++)
            if(stringmatch(OrigDendTxT[clus],DendClusTxT[0][0]))    //---child 1 is a terminal branch
                insertpoints numpnts(ClusXTxT),1,ClusXTxT
                ClusXTxT[ numpnts(ClusXTxT)-1 ]=OrigDendTxT[clus]   //---save the original position
                DendClus[0][0]=0
            endif
            if(stringmatch(OrigDendTxT[clus],DendClusTxT[0][1]))    //---child 2 is a terminal branch
                insertpoints numpnts(ClusXTxT),1,ClusXTxT
                ClusXTxT[ numpnts(ClusXTxT)-1 ]=OrigDendTxT[clus]   //---save the original position
                DendClus[0][1]=0
            endif          
        endfor
        for(clus=0;clus<dimsize(DendClusTxT,0);clus++)
            if(stringmatch(DendClusTxT[0][0],DendClusTxT[clus][2]))
                DendClus[0][0]=DendClus[clus][2]
            endif
            if(stringmatch(DendClusTxT[0][1],DendClusTxT[clus][2]))
                DendClus[0][1]=DendClus[clus][2]
            endif
        endfor     
        //---ready to remove the cell
        DendClus[0][2]=v_min
        DendTxT[minloc]=DendClusTxT[0][2]
        DendClusX[0][0]=DendMeanLoc(ClusXTxT,DendClusTxT[0][0])     //---X position info (for display only)
        deletepoints /m=0 maxloc,1, TempDisSimilarityMatrix,DendTxT
        deletepoints /m=1 maxloc,1, TempDisSimilarityMatrix
    endfor
    killwaves/z TempDisSimilarityMatrix
    //---normalize the dendrogram
    variable PeakVal=wavemax(DendClus)
    DendClus=DendClus/PeakVal*100
    variable i
    //---sort the dendrogram
    make/o/n=(itemsinlist(DendClusTxT[0][2]))/t ClusXTxT
    string/g ClusSortedLocation=""
    for(i=0;i<itemsinlist(DendClusTxT[0][2]);i++)
        ClusXTxT[i]=stringfromlist(i,DendClusTxT[0][2])             //---new position
        ClusSortedLocation+=ClusXTxT[i]+";"
    endfor
    //---drawing utilities
    for(clus=0;clus<dimsize(DendClus,0);clus++)
        insertpoints 0,5,DendPosX,DendPosY                              //---position of the branches
        DendPosX[0,1]=DendMeanLoc(ClusXTxT,DendClusTxT[clus][0])
        DendPosX[2,3]=DendMeanLoc(ClusXTxT,DendClusTxT[clus][1])
        DendPosX[4]=nan
        DendPosY[0]=DendClus[clus][0]
        DendPosY[1,2]=DendClus[clus][2]
        DendPosY[3]=DendClus[clus][1]
        DendPosY[4]=nan
    endfor     
    //---sorting
    make/o/n=(numpnts(ClusXTxT)) ClusX=p                                //---just the position (p)
    duplicate/o DisSimilarityMatrix,SortedDisSimilarityMatrix
    SortedDisSimilarityMatrix[][]=DisSimilarityMatrix[str2num(ClusXTxT[p])][str2num(ClusXTxT[q])]
end

Forum

Support

Gallery

Igor Pro 9

Learn More

Igor XOP Toolkit

Learn More

Igor NIDAQ Tools MX

Learn More