Implementing the QT Algorithm using C#


Introduction

Quality Threshold (or QT) is an algorithm used in cluster analysis which is described in this Wikipedia article (http://en.wikipedia.org/wiki/Cluster_analysis).

The basic idea of cluster analysis is to partition a set of points into clusters which have some relationship to each other. In the case of QT the user chooses the maximum diameter (i.e. distance between points) that a cluster can have and each point is then considered in turn, along with its neighbors, and allocated to a cluster.

I was recently asked if I could implement this algorithm in C# as there appears to be no other implementation which is freely available.

The source code is listed below in case it is of interest to anyone else involved in this specialized field.

Source code including sample data

using System;
using System.Collections.Generic;

struct Point
{
    public int X, Y;

    public Point(int x, int y)
    {
        this.X = x;
        this.Y = y;
    }

    public override string ToString()
    {
        return String.Format("({0}, {1})", X, Y);
    }

    public static int DistanceSquared(Point p1, Point p2)
    {
        int diffX = p2.X - p1.X;
        int diffY = p2.Y - p1.Y;
        return diffX * diffX + diffY * diffY;
    }
}

static class QT
{
    static void Main()
    {
        List<Point> points = new List<Point>();

        // sample data
        points.Add(new Point(0,100));
        points.Add(new Point(0,200));
        points.Add(new Point(0,275));
        points.Add(new Point(100, 150));
        points.Add(new Point(200,100));
        points.Add(new Point(250,200));

        double maxDiameter = 150.0;

        List<List<Point>> clusters = GetClusters(points, maxDiameter);
        Console.Clear();

         // print points to console
        Console.WriteLine("The {0} points are :\n", points.Count);
        foreach(Point p in points) Console.Write(" {0} ", p);
        Console.WriteLine();

        // print clusters to console
        for(int i = 0; i < clusters.Count; i++)
        {
           int count = clusters[i].Count;
           string plural = (count != 1) ? "s" : "";
           Console.WriteLine("\nCluster {0} consists of the following {1} point{2} :\n", i + 1, count, plural);
           foreach(Point p in clusters[i]) Console.Write(" {0} ", p);
           Console.WriteLine();
        }

        Console.ReadKey();
    }

    static List<List<Point>> GetClusters(List<Point> points, double maxDiameter)
    {
        if (points == null) return null;
        points = new List<Point>(points); // leave original List unaltered
        List<List<Point>> clusters = new List<List<Point>>();

        while (points.Count > 0)
        {
            List<Point> bestCandidate = GetBestCandidate(points, maxDiameter);
            clusters.Add(bestCandidate);          
        }
 
        return clusters;
    }

    /*
        GetBestCandidate() returns first candidate cluster encountered if there is more than one
        with the maximum number of points.
    */
    static List<Point> GetBestCandidate(List<Point> points, double maxDiameter)
    {
        double maxDiameterSquared = maxDiameter * maxDiameter; // square maximum diameter       
        List<List<Point>> candidates = new List<List<Point>>(); // stores all candidate clusters
        List<Point> currentCandidate = null; // stores current candidate cluster
        int[] candidateNumbers = new int[points.Count]; // keeps track of candidate numbers to which points have been allocated
        int totalPointsAllocated = 0; // total number of points already allocated to candidates
        int currentCandidateNumber = 0;
// current candidate number

        for(int i = 0; i < points.Count; i++)
        {
            if (totalPointsAllocated == points.Count) break; // no need to continue further
            if (candidateNumbers[i] > 0) continue; // point already allocated to a candidate
            currentCandidateNumber++;
            currentCandidate = new List<Point>(); // create a new candidate cluster
            currentCandidate.Add(points[i]); // add the current point to it
            candidateNumbers[i] = currentCandidateNumber;
            totalPointsAllocated++;

            Point latestPoint = points[i]; // latest point added to current cluster
            int[] diametersSquared = new int[points.Count];
// diameters squared of each point when added to current cluster

            // iterate through any remaining points
            // successively selecting the point closest to the group until the threshold is exceeded
            while (true)
            {
                if (totalPointsAllocated == points.Count) break; // no need to continue further               
                int closest = -1; // index of closest point to current candidate cluster
                int minDiameterSquared = Int32.MaxValue;
// minimum diameter squared, initialized to impossible value 

                for (int j = i + 1; j < points.Count; j++)
                { 
                   if(candidateNumbers[j] > 0) continue;
// point already allocated to a candidate           

                   // update diameters squared to allow for latest point added to current cluster
                   int distSquared  = Point.DistanceSquared(latestPoint, points[j]);
                   if (distSquared > diametersSquared[j]) diametersSquared[j] = distSquared;

                   // check if closer than previous closest point
                   if (diametersSquared[j] < minDiameterSquared)
                   {
                       minDiameterSquared = diametersSquared[j];
                       closest = j;
                   }
                }

                // if closest point is within maxDiameter, add it to the current candidate and mark it accordingly
                if ((double)minDiameterSquared <= maxDiameterSquared)
                {
                   currentCandidate.Add(points[closest]);
                   candidateNumbers[closest] = currentCandidateNumber;
                   totalPointsAllocated++;
                }
                else // otherwise finished with current candidate
                {
                   break;
                }                 
            }

            // add current candidate to candidates list
            candidates.Add(currentCandidate);
        }

        // now find the candidate cluster with the largest number of points
        int maxPoints = -1; // impossibly small value
        int bestCandidateNumber = 0; // ditto
        for(int i = 0; i < candidates.Count; i++)
        {
           if (candidates[i].Count > maxPoints)
           {
               maxPoints = candidates[i].Count;
               bestCandidateNumber = i + 1; // counting from 1 rather than 0
           }
        }

        // iterating backwards to avoid indexing problems, remove points in best candidate from points list
        // this will automatically be persisted to caller as List<Point> is a reference type
        for(int i = candidateNumbers.Length - 1; i >= 0; i--)
        {           
            if (candidateNumbers[i] == bestCandidateNumber) points.RemoveAt(i);
        }

        // return best candidate to caller                
        return candidates[bestCandidateNumber - 1];
    }
}

Test results

The results of running this code should be:

The 6 points are :

(0, 100) (0, 200) (0, 275) (100, 150) (200, 100) (250, 200)

Cluster 1 consists of the following 3 points :

(0, 100) (0, 200) (100, 150)

Cluster 2 consists of the following 2 points :

(200, 100) (250, 200)

Cluster 3 consists of the following 1 point :

(0, 275)

erver'>