# Implementing the QT Algorithm using C#

Introduction

Quality Threshold (or QT) is an algorithm used in cluster analysis which is described in this Wikipedia article (http://en.wikipedia.org/wiki/Cluster_analysis).

The basic idea of cluster analysis is to partition a set of points into clusters which have some relationship to each other. In the case of QT the user chooses the maximum diameter (i.e. distance between points) that a cluster can have and each point is then considered in turn, along with its neighbors, and allocated to a cluster.

I was recently asked if I could implement this algorithm in C# as there appears to be no other implementation which is freely available.

The source code is listed below in case it is of interest to anyone else involved in this specialized field.

Source code including sample data

using System;
using System.Collections.Generic;

struct Point
{
public int X, Y;

public Point(int x, int y)
{
this.X = x;
this.Y = y;
}

public override string ToString()
{
return String.Format("({0}, {1})", X, Y);
}

public static int DistanceSquared(Point p1, Point p2)
{
int diffX = p2.X - p1.X;
int diffY = p2.Y - p1.Y;
return diffX * diffX + diffY * diffY;
}
}

static class QT
{
static void Main()
{
List<Point> points = new List<Point>();

// sample data

double maxDiameter = 150.0;

List<List<Point>> clusters = GetClusters(points, maxDiameter);
Console.Clear();

// print points to console
Console.WriteLine("The {0} points are :\n", points.Count);
foreach(Point p in points) Console.Write(" {0} ", p);
Console.WriteLine();

// print clusters to console
for(int i = 0; i < clusters.Count; i++)
{
int count = clusters[i].Count;
string plural = (count != 1) ? "s" : "";
Console.WriteLine("\nCluster {0} consists of the following {1} point{2} :\n", i + 1, count, plural);
foreach(Point p in clusters[i]) Console.Write(" {0} ", p);
Console.WriteLine();
}

}

static List<List<Point>> GetClusters(List<Point> points, double maxDiameter)
{
if (points == null) return null;
points = new List<Point>(points); // leave original List unaltered
List<List<Point>> clusters = new List<List<Point>>();

while (points.Count > 0)
{
List<Point> bestCandidate = GetBestCandidate(points, maxDiameter);
}

return clusters;
}

/*
GetBestCandidate() returns first candidate cluster encountered if there is more than one
with the maximum number of points.
*/
static List<Point> GetBestCandidate(List<Point> points, double maxDiameter)
{
double maxDiameterSquared = maxDiameter * maxDiameter; // square maximum diameter
List<List<Point>> candidates = new List<List<Point>>(); // stores all candidate clusters
List<Point> currentCandidate = null; // stores current candidate cluster
int[] candidateNumbers = new int[points.Count]; // keeps track of candidate numbers to which points have been allocated
int totalPointsAllocated = 0; // total number of points already allocated to candidates
int currentCandidateNumber = 0;
// current candidate number

for(int i = 0; i < points.Count; i++)
{
if (totalPointsAllocated == points.Count) break; // no need to continue further
if (candidateNumbers[i] > 0) continue; // point already allocated to a candidate
currentCandidateNumber++;
currentCandidate = new List<Point>(); // create a new candidate cluster
candidateNumbers[i] = currentCandidateNumber;
totalPointsAllocated++;

Point latestPoint = points[i]; // latest point added to current cluster
// diameters squared of each point when added to current cluster

// iterate through any remaining points
// successively selecting the point closest to the group until the threshold is exceeded
while (true)
{
if (totalPointsAllocated == points.Count) break; // no need to continue further
int closest = -1; // index of closest point to current candidate cluster
int minDiameterSquared = Int32.MaxValue;
// minimum diameter squared, initialized to impossible value

for (int j = i + 1; j < points.Count; j++)
{
if(candidateNumbers[j] > 0) continue;
// point already allocated to a candidate

// update diameters squared to allow for latest point added to current cluster
int distSquared  = Point.DistanceSquared(latestPoint, points[j]);

// check if closer than previous closest point
{
closest = j;
}
}

// if closest point is within maxDiameter, add it to the current candidate and mark it accordingly
if ((double)minDiameterSquared <= maxDiameterSquared)
{
candidateNumbers[closest] = currentCandidateNumber;
totalPointsAllocated++;
}
else // otherwise finished with current candidate
{
break;
}
}

// add current candidate to candidates list
}

// now find the candidate cluster with the largest number of points
int maxPoints = -1; // impossibly small value
int bestCandidateNumber = 0; // ditto
for(int i = 0; i < candidates.Count; i++)
{
if (candidates[i].Count > maxPoints)
{
maxPoints = candidates[i].Count;
bestCandidateNumber = i + 1; // counting from 1 rather than 0
}
}

// iterating backwards to avoid indexing problems, remove points in best candidate from points list
// this will automatically be persisted to caller as List<Point> is a reference type
for(int i = candidateNumbers.Length - 1; i >= 0; i--)
{
if (candidateNumbers[i] == bestCandidateNumber) points.RemoveAt(i);
}

// return best candidate to caller
return candidates[bestCandidateNumber - 1];
}
}

Test results

The results of running this code should be:

The 6 points are :

(0, 100) (0, 200) (0, 275) (100, 150) (200, 100) (250, 200)

Cluster 1 consists of the following 3 points :

(0, 100) (0, 200) (100, 150)

Cluster 2 consists of the following 2 points :

(200, 100) (250, 200)

Cluster 3 consists of the following 1 point :

(0, 275)

erver'>