SIGN UP MEMBER LOGIN:    
ARTICLE

Implementing the DBSCAN Algorithm using C#

Posted by Vulpes Articles | C# Language March 17, 2011
I was recently asked if I could implement DBSCAN algorithm in C# as there appears to be no other implementation which is freely available. Here I am showing to implement this algorithm.
Reader Level:

Introduction

Density-Based Spatial Clustering of Applications with Noise (or DBSCAN) is an algorithm used in cluster analysis which is described in this Wikipedia article (http://en.wikipedia.org/wiki/DBSCAN). 

The basic idea of cluster analysis is to partition a set of points into clusters which have some relationship to each other. In the case of DBSCAN the user chooses the minimum number of points required to form a cluster and the maximum distance between points in each cluster. Each point is then considered in turn, along with its neighbours, and allocated to a cluster.

I was recently asked if I could implement this algorithm in C# as there appears to be no other implementation which is freely available.

The source code is listed below in case it is of interest to anyone else involved in this specialized field.

Source Code

using System;
using System.Collections.Generic;
using System.Linq;

class Point
{
    public const int NOISE = -1;
    public const int UNCLASSIFIED = 0;
    public int X, Y, ClusterId;
    public Point(int x, int y)
    {
        this.X = x;
        this.Y = y;
    }
    public override string ToString()
    {
        return String.Format("({0}, {1})", X, Y);
    }
    public static int DistanceSquared(Point p1, Point p2)
    {
        int diffX = p2.X - p1.X;
        int diffY = p2.Y - p1.Y;
        return diffX * diffX + diffY * diffY;
    }
}
static class DBSCAN
{
    static void Main()
    {
        List<Point> points = new List<Point>();
        // sample data
        points.Add(new Point(0, 100));
        points.Add(new Point(0, 200));
        points.Add(new Point(0, 275));
        points.Add(new Point(100, 150));
        points.Add(new Point(200, 100));
        points.Add(new Point(250, 200));
        points.Add(new Point(0, 300));
        points.Add(new Point(100, 200));
        points.Add(new Point(600, 700));
        points.Add(new Point(650, 700));
        points.Add(new Point(675, 700));
        points.Add(new Point(675, 710));
        points.Add(new Point(675, 720));
        points.Add(new Point(50, 400));
        double eps = 100.0;
        int minPts = 3;
        List<List<Point>> clusters = GetClusters(points, eps, minPts);
        Console.Clear();
        // print points to console
        Console.WriteLine("The {0} points are :\n", points.Count);
        foreach (Point p in points) Console.Write(" {0} ", p);
        Console.WriteLine();
        // print clusters to console
        int total = 0;
        for (int i = 0; i < clusters.Count; i++)
        {
            int count = clusters[i].Count;
            total += count;
            string plural = (count != 1) ? "s" : "";
            Console.WriteLine("\nCluster {0} consists of the following {1} point{2} :\n", i + 1, count, plural);
            foreach (Point p in clusters[i]) Console.Write(" {0} ", p);
            Console.WriteLine();
        }
        // print any points which are NOISE
        total = points.Count - total;
        if (total > 0)
        {
            string plural = (total != 1) ? "s" : "";
            string verb = (total != 1) ? "are" : "is";
            Console.WriteLine("\nThe following {0} point{1} {2} NOISE :\n", total, plural, verb);
            foreach (Point p in points)
            {
                if (p.ClusterId == Point.NOISE) Console.Write(" {0} ", p);
            }
            Console.WriteLine();
        }
        else
        {
            Console.WriteLine("\nNo points are NOISE");
        }
        Console.ReadKey();
    }
    static List<List<Point>> GetClusters(List<Point> points, double eps, int minPts)
    {
        if (points == null) return null;
        List<List<Point>> clusters = new List<List<Point>>();
        eps *= eps; // square eps
        int clusterId = 1;
        for (int i = 0; i < points.Count; i++)
        {
            Point p = points[i];
            if (p.ClusterId == Point.UNCLASSIFIED)
            {
                if (ExpandCluster(points, p, clusterId, eps, minPts)) clusterId++;
            }
        }
        // sort out points into their clusters, if any
        int maxClusterId = points.OrderBy(p => p.ClusterId).Last().ClusterId;
        if (maxClusterId < 1) return clusters; // no clusters, so list is empty
        for (int i = 0; i < maxClusterId; i++) clusters.Add(new List<Point>());
        foreach (Point p in points)
        {
            if (p.ClusterId > 0) clusters[p.ClusterId - 1].Add(p);
        }
        return clusters;
    }
    static List<Point> GetRegion(List<Point> points, Point p, double eps)
    {
        List<Point> region = new List<Point>();
        for (int i = 0; i < points.Count; i++)
        {
            int distSquared = Point.DistanceSquared(p, points[i]);
            if (distSquared <= eps) region.Add(points[i]);
        }
        return region;
    }
    static bool ExpandCluster(List<Point> points, Point p, int clusterId, double eps, int minPts)
    {
        List<Point> seeds = GetRegion(points, p, eps);
        if (seeds.Count < minPts) // no core point
        {
            p.ClusterId = Point.NOISE;
            return false;
        }
        else // all points in seeds are density reachable from point 'p'
        {
            for (int i = 0; i < seeds.Count; i++) seeds[i].ClusterId = clusterId;
            seeds.Remove(p);
            while (seeds.Count > 0)
            {
                Point currentP = seeds[0];
                List<Point> result = GetRegion(points, currentP, eps);
                if (result.Count >= minPts)
                {
                    for (int i = 0; i < result.Count; i++)
                    {
                        Point resultP = result[i];
                        if (resultP.ClusterId == Point.UNCLASSIFIED || resultP.ClusterId == Point.NOISE)
                        {
                            if (resultP.ClusterId == Point.UNCLASSIFIED) seeds.Add(resultP);
                            resultP.ClusterId = clusterId;
                        }
                    }
                }
                seeds.Remove(currentP);
            }
            return true;
        }
    }
}

Test Results

The results of running this code should be:

The 14 points are :

(0, 100)  (0, 200)  (0, 275)  (100, 150)  (200, 100)  (250, 200)  (0, 300)  (100, 200)
(600, 700)  (650, 700)  (675, 700)  (675, 710)  (675, 720)  (50, 400)

Cluster 1 consists of the following 6 points :

(0, 100)  (0, 200)  (0, 275)  (100, 150)  (0, 300)  (100, 200)

Cluster 2 consists of the following 5 points :

(600, 700)  (650, 700)  (675, 700)  (675, 710)  (675, 720)

The following 3 points are NOISE :

(200, 100)  (250, 200)  (50, 400)

Login to add your contents and source code to this article
share this article :
post comment
 

Glad to hear you've found this useful. The only other clustering algorithms I've implemented are QT (http://www.c-sharpcorner.com/UploadFile/b942f9/6167/) and Fixed-Width (http://www.c-sharpcorner.com/Forums/Thread/123093/implementation-of-fixed-width-clustering-algorithm.aspx). These should be faster than DBSCAN for larger datasets, as they're not as complicated, but the quality of the results may not be as good.

Posted by Vulpes Jul 10, 2011

Thanks so much for your genius code . I was searching for it for the last 2 weeks . Thanks,but are there update to include rough dbscan . I used it with large dataset of 10000 point , and it tooks 25 seconds. Are there a faster updated. Thanks, Waleed.makarem@gmail.com

Posted by waleed makarem Jul 08, 2011
Nevron Gauge for SharePoint
Become a Sponsor
PREMIUM SPONSORS
  • Finally – a virtual platform that delivers next-generation Windows Server 2008 Hyper-V virtualization technology from a managed hosting partner you can truly depend on. Visit www.maximumasp.com/max for a FREE 30 day trial. Hurry offer ends soon. Climb aboard the MaxV platform and take advantage of High Availability, Intelligent Monitoring, Recurrent Backups, and Scalability – with no hassle or hidden fees. As a managed hosting partner focused solely on Microsoft technologies since 2000, MaximumASP is uniquely qualified to provide the superior support that our business is built on. Unparalleled expertise with Microsoft technologies lead to working directly with Microsoft as first to offer IIS 7 and SQL 2008 betas in a hosted environment; partnering in the Go Live Program for Hyper-V; and product co-launches built on WS 2008 with Hyper-V technology.
    The leading .NET charting control now features PDF, Flash and Silverlight export, visualization of large datasets and more. Deliver true charting functionality to your BI, Scorecard, Presentation or Scientific apps. Download evaluation now.
Become a Sponsor