Source Code by qpY3i8

VIEWS: 40 PAGES: 33

									                          Source Code

        Project Report : Comparing Clustering Algorithms

                          Participants:

                         Joyesh Mishra
                   Vasanth Prabhu Sundararaj
                    Gnana Sundar Rajendiran

                 CIS 6930 Data Mining Fall 2007

        Department of Computer and Information Science &
                         Engineering

                      University of Florida

Index

Source Code:

  1. K-Means
  2. Agglomerative Clustering
  3. DBSCAN Using KD Trees
  4. CURE
1. K – Means




2. Agglomerative
Single Link
load data1.dat;
X = data1';
%X = [0.4 0.22 0.35 0.26 0.08 0.45;0.53 0.38 0.32 0.19 0.41 0.30];
[D,N] = size(X);
%MAKE_SET
tempVec(1,1:N) = 1:N;
parentVec(1,1:N) = tempVec;
rankVec(1,1:N) = 0;
X2 = sum(X.^2,1);
dist = repmat(X2,N,1) + repmat(X2',1,N) - 2*X'*X;
%corre = corre - diag(diag(corre));
for i = 1:N
    dist(i,i:N) = 1000;
end
dist = sqrt(dist);
vect = reshape(dist,1,N*N);
[sortVect,IX] = sort(vect);
iterNum = 0;
k = 0;
tic
while(length(find(~(parentVec - tempVec)))~=1)
    iterNum = iterNum + 1;
    pos2 = floor((IX(iterNum)-1)/N)+1;
    pos1 = IX(iterNum) - (pos2-1)*N;
    %fprintf('Iternation Number..%d...Closest data points %d %d\n',iterNum,pos1,pos2);
     %Find operation%
    [parentVal1,parentVec] = findOperation(pos1,parentVec);
    [parentVal2,parentVec] = findOperation(pos2,parentVec);
    if(parentVal1~=parentVal2)
        k = k + 1;
        mergingDataPoints(k,1) = pos1;
        mergingDataPoints(k,2) = pos2;
        %Union Operation%
        if(rankVec(parentVal1)==rankVec(parentVal2))
             parentVec(parentVal2) = parentVal1;
             rankVec(parentVal1) = rankVec(parentVal1) + 1;
        end
        if(rankVec(parentVal1)>rankVec(parentVal2))
             parentVec(parentVal2) = parentVal1;
        end
        if(rankVec(parentVal2)>rankVec(parentVal1))
             parentVec(parentVal1) = parentVal2;
        end
%     else
%          fprintf('%d and %d have the same parent %d...\n',pos1,pos2,parentVal1);
     end
     if(mod(iterNum,2000000)==0)
         for i = 1:N
             [parentVal,parentVec] = findOperation(i,parentVec);
         end
         clusterHeads = find(~(parentVec-tempVec));
         for i = 1:length(clusterHeads)
             dataPts = find(parentVec==clusterHeads(i));
             fileName = strcat('clusters/',num2str(iterNum),'_Cluster_',num2str(i),'.txt');
             fid = fopen(fileName,'w');
             for pt = 1:length(dataPts)
                 fprintf(fid,'%8.4f\t%8.4f\n',X(1,dataPts(pt)),X(2,dataPts(pt)));
             end
             fclose('all');
%              fprintf('Cluster %d data points...\n',i);
%              disp(X(:,dataPts)');
         end
         %pause();
     end
end
t = toc
for i = 1:N
    [parentVal,parentVec] = findOperation(i,parentVec);
end
fprintf('Elapsed Time - %d\n',t);
clusterHeads = find(~(parentVec-tempVec));
for i = 1:length(clusterHeads)
    dataPts = find(parentVec==clusterHeads(i));
    %disp(dataPts);
    fileName = strcat('clusters/',num2str(iterNum),'_Cluster_',num2str(i),'.txt');
    fid = fopen(fileName,'w');
    for pt = 1:length(dataPts)
        fprintf(fid,'%8.4f\t%8.4f\n',X(1,dataPts(pt)),X(2,dataPts(pt)));
    end
    fclose('all');
end
disp(mergingDataPoints);
% plot(X(1,:),X(2,:),'k*','MarkerSize',3);
% hold on;
% for i = 1:k
%     dPt1 = X(:,mergingDataPoints(i,1));
%     dPt2 = X(:,mergingDataPoints(i,2));
%     line([dPt1(1,1) dPt2(1,1)],[dPt1(2,1) dPt2(2,1)],'Color','r');
% end
Complete Link

% Loads the Data file and merges it into a single matrix “sample1”
load sample1.dat;
% Taking the transpose of the matrix and storing it in X
X = sample1';
% Variable to set a high value in the diagonal and other merged points
inf = 1000;
% Getting the dimensions of the matrix X in D and N
  [D,N] = size(X);
% Holds the initial number of clusters (singleton)
tempVec(1,1:N) = 1:N;
parentVec(1,1:N) = tempVec;
rankVec(1,1:N) = 0;
X2 = sum(X.^2,1);
dist = repmat(X2,N,1) + repmat(X2',1,N) - 2*X'*X;
dist = sqrt(dist);
dist = dist + inf*eye(N);
iterNum = 0;
k = 0;
tic
while(length(find(~(parentVec - tempVec)))~=1)
     iterNum = iterNum + 1;
     [minVals argmin] = min(dist);
     [minDist pt1] = min(minVals);
     pt2 = argmin(pt1);
     %Find operation%
     [parentVal1,parentVec] = findOperation(pt1,parentVec);
     [parentVal2,parentVec] = findOperation(pt2,parentVec);
     if(parentVal1~=parentVal2)
         k = k + 1;
         mergingDataPoints(k,1) = pt1;
         mergingDataPoints(k,2) = pt2;
         fprintf('Merging clusters %d %d\n',parentVal1,parentVal2);
         %Union Operation%
         if(rankVec(parentVal1)==rankVec(parentVal2))
              dPt2 = parentVal2;
              dPt1 = parentVal1;
              rankVec(parentVal1) = rankVec(parentVal1) + 1;
         end
         if(rankVec(parentVal1)>rankVec(parentVal2))
              dPt2 = parentVal2;
              dPt1 = parentVal1;
         end
         if(rankVec(parentVal2)>rankVec(parentVal1))
              dPt2 = parentVal1;
              dPt1 = parentVal2;
         end
         parentVec(dPt2) = dPt1;
         tempMat = [dist(dPt1,:);dist(dPt2,:)];
         maxVals = max(tempMat);
         dist(dPt1,:) = maxVals;
         dist(:,dPt1) = maxVals';
         dist(dPt2,:) = inf;
         dist(:,dPt2) = inf;
%      else
%           fprintf('%d and %d have the same parent %d...\n',pos1,pos2,parentVal1);
      end
      if(mod(iterNum,250)==0)
          for i = 1:N
              [parentVal,parentVec] = findOperation(i,parentVec);
          end
          clusterHeads = find(~(parentVec-tempVec));
          for i = 1:length(clusterHeads)
              dataPts = find(parentVec==clusterHeads(i));
            fileName = strcat('clusters/',num2str(iterNum),'_Cluster_',num2str(i),'.txt');
            fid = fopen(fileName,'w');
            for pt = 1:length(dataPts)
                fprintf(fid,'%8.4f\t%8.4f\n',X(1,dataPts(pt)),X(2,dataPts(pt)));
            end
            fclose('all');
%             fprintf('Cluster %d data points...\n',i);
%             disp(X(:,dataPts)');
         end
         %pause();
      end
end
t = toc
for i = 1:N
    [parentVal,parentVec] = findOperation(i,parentVec);
end
fprintf('Elapsed Time - %d\n',t);
clusterHeads = find(~(parentVec-tempVec));
for i = 1:length(clusterHeads)
    dataPts = find(parentVec==clusterHeads(i));
    %disp(dataPts);
    fileName = strcat('clusters/',num2str(iterNum),'_Cluster_',num2str(i),'.txt');
    fid = fopen(fileName,'w');
    for pt = 1:length(dataPts)
         fprintf(fid,'%8.4f\t%8.4f\n',X(1,dataPts(pt)),X(2,dataPts(pt)));
    end
    fclose('all');
end
disp(mergingDataPoints);
% plot(X(1,:),X(2,:),'k*','MarkerSize',3);
% hold on;
% for i = 1:k
%      dPt1 = X(:,mergingDataPoints(i,1));
%      dPt2 = X(:,mergingDataPoints(i,2));
%      line([dPt1(1,1) dPt2(1,1)],[dPt1(2,1) dPt2(2,1)],'Color','r');
% end
Find Operation function

% To find the root of the tree
function [parentVal parentVec] = findOperation(pos,parentVec)
    parentVal = parentVec(pos);
    if(parentVal~=pos)
        [parentVal parentVec] = findOperation(parentVec(pos),parentVec);
        parentVec(pos) = parentVal;
    end
end



3.DBSCAN Algorithm
Point.java
package dbscan;

/**
 * Class Point - Stores a Point in the 2 Dimensional Space
 * @version 1.0
 * @author jmishra
 */
public class Point {

        /** The following are the member variables for a Point **/
        public double x;
        public double y;
        public int index;
        public Cluster cluster;
        public boolean isCorePoint = false;

        public Point() {
        }

        public Point(double x, double y, int index) {
                this.x = x;
                this.y = y;
                this.index = index;
        }

        public Point(Point point) {
                this.x = point.x;
                this.y = point.y;
        }

        /**
         * Calculate Distance from a Point t
         * @param t Point
         * @return
         * double Euclidean Distance from t
         */
        public double calcDistanceFromPoint(Point t) {
                return Math.sqrt(Math.pow(x-t.x, 2) + Math.pow(y-t.y, 2));
        }

        public boolean equals(Point t) {
                return (x == t.x) && (y == t.y);
        }

        public void setCluster(Cluster cluster) {
                this.cluster = cluster;
        }

        public Cluster getCluster() {
                return cluster;
        }

        public double[] toDouble() {
                double[] xy = {x,y};
                return xy;
        }
}


Cluster.java
package dbscan;

import java.util.ArrayList;
import java.util.Iterator;

/**
 * Class Cluster - Represents a Cluster in DBSCAN
 * @version 1.0
 * @author jmishra
 */
public class Cluster {

        /**
         * Points Contained in the Cluster
         */
        public ArrayList points = new ArrayList();

        /**
         * Returns the Size of Cluster
         * @return
         * int Size of Cluster
         */
        public int size() {
                 return points.size();
        }

        /**
         * Calculates if the Point P is in EPS Neighborhood of the Cluster
         * @param p Point p
         * @param eps EPS Distance
         * @return
         * boolean <b>true</b> If the point lies in EPS Neighborhood of all points of cluster
         *                 <b>false</b> Otherwise
         */
        public boolean isInEPSProximityFromAllPoints(Point p, double eps) {
                boolean result = true;
                Iterator iter = points.iterator();
                while(iter.hasNext()) {
                        Point t = (Point)iter.next();
                        if(t.calcDistanceFromPoint(p) > eps) {
                                 result = false;
                                 break;
                        }
                }
                return result;
        }
}

DBSCAN.java
package dbscan;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;

import edu.wlu.cs.levy.CG.KDTree;

/**
 * DBSCAN Algorithm
 * Input Parameters: DataFile NumberOfPointsInFile MinPointThreshHoldToClassifyAsCorePoint EPS
 *
 * The Algorithm uses KD Trees to store the data points and for searching Nearest Neighbors
 * @version 1.0
 * @author jmishra
 */
public class DBSCAN {

        /** Stores the input arguments to the algorithm **/
        private String dataFile;
        private int totalNumberOfPoints;
        private double eps;
        private int minPointThreshHold;

        private Point[] dataPoints;
        private HashMap dataPointsMap = new HashMap();
        private KDTree kdtree = new KDTree(2);

        /** Stores the points in three categories based on the number of points in their neighborhood within EPS
**/
       ArrayList corePoints = new ArrayList();
       ArrayList borderPoints = new ArrayList();
       ArrayList noisePoints = new ArrayList();

       /** Stores the final list of clusters **/
       ArrayList clusters = new ArrayList();


       /**
        * DBSCAN Algorithm Execution Begins Here
        * @param args The Initialization Parameters passed through Command Line Arguments
        */
       public DBSCAN(String[] args) {

               System.out.println("DBSCAN Clustering Algorithm");
               System.out.println("---------------------------\n");

               initializeParameters(args);
               readDataPoints();

               long beginTime = System.currentTimeMillis();

               initializeKDTree();
               classifyPoints();
               buildClustersFromCorePoints();
               assignBorderPointsToClusters();

               long time = System.currentTimeMillis() - beginTime;

                System.out.println(clusters.size() + " Clusters Found.");
                System.out.println("\nThe Algorithm took " + time + " milliseconds to complete.");
                System.out.println("\nPlease Use GNUPlot to show the clusters by rendering the file using
\"load plotdbscan.txt\" on GNUPlot Console");

               showClusters();
       }

       private void initializeParameters(String[] args) {
               dataFile = args[1];
               totalNumberOfPoints = Integer.parseInt(args[2]);
               minPointThreshHold = Integer.parseInt(args[3]);
               if(args.length > 3) eps = Double.parseDouble(args[4]);
               else calculateEPS();
               dataPoints = new Point[totalNumberOfPoints];

              System.out.println("FileName: " + dataFile + "\t\tNumber of Points to be Clustered: " +
totalNumberOfPoints + "\n");
       }

        private void calculateEPS() {
                /* Efficient Methods can be implemented here to calculate EPS based on MinPointThreshhold
Specified by the user.
                * This procedure has not been implemented and user is expected to pass EPS as one of the
parameter.
                * Finding EPS would require pre-processing of the data and calculating and estimated value of
EPS.
                */
       }

       /**
        * Read and Initialize the Data Points from User Specified Input File
        */
       private void readDataPoints() {
               int pointIndex = 0;
               FileReader fr = null;
               try {
                       fr = new FileReader(dataFile);
                       BufferedReader in = new BufferedReader(fr);
                       String data = in.readLine();
                       while(data != null) {
                                StringTokenizer st = new StringTokenizer(data);
                                double x = Double.parseDouble(st.nextToken());
                                double y = Double.parseDouble(st.nextToken());
                                dataPoints[pointIndex] = new Point(x,y,pointIndex);
                                dataPointsMap.put(pointIndex, dataPoints[pointIndex]);
                                pointIndex++;
                                data = in.readLine();
                       }
                       in.close();
               } catch(Exception e){
                       debug(e);
               }
       }

       /**
        * Initialize and build the KD Tree for the data points
        */
       private void initializeKDTree() {
                for(int i = 0; i < totalNumberOfPoints ; i++) {
                          try {
                                  kdtree.insert(dataPoints[i].toDouble(), dataPoints[i].index);
                          } catch(Exception e) {}
                }
       }

       /**
        * Returns the n nearest neighbors for a Point by looking up in the KD Tree where n = MinPoints
        * @param point Point P
        * @return
        * Object[] Index of the Nearest Points to Point P
        */
       private Object[] getNearestNeighbours(Point point) {
               Object[] nearestPoints = null;
               try {
                       nearestPoints = kdtree.nearest(point.toDouble(), minPointThreshHold);
               } catch(Exception e) {
                       debug(e);
               }
               return nearestPoints;
       }

       /**
        * Classify the data points into Core Points, Border Points and Noise Points based on the number of
neighbors within EPS radius
        */
       private void classifyPoints() {
               for(int index = 0; index < totalNumberOfPoints; index++) {
                        Object[] nearestPoints = getNearestNeighbours(dataPoints[index]);
                        int satisyingPoints = 0;
                        for(int i = 0; i < nearestPoints.length ; i++) {
                                  int nearestPointIndex = (Integer)nearestPoints[i];
                                  if(dataPoints[nearestPointIndex].calcDistanceFromPoint(dataPoints[index]) <
eps) {
                                           satisyingPoints++;
                                  }
                                  else if(satisyingPoints != 1) break;
                        }
                        if(satisyingPoints == 1) noisePoints.add(dataPoints[index]);
                        else if(satisyingPoints == minPointThreshHold) {
                                  corePoints.add(dataPoints[index]);
                                  dataPoints[index].isCorePoint = true;
                        }
                        else borderPoints.add(dataPoints[index]);
               }
       }

       /**
        * Creates a new Cluster and adds the point P
        * When a core point is far enough from all remaining core points, a new cluster if formed for it.
        * @param p Point P
        */
       private void createNewClusterAndAddPoint(Point p) {
               Cluster cluster = new Cluster();
               cluster.points.add(p);
               p.cluster = cluster;
               clusters.add(cluster);
       }

       /**
        * Builds all possible clusters from the list of Core Points
        */
       private void buildClustersFromCorePoints() {
               for(int i = 0; i < corePoints.size() ; i++) {
                         Point p = (Point)corePoints.get(i);
                         if(i == 0) {
                                 createNewClusterAndAddPoint(p);
                                 continue;
                         }
                        boolean isAssigned = false;
                        for(int index = 0; index < clusters.size() ; index++) {
                                 Cluster c = (Cluster)clusters.get(index);
                                 if(c.isInEPSProximityFromAllPoints(p, eps)) {
                                          c.points.add(p);
                                          p.cluster = c;
                                          isAssigned = true;
                                          break;
                                 }
                        }
                        if(isAssigned == false) {
                                 createNewClusterAndAddPoint(p);
                        }
                }
        }

        /**
         * Returns the KD Tree Index of Nth Nearest Neighbor for a Point P
         * @param point Point P
         * @param n Nth Nearest Neighbor
         * @return
         * int KD Tree Index of Nth Nearest Neighbor for a Point P
         */
        private int getNthNearestNeighbour(Point point, int n) {
                 int nearestPointIndex = 0;
                 try {
                         Object[] nearestNeighbours = kdtree.nearest(point.toDouble(), n);
                         nearestPointIndex = (Integer)nearestNeighbours[n-1];
                 } catch(Exception e) {
                         debug(e);
                 }
                 return nearestPointIndex;
        }

       /**
        * Assigns all border points to the clusters formed from the list of core points in
buildClustersFromCorePoints method
        */
       private void assignBorderPointsToClusters() {
               for(int i = 0; i < borderPoints.size() ; i++) {
                         Point p = (Point)borderPoints.get(i);
                         int j = 2;
                         Point q = dataPoints[getNthNearestNeighbour(p,j)];
                         while(!q.isCorePoint){
                                  j++;
                                  q = dataPoints[getNthNearestNeighbour(p,j)];
                         }
                         q.cluster.points.add(p);
                         p.cluster = q.cluster;
               }
       }

        /**
 * Shows all clusters generated
 */
private void showClusters() {
        for(int i = 0; i < clusters.size() ; i++) {
                  Cluster c = (Cluster)clusters.get(i);
                  logCluster(c, "cluster" + i);
        }
        logOutliers("outliers");
        logPlotScript(clusters.size());
}

/**
 * Logs the cluster to a file
 */
private void logCluster(Cluster c, String filename) {
        BufferedWriter out = getWriterHandle(filename);
        try {
                 out.write("#\tX\tY\n");
                 for(int i = 0; i < c.size() ; i++) {
                           Point p = (Point)c.points.get(i);
                           out.write("\t" + p.x + "\t" + p.y + "\n");
                 }
                 out.flush();
                 out.close();
        } catch(Exception e) {
                 debug(e);
        }
}

/**
 * Logs the Set of Outliers to the File
 */
private void logOutliers(String filename) {
        BufferedWriter out = getWriterHandle(filename);
        try {
                 out.write("#\tX\tY\n");
                 for(int i = 0; i < noisePoints.size() ; i++) {
                           Point p = (Point)noisePoints.get(i);
                           out.write("\t" + p.x + "\t" + p.y + "\n");
                 }
                 out.flush();
                 out.close();
        } catch(Exception e) {
                 debug(e);
        }
}

/**
 * Writes a GNUPlot Script for the DBSCAN result to be shown.
 */
private void logPlotScript(int totalClusters) {
        BufferedWriter out = getWriterHandle("plotdbscan.txt");
        try {
                    setPlotStyle(out);
                    out.write("plot");
                    for(int i = 0; i < totalClusters ; i++) {
                              out.write(" \"cluster" + i + "\",");
                    }
                    out.write(" \"outliers\"");
                    out.flush();
                    out.close();
            } catch(Exception e){
                    debug(e);
            }
    }

    /**
     * Returns a file writer handle given the filename
     */
    private BufferedWriter getWriterHandle(String filename) {
            BufferedWriter out = null;
            try {
                     FileWriter fw = new FileWriter(filename, true);
                     out = new BufferedWriter(fw);
            } catch(Exception e) {
                     debug(e);
            }
            return out;
    }

    /**
     * Sets the Plot Style for the GNUPlot Script
     */
    private void setPlotStyle(BufferedWriter out) {
             try {
                     out.write("reset\n");
                     out.write("set size ratio 2\n");
                     out.write("unset key\n");
                     out.write("set title \"DBSCAN\"\n");
             } catch(Exception e) {
                     debug(e);
             }
    }

    /**
     * Prints the Exception to standard output.
     * @param e Exception thrown
     */
    private void debug(Exception e) {
            e.printStackTrace(System.out);
    }
}
4. CURE Algorithm

Point.java
package cure;
import java.util.StringTokenizer;

/**
 * Represents a Point Class. Also stores the KD Tree index for search.
 * @version 1.0
 * @author jmishra
 */
public class Point {
        public double x;
        public double y;
        public int index;

        public Point() {
        }

        public Point(double x, double y, int index) {
                this.x = x;
                this.y = y;
                this.index = index;
        }

        public Point(Point point) {
                this.x = point.x;
                this.y = point.y;
        }

        public double[] toDouble() {
                double[] xy = {x,y};
                return xy;
        }

        public static Point parseString(String str) {
                Point point = new Point();
                StringTokenizer st = new StringTokenizer(str);
                return point;
        }

        /**
         * Calculates the Euclidean Distance from a Point t
         * @param t Point t
         * @return
         * double Euclidean Distance from a Point t
         */
        public double calcDistanceFromPoint(Point t) {
                return Math.sqrt(Math.pow(x-t.x, 2) + Math.pow(y-t.y, 2));
        }
        public String toString() {
                return "{" + x + "," + y + "}";
        }

        public boolean equals(Point t) {
                return (x == t.x) && (y == t.y);
        }
}

CompareCluster.java
package cure;
import java.util.Comparator;

/**
 * Defines a class CompareCluster which helps the MinHeap (Implemented using Priority Queue)
 * to compare two clusters and store accordingly in the heap.
 *
 * The 2 clusters are compared based on the distance from their closest Cluster. The cluster pair which has the
lowest such distance
 * is stored at the root of the min heap.
 * @version 1.0
 * @author jmishra
 */
public class CompareCluster implements Comparator{

        public int compare(Object CLUSTER1, Object CLUSTER2) {
                Cluster cluster1 = (Cluster)CLUSTER1;
                Cluster cluster2 = (Cluster)CLUSTER2;
                if(cluster1.distanceFromClosest < cluster2.distanceFromClosest) {
                         return -1;
                }
                else if(cluster1.distanceFromClosest == cluster2.distanceFromClosest) {
                         return 0;
                }
                else return 1;
        }
}


Cluster.java
package cure;
import java.util.ArrayList;

/**
 * Class Cluster represents a collection of points and its set of representative points.
 * It also stores the distance from its closest neighboring cluster.
 *
 * @version 1.0
 * @author jmishra
 */
public class Cluster {

        public ArrayList rep = new ArrayList();
        public ArrayList pointsInCluster = new ArrayList();
        public double distanceFromClosest = 0;
        public Cluster closestCluster;
        public ArrayList closestClusterRep = new ArrayList();

        public double computeDistanceFromCluster(Cluster cluster) {
                double minDistance = 1000000;
                for(int i = 0; i<rep.size(); i++) {
                         for(int j = 0; j<cluster.rep.size() ; j++) {
                                  Point p1 = (Point)rep.get(i);
                                  Point p2 = (Point)cluster.rep.get(j);
                                  double distance = p1.calcDistanceFromPoint(p2);
                                  if(minDistance > distance) minDistance = distance;
                         }
                }
                return minDistance;
        }

        public int getClusterSize() {
                return pointsInCluster.size();
        }

        public ArrayList getPointsInCluster() {
                return pointsInCluster;
        }
}

ClusterSet.java
package cure;

import java.io.*;
import java.util.*;

import edu.wlu.cs.levy.CG.KDTree;

/**
 * Creates a set of clusters for a given number of data points or reduces the number of clusters to a fixed number
of clusters as specified
 * using CURE's hierarchical clustering algorithm.
 *
 * The ClusterSet uses 2 data structures. The KD Tree is initialized and used to store points across clusters.
 * The Min Heap (Uses java.util.PriorityQueue) is used to store the clusters and repetitively perform clustering.
The Min Heap is rearranged
 * in every step to bring the closest pair of clusters to the root of the heap and also change the closest distance
measures for all clusters.
 *
 * Please refer to the CURE Hierarchical Clustering Algorithm for more details. This class works only with the
sampled partitioned data
 * or already set of clusters formed. The computation of set of clusters can be done remotely on a machine hence
adding concurrency to the
 * overall algorithm.
 *
 * @version 1.0
 * @author jmishra
 */
public class ClusterSet {

       int numberOfPoints;
       Point[] points;
       CompareCluster cc;
       PriorityQueue heap;
       KDTree kdtree;
       int clustersToBeFound;
       int numberofRepInCluster;
       double shrinkFactor;
       int newPointCount;
       HashMap dataPointMap;

        /**
         * Initialize the Containers
         * @param numberOfPoints Number of Data Points
         * @param clustersToBeFound Clusters to be found after clustering
         * @param numberOfRepInCluster Number of Representative Points for every Cluster
         * @param shrinkFactor Shrink Factor for a Cluster
         */
        public void initializeContainers(int numberOfPoints, int clustersToBeFound, int numberOfRepInCluster,
double shrinkFactor) {
                 this.numberOfPoints = numberOfPoints;
                 points = new Point[numberOfPoints];
                 cc = new CompareCluster();
                 heap = new PriorityQueue(1000,cc);
                 kdtree = new KDTree(2);
                 this.clustersToBeFound = clustersToBeFound;
                 this.numberofRepInCluster = numberOfRepInCluster;
                 this.shrinkFactor = shrinkFactor;
                 newPointCount = numberOfPoints;
        }

       /**
        * Reduce the number of clusters to the specified numberOfClusters
        *
        * @param clusters Set of Clusters
        * @param numberOfClusters Number of Clusters to be found
        * @param numberOfRepInCluster Number of Representative Points in a Cluster
        * @param shrinkFactor Shrink Factor for Representative Points in a new Cluster formed
        * @param dataPointMap Data Point Map
        * @param clusterMerge True
        */
       public ClusterSet(ArrayList clusters, int numberOfClusters, int numberOfRepInCluster, double
shrinkFactor, HashMap dataPointMap, boolean clusterMerge) {
               numberOfPoints = 0;
               for(int i = 0; i < clusters.size(); i++) {
                       numberOfPoints += ((Cluster)clusters.get(i)).getClusterSize();
               }
               points = new Point[numberOfPoints];
               cc = new CompareCluster();
               heap = new PriorityQueue(1000,cc);
               kdtree = new KDTree(2);
               int pointIndex = 0;

               if(clusterMerge) {
                        for(int i = 0; i<clusters.size(); i++) {
                                  Cluster cluster = (Cluster)clusters.get(i);
                                  for(int j = 0; j<cluster.getClusterSize(); j++) {
                                            points[pointIndex] = (Point)cluster.pointsInCluster.get(j);
                                            pointIndex++;
                                  }
                        }
                        clustersToBeFound = numberOfClusters;
                        this.numberofRepInCluster = numberOfRepInCluster;
                        this.shrinkFactor = shrinkFactor;
                        this.dataPointMap = dataPointMap;
               }
               buildKDTree();
               buildHeapForClusters(clusters);
       }

       /**
        * Build the heap for set of clusters specified
        * @param clusters Set of Clusters
        */
       public void buildHeapForClusters(ArrayList clusters) {
               for(int i=0; i<clusters.size(); i++) {
                        heap.add((Cluster)clusters.get(i));
               }
       }

       /**
        * Creates a set of clusters from the given number of data points and other CURE parameters
        * @param dataPoints Data Points to be clustered
        * @param numberOfClusters Number of Clusters to be formed
        * @param numberOfRepInCluster Number of Representative points in a new cluster
        * @param shrinkFactor Shrink factor for Representative points
        * @param dataPointMap The HashMap to store the data points
        */
       public ClusterSet(ArrayList dataPoints, int numberOfClusters, int numberOfRepInCluster, double
shrinkFactor, HashMap dataPointMap) {
               initializeContainers(dataPoints.size(), numberOfClusters, numberOfRepInCluster,
shrinkFactor);
               initializePoints(dataPoints,dataPointMap);
               buildKDTree();
               buildHeap();
               startClustering();
       }
/**
 * Initialize the data points
 * @param dataPoints Data Points List
 * @param dataPointMap Map of Data Points
 */
public void initializePoints(ArrayList dataPoints, HashMap dataPointMap) {
         this.dataPointMap = dataPointMap;
         Iterator iter = dataPoints.iterator();
         int index = 0;
         while(iter.hasNext()) {
                  Point point = (Point)iter.next();
                  points[index] = point;
                  index++;
         }
}

/**
 * Merge the given set of clusters using CURE's hierarchical clustering algorithm
 * @return
 * ArrayList Set of Merged Clusters
 */
public ArrayList mergeClusters() {
        ArrayList mergedClusters = new ArrayList();
        startClustering();
        while(heap.size() != 0) {
                 mergedClusters.add(heap.remove());
        }
        return mergedClusters;
}

/**
 * Gets all clusters present in the Min Heap
 * @return
 * Cluster[] Set of Clusters
 */
public Cluster[] getAllClusters() {
        Cluster clusters[] = new Cluster[heap.size()];
        int i = 0;
        while(heap.size() != 0) {
                 clusters[i] = (Cluster)heap.remove();
                 i++;
        }
        return clusters;
}

/**
 * Builds the KD Tree to store the data points
 */
public void buildKDTree() {
        for(Integer i=0; i<numberOfPoints; i++) {
                try {
                        kdtree.insert(points[i].toDouble(), points[i].index);
                } catch(Exception e) {
                                debug(e);
                        }
                }
        }

        /**
         * Builds the Initial Min Heap. Each point represents a cluster when the algorithm begins. It creates
each cluster and adds it to the heap.
         */
        public void buildHeap() {
                ArrayList clusters = new ArrayList();
                HashMap pointCluster = new HashMap();
                for(int i=0; i<numberOfPoints; i++) {
                          Cluster cluster = new Cluster();
                          cluster.rep.add(points[i]);
                          cluster.pointsInCluster.add(points[i]);
                          int nearestPoint = getNearestNeighbour(points[i]);
                          Point nearest = (Point)dataPointMap.get(nearestPoint);
                          cluster.distanceFromClosest = points[i].calcDistanceFromPoint(nearest);
        //changed here from indexing to hashmap
                          cluster.closestClusterRep.add(nearestPoint);
                          clusters.add(cluster);
                          pointCluster.put(points[i].index,cluster);
                }
                for(int i = 0; i<clusters.size(); i++) {
                          Cluster cluster = (Cluster)clusters.get(i);
                          int closest = (Integer)cluster.closestClusterRep.get(0);
                          //cluster.closestCluster = (Cluster)clusters.get(closest);
                          cluster.closestCluster = (Cluster)pointCluster.get((Integer)closest);
                          heap.add(cluster);
                }
        }

        /**
         * Get the nearest neighbor for a given point
         * @param point Point point
         * @return
         * int KD Tree index of the nearest neighbor
         */
        public int getNearestNeighbour(Point point) {
                 int result = 0;
                 try {
                          Object[] nearestPoint = kdtree.nearest(point.toDouble(), 2);
                          result = (Integer)nearestPoint[1];
                 } catch(Exception e) {
                          debug(e);
                          result = -1;
                 }
                 return result;
        }

        /**
         * Initiates the clustering. The stopping condition is reached when the size of heap equals number of
clusters to be found.
         * At every step two clusters are merged and the heap is rearranged. The representative points are
deleted for old clusters and
         * the representative points are added for new cluster to the KD Tree.
         */
        public void startClustering() {
                 while(heap.size() > clustersToBeFound) {
                         Cluster minCluster = (Cluster)heap.remove();
                         Cluster closestCluster = minCluster.closestCluster;
                         heap.remove(closestCluster);
                         Cluster newCluster = merge(minCluster,closestCluster);
                         deleteAllRepPointsForCluster(minCluster);
                         deleteAllRepPointsForCluster(closestCluster);
                         insertAllRepPointsForCluster(newCluster);
                         newCluster.closestCluster = minCluster;
                         heap.add(newCluster);
                         adjustHeap(newCluster, minCluster, closestCluster);
                 }
        }

       /**
        * Adjust the heap after the new merged cluster has been added
        * @param newCluster The merged cluster
        * @param oldcluster1 The Cluster 1 which was merged
        * @param oldCluster2 The Closest Cluster, Cluster2, to Cluster 1 which was merged
        */
       public void adjustHeap(Cluster newCluster, Cluster oldcluster1, Cluster oldCluster2) {
                ArrayList clusters = new ArrayList();
                int initialHeapSize = heap.size();
                for(int i=0; i<initialHeapSize; i++) {
                          clusters.add(heap.remove());
                }
                for(int i=0; i<clusters.size(); i++) {
                          Cluster cluster1 = (Cluster)clusters.get(i);
                          if(!(cluster1.closestCluster == oldcluster1) && !(cluster1.closestCluster ==
oldCluster2)) {
                                   heap.add(cluster1);
                                   continue;
                          }
                          cluster1.distanceFromClosest = 100000;
                          for(int j=0; j<clusters.size(); j++) {
                                   if(i==j) continue;
                                   Cluster cluster2 = (Cluster)clusters.get(j);
                                   double distance = cluster1.computeDistanceFromCluster(cluster2);
                                   if(distance < cluster1.distanceFromClosest) {
                                            cluster1.distanceFromClosest = distance;
                                            cluster1.closestCluster = cluster2;
                                   }
                          }
                          heap.add(cluster1);
                }
       }
/**
 * Insert all representative points of the cluster to the KD Tree
 * @param cluster Merged Cluster
 */
public void insertAllRepPointsForCluster(Cluster cluster) {
         ArrayList repPoints = cluster.rep;
         for(int i = 0; i<repPoints.size(); i++) {
                   Point point = (Point)repPoints.get(i);
                   try {
                           kdtree.insert(point.toDouble(),point.index);
                   } catch(Exception e) {
                           //debug(e);
                   }
         }
}

/**
 * Delete all representative points of the cluster from the KD Tree
 * @param cluster Cluster which got merged
 */
public void deleteAllRepPointsForCluster(Cluster cluster) {
        ArrayList repPoints = cluster.rep;
        for(int i = 0; i<repPoints.size(); i++) {
                  Point point = (Point)repPoints.get(i);
                  try {
                          kdtree.delete(point.toDouble());
                  } catch(Exception e) {
                          //debug(e);
                  }
        }
}

/**
 * Computes the mean point of the cluster
 * @param cluster Cluster
 * @return
 * Point The Mean Point of the Cluster
 */
public Point computeMeanOfCluster(Cluster cluster) {
        Point point = new Point();
        for(int i=0; i<cluster.pointsInCluster.size(); i++) {
                 point.x += ((Point)cluster.pointsInCluster.get(i)).x;
                 point.y += ((Point)cluster.pointsInCluster.get(i)).y;
        }
        point.x /= cluster.pointsInCluster.size();
        point.y /= cluster.pointsInCluster.size();
        return point;
}

/**
 * Merge two clusters. Calculate the new representative points and shrink them
 * @param cluster1 Cluster 1 to be merged
 * @param cluster2 Cluster 2 to be merged
* @return
* Cluster The Merged Cluster
*/
public Cluster merge(Cluster cluster1, Cluster cluster2) {
        Cluster newCluster = new Cluster();
        for(int i=0; i<cluster1.pointsInCluster.size(); i++) {
                 newCluster.pointsInCluster.add(cluster1.pointsInCluster.get(i));
        }
        for(int i=0; i<cluster2.pointsInCluster.size(); i++) {
                 newCluster.pointsInCluster.add(cluster2.pointsInCluster.get(i));
        }
        Point mean = computeMeanOfCluster(newCluster);
        ArrayList tempset = new ArrayList();
        for(int i=0; i<numberofRepInCluster; i++) {
                 double maxDist = 0;
                 double minDist = 0;
                 Point maxPoint = null;
                 for(int j=0; j<newCluster.pointsInCluster.size(); j++) {
                          Point p = (Point)newCluster.pointsInCluster.get(j);
                          if(i==0) {
                                  minDist = p.calcDistanceFromPoint(mean);
                          }
                          else {
                                  minDist = computeMinDistanceFromGroup(p, tempset);
                          }
                          if(minDist >= maxDist) {
                                  maxDist = minDist;
                                  maxPoint = p;
                          }
                 }
                 tempset.add(maxPoint);
        }

       for(int i=0; i<tempset.size(); i++) {
                Point p = (Point) tempset.get(i);
                Point rep = new Point();
                rep.x = p.x + shrinkFactor*(mean.x - p.x);
                rep.y = p.y + shrinkFactor*(mean.y - p.y);
                //rep.index = newPointCount++;
                rep.index = Cure.getCurrentRepCount();
                newCluster.rep.add(rep);
       }
       return newCluster;
}

/**
 * Computes the min distance of a point from the group of points
 * @param p Point p
 * @param group Group of points
 * @return
 * double The Minimum Euclidean Distance
 */
public double computeMinDistanceFromGroup(Point p, ArrayList group) {
            double minDistance = 100000;
            for(int i = 0; i< group.size(); i++) {
                      Point q = (Point)group.get(i);
                      if(p.equals(q)) continue;
                      double distance = p.calcDistanceFromPoint(q);
                      if(minDistance > distance) {
                              minDistance = distance;
                      }
            }
            if(minDistance == 100000) return 0;
            else return minDistance;
    }

    /**
     * Show the clusters formed
     */
    public void showClusters() {
            for(int i=0; i<clustersToBeFound; i++) {
                     Cluster cluster = (Cluster)heap.remove();
                     logCluster(cluster, "cluster" + i);
            }
    }

    /**
     * Print the Exception thrown
     * @param e Exception e
     */
    public void debug(Exception e) {
             //e.printStackTrace(System.out);
    }

    /**
     * Logs the cluster to a file
     * @param cluster Cluster
     * @param filename Name of the file
     */
    public void logCluster(Cluster cluster, String filename) {
            FileWriter fw = null;
            try {
                     fw = new FileWriter(filename, true);
                     BufferedWriter out = new BufferedWriter(fw);
                     out.write("#\tX\tY\n");
                     for(int j=0; j<cluster.pointsInCluster.size(); j++) {
                              Point p = (Point)cluster.pointsInCluster.get(j);
                              out.write("\t" + p.x + "\t" + p.y + "\n");
                     }
                     out.flush();
                     fw.close();
            } catch(Exception e){
                     debug(e);
            }
    }
}
CURE.java
package cure;

import java.io.*;
import java.util.*;

/**
 * CURE Clustering Algorithm
 * The algorithm follows the six steps as specified in the original paperwork by Guha et. al.
 *
 * @version 1.0
 * @author jmishra
 */
public class Cure {

        /** The Input Parameters to the algorithm **/
        private String dataFile;
        private int totalNumberOfPoints;
        private int numberOfClusters;
        private int minRepresentativeCount;
        private double shrinkFactor;
        private double requiredRepresentationProbablity;
        private int numberOfPartitions;
        private int reducingFactorForEachPartition;


        private Point[] dataPoints;
        private ArrayList outliers;
        private HashMap dataPointsMap;

        private static int currentRepAdditionCount;
        private Hashtable integerTable;


        public Cure(String[] args) {

                System.out.println("CURE Clustering Algorithm");
                System.out.println("-------------------------\n");

                initializeParameters(args);
                readDataPoints();

                long beginTime = System.currentTimeMillis();

                int sampleSize = calculateSampleSize();
                ArrayList randomPointSet = selectRandomPoints(sampleSize);
                ArrayList[] partitionedPointSet = partitionPointSet(randomPointSet);
                ArrayList subClusters = clusterSubPartitions(partitionedPointSet);
                if(reducingFactorForEachPartition >= 10) {
                        eliminateOutliersFirstStage(subClusters, 1);
                }
                else {
                         eliminateOutliersFirstStage(subClusters, 0);
                }
                ArrayList clusters = clusterAll(subClusters);
                clusters = labelRemainingDataPoints(clusters);

                long time = System.currentTimeMillis() - beginTime;

                 System.out.println("The Algorithm took " + time + " milliseconds to complete.");
                 System.out.println("\nPlease Use GNUPlot to show the clusters by rendering the file using
\"load plotcure.txt\" on GNUPlot Console");

                showClusters(clusters);
       }

       /**
        * Initializes the Parameters
        * @param args The Command Line Argument
        */
       private void initializeParameters(String[] args) {
                if(args.length == 0) {
                         dataFile = "spaeth2_05.txt";
                         totalNumberOfPoints = 59;
                         numberOfClusters = 5;
                         minRepresentativeCount = 6;
                         shrinkFactor = 0.5;
                         requiredRepresentationProbablity = 0.1;
                         numberOfPartitions = 2;
                         reducingFactorForEachPartition = 2;
                }
                else {
                         dataFile = args[1];
                         totalNumberOfPoints = Integer.parseInt(args[2]);
                         numberOfClusters = Integer.parseInt(args[3]);
                         minRepresentativeCount = Integer.parseInt(args[4]);
                         shrinkFactor = Double.parseDouble(args[5]);
                         requiredRepresentationProbablity = Double.parseDouble(args[6]);
                         numberOfPartitions = Integer.parseInt(args[7]);
                         reducingFactorForEachPartition = Integer.parseInt(args[8]);
                }
                dataPoints = new Point[totalNumberOfPoints];
                dataPointsMap = new HashMap();
                currentRepAdditionCount = totalNumberOfPoints;
                integerTable = new Hashtable();
                outliers = new ArrayList();
       }

       /**
        * Reads the data points from file
        */
       private void readDataPoints() {
               int pointIndex = 0;
               FileReader fr = null;
               try {
                       fr = new FileReader(dataFile);
                       BufferedReader in = new BufferedReader(fr);
                       String data = in.readLine();
                       while(data != null) {
                               StringTokenizer st = new StringTokenizer(data);
                               double x = Double.parseDouble(st.nextToken());
                               double y = Double.parseDouble(st.nextToken());
                               dataPoints[pointIndex] = new Point(x,y,pointIndex);
                               dataPointsMap.put(pointIndex, dataPoints[pointIndex]);
                               pointIndex++;
                               data = in.readLine();
                       }
                       in.close();
               } catch(Exception e){
                       debug(e);
               }
       }

        /**
         * Calculates the Sample Size based on Chernoff Bounds Mentioned in the CURE Algorithm
         * @return
         * int The Sample Data Size to be worked on
         */
        private int calculateSampleSize() {
                 return (int)((0.5 * totalNumberOfPoints) + (numberOfClusters *
Math.log10(1/requiredRepresentationProbablity)) + (numberOfClusters *
Math.sqrt(Math.pow(Math.log10(1/requiredRepresentationProbablity), 2) +
(totalNumberOfPoints/numberOfClusters) * Math.log10(1/requiredRepresentationProbablity))));
        }

       /**
        * Select random points from the data set
        * @param sampleSize The sample size selected
        * @return
        * ArrayList The Selected Random Points
        */
       private ArrayList selectRandomPoints(int sampleSize) {
               ArrayList randomPointSet = new ArrayList();
               Random random = new Random();
               for(int i=0; i<sampleSize; i++) {
                        int index = random.nextInt(totalNumberOfPoints);
                        if(integerTable.containsKey(index)) {
                                 i--; continue;
                        }
                        else {
                                 Point point = dataPoints[index];
                                 randomPointSet.add(point);
                                 integerTable.put(index, "");
                        }
               }
               return randomPointSet;
       }

       /**
        * Partition the sampled data points to p partitions (p = numberOfPartitions)
        * @param pointSet Sample data point set
        * @return
        * ArrayList[] Data Set Partitioned Sets
        */
       private ArrayList[] partitionPointSet(ArrayList pointSet) {
                ArrayList partitionedSet[] = new ArrayList[numberOfPartitions];
                Iterator iter = pointSet.iterator();
                for(int i = 0; i < numberOfPartitions - 1 ; i++) {
                          partitionedSet[i] = new ArrayList();
                          int pointIndex = 0;
                          while(pointIndex < pointSet.size() / numberOfPartitions) {
                                   partitionedSet[i].add(iter.next());
                                   pointIndex++;
                          }
                }
                partitionedSet[numberOfPartitions - 1] = new ArrayList();
                while(iter.hasNext()) {
                          partitionedSet[numberOfPartitions - 1].add(iter.next());
                }
                return partitionedSet;
       }

        /**
         * Cluster each partitioned set to n/pq clusters
         * @param partitionedSet Data Point Set
         * @return
         * ArrayList Clusters formed
         */
        private ArrayList clusterSubPartitions(ArrayList partitionedSet[]) {
                ArrayList clusters = new ArrayList();
                int numberOfClusterInEachPartition = totalNumberOfPoints / (numberOfPartitions *
reducingFactorForEachPartition);
                for(int i=0 ; i<partitionedSet.length; i++) {
                         ClusterSet clusterSet = new
ClusterSet(partitionedSet[i],numberOfClusterInEachPartition, minRepresentativeCount, shrinkFactor,
dataPointsMap);
                         Cluster[] subClusters = clusterSet.getAllClusters();
                         for(int j=0; j<subClusters.length; j++) {
                                  clusters.add(subClusters[j]);
                         }
                }
                return clusters;
        }

       /**
        * Eliminates outliers after pre-clustering
        * @param clusters Clusters present
        * @param outlierEligibilityCount Min Threshold count for not being outlier cluster
        */
        private void eliminateOutliersFirstStage(ArrayList clusters, int outlierEligibilityCount) {
                Iterator iter = clusters.iterator();
                ArrayList clustersForRemoval = new ArrayList();
                while(iter.hasNext()) {
                         Cluster cluster = (Cluster) iter.next();
                         if(cluster.getClusterSize() <= outlierEligibilityCount) {
                                  updateOutlierSet(cluster);
                                  clustersForRemoval.add(cluster);
                         }
                }
                while(!clustersForRemoval.isEmpty()) {
                         Cluster c = (Cluster)clustersForRemoval.remove(0);
                         clusters.remove(c);
                }
        }

        /**
         * Cluster all remaining clusters. Merge all clusters using CURE's hierarchical clustering algorithm till
specified number of clusters
         * remain.
         * @param clusters Pre-clusters formed
         * @return
         * ArrayList Set of clusters
         */
        private ArrayList clusterAll(ArrayList clusters) {
                ClusterSet clusterSet = new ClusterSet(clusters, numberOfClusters, minRepresentativeCount,
shrinkFactor, dataPointsMap, true);
                return clusterSet.mergeClusters();
        }

        /**
         * Assign all remaining data points which were not part of the sampled data set to set of clusters formed
         * @param clusters Set of clusters
         * @return
         * ArrayList Modified clusters
         */
        private ArrayList labelRemainingDataPoints(ArrayList clusters) {

                for(int index = 0; index < dataPoints.length; index++) {
                         if(integerTable.containsKey(index)) continue;
                         Point p = dataPoints[index];
                         double smallestDistance = 1000000;
                         int nearestClusterIndex = -1;
                         for(int i = 0; i < clusters.size(); i++) {
                                   ArrayList rep = ((Cluster)clusters.get(i)).rep;
                                   for(int j=0; j<rep.size(); j++) {
                                            double distance = p.calcDistanceFromPoint((Point)rep.get(j));
                                            if(distance < smallestDistance) {
                                                     smallestDistance = distance;
                                                     nearestClusterIndex = i;
                                            }
                                   }
                         }
                          if(nearestClusterIndex != -1) {
                                  ((Cluster)clusters.get(nearestClusterIndex)).pointsInCluster.add(p);
                          }
                  }
                  return clusters;
          }

          /**
           * Update the outlier set for the clusters which have been identified as outliers
           * @param cluster Outlier Cluster
           */
          private void updateOutlierSet(Cluster cluster) {
                  ArrayList outlierPoints = cluster.getPointsInCluster();
                  Iterator iter = outlierPoints.iterator();
                  while(iter.hasNext()) {
                          outliers.add(iter.next());
                  }
          }

          private void debug(Exception e) {
                  //e.printStackTrace(System.out);
          }

          /**
           * Gets the current representative count so that the new points added do not conflict with older KD Tree
indices
           * @return
           * int Next representative count
           */
          public static int getCurrentRepCount() {
                   return ++currentRepAdditionCount;
          }

          public void showClusters(ArrayList clusters) {
                  for(int i=0; i<numberOfClusters; i++) {
                           Cluster cluster = (Cluster)clusters.get(i);
                           logCluster(cluster, "cluster" + i);
                  }
                  logOutlier();
                  logPlotScript(clusters.size());
          }

          private BufferedWriter getWriterHandle(String filename) {
                  BufferedWriter out = null;
                  try {
                          FileWriter fw = new FileWriter(filename, true);
                          out = new BufferedWriter(fw);
                  } catch(Exception e) {
                          debug(e);
                  }
                  return out;
          }
private void closeWriterHandle(BufferedWriter out) {
        try {
                out.flush();
                out.close();
        } catch(Exception e) {
                debug(e);
        }
}

private void logCluster(Cluster cluster, String filename) {
        BufferedWriter out = getWriterHandle(filename);
        try {
                out.write("#\tX\tY\n");
                for(int j=0; j<cluster.pointsInCluster.size(); j++) {
                         Point p = (Point)cluster.pointsInCluster.get(j);
                         out.write("\t" + p.x + "\t" + p.y + "\n");
                }
        } catch(Exception e){
                debug(e);
        }
        closeWriterHandle(out);
}

private void logOutlier() {
        BufferedWriter out = getWriterHandle("outliers");
        try {
                out.write("#\tX\tY\n");
                for(int j=0; j<outliers.size(); j++) {
                         Point p = (Point)outliers.get(j);
                         out.write("\t" + p.x + "\t" + p.y + "\n");
                }
        } catch(Exception e){
                debug(e);
        }
        closeWriterHandle(out);
}

private void logPlotScript(int totalClusters) {
        BufferedWriter out = getWriterHandle("plotcure.txt");
        try {
                setPlotStyle(out);
                out.write("plot");
                for(int i = 0; i< totalClusters; i++) {
                          out.write(" \"cluster" + i + "\",");
                }
                out.write(" \"outliers\"");
        } catch(Exception e){
                debug(e);
        }
        closeWriterHandle(out);
}

private void setPlotStyle(BufferedWriter out) {
                try {
                        out.write("reset\n");
                        out.write("set size ratio 2\n");
                        out.write("unset key\n");
                        out.write("set title \"CURE\"\n");
                } catch(Exception e) {
                        debug(e);
                }
        }
}


KD – Tree Implementation
Open Source KD Tree Implementation has been used and is distributable under GNU General Public License

Please refer http://www.cs.wlu.edu/~levy/software/kd/ for source/executable files.

Please add kd.jar to classpath for successful execution of CURE and DBSCAN packages.

http://www.cs.wlu.edu/~levy/software/kd/doc/ contains the Java Documentation for KD Tree Implementation.

								
To top