package orisun; import java.io.File; import java.util.ArrayList; import java.util.Vector; import java.util.Iterator; public class DBScan { double Eps=3; //區域半徑 int MinPts=4; //密度 //因爲本身到本身的距離是0,因此本身也是本身的neighbor public Vector<DataObject> getNeighbors(DataObject p,ArrayList<DataObject> objects){ Vector<DataObject> neighbors=new Vector<DataObject>(); Iterator<DataObject> iter=objects.iterator(); while(iter.hasNext()){ DataObject q=iter.next(); double[] arr1=p.getVector(); double[] arr2=q.getVector(); int len=arr1.length; if(Global.calEditDist(arr1,arr2,len)<=Eps){ //使用編輯距離 // if(Global.calEuraDist(arr1, arr2, len)<=Eps){ //使用歐氏距離 // if(Global.calCityBlockDist(arr1, arr2, len)<=Eps){ //使用街區距離 // if(Global.calSinDist(arr1, arr2, len)<=Eps){ //使用向量夾角的正弦 neighbors.add(q); } } return neighbors; } public int dbscan(ArrayList<DataObject> objects){ int clusterID=0; boolean AllVisited=false; while(!AllVisited){ Iterator<DataObject> iter=objects.iterator(); while(iter.hasNext()){ DataObject p=iter.next(); if(p.isVisited()) continue; AllVisited=false; p.setVisited(true); //設爲visited後就已經肯定了它是核心點仍是邊界點 Vector<DataObject> neighbors=getNeighbors(p,objects); if(neighbors.size()<MinPts){ if(p.getCid()<=0) p.setCid(-1); //cid初始爲0,表示未分類;分類後設置爲一個正數;設置爲-1表示噪聲。 }else{ if(p.getCid()<=0){ clusterID++; expandCluster(p,neighbors,clusterID,objects); }else{ int iid=p.getCid(); expandCluster(p,neighbors,iid,objects); } } AllVisited=true; } } return clusterID; } private void expandCluster(DataObject p, Vector<DataObject> neighbors, int clusterID,ArrayList<DataObject> objects) { p.setCid(clusterID); Iterator<DataObject> iter=neighbors.iterator(); while(iter.hasNext()){ DataObject q=iter.next(); if(!q.isVisited()){ q.setVisited(true); Vector<DataObject> qneighbors=getNeighbors(q,objects); if(qneighbors.size()>=MinPts){ Iterator<DataObject> it=qneighbors.iterator(); while(it.hasNext()){ DataObject no=it.next(); if(no.getCid()<=0) no.setCid(clusterID); } } } if(q.getCid()<=0){ //q不是任何簇的成員 q.setCid(clusterID); } } } public static void main(String[] args){ DataSource datasource=new DataSource(); //Eps=3,MinPts=4 datasource.readMatrix(new File("/home/orisun/test/dot.mat")); datasource.readRLabel(new File("/home/orisun/test/dot.rlabel")); //Eps=2.5,MinPts=4 // datasource.readMatrix(new File("/home/orisun/text.normalized.mat")); // datasource.readRLabel(new File("/home/orisun/text.rlabel")); DBScan ds=new DBScan(); int clunum=ds.dbscan(datasource.objects); datasource.printResult(datasource.objects,clunum); } }