码迷,mamicode.com
首页 > 其他好文 > 详细

我的相似度一次迭代聚类

时间:2014-12-18 18:14:00      阅读:189      评论:0      收藏:0      [点我收藏+]

标签:blog   ar   io   os   sp   for   java   on   div   

package MyCluster;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Vector;

public class SimpleCluster {

	/**
	 * @param args
	 */
	
	public static void main(String[] args) throws Exception, IOException {
		// TODO Auto-generated method stub
		int Featurelenth=GetLenth();				
		double Theta=0.1;
		String Addr="raw/data.txt";
		String OutPutAddr="raw/clusterresult.txt";
		
		ArrayList<ClusterObject> ResultSet=Ini_ClusterLIST(Featurelenth);
		ResultSet=Get_Cluster_Result(Theta,ResultSet,Addr,OutPutAddr,Featurelenth);
	}//针对维度长,多于k个类
	public static ArrayList<ClusterObject> Get_Cluster_Result(double Theta,ArrayList<ClusterObject> ResultSet,String Addr,String OutPutAddr,int Featurelenth) throws Exception, FileNotFoundException
	{
		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(Addr),"utf-8"));
		String line="";
		PrintWriter pw=new PrintWriter(OutPutAddr);
		while((line=reader.readLine())!=null)
		{
			Vector<Double> Line_Vector=StringToVector(line,Featurelenth);
			Distribute_OneVecter_to_Cluster(Theta,ResultSet,Line_Vector,Featurelenth,pw,line);
		}
		pw.close();
		return ResultSet;
	}
	public static ArrayList<ClusterObject> Distribute_OneVecter_to_Cluster(double Theta,ArrayList<ClusterObject> ResultSet,Vector<Double> ToBeComfined,int Featurelenth,PrintWriter pw,String line) 
	{	
		    double similaritymax=0;
		    int NO_max=0;
	    	for(int i=0;i<ResultSet.size();i++)
	    	{
	    		double Similarity_cur=Similarity(ToBeComfined,ResultSet.get(i).center,Featurelenth);
	    		if(Similarity_cur>similaritymax)
	    		{  NO_max=i;
	    		   similaritymax= Similarity_cur;
	    		}
	    	}
	    	//System.out.println(similaritymax);
	    	if(similaritymax>Theta)
	    	{
	    	 ResultSet.get(NO_max).setcenter( GetNewCenter(ResultSet.get(NO_max),Featurelenth,ToBeComfined));
	    	 ResultSet.get(NO_max).LIST.add(ToBeComfined);	    	 
	    	 pw.write(NO_max+"\r\n");
	    	}
	    	else
	    	{
	    		ClusterObject NewClu=IniClusterOj(1,ResultSet.size(),ToBeComfined);	
	    		ResultSet.add(NewClu);	    		
	    		pw.write(ResultSet.size()-1+"\r\n");
	    	}
	    	return ResultSet;
	}
	public static Vector<Double> GetNewCenter(ClusterObject Cluster,int Featurelenth,Vector<Double> NewMenber)
	{
		Vector<Double> Pre=Cluster.getcenter();
		Vector<Double> Fresh=new Vector<Double>();
		for(int i=0;i<Featurelenth;i++)
		{
			Fresh.add((Pre.get(i)*Cluster.LIST.size()+NewMenber.get(i))/(Cluster.LIST.size()+1));
		}	
		return Fresh;
	}
	public static double Similarity(Vector<Double> A,Vector<Double> B,int Featurelenth)
	{
		double res=0;
		double Vec_Multi_Sum=0;
		for(int i=0;i<Featurelenth;i++)
			Vec_Multi_Sum=Vec_Multi_Sum+A.get(i)*B.get(i);
		    res=Vec_Multi_Sum/(VectorLength(A)*VectorLength(B));
		return res;
	}
	public static double VectorLength(Vector<Double> A)
	{
		double res=0;
		double sum=0;
		for(int i=0;i<A.size();i++)
			sum=sum+A.get(i)*A.get(i);	
	    res=Math.sqrt(sum);
		return res;
	}
	public static ArrayList<ClusterObject> Ini_ClusterLIST(int Featurelenth) throws IOException, FileNotFoundException
	{
		ArrayList<ClusterObject> ori=new ArrayList<ClusterObject>();
		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("ori/original.txt"),"utf-8"));
		String line="";
		int i=0;
		while((line=reader.readLine())!=null)
		{
		   Vector<Double> v=StringToVector(line,Featurelenth);
		   ClusterObject ClusterOj=IniClusterOj(1,i++,v);
		   ori.add(ClusterOj);
		}		
		reader.close();
		return ori;
	}
	public static ClusterObject IniClusterOj(int totalsum,int NO,Vector<Double> center)
	{
		ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>();
		LIST.add(center);
		ClusterObject ClusterOj=new ClusterObject(totalsum,NO,LIST,center);
		return ClusterOj;
	}
	public static int GetLenth() throws IOException
	{
		BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("raw/data.txt"),"utf-8"));
		String line="";
		while((line=reader.readLine())!=null)
		{
		 line=reader.readLine();
		 String value[]=line.split(" ");
		 return value.length-1;
		}		
		reader.close();
		return 0;
	}
	public static Vector<Double> StringToVector (String line,int Featurelenth)
	{
		Vector<Double> res=new Vector<Double>();
		String value[]=line.split(" ");
		for(int i=1;i<=Featurelenth;i++)
			res.add(Double.valueOf(value[i]));			
		return res;
	}
}

package MyCluster;

import java.util.ArrayList;
import java.util.Vector;


public class ClusterObject {

	 public int totalsum;
	 public int NO;
	 public ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>();
	 public Vector<Double> center=new<Double> Vector();
	 
	 public  ClusterObject(int totalsum,int NO,ArrayList<Vector<Double>> LIST,Vector<Double> center){
		 this.totalsum=totalsum;
		 this.NO=NO;
		 this.LIST=LIST;
		 this.center=center;
		 }
	 public int getNO()
	 {  return NO;	 
	 }

	 public Vector<Double> getcenter()
	 {  return center;	 
	 }
	 public void setNO(int X)
	 {
		 this.NO=X;
	 }
	 public void setcenter(Vector<Double> X)
	 {
		 this.center=X;
	 }
	 
	 
}

 

 

我的相似度一次迭代聚类

标签:blog   ar   io   os   sp   for   java   on   div   

原文地址:http://www.cnblogs.com/limpek/p/4172077.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!