码迷,mamicode.com
首页 > 其他好文 > 详细

计算簇间距离

时间:2017-06-29 20:26:58      阅读:135      评论:0      收藏:0      [点我收藏+]

标签:efi   rgs   next   led   file   inpu   config   throw   oid   

<strong><span style="font-size:18px;">/***
 * @author YangXin
 * @info 计算簇间距
 */
package unitTen;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.common.distance.CosineDistanceMeasure;
import org.apache.mahout.common.distance.DistanceMeasure;
public class InterClusterDistances {
	 public static void main(String args[]) throws Exception {
		    
		    String inputFile = "reuters-kmeans-clusters/clusters-6/part-r-00000";
		    
		    Configuration conf = new Configuration();
		    Path path = new Path(inputFile);
		    System.out.println("Input Path: " + path);
		    FileSystem fs = FileSystem.get(path.toUri(), conf);
		    
		    List<Cluster> clusters = new ArrayList<Cluster>();
		    
		    SequenceFile.Reader reader = new SequenceFile.Reader(
		        fs, path, conf);
		    Writable key = (Writable) reader.getKeyClass()
		        .newInstance();
		    Writable value = (Writable) reader.getValueClass()
		        .newInstance();
		    
		    while (reader.next(key, value)) {
		      Cluster cluster = (Cluster) value;
		      clusters.add(cluster);
		      value = (Writable) reader.getValueClass()
		          .newInstance();
		    }
		    
		    DistanceMeasure measure = new CosineDistanceMeasure();
		    double max = 0;
		    double min = Double.MAX_VALUE;
		    double sum = 0;
		    int count = 0;
		    for (int i = 0; i < clusters.size(); i++) {
		      for (int j = i + 1; j < clusters.size(); j++) {
		        double d = measure.distance(clusters.get(i)
		            .getCenter(), clusters.get(j).getCenter());
		        min = Math.min(d, min);
		        max = Math.max(d, max);
		        sum += d;
		        count++;
		      }
		    }
		    
		    System.out.println("Maximum Intercluster Distance: "
		                       + max);
		    System.out.println("Minimum Intercluster Distance: "
		                       + min);
		    System.out
		        .println("Average Intercluster Distance(Scaled): "
		                 + (sum / count - min) / (max - min));
		  }
}
</span></strong>

计算簇间距离

标签:efi   rgs   next   led   file   inpu   config   throw   oid   

原文地址:http://www.cnblogs.com/jzdwajue/p/7096011.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!