标签:
分类: java 算法 2012-05-07 16:58 4988人阅读
一,k-means算法介绍:
k-means算法接受输入量 k ;然后将n个数据对象划分为 k个聚类以便使得所获得的聚类满足:同一聚类中的对象相似度较高;而不同聚类中的对象相似度较小。聚类相似度是利用各聚类中对象的均值所获得一个“中心对象”(引力中心)来进行计算的。k个聚类具有以下特点:各聚类本身尽可能的紧凑,而各聚类之间尽可能的分开。 k个聚类具有以下特点:各聚类本身尽可能的紧凑,而各聚类之间尽可能的分开。
k-means算法的工作过程说明如下:首先从n个数据对象任意选择 k 个对象作为初始聚类中心;而对于所剩下其它对象,则根据它们与这些聚类中心的相似度(距离),分别将它们分配给与其最相似的(聚类中心所代表的)聚类;然后再计算每个所获新聚类的聚类中心(该聚类中所有对象的均值);不断重复这一过程直到标准测度函数开始收敛为止。一般都采用均方差作为标准测度函数。k个聚类具有以下特点:各聚类本身尽可能的紧凑,而各聚类之间尽可能的分开。
二,k-means算法基本步骤:
(1) 从 n个数据对象任意选择 k 个对象作为初始聚类中心;
(2) 根据每个聚类对象的均值(中心对象),计算每个对象与这些中心对象的距离;并根据最小距离重新对相应对象进行划分;
(3) 重新计算每个(有变化)聚类的均值(中心对象);
(4) 计算标准测度函数,当满足一定条件,如函数收敛时,则算法终止;如果条件不满足则回到步骤(2),不断重复直到标准测度函数开始收敛为止。(一般都采用均方差作为标准测度函数。)
三,k-means算法的java实现:
一共有七个类,General.java代表武将对象,
Distance.java距离类计算各个武将到中心武将之间的距离, Cluster.java聚类对象包含一个中心武将和该聚类中所有武将,
Kmeans.java核心的聚类算法类, Tool.java工具类用于转换武将的星级为数字等操作,
TestKmeans.java测试类即入口文件, DomParser.java用于读取xml中的681个武将。
具体思路:先从general.xml文件中读取681个武将,然后随机选取初始类中心,计算各个武将到中心武将的距离,根据最小的距离进行聚类,然后重新根据平均值新的聚类的类中心,重新计算各个武将到新的中心武将的距离,直到更新后的聚类与原来的聚类包含的武将不再改变,即收敛时结束。
具体代码如下:
1,General.java
- package kmeans;
-
- public class General {
-
- private String name;
- private int render;
- private int tongshai;
- private int wuli;
- private int zhili;
- private int polic;
- private int qiangbin;
- private int jibin;
- private int nubin;
- private int qibin;
- private int binqi;
- private int tongwu;
- private int tongzhi;
- private int tongwuzhi;
- private int tongwuzhizheng;
- private int salary;
-
- public General(int render, String name, int tongshai, int wuli, int zhili,
- int polic, int qiangbin, int jibin, int nubin, int qibin,
- int binqi, int tongwu, int tongzhi, int tongwuzhi,
- int tongwuzhizheng, int salary) {
- super();
- this.name = name;
- this.render = render;
- this.tongshai = tongshai;
- this.wuli = wuli;
- this.zhili = zhili;
- this.polic = polic;
- this.qiangbin = qiangbin;
- this.jibin = jibin;
- this.nubin = nubin;
- this.qibin = qibin;
- this.binqi = binqi;
- this.tongwu = tongwu;
- this.tongzhi = tongzhi;
- this.tongwuzhi = tongwuzhi;
- this.tongwuzhizheng = tongwuzhizheng;
- this.salary = salary;
- }
-
- public General(int render, int tongshai, int wuli, int zhili, int polic,
- int qiangbin, int jibin, int nubin, int qibin, int binqi,
- int tongwu, int tongzhi, int tongwuzhi, int tongwuzhizheng,
- int salary) {
- super();
- this.name = "聚类中心";
- this.render = render;
- this.tongshai = tongshai;
- this.wuli = wuli;
- this.zhili = zhili;
- this.polic = polic;
- this.qiangbin = qiangbin;
- this.jibin = jibin;
- this.nubin = nubin;
- this.qibin = qibin;
- this.binqi = binqi;
- this.tongwu = tongwu;
- this.tongzhi = tongzhi;
- this.tongwuzhi = tongwuzhi;
- this.tongwuzhizheng = tongwuzhizheng;
- this.salary = salary;
- }
-
- public General() {
- }
-
- @Override
- public String toString() {
- return "武将 [name=" + name + ", render=" + Tool.dxingji(render)
- + ", tongshai=" + tongshai + ", wuli=" + wuli + ", zhili="
- + zhili + ", polic=" + polic + ", qiangbin="
- + Tool.dchange(qiangbin) + ", jibin=" + Tool.dchange(jibin)
- + ", nubin=" + Tool.dchange(nubin) + ", qibin="
- + Tool.dchange(qibin) + ", binqi=" + Tool.dchange(binqi)
- + ", tongwu=" + tongwu + ", tongzhi=" + tongzhi
- + ", tongwuzhi=" + tongwuzhi + ", tongwuzhizheng="
- + tongwuzhizheng + ", salary=" + salary + "]";
- }
-
- public String getName() {
- return name;
- }
-
- public void setName(String name) {
- this.name = name;
- }
-
- public int getRender() {
- return render;
- }
-
- public void setRender(int render) {
- this.render = render;
- }
-
- public int getTongshai() {
- return tongshai;
- }
-
- public void setTongshai(int tongshai) {
- this.tongshai = tongshai;
- }
-
- public int getWuli() {
- return wuli;
- }
-
- public void setWuli(int wuli) {
- this.wuli = wuli;
- }
-
- public int getZhili() {
- return zhili;
- }
-
- public void setZhili(int zhili) {
- this.zhili = zhili;
- }
-
- public int getPolic() {
- return polic;
- }
-
- public void setPolic(int polic) {
- this.polic = polic;
- }
-
- public int getQiangbin() {
- return qiangbin;
- }
-
- public void setQiangbin(int qiangbin) {
- this.qiangbin = qiangbin;
- }
-
- public int getJibin() {
- return jibin;
- }
-
- public void setJibin(int jibin) {
- this.jibin = jibin;
- }
-
- public int getNubin() {
- return nubin;
- }
-
- public void setNubin(int nubin) {
- this.nubin = nubin;
- }
-
- public int getQibin() {
- return qibin;
- }
-
- public void setQibin(int qibin) {
- this.qibin = qibin;
- }
-
- public int getBinqi() {
- return binqi;
- }
-
- public void setBinqi(int binqi) {
- this.binqi = binqi;
- }
-
- public int getTongwu() {
- return tongwu;
- }
-
- public void setTongwu(int tongwu) {
- this.tongwu = tongwu;
- }
-
- public int getTongzhi() {
- return tongzhi;
- }
-
- public void setTongzhi(int tongzhi) {
- this.tongzhi = tongzhi;
- }
-
- public int getTongwuzhi() {
- return tongwuzhi;
- }
-
- public void setTongwuzhi(int tongwuzhi) {
- this.tongwuzhi = tongwuzhi;
- }
-
- public int getTongwuzhizheng() {
- return tongwuzhizheng;
- }
-
- public void setTongwuzhizheng(int tongwuzhizheng) {
- this.tongwuzhizheng = tongwuzhizheng;
- }
-
- public int getSalary() {
- return salary;
- }
-
- public void setSalary(int salary) {
- this.salary = salary;
- }
-
- }
2,Distance.java
- package kmeans;
- public class Distance {
- int dest;
- int source;
- double dist;
-
- public int getDest() {
- return dest;
- }
-
- public void setDest(int dest) {
- this.dest = dest;
- }
-
- public int getSource() {
- return source;
- }
-
- public void setSource(int source) {
- this.source = source;
- }
-
- public double getDist() {
- return dist;
- }
-
- public void setDist(double dist) {
- this.dist = dist;
- }
-
- public Distance(int dest, int source, double dist) {
- this.dest = dest;
- this.source = source;
- this.dist = dist;
- }
-
- public Distance() {
- }
-
- }
3,Cluster.java
- package kmeans;
-
- import java.util.ArrayList;
-
- public class Cluster {
- private int center;
- private ArrayList<General> ofCluster = new ArrayList<General>();
-
- public int getCenter() {
- return center;
- }
-
- public void setCenter(int center) {
- this.center = center;
- }
-
- public ArrayList<General> getOfCluster() {
- return ofCluster;
- }
-
- public void setOfCluster(ArrayList<General> ofCluster) {
- this.ofCluster = ofCluster;
- }
-
- public void addGeneral(General general) {
- if (!(this.ofCluster.contains(general)))
- this.ofCluster.add(general);
- }
- }
4,Kmeans.java
- package kmeans;
-
- import java.util.*;
-
- public class Kmeans {
- public ArrayList<General> allGenerals = null;
- public int totalNumber = 0;
- public int K = 0;
-
- public Kmeans() {
- allGenerals = new DomParser().prepare();
- totalNumber = allGenerals.size();
- K = 3;
- }
-
-
- public Set<Integer> firstRandom() {
- Set<Integer> center = new HashSet<Integer>();
- Random ran = new Random();
- int roll = ran.nextInt(totalNumber);
- while (center.size() < K) {
- roll = ran.nextInt(totalNumber);
- center.add(roll);
- }
- return center;
- }
-
-
- public ArrayList<Cluster> init(Set<Integer> center) {
- ArrayList<Cluster> cluster = new ArrayList<Cluster>();
- Iterator<Integer> it = center.iterator();
- while (it.hasNext()) {
- Cluster c = new Cluster();
- c.setCenter(it.next());
- cluster.add(c);
- }
- return cluster;
- }
-
-
- public ArrayList<Cluster> juLei(Set<Integer> center,
- ArrayList<Cluster> cluster) {
- ArrayList<Distance> distence = new ArrayList<Distance>();
- General source = null;
- General dest = null;
- int id = 0;
- int id2 = 0;
- Object[] p = center.toArray();
- boolean flag = false;
-
- for (int i = 0; i < totalNumber; i++) {
-
- distence.clear();
-
- for (int j = 0; j < center.size(); j++) {
-
- if (!(center.contains(i))) {
- flag = true;
-
- source = allGenerals.get(i);
- dest = allGenerals.get((Integer) p[j]);
-
- distence.add(new Distance((Integer) p[j], i, Tool.juli(
- source, dest)));
- } else {
- flag = false;
- }
- }
-
- if (flag == true) {
-
-
- double min = distence.get(0).getDist();
-
- int minid = 0;
- for (int k = 1; k < distence.size(); k++) {
- if (min > distence.get(k).getDist()) {
- min = distence.get(k).getDist();
- id = distence.get(k).getDest();
- id2 = distence.get(k).getSource();
- minid = k;
- } else {
- id = distence.get(minid).getDest();
- id2 = distence.get(minid).getSource();
- }
- }
-
- for (int n = 0; n < cluster.size(); n++) {
-
- if (cluster.get(n).getCenter() == id) {
- cluster.get(n).addGeneral(allGenerals.get(id2));
- break;
- }
- }
- }
- }
- return cluster;
- }
-
-
- public Set<Integer> updateCenter() {
- Set<Integer> center = new HashSet<Integer>();
- for (int i = 0; i < K; i++) {
- center.add(i);
- }
- return center;
- }
-
-
- public ArrayList<Cluster> updateCluster(ArrayList<Cluster> cluster) {
- ArrayList<Cluster> result = new ArrayList<Cluster>();
-
-
- for (int j = 0; j < K; j++) {
- ArrayList<General> ps = cluster.get(j).getOfCluster();
-
- ps.add(allGenerals.get(cluster.get(j).getCenter()));
- int size = ps.size();
-
- int sumrender = 0, sumtongshai = 0, sumwuli = 0, sumzhili = 0, sumjibin = 0, sumnubin = 0, sumqibin = 0, sumpolic = 0, sumqiangbin = 0, sumbinqi = 0, sumtongwu = 0, sumtongzhi = 0, sumtongwuzhi = 0, sumtongwuzhizheng = 0, sumsalary = 0;
- for (int k1 = 0; k1 < size; k1++) {
- sumrender += ps.get(k1).getRender();
- sumtongshai += ps.get(k1).getRender();
- sumwuli += ps.get(k1).getWuli();
- sumzhili += ps.get(k1).getZhili();
- sumjibin += ps.get(k1).getJibin();
- sumnubin += ps.get(k1).getNubin();
- sumqibin += ps.get(k1).getQibin();
- sumpolic += ps.get(k1).getPolic();
- sumqiangbin += ps.get(k1).getQiangbin();
- sumbinqi += ps.get(k1).getBinqi();
- sumtongwu += ps.get(k1).getTongwu();
- sumtongzhi += ps.get(k1).getTongzhi();
- sumtongwuzhi += ps.get(k1).getTongwuzhi();
- sumtongwuzhizheng += ps.get(k1).getTongwuzhizheng();
- sumsalary += ps.get(k1).getSalary();
- }
-
- Cluster newCluster = new Cluster();
- newCluster.setCenter(j);
-
- newCluster.addGeneral(new General(sumrender / size, sumtongshai
- / size, sumwuli / size, sumzhili / size, sumjibin / size,
- sumnubin / size, sumqibin / size, sumpolic = 0,
- sumqiangbin = 0, sumbinqi / size, sumtongwu / size,
- sumtongzhi / size, sumtongwuzhi / size, sumtongwuzhizheng
- / size, sumsalary / size));
- result.add(newCluster);
- }
- return result;
-
- }
-
-
- public ArrayList<Cluster> updateJuLei(ArrayList<Cluster> update,
- ArrayList<Cluster> cluster) {
- ArrayList<Distance> distence = new ArrayList<Distance>();
- General source = null;
- General dest = null;
- int id = 0;
- int id2 = 0;
-
- boolean flag = false;
-
- for (int i = 0; i < totalNumber; i++) {
-
- distence.clear();
-
-
- for (int j = 0; j < update.size(); j++) {
-
-
- flag = true;
-
- source = allGenerals.get(i);
-
- dest = update.get(j).getOfCluster().get(0);
-
-
- distence.add(new Distance(update.get(j).getCenter(), i, Tool.juli(
- source, dest)));
-
- }
-
- if (flag == true) {
-
-
- double min = distence.get(0).getDist();
-
- int mid = 0;
- for (int k = 1; k < distence.size(); k++) {
- if (min > distence.get(k).getDist()) {
- min = distence.get(k).getDist();
- id = distence.get(k).getDest();
- id2 = distence.get(k).getSource();
- mid = k;
- } else {
- id = distence.get(mid).getDest();
- id2 = distence.get(mid).getSource();
- }
- }
-
- for (int n = 0; n < cluster.size(); n++) {
-
- if (cluster.get(n).getCenter() == id) {
- cluster.get(n).addGeneral(allGenerals.get(id2));
- }
- }
- }
- }
- return cluster;
- }
-
-
- public ArrayList<Cluster> getResult() {
- ArrayList<Cluster> result = new ArrayList<Cluster>();
- ArrayList<Cluster> temp = new ArrayList<Cluster>();
- boolean flag = false;
-
- Set<Integer> center = firstRandom();
- result = juLei(center, init(center));
- print(result);
- do {
-
- ArrayList<Cluster> up = updateCluster(result);
- ArrayList<Cluster> cluster = init(updateCenter());
- temp = updateJuLei(up, cluster);
-
- flag = isEquals(temp, result);
- result = temp;
- } while (!flag);
- return result;
- }
-
- public boolean isEquals(ArrayList<Cluster> temp, ArrayList<Cluster> result){
- boolean flag = false;
- if(temp.size() != result.size()){
- return flag;
- }
- for(Cluster tem : temp){
- for(Cluster res : result){
- if(tem.getCenter() == res.getCenter()){
- flag = true;
- }
- }
-
- if(flag == false){
- return false;
- }else{
- flag = false;
- }
- }
-
- flag = true;
- return flag;
- }
-
-
- public void print(ArrayList<Cluster> cs) {
- System.out.println("***************************************");
- for (int i = 0; i < cs.size(); i++) {
- Cluster c = cs.get(i);
- System.out.println("-----------------------------------------------------");
- System.out.println("center: " + allGenerals.get(c.getCenter()));
- ArrayList<General> p = c.getOfCluster();
- for (int j = 0; j < p.size(); j++) {
- System.out.println("general:"+p.get(j)+"\n");
- }
- }
- }
-
- }
5,Tool.java
- package kmeans;
-
- public class Tool {
-
- public static int change(String str) {
- int result = str.equals("精") ? 4 : (str.equals("神") ? 3 : (str
- .equals("通") ? 2 : 1));
- return result;
- }
-
- public static int xingji(String str) {
- int result = str.equals("★★★★★") ? 5 : (str.equals("★★★★") ? 4 : (str
- .equals("★★★") ? 3 : (str.equals("★★") ? 2 : 1)));
- return result;
- }
-
- public static String dchange(int str) {
- String result = str== 4 ? "精" : (str== 3 ? "神" : (str== 2 ? "通" : "疏"));
- return result;
- }
-
- public static String dxingji(int str) {
- String result = str== 5 ? "★★★★★" : (str== 4 ? "★★★★" : (str== 3 ? "★★★" : (str == 2 ? "★★" : "★")));
- return result;
- }
-
- public static double juli(General g1, General g2) {
- double result = (Double) Math.sqrt(StrictMath.pow(g1.getRender() - g2.getRender(), 2)
- + StrictMath.pow(g1.getTongshai() - g2.getTongshai(), 2)
- + StrictMath.pow(g1.getWuli() - g2.getWuli(), 2)
- + StrictMath.pow(g1.getZhili() - g2.getZhili(), 2)
- + StrictMath.pow(g1.getPolic() - g2.getPolic(), 2)
- + StrictMath.pow(g1.getQiangbin() - g2.getQiangbin(), 2)
- + StrictMath.pow(g1.getQibin() - g2.getQibin(), 2)
- + StrictMath.pow(g1.getJibin() - g2.getJibin(), 2)
- + StrictMath.pow(g1.getNubin() - g2.getNubin(), 2)
- + StrictMath.pow(g1.getBinqi() - g2.getBinqi(), 2)
- + StrictMath.pow(g1.getTongwu() - g2.getTongwu(), 2)
- + StrictMath.pow(g1.getTongzhi() - g2.getTongzhi(), 2)
- + StrictMath.pow(g1.getTongwuzhizheng() - g2.getTongwuzhizheng(), 2)
- + StrictMath.pow(g1.getTongwuzhi() - g2.getTongwuzhi(), 2)
- + StrictMath.pow(g1.getSalary() - g2.getSalary(), 2)
- );
- return result;
- }
- }
6,DomParser.java
- package kmeans;
-
- import javax.xml.parsers.*;
-
- import java.io.*;
- import java.util.ArrayList;
-
- import org.w3c.dom.*;
- import org.xml.sax.SAXException;
-
- public class DomParser {
-
- private ArrayList<General> generals = new ArrayList<General>();
-
- public ArrayList<General> prepare(){
-
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- factory.setIgnoringElementContentWhitespace(true);
-
- DocumentBuilder builder = null;
- try {
- builder = factory.newDocumentBuilder();
- } catch (ParserConfigurationException e) {
- e.printStackTrace();
- }
-
- Document doc = null;
- try {
- doc = builder.parse(new File("general.xml"));
- } catch (SAXException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- Element generalList = doc.getDocumentElement();
-
- NodeList nodeList = generalList.getElementsByTagName("Row");
-
- for (int i = 1; i < nodeList.getLength(); i++) {
-
- System.out.println("------------the " + i
- + " element--------------");
-
- Node row = nodeList.item(i);
-
- NodeList attList = row.getChildNodes();
-
- generals.add(new General(Tool.xingji(attList.item(1)
- .getTextContent()), attList.item(3).getTextContent(),
- Integer.parseInt(attList.item(5).getTextContent()),
- Integer.parseInt(attList.item(7).getTextContent()),
- Integer.parseInt(attList.item(9).getTextContent()),
- Integer.parseInt(attList.item(11).getTextContent()),
- Tool.change(attList.item(13).getTextContent()),
- Tool.change(attList.item(15).getTextContent()),
- Tool.change(attList.item(17).getTextContent()),
- Tool.change(attList.item(19).getTextContent()),
- Tool.change(attList.item(21).getTextContent()),
- Integer.parseInt(attList.item(23).getTextContent()),
- Integer.parseInt(attList.item(25).getTextContent()),
- Integer.parseInt(attList.item(27).getTextContent()),
- Integer.parseInt(attList.item(29).getTextContent()),
- Integer.parseInt(attList.item(31).getTextContent())));
-
- System.out.println(" 星级:"
- + Tool.xingji(attList.item(1).getTextContent()) + " 姓名:"
- + attList.item(3).getTextContent() + " 统率:"
- + attList.item(5).getTextContent() + " 武力:"
- + attList.item(7).getTextContent() + " 智力:"
- + attList.item(9).getTextContent() + " 政治:"
- + attList.item(11).getTextContent() + "枪兵:"
- + Tool.change(attList.item(13).getTextContent()) + " 戟兵:"
- + Tool.change(attList.item(15).getTextContent()) + " 弩兵:"
- + Tool.change(attList.item(17).getTextContent()) + " 骑兵:"
- + Tool.change(attList.item(19).getTextContent()) + " 兵器:"
- + Tool.change(attList.item(21).getTextContent()) + " 统武:"
- + attList.item(23).getTextContent() + " 统智:"
- + attList.item(25).getTextContent() + " 统武智:"
- + attList.item(27).getTextContent() + " 统武智政:"
- + attList.item(29).getTextContent() + " 50级工资:"
- + attList.item(31).getTextContent() + " ");
-
- }
- return generals;
-
- }
- }
7,TestKmeans.java
- package kmeans;
-
- public class TestKmeans {
-
- public static void main(String[] args) {
- Kmeans kmeans = new Kmeans();
- kmeans.print(kmeans.getResult());
- }
-
- }
五、附件
附部分general.xml(已上传到百度云盘,点此下载,完整下载链接:http://pan.baidu.com/s/1qW6SWOkf):
六、结果截图
最终运行结果截图如下:
读取到得武将数据如图,
运行后的部分截图:
K-means算法的java实现,聚类分析681个三国武将
标签:
原文地址:http://www.cnblogs.com/bb3q/p/4526402.html