java weka.filters.supervised.attribute.Discretize -i my_arff_save_path/data_vsm.arff -o my_arff_save_path/data_D_vsm.arff -c first
得到data_D_vsm.arff
测试
import java.io.File;
import weka.classifiers.Classifier;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.core.converters.ArffLoader;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.*;
public class weka {
public static void main(String[] args) throws Exception {
BufferedWriter destFileBw = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(newFile("E://data.txt" ))));
// TODO Auto-generated method stub
Classifier m_classifier = new J48();
File inputFile = new File( "E://data_D_vsm.arff" );//训练语料文件
ArffLoader atf = new ArffLoader();
atf.setFile( inputFile);
Instances instancesTrain = atf .getDataSet(); // 读入训练文件
inputFile = new File( "E://data_D_vsm.arff" );//测试语料文件
atf.setFile( inputFile);
Instances instancesTest = atf .getDataSet(); // 读入测试文件
instancesTest.setClassIndex(0); //设置分类属性所在行号(第一行为0号),instancesTest.numAttributes()可以取得属性总数
double sum = instancesTest .numInstances(), //测试语料实例数
right = 0.0f;
instancesTrain.setClassIndex(0);
m_classifier.buildClassifier( instancesTrain ); //训练
for( int i = 0; i < sum ; i ++) //测试分类结果
{
double predicted = m_classifier.classifyInstance( instancesTest .instance( i));
System. out.println( "预测某条记录的分类id:" + predicted + ", 分类值:"
+ instancesTest.classAttribute().value(( int) predicted));
destFileBw.write( "预测某条记录的分类id:" + predicted + ", 分类值:"
+ instancesTest.classAttribute().value(( int) predicted));
destFileBw.newLine();
System. out.println( "测试文件的分类值: " + instancesTest .instance(i ).classValue() + ", 记录:"
+ instancesTest.instance( i ));
destFileBw.write( "测试文件的分类值: " + instancesTest .instance(i).classValue() + ", 记录:"
+ instancesTest.instance( i ));
destFileBw.newLine();
System.out .println("--------------------------------------------------------------" );
destFileBw.write( "--------------------------------------------------------------" );
destFileBw.newLine();
if( m_classifier .classifyInstance(instancesTest .instance( i ))== instancesTest .instance(i).classValue())//如果预测值和答案值相等(测试语料中的分类列提供的须为正确答案,结果才有意义)
{
right++; //正确值加1
}
}
System. out.println( "J48 classification precision:" +(right /sum ));
destFileBw.write( "J48 classification precision:" +(right /sum ));
destFileBw.newLine();
destFileBw.close();
}
}