标签:
首先,在这里首先感谢台湾林智仁先生的开源工具包libsvm。使SVM算法更加普及。大家可以到下面的libsvm官网去了解相关的信息。
Libsvm官方网站->https://www.csie.ntu.edu.tw/~cjlin/libsvm/
其次,我在使用过程中发现,先生svm_scale文件中无法将经过规约的文件输出到本地txt文件中,只能在控制台重定向,而我并不想在程序运行中打开控制台进行较为繁琐的操作。
所以我改造了svm_scale文件,实现了文件的写入,在这里可以和大家分享一下。
改造后新增参数“-z [filename]” 参数filename表示输出文件的路径。
public static void main(String args[])
{
String[] testArgs_Test = {"-l","0", "-u","1","-z","C:\\db\\svm\\scale_testData.txt","-s","C:\\db\\svm\\chao-test-scale.txt","C:\\db\\svm\\UCI-breast-cancer-tra"};
//-l [] 规约文件属性值的下限
//-u [] 规约文件属性值的下限
//-s [] 规约标准存放的位置
//-z [] 对训练样本进行规约后输出到本地的文件
//C:\\db\\svm\\UCI-breast-cancer-tra 参数最后的训练样本的Path路径
String[] testArgs_Predict = {"-l","0", "-u","1","-z","C:\\db\\svm\\scale_predictData.txt","-s","C:\\db\\svm\\chao-predict-scale.txt","C:\\db\\svm\\UCI-breast-cancer-test"};
getTest_scale(testArgs_Test);
getPredict_scale(testArgs_Predict);
try {
Thread.sleep(1000);
} catch (InterruptedException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
String[] arg = { "C:\\db\\svm\\scale_testData.txt", // 存放SVM训练模型用的数据的路径
"C:\\db\\svm\\\\model_r.txt" };
String[] parg = { "C:\\db\\svm\\scale_predictData.txt", // 这个是存放测试数据
"C:\\db\\svm\\model_r.txt", // 调用的是训练以后的模型
"C:\\db\\svm\\out_r.txt" }; // 生成的结果的文件的路径
System.out.println("........SVM运行开始..........");
// 创建一个训练对象
svm_train t = new svm_train();
// 创建一个预测或者分类的对象
svm_predict p = new svm_predict();
try {
t.main(arg);// 调用
p.main(parg); // 调用
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
});
}
/** 这里使用多线程使训练样本和预测样本进行规约并输出至本地路径
* 方便svm_model和svm_predict文件调用
*
*/
public void getTest_scale(String[] args) {
new Thread(new Runnable() {
@Override
public void run() {
try {
svm_scaleUpdate.main(args);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}).start();
}
public void getPredict_scale(String[] args) {
new Thread(new Runnable() {
@Override
public void run() {
try {
svm_scaleUpdate.main(args);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}).start();
}
}
运行结果:
……..SVM运行开始……….
*
optimization finished, #iter = 50
nu = 0.11329630723694926
obj = -58.4674213525147, rho = -0.8102576113199965
nSV = 76, nBSV = 70
Total nSV = 76
Accuracy = 92.3076923076923% (36/39) (classification)
找了找没发现怎么上传文件,直接将代码复制出来吧!
svm_scaleUpdate.java—–>>>>>>
package demo;
import java.io.*;
import java.util.*;
/**
* 此类升级至svm_scaleUpdate
* 新增参数“-z [filename]” 表示将输出文件输出至本地方便训练及预测使用
* @author Administrator
*
*/
class svm_scaleUpdate
{
private String line = null;
private double lower = -1.0;
private double upper = 1.0;
private double y_lower;
private double y_upper;
private boolean y_scaling = false;
private double[] feature_max;
private double[] feature_min;
private double y_max = -Double.MAX_VALUE;
private double y_min = Double.MAX_VALUE;
private int max_index;
private long num_nonzeros = 0;
private long new_num_nonzeros = 0;
private FileWriter fWriter = null;
private static void exit_with_help()
{
System.out.print(
"Usage: svm-scale [options] data_filename\n"
+"options:\n"
+"-l lower : x scaling lower limit (default -1)\n"
+"-u upper : x scaling upper limit (default +1)\n"
+"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
+"-s save_filename : save scaling parameters to save_filename\n"
+"-r restore_filename : restore scaling parameters from restore_filename\n"
);
System.exit(1);
}
private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
{
fp.close();
return new BufferedReader(new FileReader(filename));
}
private void output_target(double value)
{
if(y_scaling)
{
if(value == y_min)
value = y_lower;
else if(value == y_max)
value = y_upper;
else
value = y_lower + (y_upper-y_lower) *
(value-y_min) / (y_max-y_min);
}
System.out.print(value + " ");
try {
fWriter.write(value + " ");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void output(int index, double value)
{
/* skip single-valued attribute */
if(feature_max[index] == feature_min[index])
return;
if(value == feature_min[index])
value = lower;
else if(value == feature_max[index])
value = upper;
else
value = lower + (upper-lower) *
(value-feature_min[index])/
(feature_max[index]-feature_min[index]);
if(value != 0)
{
System.out.print(index + ":" + value + " ");
try {
fWriter.write(index + ":" + value + " ");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
new_num_nonzeros++;
}
}
private String readline(BufferedReader fp) throws IOException
{
line = fp.readLine();
return line;
}
private void run(String []argv) throws IOException
{
int i,index;
BufferedReader fp = null, fp_restore = null;
String save_filename = null;
String restore_filename = null;
String data_filename = null;
String save_file = null;
for(i=0;i<argv.length;i++)
{
if (argv[i].charAt(0) != ‘-‘) break;
++i;
switch(argv[i-1].charAt(1))
{
case ‘l‘: lower = Double.parseDouble(argv[i]); break;
case ‘u‘: upper = Double.parseDouble(argv[i]); break;
case ‘y‘:
y_lower = Double.parseDouble(argv[i]);
++i;
y_upper = Double.parseDouble(argv[i]);
y_scaling = true;
break;
case ‘s‘: save_filename = argv[i]; break;
case ‘r‘: restore_filename = argv[i]; break;
case ‘z‘: save_file = argv[i]; break;
default:
System.err.println("unknown option");
exit_with_help();
}
}
if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
{
System.err.println("inconsistent lower/upper specification");
System.exit(1);
}
if(restore_filename != null && save_filename != null)
{
System.err.println("cannot use -r and -s simultaneously");
System.exit(1);
}
if(argv.length != i+1)
exit_with_help();
data_filename = argv[i];
try {
fp = new BufferedReader(new FileReader(data_filename));
} catch (Exception e) {
System.err.println("can‘t open file " + data_filename);
System.exit(1);
}
File save_file_data = new File(save_file);
fWriter = new FileWriter(save_file_data);
/* assumption: min index of attributes is 1 */
/* pass 1: find out max index of attributes */
max_index = 0;
if(restore_filename != null)
{
int idx, c;
try {
fp_restore = new BufferedReader(new FileReader(restore_filename));
}
catch (Exception e) {
System.err.println("can‘t open file " + restore_filename);
System.exit(1);
}
if((c = fp_restore.read()) == ‘y‘)
{
fp_restore.readLine();
fp_restore.readLine();
fp_restore.readLine();
}
fp_restore.readLine();
fp_restore.readLine();
String restore_line = null;
while((restore_line = fp_restore.readLine())!=null)
{
StringTokenizer st2 = new StringTokenizer(restore_line);
idx = Integer.parseInt(st2.nextToken());
max_index = Math.max(max_index, idx);
}
fp_restore = rewind(fp_restore, restore_filename);
}
while (readline(fp) != null)
{
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
st.nextToken();
while(st.hasMoreTokens())
{
index = Integer.parseInt(st.nextToken());
max_index = Math.max(max_index, index);
st.nextToken();
num_nonzeros++;
}
}
try {
feature_max = new double[(max_index+1)];
feature_min = new double[(max_index+1)];
} catch(OutOfMemoryError e) {
System.err.println("can‘t allocate enough memory");
System.exit(1);
}
for(i=0;i<=max_index;i++)
{
feature_max[i] = -Double.MAX_VALUE;
feature_min[i] = Double.MAX_VALUE;
}
fp = rewind(fp, data_filename);
/* pass 2: find out min/max value */
while(readline(fp) != null)
{
int next_index = 1;
double target;
double value;
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
target = Double.parseDouble(st.nextToken());
y_max = Math.max(y_max, target);
y_min = Math.min(y_min, target);
while (st.hasMoreTokens())
{
index = Integer.parseInt(st.nextToken());
value = Double.parseDouble(st.nextToken());
for (i = next_index; i<index; i++)
{
feature_max[i] = Math.max(feature_max[i], 0);
feature_min[i] = Math.min(feature_min[i], 0);
}
feature_max[index] = Math.max(feature_max[index], value);
feature_min[index] = Math.min(feature_min[index], value);
next_index = index + 1;
}
for(i=next_index;i<=max_index;i++)
{
feature_max[i] = Math.max(feature_max[i], 0);
feature_min[i] = Math.min(feature_min[i], 0);
}
}
fp = rewind(fp, data_filename);
/* pass 2.5: save/restore feature_min/feature_max */
if(restore_filename != null)
{
// fp_restore rewinded in finding max_index
int idx, c;
double fmin, fmax;
fp_restore.mark(2); // for reset
if((c = fp_restore.read()) == ‘y‘)
{
fp_restore.readLine(); // pass the ‘\n‘ after ‘y‘
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
y_lower = Double.parseDouble(st.nextToken());
y_upper = Double.parseDouble(st.nextToken());
st = new StringTokenizer(fp_restore.readLine());
y_min = Double.parseDouble(st.nextToken());
y_max = Double.parseDouble(st.nextToken());
y_scaling = true;
}
else
fp_restore.reset();
if(fp_restore.read() == ‘x‘) {
fp_restore.readLine(); // pass the ‘\n‘ after ‘x‘
StringTokenizer st = new StringTokenizer(fp_restore.readLine());
lower = Double.parseDouble(st.nextToken());
upper = Double.parseDouble(st.nextToken());
String restore_line = null;
while((restore_line = fp_restore.readLine())!=null)
{
StringTokenizer st2 = new StringTokenizer(restore_line);
idx = Integer.parseInt(st2.nextToken());
fmin = Double.parseDouble(st2.nextToken());
fmax = Double.parseDouble(st2.nextToken());
if (idx <= max_index)
{
feature_min[idx] = fmin;
feature_max[idx] = fmax;
}
}
}
fp_restore.close();
}
if(save_filename != null)
{
Formatter formatter = new Formatter(new StringBuilder());
BufferedWriter fp_save = null;
try {
fp_save = new BufferedWriter(new FileWriter(save_filename));
} catch(IOException e) {
System.err.println("can‘t open file " + save_filename);
System.exit(1);
}
if(y_scaling)
{
formatter.format("y\n");
formatter.format("%.16g %.16g\n", y_lower, y_upper);
formatter.format("%.16g %.16g\n", y_min, y_max);
}
formatter.format("x\n");
formatter.format("%.16g %.16g\n", lower, upper);
for(i=1;i<=max_index;i++)
{
if(feature_min[i] != feature_max[i])
formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
}
fp_save.write(formatter.toString());
fp_save.close();
}
/* pass 3: scale */
while(readline(fp) != null)
{
int next_index = 1;
double target;
double value;
StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
target = Double.parseDouble(st.nextToken());
output_target(target);
while(st.hasMoreElements())
{
index = Integer.parseInt(st.nextToken());
value = Double.parseDouble(st.nextToken());
for (i = next_index; i<index; i++)
output(i, 0);
output(index, value);
next_index = index + 1;
}
for(i=next_index;i<= max_index;i++)
output(i, 0);
System.out.print("\n");
fWriter.write("\r\n");
}
if (new_num_nonzeros > num_nonzeros)
System.err.print(
"WARNING: original #nonzeros " + num_nonzeros+"\n"
+" new #nonzeros " + new_num_nonzeros+"\n"
+"Use -l 0 if many original feature values are zeros\n");
fp.close();
fWriter.flush();
fWriter.close();
}
public static void main(String argv[]) throws IOException
{
svm_scaleUpdate s = new svm_scaleUpdate();
s.run(argv);
}
}
标签:
原文地址:http://blog.csdn.net/qq_18149897/article/details/51887017