标签:image die ict pen character display model 需要 list
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
import pydot
from sklearn.externals.six import StringIO
from IPython.display import Image
import pydotplus
train = pd.read_csv("train2.csv", dtype={"Age": np.float64},)
print train.head(10)
def harmonize_data(titanic):
titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
titanic.loc[titanic["Sex"] == "female", "Sex"] = 1
titanic["Embarked"] = titanic["Embarked"].fillna("S")
titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2
titanic["Fare"] = titanic["Fare"].fillna(titanic["Fare"].median())
return titanic
harmonize_data(train)
print "ok"
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
results = []
sample_leaf_options = list(range(1, 500, 3))
n_estimators_options = list(range(1, 1000, 5))
groud_truth = train[‘Survived‘][601:]
alg = RandomForestClassifier(min_samples_leaf=50, n_estimators=5, random_state=50)
alg.fit(train[predictors][:600], train[‘Survived‘][:600])
predict = alg.predict(train[predictors][601:])
#print groud_truth == predict
results.append((50, 5, (groud_truth == predict).mean()))
#print((groud_truth == predict).mean())
print(results)
Estimators = alg.estimators_
for index, model in enumerate(Estimators):
filename = ‘iris_‘ + str(index) + ‘.pdf‘
dot_data = tree.export_graphviz(model , out_file=None,
feature_names=predictors,
class_names=["die","live"],
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
graph.write_pdf(filename)
前提需要安装graphviz
yum install graphviz
涉及到的训练集参考上一篇文章
标签:image die ict pen character display model 需要 list
原文地址:http://blog.51cto.com/12597095/2160408