标签:rom pipeline bio span info from 好处 pcl txt
import pandas as pd import numpy as np titanic = pd.read_csv(‘http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt‘) titanic.head() titanic.info() X = titanic[[‘pclass‘,‘age‘,‘sex‘]] y = titanic[‘survived‘] X[‘age‘].fillna(X[‘age‘].mean(),inplace=True) X.info() from sklearn.cross_validation import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=33) X_train = X_train.to_dict(orient=‘record‘) X_test = X_test.to_dict(orient=‘record‘) #将非数值型数据转换为数值型数据 from sklearn.feature_extraction import DictVectorizer from sklearn.tree import DecisionTreeClassifier from sklearn.pipeline import Pipeline clf = Pipeline([(‘vecd‘,DictVectorizer(sparse=False)),(‘dtc‘,DecisionTreeClassifier())]) vec = DictVectorizer(sparse=False) clf.fit(X_train,y_train) y_predict = clf.predict(X_test) from sklearn.metrics import classification_report print (clf.score(X_test,y_test)) print(classification_report(y_predict,y_test,target_names=[‘died‘,‘survivied‘]))
标签:rom pipeline bio span info from 好处 pcl txt
原文地址:https://www.cnblogs.com/gwzz/p/13254993.html