标签:class ssi jobs data date ble criterion open 并行
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split path = r‘F:\数据分析专用\机器学习\监督学习项目\census.csv‘ with open(path, ‘r‘, encoding=‘utf-8‘) as f: data = pd.read_csv(f) def replace_str(data): for index, name in enumerate(data): # print(index, data.columns.values[index]) # print(data[name]) # print(data[name].dtypes) if data[name].dtypes == ‘object‘: for i, v in enumerate(data[name].drop_duplicates()): data.iloc[:, index] = data.iloc[:, index].replace(v, str(i + 1)) return data data = replace_str(data) X = data.iloc[:, :-1].values y = data.iloc[:, -1].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # 岭回归预测0.80 from sklearn.linear_model import RidgeClassifier regressor = RidgeClassifier() regressor.fit(X_train, y_train) print(regressor.score(X_test,y_test)) # 决策树预测0.818 from sklearn.tree import DecisionTreeClassifier regressor = DecisionTreeClassifier() regressor.fit(X_train, y_train) print(regressor.score(X_test,y_test)) # 支持向量机 # from sklearn.svm import SVC # regressor = SVC(kernel=‘linear‘,degree=3) # regressor.fit(X_train, y_train) # print(regressor.score(X_test,y_test)) # 随机森林 0.84 from sklearn.ensemble import RandomForestClassifier import sklearn.tree estimators = {} # criterion: 分支的标准(gini/entropy) estimators[‘tree‘] = sklearn.tree.DecisionTreeClassifier(criterion=‘gini‘,random_state=8) # 决策树 # n_estimators: 树的数量 # bootstrap: 是否随机有放回 # n_jobs: 可并行运行的数量 estimators[‘forest‘] = RandomForestClassifier(n_estimators=20,criterion=‘gini‘,bootstrap=True,n_jobs=2,random_state=8) # 随机森林 from sklearn.model_selection import cross_val_score import datetime for k in estimators.keys(): estimators[k] = estimators[k].fit(X_train, y_train) pred = estimators[k].predict(X_test) print("%s Score: %0.2f" % (k, estimators[k].score(X_test, y_test))) scores = cross_val_score(estimators[k], X_train, y_train,scoring=‘accuracy‘ ,cv=10) print("%s Cross Avg. Score: %0.2f (+/- %0.2f)" % (k, scores.mean(), scores.std() * 2)) # KNN 0.82 from sklearn.neighbors import KNeighborsClassifier regressor = KNeighborsClassifier() regressor.fit(X_train, y_train) print(regressor.score(X_test,y_test)) # 神经网络 0.80 from sklearn.neural_network import MLPClassifier regressor = MLPClassifier() regressor.fit(X_train, y_train) print(regressor.score(X_test,y_test)) # 回归模型 0.77 from sklearn.linear_model import SGDClassifier regressor = SGDClassifier(max_iter=1000) regressor.fit(X_train, y_train) print(regressor.score(X_test,y_test))
标签:class ssi jobs data date ble criterion open 并行
原文地址:https://www.cnblogs.com/pandaboy1123/p/10286586.html