码迷,mamicode.com
首页 > 编程语言 > 详细

慈善人数预测(不同算法简单暴力代码)

时间:2019-01-18 12:25:48      阅读:182      评论:0      收藏:0      [点我收藏+]

标签:class   ssi   jobs   data   date   ble   criterion   open   并行   

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

path = rF:\数据分析专用\机器学习\监督学习项目\census.csv
with open(path, r, encoding=utf-8) as f:
    data = pd.read_csv(f)


def replace_str(data):
    for index, name in enumerate(data):
        # print(index, data.columns.values[index])
        # print(data[name])
        # print(data[name].dtypes)
        if data[name].dtypes == object:
            for i, v in enumerate(data[name].drop_duplicates()):
                data.iloc[:, index] = data.iloc[:, index].replace(v, str(i + 1))
    return data


data = replace_str(data)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# 岭回归预测0.80
from sklearn.linear_model import RidgeClassifier
regressor = RidgeClassifier()
regressor.fit(X_train, y_train)
print(regressor.score(X_test,y_test))
# 决策树预测0.818
from sklearn.tree import DecisionTreeClassifier
regressor = DecisionTreeClassifier()
regressor.fit(X_train, y_train)
print(regressor.score(X_test,y_test))
# 支持向量机
# from sklearn.svm import SVC
# regressor = SVC(kernel=‘linear‘,degree=3)
# regressor.fit(X_train, y_train)
# print(regressor.score(X_test,y_test))
# 随机森林 0.84
from sklearn.ensemble import RandomForestClassifier
import sklearn.tree
estimators = {}
# criterion: 分支的标准(gini/entropy)
estimators[tree] = sklearn.tree.DecisionTreeClassifier(criterion=gini,random_state=8) # 决策树
# n_estimators: 树的数量
# bootstrap: 是否随机有放回
# n_jobs: 可并行运行的数量
estimators[forest] = RandomForestClassifier(n_estimators=20,criterion=gini,bootstrap=True,n_jobs=2,random_state=8) # 随机森林
from sklearn.model_selection import cross_val_score
import datetime
for k in estimators.keys():
    estimators[k] = estimators[k].fit(X_train, y_train)
    pred = estimators[k].predict(X_test)
    print("%s Score: %0.2f" % (k, estimators[k].score(X_test, y_test)))
    scores = cross_val_score(estimators[k], X_train, y_train,scoring=accuracy ,cv=10)
    print("%s Cross Avg. Score: %0.2f (+/- %0.2f)" % (k, scores.mean(), scores.std() * 2))
# KNN 0.82
from sklearn.neighbors import KNeighborsClassifier
regressor = KNeighborsClassifier()
regressor.fit(X_train, y_train)
print(regressor.score(X_test,y_test))
# 神经网络 0.80
from sklearn.neural_network import MLPClassifier
regressor = MLPClassifier()
regressor.fit(X_train, y_train)
print(regressor.score(X_test,y_test))
# 回归模型 0.77
from sklearn.linear_model import SGDClassifier
regressor = SGDClassifier(max_iter=1000)
regressor.fit(X_train, y_train)
print(regressor.score(X_test,y_test))

 

慈善人数预测(不同算法简单暴力代码)

标签:class   ssi   jobs   data   date   ble   criterion   open   并行   

原文地址:https://www.cnblogs.com/pandaboy1123/p/10286586.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!