标签:stat mat red ros nts warning imei cross imp
from sklearn.decomposition import PCA from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.model_selection import cross_val_score import matplotlib.pyplot as plt import pandas as pd import numpy as np
data = pd.read_csv(r"C:\work\learnbetter\micro-class\week 3 Preprocessing\digit recognizor.csv") X = data.iloc[:,1:] y = data.iloc[:,0] X.shape
pca_line = PCA().fit(X) plt.figure(figsize=[20,5]) plt.plot(np.cumsum(pca_line.explained_variance_ratio_)) plt.xlabel("number of components after dimension reduction") plt.ylabel("cumulative explained variance ratio") plt.show()
#======【TIME WARNING:2mins 30s】======# score = [] for i in range(1,101,10): X_dr = PCA(i).fit_transform(X) once = cross_val_score(RFC(n_estimators=10,random_state=0) ,X_dr,y,cv=5).mean() score.append(once) plt.figure(figsize=[20,5]) plt.plot(range(1,101,10),score) plt.show()
#======【TIME WARNING:2mins 30s】======# score = [] for i in range(10,25): X_dr = PCA(i).fit_transform(X) once = cross_val_score(RFC(n_estimators=10,random_state=0),X_dr,y,cv=5).mean() score.append(once) plt.figure(figsize=[20,5]) plt.plot(range(10,25),score) plt.show()
X_dr = PCA(23).fit_transform(X) #======【TIME WARNING:1mins 30s】======# cross_val_score(RFC(n_estimators=100,random_state=0),X_dr,y,cv=5).mean()
from sklearn.neighbors import KNeighborsClassifier as KNN cross_val_score(KNN(),X_dr,y,cv=5).mean()
#======【TIME WARNING: 】======# score = [] for i in range(10): X_dr = PCA(23).fit_transform(X) once = cross_val_score(KNN(i+1),X_dr,y,cv=5).mean() score.append(once) plt.figure(figsize=[20,5]) plt.plot(range(10),score) plt.show()
cross_val_score(KNN(4),X_dr,y,cv=5).mean() #=======【TIME WARNING: 3mins】======# %%timeit cross_val_score(KNN(4),X_dr,y,cv=5).mean()
机器学习sklearn(51): 特征工程(十八) 特征降维(九)降维算法PCA和SVD(四)PCA对手写数字数据集的降维
标签:stat mat red ros nts warning imei cross imp
原文地址:https://www.cnblogs.com/qiu-hua/p/14933135.html