标签:分析 时间 获取 图像 get 取数据 网站 tran 查看
import numpy as np from sklearn.datasets import fetch_mldata mnist = fetch_mldata("MNIST original")
mnist # 输出: {‘COL_NAMES‘: [‘label‘, ‘data‘], ‘DESCR‘: ‘mldata.org dataset: mnist-original‘, ‘data‘: array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), ‘target‘: array([0., 0., 0., ..., 9., 9., 9.])}
X, y = mnist[‘data‘], mnist[‘target‘] X.shape # 输出:(70000, 784) X_train = np.array(X[:60000], dtype=float) y_train = np.array(y[:60000], dtype=float) X_test = np.array(X[60000:], dtype=float) y_test = np.array(y[60000:], dtype=float)
from sklearn.neighbors import KNeighborsClassifier knn_clf = KNeighborsClassifier() %time knn_clf.fit(X_train, y_train) # 输出:44.9 s knn_clf.score(X_test, y_test) # 输出:0.9688
from sklearn.decomposition import PCA # 只保留样本的 90% 的信息 pca = PCA(0.9) pca.fit(X_train) X_train_reduction = pca.transform(X_train) X_train_reduction.shape # 输出:(60000, 87) knn_clf = KNeighborsClassifier() %time knn_clf.fit(X_train_reduction, y_train) # 输出:602 s X_test_reduction = pca.transform(X_test) %time knn_clf.score(X_test_reduction, y_test) # 输出:1 min 27 s 0.9728
标签:分析 时间 获取 图像 get 取数据 网站 tran 查看
原文地址:https://www.cnblogs.com/volcao/p/9257578.html