标签:设计 type 一个 数据 方法 matrix get 偏差 判断
import numpy as np from sklearn import datasets digits = datasets.load_digits() X = digits.data y = digits.target.copy() # 构造极度偏斜的数据 # y = digits.target:y 和 digits.target 指向的是同一组数据,此处修改 y 时,digits.target 也会一起被修改,因为赋值符号 ‘=‘ 没有进行数据的 copy; # 如果想不改变 digits.target 的数据,需要更改赋值方式:y = digits.target.copy()digits.target.copy(); y[digits.target==9] = 1 y[digits.target!=9] = 0
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) from sklearn.linear_model import LogisticRegression log_reg = LogisticRegression() log_reg.fit(X_train, y_train) log_reg.score(X_test, y_test) # 准确率:0.9755555555555555
y_log_predict = log_reg.predict(X_test) def TN(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 0) & (y_predict == 0)) def FP(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 0) & (y_predict == 1)) def FN(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 1) & (y_predict == 0)) def TP(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 1) & (y_predict == 1)) def confusion_matrix(y_true, y_predict): return np.array([ [TN(y_test, y_log_predict), FP(y_test, y_log_predict)], [FN(y_test, y_log_predict), TP(y_test, y_log_predict)] ]) confusion_matrix(y_test, y_log_predict) # 混淆矩阵:array([[403, 2], [9, 36]]
def precision_score(y_true, y_predict): tp = TP(y_test, y_log_predict) fp = FP(y_test, y_log_predict) try: return tp / (tp + fp) except: return 0.0 precision_score(y_test, y_log_predict) # 精准率:0.9473684210526315
def recall_score(y_true, y_predict): tp = TP(y_test, y_log_predict) fn = FN(y_test, y_log_predict) # try...except:异常检测; # 没有异常,执行 try 后面的语句; # 出现异常,执行 except 后面的语句, try: return tp / (tp + fn) except: return 0.0 recall_score(y_test, y_log_predict) # 召回率:0.8
from sklearn.metrics import confusion_matrix confusion_matrix(y_test, y_log_predict) # 混淆矩阵:array([[403, 2], [9, 36]], dtype=int64)
from sklearn.metrics import precision_score precision_score(y_test, y_log_predict) # 精准率:0.9473684210526315
from sklearn.metrics import recall_score recall_score(y_test, y_log_predict) # 召回率:0.8
标签:设计 type 一个 数据 方法 matrix get 偏差 判断
原文地址:https://www.cnblogs.com/volcao/p/9394909.html