logistic 回归（线性和非线性）

时间：2018-10-23 23:06:24 阅读：217 评论：0 收藏：0 [点我收藏+]

标签：通过 step 创建测试 prime lib pyplot val class

一：线性logistic 回归

代码如下：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as opt
import seaborn as sns

#读取数据集
path = ‘ex2data1.txt‘
data = pd.read_csv(path, header=None, names=[‘Exam 1‘, ‘Exam 2‘, ‘Admitted‘])

#将正负数据集分开
positive = data[data[‘Admitted‘].isin([1])]
negative = data[data[‘Admitted‘].isin([0])]

‘‘‘
#查看分布
fig, ax = plt.subplots(figsize=(12, 8))
ax.scatter(positive[‘Exam 1‘], positive[‘Exam 2‘], s=60, c=‘b‘, marker=‘o‘, label=‘Admitted‘)
ax.scatter(negative[‘Exam 1‘], negative[‘Exam 2‘], s=50, c=‘r‘, marker=‘x‘, label=‘UnAdmitted‘)
ax.legend()
ax.set_xlabel(‘Exam 1 Score‘)
ax.set_ylabel(‘Exam 2 Score‘)
plt.show()
‘‘‘

#sigmoid函数实现
def sigmoid(h):
    return 1 / (1 + np.exp(-h))


‘‘‘
#测试sigmoid函数
nums = np.arange(-10, 11, step=1)
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(nums, sigmoid(nums), ‘k‘)
plt.show()
‘‘‘

#计算损失函数值
def cost(theta, X, y):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    part1 = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    part2 = np.multiply((1-y), np.log(1-sigmoid(X * theta.T)))
    return np.sum(part1-part2) / len(X)

#在原矩阵第1列前加一列全1
data.insert(0, ‘ones‘, 1)

cols = data.shape[1]

X = data.iloc[:, 0:cols-1]
y = data.iloc[:, cols-1:cols]

X = np.array(X.values)
y = np.array(y.values)
theta = np.zeros(3) #这里是一个行向量


#返回梯度向量，注意是向量
def gradient(theta, X, y):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    parameters = theta.ravel().shape[1]
    grad = np.zeros(parameters)

    error = sigmoid(X * theta.T) - y

    grad = error.T.dot(X)
    grad = grad / len(X)
    return grad

#通过高级算法计算出最好的theta值
result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))

#print(cost(result[0], X, y))

#测试所得theta的性能
#计算原数据集的预测情况
def predict(theta, X):
    theta = np.matrix(theta)
    X = np.matrix(X)

    probability = sigmoid(X * theta.T)
    return [1 if i > 0.5 else 0 for i in probability]


theta_min = result[0]
predictions = predict(theta_min, X)

correct = [1 if((a == 1 and b == 1) or(a == 0 and b == 0)) else 0 for(a, b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) % len(correct))
print(‘accuracy = {0}%‘.format(accuracy))#训练集测试准确度89%


# 作图
theta_temp = theta_min
theta_temp = theta_temp / theta_temp[2]

x = np.arange(130, step=0.1)
y = -(theta_temp[0] + theta_temp[1] * x)
#画出原点
sns.set(context=‘notebook‘, style=‘ticks‘, font_scale=1.5)
sns.lmplot(‘Exam 1‘, ‘Exam 2‘, hue=‘Admitted‘, data=data,
           size=6,
           fit_reg=False,
           scatter_kws={"s": 25}
           )
#画出分界线
plt.plot(x, y, ‘grey‘)
plt.xlim(0, 130)
plt.ylim(0, 130)
plt.title(‘Decision Boundary‘)
plt.show()

二：非线性logistic 回归（正则化）

代码如下：

import pandas as pd
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt


path = ‘ex2data2.txt‘
data = pd.read_csv(path, header=None, names=[‘Test 1‘, ‘Test 2‘, ‘Accepted‘])

positive = data[data[‘Accepted‘].isin([1])]
negative = data[data[‘Accepted‘].isin([0])]

‘‘‘
#显示原始数据的分布
fig, ax = plt.subplots(figsize=(12, 8))
ax.scatter(positive[‘Test 1‘], positive[‘Test 2‘], s=50, c=‘b‘, marker=‘o‘, label=‘Accepted‘)
ax.scatter(negative[‘Test 1‘], negative[‘Test 2‘], s=50, c=‘r‘, marker=‘x‘, label=‘Unaccepted‘)
ax.legend() #显示右上角的Accepted 和 Unaccepted标签
ax.set_xlabel(‘Test 1 Score‘)
ax.set_ylabel(‘Test 2 Score‘)
plt.show()
‘‘‘
degree = 5
x1 = data[‘Test 1‘]
x2 = data[‘Test 2‘]
#在data的第三列插入一列全1
data.insert(3, ‘Ones‘, 1)

#创建多项式特征值，最高阶为4
for i in range(1, degree):
    for j in range(0, i):
        data[‘F‘ + str(i) + str(j)] = np.power(x1, i-j) * np.power(x2, j)

#删除原数据中的test 1和test 2两列
data.drop(‘Test 1‘, axis=1, inplace=True)
data.drop(‘Test 2‘, axis=1, inplace=True)


#sigmoid函数实现
def sigmoid(h):
    return 1 / (1 + np.exp(-h))


def cost(theta, X, y, learnRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))
    second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))
    reg = (learnRate / (2 * len(X))) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2))
    return np.sum(first - second) / len(X) + reg


learnRate = 1
cols = data.shape[1]

X = data.iloc[:, 1:cols]
y = data.iloc[:, 0:1]

X = np.array(X)
y = np.array(y)
theta = np.zeros(X.shape[1])


#计算原数据集的预测情况
def predict(theta, X):
    theta = np.matrix(theta)
    X = np.matrix(X)

    probability = sigmoid(X * theta.T)
    return [1 if i > 0.5 else 0 for i in probability]


def gradientReg(theta, X, y, learnRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)

    paramates = int(theta.ravel().shape[1])
    grad = np.zeros(paramates)

    grad = (sigmoid(X * theta.T) - y).T * X / len(X) + (learnRate / len(X)) * theta[:, i]
    grad[0] = grad[0] - (learnRate / len(X)) * theta[:, i]
    return grad

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradientReg, args=(X, y, learnRate))
print(result)

theta_min = np.matrix(result[0])
predictions = predict(theta_min, X)
correct = [1 if((a == 1 and b == 1) or(a == 0 and b == 0)) else 0 for(a, b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) % len(correct))

print(‘accuracy = {0}%‘.format(accuracy))

logistic 回归（线性和非线性）

标签：通过 step 创建测试 prime lib pyplot val class

原文地址：https://www.cnblogs.com/qiang-wei/p/9839458.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行