参考链接http://blog.csdn.net/lu597203933/article/details/38468303
Logistic代码
# Logistic回归分类 使用梯度上升找最佳参数 import numpy as np def loadDataSet(): datMat = []; labelMat = [] fr = open(‘testSet.txt‘) for line in fr.readlines(): lineArr = line.strip().split(‘\t‘) # 添加常数项对应的x值1 datMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) labelMat.append(int(lineArr[2])) return datMat, labelMat def sigmoid(inx): return 1.0 / (1 + np.exp(-inx)) # 可以这样理解 但事实并非如此 # f(x)=ax1+bx2+cx3 # L(a,b,c)=(1/2)(i从1到100 (f(xi)-yi)^2)的最小值 # 记下降率为rate=0.1 T0=[1,1,1] # L(w)对a求偏导=i从1到100[(f(xi)-yi)xi1]=ra 表示a的梯度方向 # 同理 b,c # 所以有 a=a-ra*rate b=b-rb*rate c=c-rc*rate # 梯度上升法 def gradAscent(dataMatIn, classLabels): # m*n 100*3矩阵dataMatrix dataMatrix = np.mat(dataMatIn) # 矩阵转置 # 类别标签矩阵 100*1 labelMat = np.mat(classLabels).transpose() m, n = np.shape(dataMatrix) alpha = 0.001 maxCycles = 500 # 3*1 weights = np.ones((n, 1)) for k in range(maxCycles): # 计算整个数据集的梯度 100*1 # 每次都是处理所有数据集 h = sigmoid(dataMatrix * weights) # 真实类别与预测类别的差值 error = (labelMat - h) # 更新回归系数向量 3*1=3*1+0.001*[(3*100)(100*1)]) weights = weights + alpha * dataMatrix.transpose() * error return weights # 随机梯度上升算法 # 可以进行增量式更新 def stoGradAscent0(dataMatrix, classLabels): m, n = np.shape(dataMatrix) alpha = 0.01 # array([ 1., 1., 1.]) weigths = np.ones(n) for i in range(m): # 一次仅用一个样本来更新数据 h = sigmoid(sum(dataMatrix[i] * weigths)) error = classLabels[i] - h weigths = weigths + alpha * error * dataMatrix[i] return weigths # 改进的随机梯度上升算法 def stoGradAscent1(dataMatrix, classLabels, numIter=150): m, n = np.shape(dataMatrix) weights = np.ones(n) for j in range(numIter): dataIndex = list(range(m)) for i in range(m): # 每次迭代时调整alpha alpha = 4 / (1.0 + j + i) + 0.01 # index = int(np.random.uniform(0, len(dataIndex))) # randIndex = dataIndex[index] randIndex = int(np.random.uniform(0, len(dataIndex))) h = sigmoid(sum(dataMatrix[randIndex] * weights)) error = classLabels[randIndex] - h weights = weights + alpha * error * dataMatrix[randIndex] del [dataIndex[randIndex]] return weights # 画出数据集和最佳拟合直线的函数 def plotBestFit(wei): import matplotlib.pyplot as plt weights = wei.getA() # weights = wei dataMat, labelMat = loadDataSet() dataArr = np.array(dataMat) n = np.shape(dataArr)[0] xcord1 = [] ycord1 = [] xcord2 = [] ycord2 = [] for i in range(n): if int(labelMat[i]) == 1: xcord1.append(dataArr[i][1]) ycord1.append(dataArr[i][2]) else: xcord2.append(dataArr[i][1]) ycord2.append(dataArr[i][2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s=30, c=‘red‘, marker=‘s‘) ax.scatter(xcord2, ycord2, s=30, c=‘green‘) x = np.arange(-3.0, 3.0, 0.1) # z=w0*1+w1*x1+w2*x2 # h=1.0/(1+exp(-z)) # 当 z=0时,h=0.5正好是判断类别是1或者0的边界 从而 z=0为最佳拟合曲线 # 即 wo*1+w1*x1+w2*x2=0 而x2=y 从而 y = (-weights[0] - weights[1] * x) / weights[2] y = (-weights[0] - weights[1] * x) / weights[2] ax.plot(x, y) plt.xlabel(‘x1‘) plt.ylabel(‘x2‘) plt.show() # 修改之后的随机梯度上升法 # dataArra, labelMat = loadDataSet() # weights = stoGradAscent1(np.array(dataArra), labelMat, numIter=150) # print(weights) # weights = np.matrix(weights).transpose() # plotBestFit(weights) # 随机梯度上升法 # dataArra, labelMat = loadDataSet() # weights=stoGradAscent0(np.array(dataArra),labelMat) # print(weights) # weights = np.matrix(weights).transpose() # plotBestFit(weights) # 梯度上升法 dataArra, labelMat = loadDataSet() weights=gradAscent(dataArra,labelMat) print(weights) weights = np.matrix(weights) plotBestFit(weights) # weights = gradAscent(dataArra, labelMat) # weights = stoGradAscent1(np.array(dataArra), labelMat) # print(weights) # plotBestFit(np.mat(weights.transpose())) # import matplotlib.pyplot as plt # # plt.plot([1, 2, 3, 4], [1, 4, 9, 16],‘r‘) # plt.axis([0, 5, 0, 20]) # t = np.arange(0, 5, 0.2) # plt.plot(t, t, ‘r--‘, t, t ** 2, ‘bs‘, t, t ** 3, ‘g^‘) # plt.ylabel(‘some numbers‘) # plt.show()