标签:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | # ========================== # 读入文本记录,转换为NumPy,便于其他函数使用 # 输入:文本记录的路径 # ========================== def file2matrix(filename): fr = open (filename) arrayOLines = fr.readlines() numberOfLines = len (arrayOLines) returnMat = zeros((numberOfLines, 3 )) classLabelVector = [] index = 0 for line in arrayOLines: line = line.strip() # 删除字符串首尾的空白符(包括'\n', '\r', '\t', ' ') listFromLines = line.split( "\t" ) returnMat[index, :] = listFromLines[ 0 : 3 ] classLabelVector.append( int (listFromLines[ - 1 ])) index + = 1 return returnMat, classLabelVector |
1 2 3 4 5 6 7 8 9 10 | def plotSca(datingDataMat, datingLabels): import matplotlib.pyplot as plt fig = plt.figure() ax1 = fig.add_subplot( 121 ) # 玩网游所消耗的时间比(横轴)与每年消耗的冰淇淋公升数(纵轴)的散点图 ax1.scatter(datingDataMat[:, 1 ], datingDataMat[:, 2 ], 15.0 * array(datingLabels), 5.0 * array(datingLabels)) ax2 = fig.add_subplot( 122 ) # 每年获得的飞行常客里程数(横轴)与 玩网游所消耗的时间比(纵轴)的散点图 ax2.scatter(datingDataMat[:, 0 ], datingDataMat[:, 1 ], 15.0 * array(datingLabels), 5.0 * array(datingLabels)) plt.show() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | # ===================================== # 如果不同特征值量级相差太大, # 而他们在模型中占的权重又并不比其他特征大, # 这个时候就需要对特征值进行归一化, # 也就是将取值范围处理为0到1或者-1到1之间 # 本函数就是对数据集归一化特征值 # dataset: 输入数据集 # ===================================== def autoNorm(dataset): minVals = dataset. min ( 0 ) maxVals = dataset. max ( 0 ) ranges = maxVals - minVals normDataset = zeros(shape(dataset)) m = dataset.shape[ 0 ] normDataset = dataset - tile(minVals, (m, 1 )) normDataset = normDataset / tile(ranges, (m, 1 )) # 矩阵中对应数值相除 return normDataset, ranges, minVals |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | ''' 分类器针对约会网站的测试代码 ''' def datingClassTest(): hoRatio = 0.10 # 数据集中用于测试的比例 filePath = "E:\ml\machinelearninginaction\Ch02\datingTestSet2.txt" datingDataMat, datingLabels = file2matrix(filePath) # plotSca(datingDataMat, datingLabels) normMat, ranges, minVals = autoNorm(datingDataMat) m = normMat.shape[ 0 ] numTestVecs = int (hoRatio * m) errcounter = 0.0 for i in range (numTestVecs): classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 3 ) print "the classifier came back with: %d, the real answer is: %d" % \ (classifierResult, datingLabels[i]) if (classifierResult ! = datingLabels[i]): errcounter + = 1.0 print "the total error rate is: %f" % (errcounter / float (numTestVecs)) print errcounter |
标签:
原文地址:http://www.cnblogs.com/mooba/p/5412592.html