标签:orm filename getter 功能 set 机器 line python diff
dict.get(key, default=None)
add_subplot()基础用法
import matplotlib.pyplot as plt
from numpy import *
fig = plt.figure()
ax = fig.add_subplot(349)
ax.plot(x,y)
将画布分成三行四列,在第九个分区画图
from numpy import *
from os import listdir
import operator
def createDataSet():
group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = [‘A‘, ‘A‘, ‘B‘, ‘B‘]
return group, labels
# 对数据进行分类
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0] #shape[0]获取第一维的数目
diffMat = tile(inX, (dataSetSize, 1)) - dataSet # tile??????
sqDiffMat = diffMat**2
# 求差的平方和
sqDistance = sqDiffMat.sum(axis=1) #axis=1???????sum函数默认是axis=0列元素相加,axis=1是一行的元素求和
# 求标准差
distances = sqDistance**0.5
# 距离排序
sortDistIndicies = distances.argsort() #argsort函数返回的是数组值从小到大的索引值
# 定义元字典
classCount = {}
for i in range(k):
# 获得前k个元素的标签
voteIlabel = labels[sortDistIndicies[i]]
# 计算前k个数据标签出现的次数
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 #dict.get()???????????
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
# 读取文本文件数据
def file2matrix(filename):
fr = open(filename)
lines = fr.readlines()
num_lines = len(lines)
train_matrix = zeros((num_lines, 3))
label_vector = []
index = 0
for line in lines:
line = line.strip()
line_list = line.split(‘\t‘)
train_matrix[index, :] = line_list[0:3] # 获取列表的前0,1,2列
label_vector.append(int(line_list[-1])) # 获取列表的最后一列
index += 1
return train_matrix, label_vector # add_subplot????????????????
#归一化函数
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
# normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals, (m, 1)) # minVals在列上重复一次(本身),在行上重复m次,从而形成m*3的向量
normDataSet = normDataSet/tile(ranges, (m, 1))
return normDataSet, ranges, minVals # 归一化后的数据, 极差范围, 最小值
# 分类器测试函数
def datingClassTest():
hoRatio = 0.10 #测试集比例
datingDataMat, datingLabels = file2matrix(‘datingTestSet2.txt‘)
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errcount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs:m], 2)
print("the classifier came back with :%d ,the real answer is :%d" % (classifierResult, datingLabels[i]))
if(classifierResult != datingLabels[i]):
errcount += 1.0
print("the total error rate is: %f" %(errcount/float(numTestVecs)))
# 手写字符文件转换成向量
def img2vector(filename):
returnVect = zeros((1, 1024))
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect[0, 32*i+j] = int(lineStr[j])
return returnVect
# 手写字符识别测试
def handwritingClassTest():
hwlabels = [] # 定义手写字符标签
trainingFileList = listdir(‘digits/trainingDigits‘)
m = len(trainingFileList)
trainingMat = zeros((m, 1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split(‘.‘)[0]
classNumStr = fileStr.split(‘_‘)[0]
hwlabels.append(classNumStr)
# 把文件变成向量并赋值到trainingMat
trainingMat[i, :] = img2vector(‘digits/trainingDigits/%s‘ % fileNameStr)
testFileList = listdir(‘digits/testDigits‘)
errcount = 0.0
mTest = len(testFileList)
for i in range(mTest):
fileNameStr = testFileList[i]
fileStr = fileNameStr.split(‘.‘)[0]
classNumStr = int(fileStr.split(‘_‘)[0])
vectorUnderTest = img2vector(‘digits/testDigits/%s‘ % fileNameStr)
classifierResult = classify0(vectorUnderTest, trainingMat, hwlabels, 3)
print(‘the classifier came back with : %d, the real answer is %d‘ % (int(classifierResult), classNumStr))
if(int(classifierResult) != int(classNumStr)):
errcount += 1
print(‘\nthe total number of errors is %d‘ % errcount)
print(‘\nthe total error rate is: %f‘ % float(errcount/mTest))
标签:orm filename getter 功能 set 机器 line python diff
原文地址:https://www.cnblogs.com/myblog1993/p/8886459.html