Python 实现的PageRank算法,纯粹使用python原生模块,没有使用numpy、scipy。这个程序实现还比较原始,可优化的地方较多。
#-*- coding:utf-8 -*- import random N = 8 #八个网页 d = 0.85 #阻尼因子为0.85 delt = 0.00001 #迭代控制变量 #两个矩阵相乘 def matrix_multi(A,B): result = [[0]*len(B[0]) for i in range(len(A))] for i in range(len(A)): for j in range(len(B[0])): for k in range(len(B)): result[i][j] += A[i][k]*B[k][j] return result #矩阵A的每个元素都乘以n def matrix_multiN(n,A): result = [[1]*len(A[0]) for i in range(len(A))] for i in range(len(A)): for j in range(len(A[0])): result[i][j] = n*A[i][j] return result #两个矩阵相加 def matrix_add(A,B): if len(A[0])!=len(B[0]) and len(A)!=len(B): return result = [[0]*len(A[0]) for i in range(len(A))] for i in range(len(A)): for j in range(len(A[0])): result[i][j] = A[i][j]+B[i][j] return result def pageRank(A): e = [] for i in range(N): e.append(1) norm = 100 New_P = [] for i in range(N): New_P.append([random.random()]) r = [ [(1-d)*i*1/N] for i in e] while norm > delt: P = New_P New_P = matrix_add(r,matrix_multiN(d,matrix_multi(A,P))) #P=(1-d)*e/n+d*M'P PageRank算法的核心 norm = 0 #求解矩阵一阶范数 for i in range(N): norm += abs(New_P[i][0]-P[i][0]) print New_P #根据邻接矩阵求转移概率矩阵并转向 def tran_and_convert(A): result = [[0]*len(A[0]) for i in range(len(A))] result_convert = [[0]*len(A[0]) for i in range(len(A))] for i in range(len(A)): for j in range(len(A[0])): result[i][j] = A[i][j]*1.0/sum(A[i]) for i in range(len(result)): for j in range(len(result[0])): result_convert[i][j]=result[j][i] return result_convert def main(): A = [[0,1,1,0,0,1,0,0], [0,0,0,1,1,0,0,0], [0,0,0,1,0,1,0,0], [0,0,0,0,0,1,0,0], [1,0,0,1,0,0,1,1], [0,0,0,1,0,0,0,0], [0,0,1,0,0,0,0,0], [0,0,0,1,0,0,1,0]] M = tran_and_convert(A) pageRank(M) if __name__ == '__main__': main()
原文地址:http://blog.csdn.net/nersie/article/details/44024869