标签:
# -*- coding: utf-8 -*- import pandas as pd from scipy.interpolate import lagrange inputfile=‘F:\\python数据挖掘\\chapter6\\chapter6\\demo\\data\\missing_data.xls‘ outputfile=‘F:\\python数据挖掘\\chapter6\\chapter6\\demo\\tmp\\missing_data_sale.xls‘ data=pd.read_excel(inputfile,header=None) #data[u‘销量‘][(data[u‘销量‘]<400)|(data[u‘销量‘]>5000)]=None def ployinterp(s,n,k=5): y=s[list(range(n-k,n))+list(range(n+1,n+1+k))] print(y) y=y[y.notnull()] return lagrange(y.index,list(y))(n) for i in data.columns: for j in range(len(data)): if(data[i].isnull())[j]: data[i][j]=ployinterp(data[i],j) print(data[i][j]) data.to_excel(outputfile,header=None,index=False)
标签:
原文地址:http://www.cnblogs.com/caicaihong/p/5847165.html