每天的数据差不多是这样的:
抓取网页数据,进行保存:
import urllib import time import calendar year_list=[2016] month_list=[1,2,3,4,5,6,7,8,9,10,11,12] for year in year_list: if year==2015: for month in month_list[6:]: days=range(calendar.monthrange(year, month)[1]+1)[1:] for day in days: date1=str(year)+"-"+str(month)+"-"+str(day) date2=time.strptime(date1, "%Y-%m-%d") datestring=time.strftime("%Y-%m-%d",date2) url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring filename = urllib.urlretrieve(url,filename="E:\\xml\\2015\\"+datestring+".xml") #modify path time.sleep(5) else: days=days=range(calendar.monthrange(year, 6)[1]+1)[7:] for day in days: date1=str(year)+"-"+str(6)+"-"+str(day) date2=time.strptime(date1, "%Y-%m-%d") datestring=time.strftime("%Y-%m-%d",date2) url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring filename = urllib.urlretrieve(url,filename="E:\\xml\\2016\\"+datestring+".xml") #modify path time.sleep(5)
处理每天数据的平均值,最大值,最小值一直大于130的值统计处理,并且生成xls表格
import xml.dom.minidom import urllib import time import calendar import os import xlrd import xlwt from xlwt import * value_list=[] datalist=[] data=() for path,dir,filenames in os.walk("E:\\zh\\"):#modify path print filenames print path print dir for filename in filenames: dom = xml.dom.minidom.parse("E:\\zh\\"+filename) #modify path root = dom.documentElement bands = root.getElementsByTagName(‘bandwidth‘) for i in range(bands.length): bandschild=bands[i] bandschildvalue=float(bandschild.firstChild.data) value_list.append(bandschildvalue) vmax=max(value_list) #average=reduce(lambda x,y: x+y, value_list)/len(value_list) average=sum(value_list)/len(value_list) data=(filename.strip(".xml"),vmax,average) datalist.append(data) value_list=[] data=() file = xlwt.Workbook() table = file.add_sheet(‘tongji‘) for i in range(len(datalist)): daydate,daymax,dayv=datalist[i] table.write(i,0,daydate) table.write(i,1,daymax) table.write(i,2,dayv) file.save(‘E:\\mini6.xls‘) #modify path
import xml.dom.minidom import urllib import time import calendar import os import xlrd import xlwt from xlwt import * value_list=[] datalist=[] data=() for path,dir,filenames in os.walk("E:\\zh\\2016\\"):#modify path for filename in filenames: dom = xml.dom.minidom.parse("E:\\zh\\2016\\"+filename) #modify path root = dom.documentElement bands = root.getElementsByTagName(‘bandwidth‘) for i in range(bands.length): bandschild=bands[i] bandschildvalue=float(bandschild.firstChild.data) value_list.append(bandschildvalue) #vmax=max(value_list) #average=reduce(lambda x,y: x+y, value_list)/len(value_list) #average=sum(value_list)/len(value_list) for ii in value_list: data=(filename.strip(".xml"),ii) datalist.append(data) data=() value_list=[] file = xlwt.Workbook() table = file.add_sheet(‘tongji‘) for iii in range(len(datalist)): daydate,daycount=datalist[iii] table.write(iii,0,daydate) table.write(iii,1,daycount) file.save(‘E:\\mini14.xls‘) #modify path
本文出自 “12109115” 博客,请务必保留此出处http://12119115.blog.51cto.com/12109115/1859896
原文地址:http://12119115.blog.51cto.com/12109115/1859896