每天的数据差不多是这样的:
抓取网页数据,进行保存:
import urllib
import time
import calendar
year_list=[2016]
month_list=[1,2,3,4,5,6,7,8,9,10,11,12]
for year in year_list:
if year==2015:
for month in month_list[6:]:
days=range(calendar.monthrange(year, month)[1]+1)[1:]
for day in days:
date1=str(year)+"-"+str(month)+"-"+str(day)
date2=time.strptime(date1, "%Y-%m-%d")
datestring=time.strftime("%Y-%m-%d",date2)
url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring
filename = urllib.urlretrieve(url,filename="E:\\xml\\2015\\"+datestring+".xml") #modify path
time.sleep(5)
else:
days=days=range(calendar.monthrange(year, 6)[1]+1)[7:]
for day in days:
date1=str(year)+"-"+str(6)+"-"+str(day)
date2=time.strptime(date1, "%Y-%m-%d")
datestring=time.strftime("%Y-%m-%d",date2)
url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring
filename = urllib.urlretrieve(url,filename="E:\\xml\\2016\\"+datestring+".xml") #modify path
time.sleep(5)处理每天数据的平均值,最大值,最小值一直大于130的值统计处理,并且生成xls表格
import xml.dom.minidom
import urllib
import time
import calendar
import os
import xlrd
import xlwt
from xlwt import *
value_list=[]
datalist=[]
data=()
for path,dir,filenames in os.walk("E:\\zh\\"):#modify path
print filenames
print path
print dir
for filename in filenames:
dom = xml.dom.minidom.parse("E:\\zh\\"+filename) #modify path
root = dom.documentElement
bands = root.getElementsByTagName(‘bandwidth‘)
for i in range(bands.length):
bandschild=bands[i]
bandschildvalue=float(bandschild.firstChild.data)
value_list.append(bandschildvalue)
vmax=max(value_list)
#average=reduce(lambda x,y: x+y, value_list)/len(value_list)
average=sum(value_list)/len(value_list)
data=(filename.strip(".xml"),vmax,average)
datalist.append(data)
value_list=[]
data=()
file = xlwt.Workbook()
table = file.add_sheet(‘tongji‘)
for i in range(len(datalist)):
daydate,daymax,dayv=datalist[i]
table.write(i,0,daydate)
table.write(i,1,daymax)
table.write(i,2,dayv)
file.save(‘E:\\mini6.xls‘) #modify pathimport xml.dom.minidom
import urllib
import time
import calendar
import os
import xlrd
import xlwt
from xlwt import *
value_list=[]
datalist=[]
data=()
for path,dir,filenames in os.walk("E:\\zh\\2016\\"):#modify path
for filename in filenames:
dom = xml.dom.minidom.parse("E:\\zh\\2016\\"+filename) #modify path
root = dom.documentElement
bands = root.getElementsByTagName(‘bandwidth‘)
for i in range(bands.length):
bandschild=bands[i]
bandschildvalue=float(bandschild.firstChild.data)
value_list.append(bandschildvalue)
#vmax=max(value_list)
#average=reduce(lambda x,y: x+y, value_list)/len(value_list)
#average=sum(value_list)/len(value_list)
for ii in value_list:
data=(filename.strip(".xml"),ii)
datalist.append(data)
data=()
value_list=[]
file = xlwt.Workbook()
table = file.add_sheet(‘tongji‘)
for iii in range(len(datalist)):
daydate,daycount=datalist[iii]
table.write(iii,0,daydate)
table.write(iii,1,daycount)
file.save(‘E:\\mini14.xls‘) #modify path本文出自 “12109115” 博客,请务必保留此出处http://12119115.blog.51cto.com/12109115/1859896
原文地址:http://12119115.blog.51cto.com/12109115/1859896