码迷,mamicode.com
首页 > 编程语言 > 详细

python 爬虫获取世界杯比赛赛程

时间:2018-06-22 19:58:00      阅读:393      评论:0      收藏:0      [点我收藏+]

标签:fresh   pre   usr   获取   des   oca   soup   load   lis   

#!/usr/bin/python
# -*- coding:utf8 -*-

import requests
import re
import os  
import time 
# from urllib
import json
from bs4 import BeautifulSoup
from datetime import date

def getTimeExpire(time_play,time_gap):
	# print(time_play)
	try:
		time_arr=time.strptime(time_play,"%Y-%m-%d %H:%M:%S")
	except:
		print(‘时间转化失败‘)
		return ‘‘
	else:
		t1=time.mktime(time_arr)
		x = time.localtime(t1+time_gap)#是秒不是毫秒
		return time.strftime(‘%Y-%m-%d %H:%M:%S‘,x)
def getHtml():
	#改成从网站直接获取,但是网站需要分页
	with open(‘F:\\test\\python\\worldcup.html‘, ‘r‘,encoding=‘utf-8‘) as f:
		content = f.read()
		soup = BeautifulSoup(content,‘lxml‘)
		nodes=soup.select(‘.b-pull-refresh-content > div‘)
		arr=[]
		#写入CSV文件的头部
		filename = "F:\\test\\python\\worldcup.csv"
		f = open(filename,‘a‘)
		f.writelines(‘team1,team2,time_expire,time_play \n‘)
		f.close()
		for node in nodes:
			date = node.select(‘.wa-match-schedule-list-title‘)[0].get_text().strip()
			datas = node.select(‘.sfc-contacts-list .wa-match-schedule-list-item‘)
			for d in datas:
				obj={‘team1‘:‘‘,‘team2‘:‘‘,‘time‘:‘‘}
				obj[‘team1‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[0].get_text().strip()
				obj[‘team2‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[1].get_text().strip()
				obj[‘time_play‘]=‘2018-‘+date[2:8]+‘‘+d.select(‘.status-text‘)[0].get_text().strip()+‘:00‘
				obj[‘time_expire‘]=getTimeExpire(obj[‘time_play‘],-10*60)
				filename = "F:\\test\\python\\worldcup.csv"
				f = open(filename,‘a‘)
				f.writelines(obj[‘team1‘]+‘,‘+obj[‘team2‘]+‘,‘+obj[‘time_expire‘]+‘,‘+obj[‘time_play‘]+‘\n‘)
				f.close()
#getHtml()


def getFromAPI():
	month=6
	day=11
	#从2018-06-14 到 07-15
	for d in range(0,15): 
		day+=2
		if day>30:
			month+=1
			day=1
		url="http://tiyu.baidu.com/api/match/%E4%B8%96%E7%95%8C%E6%9D%AF/live/date/2018-"+str(month)+‘-‘+str(day)+"/direction/after?from=self"
		time.sleep(1) 
		data = json.loads(requests.get(url,timeout=3).text)
		if(data[‘status‘]==‘0‘):
			print(‘为0‘)
			for matches in data[‘data‘]:
				for m in matches[‘list‘]:
					filename = "F:\\test\\python\\worldcupFromAPI.csv"
					f = open(filename,‘a‘)
					if m[‘startTime‘]>time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()):
						f.writelines(m[‘leftLogo‘][‘name‘]+‘,‘+m[‘rightLogo‘][‘name‘]+‘,‘+getTimeExpire(m[‘startTime‘],-10*60)+‘,‘+m[‘startTime‘]+‘\n‘)
					f.close()
					
			
	
getFromAPI()

  

python 爬虫获取世界杯比赛赛程

标签:fresh   pre   usr   获取   des   oca   soup   load   lis   

原文地址:https://www.cnblogs.com/cao-zhen/p/9215222.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!