标签:break exce ota ade http div raw 强制 频道
通过学习xmly的爬取
自己琢磨出的qingtingfm爬取频道视频
特记录一下
1 # -*- coding: utf-8 -*- 2 import requests, time 3 import os 4 import sys 5 reload(sys) 6 sys.setdefaultencoding(‘utf-8‘) 7 8 hd = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; rv:55.0) Gecko/20100101 Firefox/55.0‘} 9 url_top = ‘http://i.qingting.fm/wapi/channels/‘ # 蜻蜓各个json信息的接口 10 m4aurl_top = ‘http://od.qingting.fm/‘ 11 12 13 def getChannelInfo(channelid): 14 res = requests.get(url_top + str(channelid), headers=hd).json() 15 if int(res[‘code‘]) == 0: # code码0正常 1 无数据 16 # (id数字不对返回是code为1 ),如果id是字符串,直接返回404,后面会报异常 17 return res 18 else: 19 # print ‘频道ID不存在!‘ 20 return 21 22 def getChannelName(channelid): 23 res = requests.get(url_top + str(channelid), headers=hd).json() 24 if int(res[‘code‘]) == 0: # code码0正常 1 无数据 25 # (id数字不对返回是code为1 ),如果id是字符串,直接返回404,后面会报异常 26 return res[‘data‘][‘name‘] 27 else: 28 return 29 30 def getM4aList(channelid, page): 31 url = url_top + ‘%s/programs/page/%d‘ % (str(channelid), page) # 拼接频道内音频的信息json,page是页码 32 res = requests.get(url, headers=hd).json() 33 return res 34 35 36 def mkdir(name): 37 path = ‘E:\\蜻蜓fm下载\\‘ + name 38 isExists = os.path.exists(path) 39 if not isExists: 40 os.makedirs(path) 41 print ‘创建***%s***文件夹成功!开始下载‘ % name 42 return True 43 else: 44 print ‘已存在***%s***文件夹!开始下载‘ % name 45 return False 46 47 48 def download(url, name): 49 m4a = requests.get(url, headers=hd).content 50 with open(name + ‘.m4a‘, ‘wb‘) as f: 51 f.write(m4a) 52 53 while True: 54 try: 55 channelid=raw_input(unicode(‘请输入要下载的频道ID(纯数字): ‘).encode(‘gbk‘)) 56 # raw_input在cmd里中文乱码,使用强制转码解决,先转码 unicode(str) 再编码 .encode(‘gbk‘) 57 channelid =int(channelid) 58 break 59 except: 60 print u‘ID是数字不是字母!OK?!‘ 61 pass 62 63 chname = getChannelName(channelid) 64 if chname==None: 65 print ‘频道ID不存在!‘ 66 else: 67 res=getChannelInfo(channelid) 68 chtype=res[‘data‘][‘type‘] 69 chsale=res[‘data‘][‘sale_type‘] 70 print chsale==5 71 if chtype==‘channel_live‘: 72 print u‘在线节目,不能下载!‘ 73 elif chsale==5: 74 print u‘付费频道,不能下载!‘ 75 else: 76 mkdir(chname) 77 os.chdir(‘E:\\蜻蜓fm下载\\‘ + chname) 78 i = 1 79 j = 1 80 while True: 81 m4alist = getM4aList(channelid, i) # page参数从1开始,每次while递增1 82 code = m4alist[‘code‘] 83 i += 1 84 if int(code) == 1: # code码控制while循环,正常返回0,页码超出范围返回1 85 break # 返回1,退出while循环 86 else: 87 m4ainfos = m4alist[‘data‘] 88 total = m4alist[‘total‘] 89 for m4ainfo in m4ainfos: 90 m4aname = m4ainfo[‘name‘] 91 m4aurl = m4aurl_top + m4ainfo[‘file_path‘] 92 print u‘*********下载第%s个 共%s个*********‘ % (str(j), total) 93 download(m4aurl, m4aname) 94 print m4aname + u‘ 下载成功!~‘ 95 time.sleep(1) 96 j += 1
标签:break exce ota ade http div raw 强制 频道
原文地址:http://www.cnblogs.com/feiyusir/p/7615889.html