标签:apple session image cep requests 列表 lxml html 存放位置
#!/usr/bin/env python
# _*_ coding: utf-8 _*_
# @Time: 2020/1/29 11:31
# @Author: lixing
# @File: getWangYiYunMusic.py
# @desc: 爬取网易云上的音乐文件 原文链接:https://blog.csdn.net/t8116189520/article/details/80251473
import requests
from bs4 import BeautifulSoup
import urllib.request
# 爬取歌单中的所有音乐名字和下载链接地址
def getAllMusicList(play_url):
headers = {
'Referer': 'http://music.163.com/',
'Host': 'music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}
# 获取页面内容
s = requests.session()
response = s.get(play_url, headers=headers).content
# 使用bs4匹配出对应的歌曲名称和地址
s = BeautifulSoup(response, 'lxml')
main = s.find('ul', {'class': 'f-hide'})
lists = []
for music in main.find_all('a'):
print(music)
list = []
# print('{} : {}'.format(music.text, music['href']))
musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3'
musicName = music.text
# 单首歌曲的名字和地址放在list列表中
list.append(musicName)
list.append(musicUrl)
# 全部歌曲信息放在lists列表中
lists.append(list)
print(lists)
return lists
# 下载歌单lists中的所有音乐
def downloadMusicList(lists):
# 下载列表中的全部歌曲,并以歌曲名命名下载后的文件,文件位置为当前文件夹
for i in lists:
url = i[1]
name = i[0]
try:
print('正在下载', name)
urllib.request.urlretrieve(url, './oldMusic/%s.mp3' % name)
print('下载成功')
except:
print('下载失败')
def main():
# 歌单url [注意:需要去除playlist前面的‘#/’]
url = 'https://music.163.com/playlist?id=22341433'
# 获取歌名,歌曲下载链接地址
lists = getAllMusicList(url)
downloadMusicList(lists)
if __name__ == "__main__":
main()
标签:apple session image cep requests 列表 lxml html 存放位置
原文地址:https://www.cnblogs.com/paopaolx/p/12258435.html