码迷,mamicode.com
首页 > 编程语言 > 详细

python爬取网易云音乐歌单音乐

时间:2017-05-14 10:48:27      阅读:359      评论:0      收藏:0      [点我收藏+]

标签:download   blank   res   idt   odi   span   exce   splay   并且   

在网易云音乐中第一页歌单的url:http://music.163.com/#/discover/playlist/

依次第二页:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=35

依次第三页:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=70

然后从歌单的查看框架的源代码:

技术分享

从图中的源代码可以得到每个歌单的url:eg:http://music.163.com/#/playlist?id=696806036

然后相应的歌单页面中可以得到歌单中每首歌的名字和歌手:

然后歌名歌手,在百度音乐搜索api接口获得songid,api是url = "http://sug.music.baidu.com/info/suggestion"

截图来自:贴吧

技术分享技术分享

然后从获取到的songid从百度音乐免费API接口:http://music.baidu.com/data/music/fmlink,获取songLink进行下载,并且将songLink保存到本地.flac

eg;http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=flac


下载结果:

技术分享

代码:

# -*- coding: utf-8 -*-
import re
import urllib
import urllib2
import os
import stat
import itertools
import re
import sys
import requests
import json
import time
import socket
import urlparse
import csv
import random
from datetime import datetime, timedelta
import lxml.html

from zipfile import ZipFile
from StringIO import StringIO
from downloader import Downloader
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
from itertools import product
import sys
reload(sys)
sys.setdefaultencoding(utf8)
URL = http://music.163.com
NUM = 5
def download(url, user_agent=wswp, num_try=2):

    headers = {User_agent: user_agent}
    request = urllib2.Request(url, headers=headers)
    try:
        html = urllib2.urlopen(request).read()
    except urllib2.URLError as e:
        print Download error, e.reason
        html = None
        if num_try > 0:
            if hasattr(e, code) and 500 <= e.code < 600:
                return download(url, user_agent, num_try - 1)
    return html


def get_song_list(url):
    html = download(url)
    res = r<ul class="f-hide">(.*?)</ul>
    mm = re.findall(res,html,re.S | re.M)
    #print mm
    res = r<li><a .*?>(.*?)</a></li>
    song_list = re.findall(res, html,re.S | re.M)
    return song_list

#获取网易云歌单 eg:/playlist?id=706469943
def get_play_list(html):
    soup = BeautifulSoup(html, "html.parser")
    results = soup.find_all(name=a, attrs={class: tit f-thide s-fc0})
    list = []
    for each in results:
        ee = each.get(href)
        list.append(ee)
    return list

def download_music(url, song_name):
    print "Downloading song_name:" + song_name
    path = "songs"
    if not os.path.isdir(path):
        os.mkdir(path)
    f = open(path + / + song_name + .flac, wb)
    f.write(download(url))
    f.close()

def download_song(song_name,singer):

    url = "http://sug.music.baidu.com/info/suggestion"
    #百度音乐搜索获得songid
    mess = song_name + singer
    payload = {word: mess, version: 2.1.1, from: 0}
    r = requests.get(url, params=payload)
    contents = r.text
    d = json.loads(contents, encoding="utf-8")
    #print d
    if (data not in d):
        print "do not have flac"
        return 0
    if (song not in d["data"]):
        print "do not have flac"
        return 0
    song_id = d["data"]["song"][0]["songid"]

    print "song_id:"+song_id

    url = "http://music.baidu.com/data/music/fmlink" #百度音乐免费api接口
    ‘‘‘
        http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=&callback=cb_download&_t=1468380564513&format=json
    ‘‘‘
    payload = {songIds: song_id, type: mp3}
    r = requests.get(url, params=payload)
    contents = r.text
    try:
        d = json.loads(contents, encoding="utf-8")
    except:
        return 0
    if d is not None and data not in d or d[data] == ‘‘:
        return 0
    songlink = d["data"]["songList"][0]["songLink"]
    if (len(songlink) < 10):
        print "do not have flac"
        return 0
    print "Song Source: " + songlink
    download_music(songlink,mess)

def get_song_singer(url):
    html = download(url)
    soup = BeautifulSoup(html, "html.parser")
    results = soup.find_all(name=textarea, attrs={style: display:none;})
    mess = str(results[0])
    tt = len(<textarea style="display:none;">)
    result = mess[tt:]
    tt = len(</textarea>))-1
    resu = result[:-tt]
    list = json.loads(resu, encoding="utf-8")
    singer_list = []
    for each in list:
        singer_list.append(each["artists"][0]["name"])
    return singer_list



if __name__ == __main__:

    num = 0
    for flag in range(1,5):
        if flag > 1:
            page = (flag - 1) * 35
            url = http://music.163.com/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=+str(page)
        else:
            url = http://music.163.com/discover/playlist
        print url
        html = download(url)
        list = get_play_list(html)
        for i in list:
            song_list_url = URL + i
            print song_list_url
            singer_list = get_song_list(song_list_url)
            singer_name = get_song_singer(song_list_url)
            tt = len(singer_list)
            mm = len(singer_name)
            index = min(tt,mm)
            num = num + mm
            for j in range(0, index):
                print singer_name[j]
                print singer_list[j]
                download_song(singer_list[j],singer_name[j])
                print "\n"

    print "Download " + str(num) + " music\n"

 

python爬取网易云音乐歌单音乐

标签:download   blank   res   idt   odi   span   exce   splay   并且   

原文地址:http://www.cnblogs.com/chenyang920/p/6851486.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!