首页 > 技术文章 > python 抓取百度音乐

tsw123 2016-03-06 13:30 原文

# coding:utf-8

import urllib2
import re
import urllib
import chardet
from json import *

category = '经典老歌'
url = 'http://music.baidu.com/tag/' + category
url_songs = 'http://play.baidu.com/data/music/songlink'
try:
    request = urllib2.Request(url)
    response = urllib2.urlopen(request)
    content = response.read()
    patt_str = '<li data-songitem = \'{&quot;songItem&quot;:{&quot;sid&quot;:(.*?),.*?</li>'
    pattern = re.compile(patt_str,re.S)
    songIds = re.findall(pattern,content)
    #for songId in songIds:
    #print songIds
    formdata = {"songIds":",".join(songIds)}
    #print formdata
    data_encoded = urllib.urlencode(formdata)
    songList = urllib2.urlopen(url_songs,data_encoded)
    songListJson = songList.read()
    #print songListJson
    song_dict = JSONDecoder().decode(songListJson)
    #print song_dict
    song_data_dict = song_dict.get('data').get('songList')
    for sond_data in song_data_dict:
        song_name = sond_data.get('songName')
        song_artistName = sond_data.get('artistName')
        song_format = sond_data.get('format')
        song_link = sond_data.get('songLink')
        #print song_name+'--'+song_artistName+'.'+song_format+u'     下载链接为:'+song_link
        music = urllib.urlopen(song_link).read()
        open(song_name+'.mp3','w').write(music)
    
except urllib2.URLError,e:
    if hasattr(e,"code"):
        print e.code
    if hasattr(e,"reason"):
        print e.reason

 

推荐阅读