python 爬蟲 下載豆瓣歌單

運行的時候輸入歌單的網址:https://music.douban.com/programme/9574867?sid=#play,爬蟲會自動抓取頁面中全部音樂的下載連接html

import requests
import simplejson as json
from bs4 import BeautifulSoup
import queue
import threading
import os

myQueue=queue.Queue()
MusicList=[]
class Music:
    def __init__(self,title,DownUri):
        self.title=title
        self.DownUri=DownUri

class MyThread(threading.Thread):
    def __init__(self,records, page_num):
        threading.Thread.__init__(self, name=page_num)
        self._records =records

    def run(self):
        while(self._records.qsize()):
            #page=read_page(url,self._records.get())
            #read_tag(page,tag)
            ll=self._records.get()
            print(ll.title,"正在下載")
            DownMusicFile(ll.DownUri,ll.title)
            print(ll.title,"下載完成")


def Resulturi(songid,ssid):
    uri='https://music.douban.com/j/songlist/get_song_url?sid=%s&ssid=%s'%(songid,ssid)
    content=json.loads(requests.session().get(uri).content)['r']
    return content

def GetDown(MusicUri):
    page=requests.session().get(MusicUri)
    content=page.content
    soup=BeautifulSoup(content,"html5lib")
    link = soup.find_all("div","song-item")
    for s in link:
        #print(i['data-index'],i['data-title'],i['data-performer'],i['data-songid'],i['data-ssid'])
        DownUri=Resulturi(s['data-songid'],s['data-ssid'])
        tmp=Music(s['data-title'],DownUri)
        myQueue.put(tmp)

def DownMusicFile(uri,title):
    r=requests.get(uri,stream=True)
    with open(os.getcwd()+'/'+title+".mp4",'wb') as fd:
        for chunk in r.iter_content():
            fd.write(chunk)


def reptile(records,threadNum):
    tasks=[]

    for page_num in range(1,10):
        Thread=MyThread(records,page_num)
        Thread.setDaemon(False)
        Thread.start()
        tasks.append(Thread)
    for task in tasks:
        if task.isAlive():
            tasks.append(task)
            continue

if __name__ == '__main__':
    MusicUri=input("請輸入須要下載的專輯網址")
    GetDown(MusicUri)
    reptile(myQueue,10)
相關文章
相關標籤/搜索