python 之網絡爬蟲爬取網易雲音樂

#!/usr/bin/env python
# -*- coding: utf-8 -*- # @Time : 2019/9/9 18:16
# @Author : 小J
# @File : text.py
# @Software: PyCharm
#coding:utf-8
#導入第三方庫
import  requests
from lxml import etree

#假裝瀏覽器去請求
header={
    'referer':'https://music.163.com/',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}#網絡爬蟲三件套其中兩件套
#第一步:請求網絡HTML數據
def get_request_data(url):
    response = requests.get(url,headers=header)
    response.encoding='utf-8'
    html=response.text
    #加載HTML數據
    tree=etree.HTML(html)
    url=tree.xpath('//ul[@class="f-hide"]/li/a/@href')
    #print(url)
    name=tree.xpath('//ul[@class="f-hide"]/li/a/text()')
    #print(name)
    for index,item in enumerate(url):
        id_url=item.split('=')[-1]
        file_name=name[index]
        base_url='http://music.163.com/song/media/outer/url?id=%s'%id_url
        #print(base_url)
        print('第'+str(index+1)+'數據保存成功')
        file_path=r'E:\網絡爬取\%s.mp3'%file_name
        print(file_path)
        with open(file_path,'wb') as file:
            req=requests.get(base_url,headers=header)
            file.write(req.content)
get_request_data('http://music.163.com/playlist?id=2948214773')

相關文章
相關標籤/搜索