戴上耳機, 這個世界與我無關...
讓咱們用音樂洗滌心靈吧...
咱們從哪一個網站爬取資源呢?
專治各類不服...
打開酷狗官網, 能夠看到搜索框,咱們要爬取的數據就是搜索歌曲後,
酷狗後臺返回的歌曲列表以及每首歌的歌曲信息(歌詞、做者、url等)
敲F12鍵進入開發者模式,選擇Network - All (這裏就是酷狗先後臺交互的全部請求列表)
這麼多請求, 我應該選哪一個? 一個一個試?
而後查看返回的json數據
{
"status": 1,
"error_code": 0,
"data": {
"page": 1,
"tab": "所有",
"lists": [
{
"SongName": "雨一直下",
"OwnerCount": 61910,
"MvType": 2,
"TopicRemark": "",
"SQFailProcess": 4,
"Source": "",
"Bitrate": 128,
"HQExtName": "mp3",
"SQFileSize": 37489904,
"ResFileSize": 0,
"AudioCdn": 100,
"MvTrac": 3,
"SQDuration": 293,
"ExtName": "mp3",
"Auxiliary": "",
"SongLabel": "",
"Scid": 339102,
"OriSongName": "雨一直下",
"FailProcess": 4,
"SQBitrate": 1022,
"HQBitrate": 320,
"Audioid": 339102,
"HiFiQuality": 2,
"Grp": [
{
"SongName": "雨一直下",
"OwnerCount": 31,
"MvType": 2,
"TopicRemark": "",
"SQFailProcess": 4,
"Source": "",
"Bitrate": 128,
"HQExtName": "mp3",
"SQFileSize": 37489904,
"ResFileSize": 0,
"AudioCdn": 100,
"MvTrac": 3,
"SQDuration": 293,
"ExtName": "mp3",
"Auxiliary": "",
"SongLabel": "",
"Scid": 339102,
"OriSongName": "雨一直下",
"FailProcess": 4,
"SQBitrate": 1022,
"HQBitrate": 320,
"Audioid": 339102,
"HiFiQuality": 2,
"OriOtherName": "",
"AlbumPrivilege": 8,
"TopicUrl": "",
"SuperFileHash": "",
"ASQPrivilege": 10,
"M4aSize": 1225472,
"IsOriginal": 0,
"Privilege": 8,
"ResBitrate": 0,
"FileHash": "8AA6B442D0541FE6645611A108E6FD32",
"SQPayType": 3,
"HQPrice": 200,
"Type": "audio",
"trans_param": {
"cid": 5111971,
"pay_block_tpl": 1,
"musicpack_advance": 0,
"display_rate": 0,
"display": 0
},
"SourceID": 0,
"A320Privilege": 10,
"FileName": "張宇 - 雨一直下",
"AlbumID": "973971",
"ID": "32163792",
"SuperFileSize": 0,
"QualityLevel": 3,
"SQFileHash": "7F0933165786A9BC9858A8B56EED0D43",
"AlbumName": "男人的好 新歌+精選",
"HQPrivilege": 10,
"SuperBitrate": 0,
"SuperDuration": 0,
"MixSongID": "32163792",
"ResFileHash": "",
"PublishAge": 255,
"SuperExtName": "",
"HQFileHash": "32CB70DDFF57B235550C75B5FC46D030",
"HQPkgPrice": 1,
"Duration": 293,
"FileSize": 4694905,
"OtherName": "",
"SQPkgPrice": 1,
"PkgPrice": 1,
"HQFileSize": 11735431,
"HQFailProcess": 4,
"OldCpy": 1,
"SQPrivilege": 10,
"SQPrice": 200,
"ResDuration": 0,
"SingerId": [
3537
],
"Price": 200,
"HQPayType": 3,
"SingerName": "張宇",
"Publish": 1,
"MvHash": "E8BD1926CF6298D452E16F8904CB795A",
"SQExtName": "flac",
"HQDuration": 293,
"PayType": 3,
"HasAlbum": 1,
"mvTotal": 0,
"Accompany": 1
},
{
"SongName": "雨一直下",
"OwnerCount": 12,
"MvType": 2,
"TopicRemark": "",
"SQFailProcess": 4,
"Source": "",
"Bitrate": 128,
"HQExtName": "mp3",
"SQFileSize": 37489904,
"ResFileSize": 0,
"AudioCdn": 100,
"MvTrac": 3,
"SQDuration": 293,
"ExtName": "mp3",
"Auxiliary": "",
"SongLabel": "",
"Scid": 339102,
"OriSongName": "雨一直下",
"FailProcess": 4,
"SQBitrate": 1022,
"HQBitrate": 320,
"Audioid": 339102,
"HiFiQuality": 2,
"OriOtherName": "",
"AlbumPrivilege": 8,
"TopicUrl": "",
"SuperFileHash": "",
"ASQPrivilege": 10,
"M4aSize": 1225472,
"IsOriginal": 0,
"Privilege": 8,
"ResBitrate": 0,
"FileHash": "8AA6B442D0541FE6645611A108E6FD32",
"SQPayType": 3,
"HQPrice": 200,
"Type": "audio",
"trans_param": {
"cid": 31864779,
"pay_block_tpl": 1,
"musicpack_advance": 0,
"display_rate": 0,
"display": 0
},
"SourceID": 0,
"A320Privilege": 10,
"FileName": "張宇 - 雨一直下",
"AlbumID": "2400135",
"ID": "62076604",
"SuperFileSize": 0,
"QualityLevel": 3,
"SQFileHash": "7F0933165786A9BC9858A8B56EED0D43",
"AlbumName": "重拾男人心",
"HQPrivilege": 10,
"SuperBitrate": 0,
"SuperDuration": 0,
"MixSongID": "62076604",
"ResFileHash": "",
"PublishAge": 255,
"SuperExtName": "",
"HQFileHash": "32CB70DDFF57B235550C75B5FC46D030",
"HQPkgPrice": 1,
"Duration": 293,
"FileSize": 4694905,
"OtherName": "",
"SQPkgPrice": 1,
"PkgPrice": 1,
"HQFileSize": 11735431,
"HQFailProcess": 4,
"OldCpy": 1,
"SQPrivilege": 10,
"SQPrice": 200,
"ResDuration": 0,
"SingerId": [
3537
],
"Price": 200,
"HQPayType": 3,
"SingerName": "張宇",
"Publish": 1,
"MvHash": "E8BD1926CF6298D452E16F8904CB795A",
"SQExtName": "flac",
"HQDuration": 293,
"PayType": 3,
"HasAlbum": 1,
"mvTotal": 0,
"Accompany": 1
}
],
"OriOtherName": "",
"AlbumPrivilege": 8,
"TopicUrl": "",
"SuperFileHash": "",
"ASQPrivilege": 10,
"M4aSize": 1225472,
"IsOriginal": 1,
"Privilege": 8,
"ResBitrate": 0,
"FileHash": "8AA6B442D0541FE6645611A108E6FD32",
"SQPayType": 3,
"HQPrice": 200,
"trans_param": {
"cid": 2456823,
"pay_block_tpl": 1,
"musicpack_advance": 0,
"display_rate": 0,
"display": 0
},
"Type": "audio",
"FoldType": 0,
"SourceID": 0,
"A320Privilege": 10,
"FileName": "張宇 - 雨一直下",
"AlbumID": "982663",
"ID": "32243475",
"SuperFileSize": 0,
"QualityLevel": 3,
"SQFileHash": "7F0933165786A9BC9858A8B56EED0D43",
"AlbumName": "雨一直下",
"HQPrivilege": 10,
"SuperBitrate": 0,
"SuperDuration": 0,
"MixSongID": "32243475",
"ResFileHash": "",
"PublishAge": 255,
"SuperExtName": "",
"HQFileHash": "32CB70DDFF57B235550C75B5FC46D030",
"HQPkgPrice": 1,
"Duration": 293,
"FileSize": 4694905,
"OtherName": "",
"SQPkgPrice": 1,
"PkgPrice": 1,
"HQFileSize": 11735431,
"HQFailProcess": 4,
"OldCpy": 1,
"SQPrivilege": 10,
"SQPrice": 200,
"ResDuration": 0,
"SingerId": [
3537
],
"Price": 200,
"HQPayType": 3,
"SingerName": "張宇",
"Publish": 1,
"MvHash": "E8BD1926CF6298D452E16F8904CB795A",
"SQExtName": "flac",
"HQDuration": 293,
"PayType": 3,
"HasAlbum": 1,
"mvTotal": 0,
"Accompany": 1
}
],
"chinesecount": 4,
"searchfull": 1,
"correctiontype": 0,
"subjecttype": 0,
"aggregation": [
{
"key": "DJ",
"count": 0
},
{
"key": "現場",
"count": 0
},
{
"key": "廣場舞",
"count": 0
},
{
"key": "伴奏",
"count": 0
},
{
"key": "鈴聲",
"count": 0
}
],
"allowerr": 0,
"correctionsubject": "",
"correctionforce": 0,
"total": 36,
"istagresult": 0,
"istag": 0,
"correctiontip": "",
"pagesize": 20
}
}
複製代碼
requests的json()方法, 能夠把json字符串, 轉成python能夠識別的dict或者list
這些數據使咱們經過訪問一下連接得到的
https://songsearch.kugou.com/song_search_v2?callback=jQuery112409264783558861354_1559273651647&keyword=%E9%9B%A8%E4%B8%80%E7%9B%B4%E4%B8%8B&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1559273651658
複製代碼
這也太長了, 有些參數是否是沒有必要啊?
咱們經過postman篩一下...
postman, 接口測試工具, 最好的接口測試工具, 不怕違反廣告法...
安裝以後打開, 粘貼咱們以前的url
而後點擊音樂詳情頁
咱們使用hash值
8AA6B442D0541FE6645611A108E6FD32
來搜索請求...
咱們找到了請求數據的url
https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery19101729051683512821_1559276678103&hash=8AA6B442D0541FE6645611A108E6FD32&album_id=982663&dfid=3d7kV00OqOe70A00N74FV8Ue&mid=15b47ea8a82b0a8111b91cccb1c52055&platid=4&_=1559276678104
複製代碼
參數依舊不少, 咱們使用postman過濾一下參數...
咱們找到了想要的參數, play_url
mid是從cookie中得到的, 變化的頻率不高
整理一下思路:
完整源碼
import requests
res = ''
def get_music_list(music_name):
music_list = []
print('歌曲[{}]的列表以下, 請選擇序號:'.format(music_name))
res = requests.get('https://songsearch.kugou.com/song_search_v2?keyword='+music_name).json()['data']['lists']
for i,music in enumerate(res):
print('{} -- {}'.format(i+1,music['FileName']))
music_list.append({'name':music['FileName'],'hash':music['FileHash'],'aid':music['AlbumID']})
return music_list
def get_play_url(music_hash,music_aid):
''' 獲取歌曲的url '''
aid = ''
if music_aid:
aid = '&album_id='+music_aid
url = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={}&mid=4d9f1c937f33674bb55a4fa9096e97e8".format(music_hash)+aid
return requests.get(url).json()['data']['play_url']
def download_music(name,url):
''' 根據url下載歌曲 '''
print('[{}] 正在下載...'.format(name))
with open('{}.mp3'.format(name),'wb') as file:
file.write(requests.get(url).content)
print('[{}] 已經下載完畢!'.format(name))
if __name__ == "__main__":
music_name = input('請輸入你想聽的歌曲: ')
music_list = get_music_list(music_name)
music_index = input('請輸入序號: ')
music_play_url = get_play_url(music_list[int(music_index)-1]['hash'],music_list[int(music_index)-1]['aid'])
music_name = music_list[int(music_index)-1]['name']
download_music(music_name,music_play_url)
複製代碼
貓哥教你寫爬蟲 000--開篇.md
貓哥教你寫爬蟲 001--print()函數和變量.md
貓哥教你寫爬蟲 002--做業-打印皮卡丘.md
貓哥教你寫爬蟲 003--數據類型轉換.md
貓哥教你寫爬蟲 004--數據類型轉換-小練習.md
貓哥教你寫爬蟲 005--數據類型轉換-小做業.md
貓哥教你寫爬蟲 006--條件判斷和條件嵌套.md
貓哥教你寫爬蟲 007--條件判斷和條件嵌套-小做業.md
貓哥教你寫爬蟲 008--input()函數.md
貓哥教你寫爬蟲 009--input()函數-人工智能小愛同窗.md
貓哥教你寫爬蟲 010--列表,字典,循環.md
貓哥教你寫爬蟲 011--列表,字典,循環-小做業.md
貓哥教你寫爬蟲 012--布爾值和四種語句.md
貓哥教你寫爬蟲 013--布爾值和四種語句-小做業.md
貓哥教你寫爬蟲 014--pk小遊戲.md
貓哥教你寫爬蟲 015--pk小遊戲(全新改版).md
貓哥教你寫爬蟲 016--函數.md
貓哥教你寫爬蟲 017--函數-小做業.md
貓哥教你寫爬蟲 018--debug.md
貓哥教你寫爬蟲 019--debug-做業.md
貓哥教你寫爬蟲 020--類與對象(上).md
貓哥教你寫爬蟲 021--類與對象(上)-做業.md
貓哥教你寫爬蟲 022--類與對象(下).md
貓哥教你寫爬蟲 023--類與對象(下)-做業.md
貓哥教你寫爬蟲 024--編碼&&解碼.md
貓哥教你寫爬蟲 025--編碼&&解碼-小做業.md
貓哥教你寫爬蟲 026--模塊.md
貓哥教你寫爬蟲 027--模塊介紹.md
貓哥教你寫爬蟲 028--模塊介紹-小做業-廣告牌.md
貓哥教你寫爬蟲 029--爬蟲初探-requests.md
貓哥教你寫爬蟲 030--爬蟲初探-requests-做業.md
貓哥教你寫爬蟲 031--爬蟲基礎-html.md
貓哥教你寫爬蟲 032--爬蟲初體驗-BeautifulSoup.md
貓哥教你寫爬蟲 033--爬蟲初體驗-BeautifulSoup-做業.md
貓哥教你寫爬蟲 034--爬蟲-BeautifulSoup實踐.md
貓哥教你寫爬蟲 035--爬蟲-BeautifulSoup實踐-做業-電影top250.md
貓哥教你寫爬蟲 036--爬蟲-BeautifulSoup實踐-做業-電影top250-做業解析.md
貓哥教你寫爬蟲 037--爬蟲-寶寶要聽歌.md
貓哥教你寫爬蟲 038--帶參數請求.md
貓哥教你寫爬蟲 039--存儲數據.md
貓哥教你寫爬蟲 040--存儲數據-做業.md
貓哥教你寫爬蟲 041--模擬登陸-cookie.md
貓哥教你寫爬蟲 042--session的用法.md
貓哥教你寫爬蟲 043--模擬瀏覽器.md
貓哥教你寫爬蟲 044--模擬瀏覽器-做業.md
貓哥教你寫爬蟲 045--協程.md
貓哥教你寫爬蟲 046--協程-實踐-吃什麼不會胖.md
貓哥教你寫爬蟲 047--scrapy框架.md
貓哥教你寫爬蟲 048--爬蟲和反爬蟲.md
貓哥教你寫爬蟲 049--完結撒花.mdphp