# -*- coding: utf-8 -*- """ Created on Fri Aug 01 18:09:21 2014 @author: omom """ import urllib2 from bs4 import BeautifulSoup src="http://www.xiami.com/widget/40537093_376239,185689,1769863609,3351090,1769863610,3562321,183942,1769736296,3418502,1770127750,1770201852,3351083,3351088,3351082,1769496545,1769496547,1769496546,3418497,_235_346_FF8719_494949_0/multiPlayer.swf" #src="http://www.xiami.com/widget/40537093_1771331004,1771331002,55553,3478385,1769187978,380807,3478389,1770464110,55552,1771331001,380865,3478386,380834,380869,55670,55823,1772165872,55549,1769187987,380818,_235_346_FF8719_494949_0/multiPlayer.swf" #src="http://www.xiami.com/widget/40537093_380863,380832,55550,380830,380837,380861,380799,380866,380808,1770464109,55559,380860,1771512727,3478391,1771331023,55711,55556,380797,1769074612,380788,380810,3478387,380852,55705,55865,1769187981,380787,380862,1770464107,1771360882,55700,1770464108,55869,55867,3478388,380835,1769187983,3364419,1769115993,1771331005,_235_346_FF8719_494949_0/multiPlayer.swf" a,b,c=src.split("_",2) b=b.rstrip(",") ids=b.split(",") music_base="http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id" def decrypt_url(s): s=s.replace('^','0') src=list(s) rows_count=int(src.pop(0)) dst_list=[] dst=[] src_len=len(src) row_len,reminder=divmod(src_len,rows_count) for i in range(rows_count): dst_list.append([]) start=stop=0 for row_list in dst_list: stop=start+row_len if reminder>0: stop+=1 reminder-=1 row_list.extend(src[start:stop]) start=stop while 1: try: for row in dst_list: dst.append(row.pop(0)) except IndexError: break url= ''.join(dst) return urllib2.unquote(url).replace('^','0') import time def collect(mid=376239,high_quality=False): req=music_base%str(mid) req=urllib2.Request(req) req.add_header("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.add_header("Accept-Language","zh-CN,zh;q=0.8") req.add_header("Cache-Control","no-cache") req.add_header("Connection","close") req.add_header("Pragma","no-cache") req.add_header("Referer","http://www.baidu.com/") req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36") req.add_header("cookie","_unsign_token=d0e87c7230b44e116e3b8e96c48c9b62; __gads=ID=3e9c72b9e0b3e7ba:T=1407824092:S=ALNI_MYxedT1iMAiA-IXbcgEu4Ss_XiRaw; box_opened=1; bdshare_firstime=1409207591670; __utma=251084815.350459004.1409209135.1409209135.1409209135.1; __utmz=251084815.1409209135.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); member_auth=2WydGIYavmhn16fDTt9ldyYb5%2BHTT2eFyY9Yjb4ovwQnIooIY9H%2Bx6uVQg5L3yCaq2HKtwNJXYSZg3aFgGLx8Kg; user=40534293%22%E5%93%8E%E5%B0%8F%E7%AC%A8%E8%9B%8Ba%22%220%221%22%3Ca+href%3D%27%2Fwebsitehelp%23help9_3%27+%3Edo%3C%2Fa%3E%220%220%220%22ee71485989%221409217771; ahtena_is_show=false; recent_tags=%E6%BF%80%E6%83%85+%E7%97%9B%E8%8B%A6+%E4%BC%A4%E5%BF%83+%E5%BF%A7%E4%BC%A4+; user_from=1; t_sign_auth=0; __guestplay=MTc3MjEzMDMyMywxOzE3NzE0MTkwNTQsMjsxNzY5OTI0MjQ0LDE%3D; pnm_cku822=187n%2BqZ9mgNqgJnCG0WtMC8x7vAtsC0zrXQcNA%3D%7CnOiH84T3i%2FOL%2F4zwi%2FyG9VU%3D%7CneiHGXz6UeRW5k4rRCFXLkskQdt3xmHTad%2B6Gro%3D%7Cmu6b9JHlkuGd5Z3pmuad6pDjnu2c65%2Fkneef5JjhluyX7JjhmuCFJQ%3D%3D%7Cm%2B%2BT%2FGIXeAx4D2AUbwBl1mcbhfZW1n3Fv8F03GvTZte00XHR%7CmO6BH2wDdg11Gm4bbht0B2gcYBVmCX0OdQZpHWEUZwh8D3gDowM%3D%7Cme6d7oHyneiH84Twn%2BmR64TzUw%3D%3D; CNZZDATA921624=cnzz_eid%3D6125411959-1407824089-%26ntime%3D1409550640; CNZZDATA2629111=cnzz_eid%3D1781743730-1407814089-%26ntime%3D1409450640; _xiamitoken=7cec7fe673a3672812c4b714a31d6687; isg=67257CF91c74F3297A603C00A816D262; sec=5401410089735bee8e0075e0b6825e0ba6a0a485") c=0 while 1: try: page=urllib2.urlopen(req) time.sleep(0.5) break except urllib2.HTTPError: c+=1 if c==5: print 'id is:',mid return {} dom=BeautifulSoup(page.read(),features="xml") try: title=dom.find("title").text song_id=dom.find("song_id").text url=dom.find("location").text url=decrypt_url(url) lyric=dom.find("lyric").text background=dom.find("background").text album_id=dom.find("album_id").text album_pic_s=dom.find("pic").text album_pic=dom.find("album_pic").text album_name=dom.find("album_name").text artist_id=dom.find("artist_id").text artist=dom.find("artist").text except AttributeError: print 'id is:',id print title if high_quality: url=url.split("?auth_key")[0][::-1].replace("l_","h_")[::-1] return {"title":title,"song_id":song_id,"url":url, "lyric":lyric,"background":background, "album_id":album_id,"album_pic_s":album_pic_s, "album_pic":album_pic,"album_name":album_name, "artist_id":artist_id,"artist":artist, "xiami":True, } #from pprint import pprint #pprint(collect()) def split_var(s): s_list=s.split("\n") dst="" for i in s_list: line=i.strip() if line=="": continue var=line.split("=")[0] dst+='"'+var+'":'+var+',' dst="{"+dst+"}" print '' print dst print '' dst="" for i in s_list: line=i.strip() if line=="": continue var=line.split("=")[0] dst+=var+"=i[\""+var+"\"]"+"\n" print dst print "" print s=''' title=dom.find("title").text song_id=dom.find("song_id").text url=decrypt_url(url) lyric=dom.find("lyric").text background=dom.find("background").text album_id=dom.find("album_id").text album_pic_s=dom.find("pic").text album_pic=dom.find("album_pic").text album_name=dom.find("album_name").text artist_id=dom.find("artist_id").text artist=dom.find("artist").text ''' ##已失效。必須實時獲取 split_var(s) from pprint import pprint dst=[] for i in ids: data=collect(i) if data: dst.append(data) import json,urllib data=json.dumps(dst) dst={"data":data} #print urllib.urlopen("http://localhost/music/upload",data=urllib.urlencode(dst)).read()