
# coding=utf-8

import urllib,re,sys,time

url='http:
//www.google.cn/music/topiclisting?q=top100_duet_love_songs&cat=song'

prefix='http:
//www.google.cn/music/top100/musicdownload?id='

downname='songlist.txt'

html=urllib.urlopen(url).read()

regx='下載.*window.*http.*\\\\x26resnum' #\x26resnum很奇怪,明明看到的是一個‘\’但是匹配不出來,好像是有兩個‘\\’

reobj=re.compile(regx)

reg='http.*mp3' #文件下載地址

rej=re.compile(reg)

list=[]
for match
in reobj.finditer(html):

list.append(match.group())

id=[]
for l
in list:

id.append(prefix+urllib.unquote(l[-27:-10]))

down=[]
for i
in id:

html=urllib.urlopen(i).read()

time.sleep(1)
for match
in rej.finditer(html):

down.append(urllib.unquote(urllib.unquote(match.group())))
if len(down) >0:

file=open(downname,'w')
for d
in down:

file.write(d+'\n')

file.close()

print 'finish'