# _*_ coding:utf-8 _*_ from Tkinter import * from ScrolledText import ScrolledText import urllib #import requests import urllib2 #import pygame import re import threading import time url_name=[]#放置地址 名稱 a=1#表明頁數 def get(): global a#改變全局變量 hd={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'} url='http://www.budejie.com/video/'+str(a) varl.set('已經獲取到第%s的視頻'%a) opener=urllib2.build_opener() request=urllib2.Request(url,None,hd) result=opener.open(request).read() url_content=re.compile(r'(<div class="j-r-list-c">.*?</div>.*?</div>)',re.S)#re.S匹配換行符 url_contents=re.findall(url_content,result) # for iterm in url_contents: # print iterm url_reg = r'data-mp4="(.*?)">' for iterm in url_contents: url_items=re.findall(url_reg,iterm) #print url_items if url_items:#若是有視頻存在,就匹配名字,若是是其餘格式,則跳過 name_reg=re.compile(r'<a .*?>(.*?)</a>',re.S) name_items=re.findall(name_reg,iterm) #print name_items for name,url in zip(name_items,url_items): url_name.append([name,url]) print name,url return url_name id=1#視頻個數 def write(): global id while id<10: url_name=get()#獲取視頻和名字 for iterm in url_name: urllib.urlretrieve(iterm[1],'%s.mp4' % (iterm[0].decode('utf-8').encode('gbk')))#下載 text.insert(END,str(id)+'.'+iterm[1]+'\n'+iterm[0]+'\n') url_name.pop(0)#刪除第一個元素 id+=1 varl.set('抓取完成') def start(): th=threading.Thread(target=write) th.start()#運行線程 root=Tk() root.title=('視頻下載') text=ScrolledText(root,font=('微軟雅黑',10)) text.grid()#實現佈局方法 button=Button(root,text='開始爬取',font=('微軟雅黑',10),command=start) button.grid() varl=StringVar()#經過tk方法綁定一個變量 label=Label(root,font=('微軟雅黑',10),fg='red',textvariable=varl) varl.set('熊貓已準備....') label.grid() root.mainloop()