樣本python
網絡來源linux
做者: hehao網絡
原文抓取linux520網站的***測試視頻,無心侵犯linux520網站權益。ide
源碼分享學習學習
#!/usr/bin/env python測試
# -*- coding: UTF-8 -*-網站
#version 0.1url
#author:hehao視頻
#python version:2.7.2xml
#須要安裝psutil庫
#
from os.path import basename
from urlparse import urlsplit
import os
import urllib2
import sys
try:
import psutil
except:
print "please install psutil ex:pypm install psutil"
sys.exit()
import re
def url2name(url):
return basename(urlsplit(url)[2])
#下載指定的文件
def download(url, localFileName = None):
localName = url2name(url)
req = urllib2.Request(url)
r = urllib2.urlopen(req)
if r.info().has_key('Content-Disposition'):
# If the response has Content-Disposition, we take file name from it
localName = r.info()['Content-Disposition'].split('filename=')[1]
if localName[0] == '"' or localName[0] == "'":
localName = localName[1:-1]
elif r.url != url:
# if we were redirected, the real file name we take from the final URL
localName = url2name(r.url)
if localFileName:
# we can force to save the file as specified name
localName = localFileName
f = open(localName, 'wb')
f.write(r.read())
f.close()
#經過進程名獲取進程ID
def getpid(process_name):
p_list=psutil.get_process_list()
for x in p_list:
if process_name in str(x):
return x.pid
else:
return 0
#殺死指定進程ID
def killpid(pid):
p_kill=psutil.Process(pid)
try:
p_kill.kill()
except:
return 0
#使用swfdump對flash文件進行分析,並提取真實的文件名
def analy_swf(swf_path):
a=os.popen(r"swfdump.exe -a "+swf_path)
for x in a:
real_name=re.findall(r"""<uri>([\S\s]*?)</uri>""",x)
if len(real_name)>0:
return real_name[0]
killpid(getpid("swfdump.exe"))
#下載真實的視頻文件
def download_realvideo(swf_url,url,id):
download(swf_url,'tmp.swf')
r_name=analy_swf("tmp.swf")
download(url+r_name,str(id)+"_"+r_name)
os.remove('tmp.swf')
#url="#該url不公開(視頻地址)"
#u="該url不公開(地址)"
#download_realvideo(url,u,138)
url="該url不公開(地址)"
for x in range(1,200):
u=url+str(x)+"/"
print u
try:
a=urllib2.urlopen(urllib2.Request(u)).read()
except:
continue
try:
s=re.findall(r"""<param name\=\"src\"\svalue\=\"(.*?)\"\/>""",a)[0] #正則提取swf
except:
continue
if '#' in s:
try:
real_name=re.findall(r"""<uri>([\S\s]*?)</uri>""",urllib2.urlopen(urllib2.Request(u+s.split('_')[0]+'_config.xml')).read())
download(u+real_name[0],str(x)+'_'+real_name[0])
except:
continue
elif 'swf' in s:
try:
download_realvideo(u+s,u,x)
except:
continue
else:
try:
download(u+s,str(x)+'_'+s)
except:
continue