實現抓圖的工具html
#encoding:UTF-8import urllibimport urllib2import reimport osfrom BeautifulSoup import BeautifulSoupdef GetUrlContent(url,path): #url = "http://www.2cto.com/meinv/sexmv/" req = urllib2.urlopen(url) content = req.read() soup = BeautifulSoup(content) # print soup.pret() #查找左右連接,而且不含title屬性 alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None) i = 0 for a in alinks: surl = a['href'] print surl GetUrl(surl,path) print " " print " " #print surldef createFileWithFileName(localPathParam,fileName): totalPath=localPathParam+'\\'+fileName if not os.path.exists(totalPath): file=open(totalPath,'a+') file.close() return totalPathdef GetFileName(url): sFilename=os.path.basename(url) return sFilenamedef GetUrl(myUrl,localPath): #url = "http://www.2cto.com/meinv/sexmv/1819.html" try: req = urllib2.urlopen(myUrl,None,5) content = req.read() soup = BeautifulSoup(content) alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")}) for d in alinks: imgUrl=d["src"] print imgUrl fileName=GetFileName(imgUrl) print fileName urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName)) except Exception,e: print "Error"if __name__=='__main__': #GetUrl("http://www.2cto.com/meinv/sexmv/1810.html") print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")