import re import urllib.request # ------ 獲取網頁源代碼的方法 --- def getHtml(url): page = urllib.request.urlopen(url) html = page.read() return html # ------ getHtml()內輸入任意帖子的URL ------ html = getHtml("https://tieba.baidu.com/p/5352556650") # ------ 修改html對象內的字符編碼爲UTF-8 ------ html = html.decode('UTF-8') # ------ 獲取帖子內全部圖片地址的方法 ------ def getImg(html): # ------ 利用正則表達式匹配網頁內容找到圖片地址 ------ reg = r'src="([.*\S]*\.jpg)"' imgre = re.compile(reg); imglist = re.findall(imgre, html) return imglist imgList = getImg(html) imgName = 0 for imgPath in imgList: # ------ 這裏最好使用異常處理及多線程編程方式 ------ try: f = open('D:\\Temp\\'+ str(imgName)+".jpg", 'wb') f.write((urllib.request.urlopen(imgPath)).read()) print(imgPath) f.close() except Exception as e: print(imgPath+" error") imgName += 1 print("All Done!")