from urllib import request def f(url): print("GET:%s"% url) # 實例化 resp = request.urlopen(url) # data就是下載的網頁 data = resp.read() # 打開url下載到本地 f = open("url.html","wb") f.write(data) f.close() print('%d bytes received from %s.' % (len(data), url)) # 須要爬的網頁 f("http://www.cnblogs.com/alex3714/articles/5248247.html")
import gevent,time from urllib import request from gevent import monkey # 把當前程序的全部的io操做給我單獨的坐上標記 # 至關於gevent.sleep monkey.patch_all() def f(url): print("GET:%s"% url) # 實例化網頁捕獲 resp = request.urlopen(url) # data就是下載的網頁 data = resp.read() print('%d bytes received from %s.' % (len(data), url)) #---------------------------串行------------------------------# # 建立列表 urls = ['https://www.python.org/', 'https://www.yahoo.com/', 'https://github.com/' ] # 獲取同步時間 time_start = time.time() # 循環打印網頁 for url in urls: f(url) print("同步cost",time.time() - time_start) #--------------------------並行--------------------------------# # 獲取異步時間 async_time_start = time.time() # 執行協程 gevent.joinall([ # 生成三個協程,執行f函數,參數。 gevent.spawn(f, 'https://www.python.org/'), gevent.spawn(f, 'https://www.yahoo.com/'), gevent.spawn(f, 'https://github.com/'), ]) print("異步cost",time.time() - async_time_start)