thread instance:html
#!/usr/bin/env python #coding:utf-8 #author:Bing from threading import Thread import time def test(arg): for i in range(10): print i #time.sleep(1) t1 = Thread(target=test, args=("11",)) #t1.setDaemon(True) don't waiting for subprogress t1.start() print t1.getName() t1.join(5) #t1.join() print "after" print "after" class MyThread(Thread): def run(self): #time.sleep(15) print "test" Thread.run(self) #call bone excute def bone(): print "bone" t2 = MyThread(target=bone) t2.start() print "over"
Product and Consumer mode:html5
#!/usr/bin/env python #coding:utf-8 #author:Bing from threading import Thread import time,Queue,random que = Queue.Queue(maxsize=100) class Product(Thread): def __init__(self,name,que): super(Product,self).__init__() self.name = name self.que = que def run(self): while True: if self.que.full(): time.sleep(1) else: self.que.put("baozi") print "%s:made a baozi...===" % self.name class Consumer(Thread): def __init__(self,name,que): super(Consumer,self).__init__() self.name = name self.que = que def run(self): while True: if self.que.empty(): time.sleep(1) else: self.que.get() print "%s:got a baozi" % self.name #three productor t1 = Product("chef1",que) t2 = Product("chef1",que) t3 = Product("chef1",que) t1.start() t2.start() t3.start() #twenty consumer for item in range(20): name = "customer%d"%(item,) temp = Consumer(name,que) temp.start() ''' def Pro(name,que): while True: if que.qsize() <3: que.put("baozi") print "%s:made a lion...=====" % name else: print "%s:we have three lions..." % name time.sleep(random.randrange(2)) #control made speed def Con(name,que): while True: try: que.get_nowait() print "%s:got a lion" % name except Exception: print "%s:don't have a lion" % name time.sleep(random.randrange(3)) #control consumer speed p1 = Thread(target=Pro,args=("chef1", que)) p2 = Thread(target=Pro,args=("chef2", que)) p1.start() p2.start() c1 = Thread(target=Con,args=("customer1", que)) c2 = Thread(target=Con,args=("customer2", que)) c1.start() c2.start() '''
Threading.Eventpython
#!/usr/bin/env python #coding:utf-8 #author:Bing from threading import Thread,Event import time,Queue,random que = Queue.Queue(maxsize=100) def Pro(): print "chef:waiting for get a baozi of people." event.wait() event.clear() print "chef:a person comming for baozi" print "chef:make a baozi for a person" time.sleep(3) print "chef:your baozi is ok!" event.set() def Con(): print "I go to buy a baozi." event.set() time.sleep(2) print "waiting for a baozi" event.wait() print "think you!" event = Event() p = Thread(target=Pro) c = Thread(target=Con) p.start() c.start()
for examlpe:api
#!/usr/bin/python # encoding=utf-8 # Filename: port.py # Author:Bing import sys,re reload(sys) sys.setdefaultencoding('utf-8') import urllib,time,threading import urllib2,random,Queue from bs4 import BeautifulSoup USER_AGENTS = [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", ] class Spiderdomain: def __init__(self,target,page,threads_num): self.target = target self.page = page self.lock = threading.Lock() self.threads_num = threads_num self._que(self.target) self.lock = threading.Lock() def random_useragent(self): global USER_AGENTS return random.choice(USER_AGENTS) def _que(self,target): self.queue = Queue.Queue() for sub in target: if sub:self.queue.put(sub) def baidu(self): count = 0 pages = self.page*10 urls = [] reg = r'((\w+\.)+(com|edu|cn|gov|net|org|hk){1,2})' thread_id = int( threading.currentThread().getName() ) while self.queue.qsize() > 0: sub = self.queue.get() while count <= pages: url = "http://www.baidu.com/s?wd=%s&pn=%s" % (urllib.quote(str(sub)),count) request = urllib2.Request(url) request.add_header('User-Agent', self.random_useragent()) request.add_header('Referer', request.get_full_url()) htmlpage = urllib2.urlopen(request).read() soup = BeautifulSoup(htmlpage,"html5lib") #tags_a =soup.findAll(name='a',attrs={'href':re.compile("^https?://")}) Tags = soup.findAll(name='a',attrs={'class':'c-showurl'}) for i in Tags: result = re.findall(reg,str(i)) try: urls.append(result[1][0]) except Exception,e: pass count += 10 result = {}.fromkeys(urls).keys() print result def run(self): threads = [] # start threads for i in range(self.threads_num): t = threading.Thread(target=self.baidu, name=str(i)) t.start() threads.append(t) # wait for all for i in range(self.threads_num): threads[i].join() d = Spiderdomain(target=["site:"+"sina.com","site:"+"baidu.com","site:"+"qq.com","site:"+"360.com","site:"+"58.com"],page=20,threads_num=20)#,"site:"+"qq.com","site:"+"360.com","site:"+"58.com" d.run() #27s
The following is a process instance多線程
#!/usr/bin/python # encoding=utf-8 # Filename: port.py # Author:Bing import sys,re reload(sys) sys.setdefaultencoding('utf-8') import urllib,time,threading import urllib2,random,Queue from bs4 import BeautifulSoup from multiprocessing.dummy import Pool as ThreadPool USER_AGENTS = [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", ] def baidu(target): count = 0 pages = 20*10 urls = [] reg = r'((\w+\.)+(com|edu|cn|gov|net|org|hk){1,2})' while count <= pages: url = "http://www.baidu.com/s?wd=%s&pn=%s" % (urllib.quote(str(target)),count) request = urllib2.Request(url) request.add_header('User-Agent', USER_AGENTS) request.add_header('Referer', request.get_full_url()) htmlpage = urllib2.urlopen(request).read() soup = BeautifulSoup(htmlpage,"html5lib") #tags_a =soup.findAll(name='a',attrs={'href':re.compile("^https?://")}) Tags = soup.findAll(name='a',attrs={'class':'c-showurl'}) for i in Tags: result = re.findall(reg,str(i)) try: urls.append(result[1][0]) except Exception,e: pass count += 10 result = {}.fromkeys(urls).keys() return result urls = ["site:"+"sina.com","site:"+"baidu.com","site:"+"qq.com","site:"+"360.com","site:"+"58.com"] pool = ThreadPool(20) # pool size res = map(baidu, urls) print res pool.close() pool.join() #46s
Pool和threading 進程池和多線程:併發
''' from multiprocessing import Pool import threading import traceback #from gevent.pool import Pool import time def t(li,n): #li.append(n) print "thread number:",n #一個子進程運行5個線程 def p(x): info_list = [] for i in range(5): t = threading.Thread(target=t,args=[info_list,i]) t.start() print info_list,"********" #最多一次運行5個子進程 pls = Pool(processes=5) res_list = [] #生產10個子進程,一次運行最多5個子進程,異步運行(相似於一次併發5個) for i in range(10): res = pls.apply_async(p,[i,]) print '------------:',i res_list.append(res) for r in res_list: print r.get(timeout=1) pls.close() pls.join() '''