day6-threading

thread instance:html

#!/usr/bin/env python
#coding:utf-8
#author:Bing

from threading import Thread
import time

def test(arg):
    for i in range(10):
        print i
        #time.sleep(1)

t1 = Thread(target=test, args=("11",))
#t1.setDaemon(True)  don't waiting for subprogress
t1.start()
print t1.getName()
t1.join(5)
#t1.join()

print "after"
print "after"

class MyThread(Thread):
    def run(self):
        #time.sleep(15)
        print "test"
        Thread.run(self) #call bone excute

def bone():
    print "bone"

t2 = MyThread(target=bone)
t2.start()
print "over"

Product and Consumer mode:html5

#!/usr/bin/env python
#coding:utf-8
#author:Bing

from threading import Thread
import time,Queue,random

que = Queue.Queue(maxsize=100)

class Product(Thread):
    def __init__(self,name,que):
        super(Product,self).__init__()
        self.name = name
        self.que = que

    def run(self):
        while True:
            if self.que.full():
                time.sleep(1)
            else:
                self.que.put("baozi")
                print "%s:made a baozi...===" % self.name

class Consumer(Thread):
    def __init__(self,name,que):
        super(Consumer,self).__init__()
        self.name = name
        self.que = que

    def run(self):
        while True:
            if self.que.empty():
                time.sleep(1)
            else:
                self.que.get()
                print "%s:got a baozi" % self.name

#three productor
t1 = Product("chef1",que)
t2 = Product("chef1",que)
t3 = Product("chef1",que)
t1.start()
t2.start()
t3.start()

#twenty consumer
for item in range(20):
    name = "customer%d"%(item,)
    temp = Consumer(name,que)
    temp.start()

'''

def Pro(name,que):
    while True:
        if que.qsize() <3:
            que.put("baozi")
            print "%s:made a lion...=====" % name
        else:
            print "%s:we have three lions..." % name
        time.sleep(random.randrange(2))  #control made speed

def Con(name,que):
    while True:
        try:
            que.get_nowait()
            print "%s:got a lion" % name
        except Exception:
            print "%s:don't have a lion" % name
        time.sleep(random.randrange(3))  #control consumer speed

p1 = Thread(target=Pro,args=("chef1", que))
p2 = Thread(target=Pro,args=("chef2", que))
p1.start()
p2.start()

c1 = Thread(target=Con,args=("customer1", que))
c2 = Thread(target=Con,args=("customer2", que))
c1.start()
c2.start()
'''

Threading.Eventpython

#!/usr/bin/env python
#coding:utf-8
#author:Bing

from threading import Thread,Event
import time,Queue,random

que = Queue.Queue(maxsize=100)

def Pro():
    print "chef:waiting for get a baozi of people."
    event.wait()
    event.clear()
    print "chef:a person comming for baozi"
    print "chef:make a baozi for a person"
    time.sleep(3)

    print "chef:your baozi is ok!"
    event.set()
def Con():
    print "I go to buy a baozi."
    event.set()

    time.sleep(2)
    print "waiting for a baozi"
    event.wait()

    print "think you!"

event = Event()
p = Thread(target=Pro)
c = Thread(target=Con)

p.start()
c.start()

for examlpe:api

#!/usr/bin/python
# encoding=utf-8
# Filename: port.py
# Author:Bing
import sys,re
reload(sys)
sys.setdefaultencoding('utf-8')
import urllib,time,threading
import urllib2,random,Queue
from bs4 import BeautifulSoup

USER_AGENTS = [
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
    "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
    "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
    "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
    "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
    "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
    "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
]

class Spiderdomain:
    def __init__(self,target,page,threads_num):
        self.target = target
        self.page = page
        self.lock = threading.Lock()
        self.threads_num = threads_num
        self._que(self.target)
        self.lock = threading.Lock()

    def random_useragent(self):
        global USER_AGENTS
        return random.choice(USER_AGENTS)

    def _que(self,target):
        self.queue = Queue.Queue()
        for sub in target:
            if sub:self.queue.put(sub)

    def baidu(self):
        count = 0
        pages = self.page*10
        urls = []
        reg = r'((\w+\.)+(com|edu|cn|gov|net|org|hk){1,2})'
        thread_id = int( threading.currentThread().getName() )
        while self.queue.qsize() > 0:
            sub = self.queue.get()
            while count <= pages:
                url = "http://www.baidu.com/s?wd=%s&pn=%s" % (urllib.quote(str(sub)),count)
                request = urllib2.Request(url)
                request.add_header('User-Agent', self.random_useragent())
                request.add_header('Referer', request.get_full_url())
                htmlpage = urllib2.urlopen(request).read()
                soup = BeautifulSoup(htmlpage,"html5lib")           #tags_a =soup.findAll(name='a',attrs={'href':re.compile("^https?://")})
                Tags = soup.findAll(name='a',attrs={'class':'c-showurl'})
                for i in Tags:
                    result = re.findall(reg,str(i))
                    try:
                        urls.append(result[1][0])
                    except Exception,e:
                        pass
                count += 10
            result = {}.fromkeys(urls).keys()
            print result

    def run(self):
        threads = []
        # start threads
        for i in range(self.threads_num):
            t = threading.Thread(target=self.baidu, name=str(i))
            t.start()
            threads.append(t)
        # wait for all
        for i in range(self.threads_num):
        	threads[i].join()

d = Spiderdomain(target=["site:"+"sina.com","site:"+"baidu.com","site:"+"qq.com","site:"+"360.com","site:"+"58.com"],page=20,threads_num=20)#,"site:"+"qq.com","site:"+"360.com","site:"+"58.com"
d.run()

#27s

The following is a process instance多線程

#!/usr/bin/python
# encoding=utf-8
# Filename: port.py
# Author:Bing
import sys,re
reload(sys)
sys.setdefaultencoding('utf-8')
import urllib,time,threading
import urllib2,random,Queue
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool

USER_AGENTS = [
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
    "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
    "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
    "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
    "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
    "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
    "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
]

def baidu(target):
    count = 0
    pages = 20*10
    urls = []
    reg = r'((\w+\.)+(com|edu|cn|gov|net|org|hk){1,2})'
    while count <= pages:
        url = "http://www.baidu.com/s?wd=%s&pn=%s" % (urllib.quote(str(target)),count)
        request = urllib2.Request(url)
        request.add_header('User-Agent', USER_AGENTS)
        request.add_header('Referer', request.get_full_url())
        htmlpage = urllib2.urlopen(request).read()
        soup = BeautifulSoup(htmlpage,"html5lib")           #tags_a =soup.findAll(name='a',attrs={'href':re.compile("^https?://")})
        Tags = soup.findAll(name='a',attrs={'class':'c-showurl'})
        for i in Tags:
            result = re.findall(reg,str(i))
            try:
                urls.append(result[1][0])
            except Exception,e:
                pass
        count += 10
    result = {}.fromkeys(urls).keys()
    return result

urls = ["site:"+"sina.com","site:"+"baidu.com","site:"+"qq.com","site:"+"360.com","site:"+"58.com"]
pool = ThreadPool(20) # pool size
res  = map(baidu, urls)
print res
pool.close()
pool.join()

#46s

Pool和threading 進程池和多線程:併發

'''     
from multiprocessing import Pool
import threading
import traceback
#from gevent.pool import Pool
import time

def t(li,n):
    #li.append(n)
    print "thread number:",n
 
#一個子進程運行5個線程
def p(x):
    info_list = []
    for i in range(5):
        t = threading.Thread(target=t,args=[info_list,i])
        t.start()
    print info_list,"********"
    
#最多一次運行5個子進程
pls = Pool(processes=5)

res_list = []
#生產10個子進程,一次運行最多5個子進程,異步運行(相似於一次併發5個)
for i in range(10):
    res = pls.apply_async(p,[i,])
    print '------------:',i
    res_list.append(res)

for r in res_list:
    print r.get(timeout=1)

pls.close()
pls.join()
'''
相關文章
相關標籤/搜索
本站公眾號
   歡迎關注本站公眾號,獲取更多信息