day 35

s15day34 進程 
內容回顧:
    1. GIL鎖 
    2. 進程和線程的區別?
        第一:
            進程是cpu資源分配的最小單元。
            線程是cpu計算的最小單元。
        第二:
            一個進程中能夠有多個線程。
        第三:
            對於Python來講他的進程和線程和其餘語言有差別,是有GIL鎖。
            GIL鎖保證一個進程中同一時刻只有一個線程被cpu調度。
            
        注意:IO密集型操做能夠使用多線程;計算密集型能夠使用多進程;
    
    3. Lock和RLock
    
    4. 線程池 
        
    5. threading.local 
    
    6. 經常使用方法
    
    7. 面向對象補充:
        class Foo(object):

            def __init__(self):
                object.__setattr__(self, 'info', {}) # 在對象中設置值的本質

            def __setattr__(self, key, value):
                self.info[key] = value

            def __getattr__(self, item):
                print(item)
                return self.info[item]

        obj = Foo()
        obj.name = 'alex'
        print(obj.name)

        
        
今日內容:
    1. 進程
    2. 數據共享
    3. 鎖 
    4. 進程池 
    5. 模塊(爬蟲)
        - requests 
        - bs4(beautifulsoup)
    6. 協程
    
內容詳細:
    1. 進程
        - 進程間數據不共享
            data_list = []
            def task(arg):
                data_list.append(arg)
                print(data_list)


            def run():
                for i in range(10):
                    p = multiprocessing.Process(target=task,args=(i,))
                    # p = threading.Thread(target=task,args=(i,))
                    p.start()

            if __name__ == '__main__':
                run()
        - 經常使用功能: 
            - join
            - deamon
            - name
            - multiprocessing.current_process()
            - multiprocessing.current_process().ident/pid
        
        - 類繼承方式建立進程
            class MyProcess(multiprocessing.Process):

                def run(self):
                    print('當前進程',multiprocessing.current_process())


                def run():
                    p1 = MyProcess()
                    p1.start()

                    p2 = MyProcess()
                    p2.start()

            if __name__ == '__main__':
                run()
                
    2. 進程間數據共享
        Queue:
            linux:
                q = multiprocessing.Queue()

                def task(arg,q):
                    q.put(arg)

                def run():
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i, q,))
                        p.start()

                    while True:
                        v = q.get()
                        print(v)

                run()
            windows:    
                def task(arg,q):
                    q.put(arg)

                if __name__ == '__main__':
                    q = multiprocessing.Queue()
                    for i in range(10):
                        p = multiprocessing.Process(target=task,args=(i,q,))
                        p.start()
                    while True:
                        v = q.get()
                        print(v)
    
        Manager:(*)
            Linux:
                m = multiprocessing.Manager()
                dic = m.dict()

                def task(arg):
                    dic[arg] = 100

                def run():
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i,))
                        p.start()

                    input('>>>')
                    print(dic.values())
                    
                if __name__ == '__main__':
                    
                    run()
            windows:
                def task(arg,dic):
                    time.sleep(2)
                    dic[arg] = 100

                if __name__ == '__main__':
                    m = multiprocessing.Manager()
                    dic = m.dict()

                    process_list = []
                    for i in range(10):
                        p = multiprocessing.Process(target=task, args=(i,dic,))
                        p.start()

                        process_list.append(p)

                    while True:
                        count = 0
                        for p in process_list:
                            if not p.is_alive():
                                count += 1
                        if count == len(process_list):
                            break
                    print(dic)
    
    3. 進程鎖 
            import time
            import threading
            import multiprocessing


            lock = multiprocessing.RLock()

            def task(arg):
                print('鬼子來了')
                lock.acquire()
                time.sleep(2)
                print(arg)
                lock.release()

            if __name__ == '__main__':
                p1 = multiprocessing.Process(target=task,args=(1,))
                p1.start()

                p2 = multiprocessing.Process(target=task, args=(2,))
                p2.start()
    
        爲何要加鎖?
    
    4. 進程池
        import time
        from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

        def task(arg):
            time.sleep(2)
            print(arg)

        if __name__ == '__main__':

            pool = ProcessPoolExecutor(5)
            for i in range(10):
                pool.submit(task,i)

    5. 初識爬蟲:
        安裝:
            pip3 install requests 
            pip3 install beautifulsoup4 
        
        問題:
            找不到內部指令?
                方式一:
                    C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts\pip3  install requests 
                方式二:
                    C:\Users\Administrator\AppData\Local\Programs\Python\Python36\Scripts
                    
                    pip3  install requests 
                
        
        示例:
            import requests
            from bs4 import BeautifulSoup
            from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor


            # 模擬瀏覽器發送請求
            # 內部建立 sk = socket.socket()
            # 和抽屜進行socket鏈接 sk.connect(...)
            # sk.sendall('...')
            # sk.recv(...)

            def task(url):
                print(url)
                r1 = requests.get(
                    url=url,
                    headers={
                        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
                    }
                )

                # 查看下載下來的文本信息
                soup = BeautifulSoup(r1.text,'html.parser')
                print(soup.text)
                # content_list = soup.find('div',attrs={'id':'content-list'})
                # for item in content_list.find_all('div',attrs={'class':'item'}):
                #     title = item.find('a').text.strip()
                #     target_url = item.find('a').get('href')
                #     print(title,target_url)

            def run():
                pool = ThreadPoolExecutor(5)
                for i in range(1,50):
                    pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)


            if __name__ == '__main__':
                run()
    
        相關:
            a. 以上示例進程和線程那個好?
                - 線程好 
            b. requests模塊模擬瀏覽器發送請求
                - 本質 requests.get(...):
                    - 建立socket客戶端
                    - 鏈接 【阻塞】
                    - 發送請求
                    - 接收請求【阻塞】
                    - 斷開鏈接 
                
            c. 線程和進程池 
            
    
重點總結:
    1. 進程    *****
        - windows 
        - linux 
    2. 進程數據共享 *****
        - Queue
        - Manager
    3. 進程鎖   ***
    4. 進程池    *****
    5. 爬蟲(進程池/線程池的應用)

 

# by luffycity.com
"""
面向對象補充
"""

"""
class Foo(object):
    def __init__(self):
        self.info = {}

    def __setitem__(self, key, value):
        self.info[key] = value

    def __getitem__(self, item):
        return self.info.get(item)


obj = Foo()
obj['x'] = 123
print(obj['x'])
"""
from flask import globals
# class Foo(object):
#
#     def __init__(self):
#         object.__setattr__(self, 'info', {}) # 在對象中設置值的本質
#
#     def __setattr__(self, key, value):
#         self.info[key] = value
#
#     def __getattr__(self, item):
#         print(item)
#         return self.info[item]
#
# obj = Foo()
# obj.name = 'alex'
# print(obj.name)
v = []
for i in range(10000):
    v.append(i)

print(v)
View Code
# by luffycity.com
import multiprocessing
import threading




# ##################### 進程間的數據不共享 #####################
"""
data_list = []

def task(arg):
    data_list.append(arg)
    print(data_list)


def run():
    for i in range(10):
        p = multiprocessing.Process(target=task,args=(i,))
        # p = threading.Thread(target=task,args=(i,))
        p.start()

if __name__ == '__main__':
    run()
"""
# ##################### 進程經常使用功能 #####################
"""
import time
def task(arg):
    time.sleep(2)
    print(arg)


def run():
    print('111111111')
    p1 = multiprocessing.Process(target=task,args=(1,))
    p1.name = 'pp1'
    p1.start()
    print('222222222')

    p2 = multiprocessing.Process(target=task, args=(2,))
    p2.name = 'pp2'
    p2.start()
    print('333333333')

if __name__ == '__main__':
    run()
"""

# ##################### 經過繼承方式建立進程 #####################

class MyProcess(multiprocessing.Process):

    def run(self):
        print('當前進程',multiprocessing.current_process())


def run():
    p1 = MyProcess()
    p1.start()

    p2 = MyProcess()
    p2.start()

if __name__ == '__main__':
    run()
# by luffycity.com
import multiprocessing
import threading
import queue
import time
# ##################### 進程間的數據共享:multiprocessing.Queue #####################
"""
q = multiprocessing.Queue()

def task(arg,q):
    q.put(arg)


def run():
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i, q,))
        p.start()

    while True:
        v = q.get()
        print(v)
run()
"""
# ##################### 進程間的數據共享:Manager #####################
"""
def task(arg,dic):
    time.sleep(2)
    dic[arg] = 100

if __name__ == '__main__':
    m = multiprocessing.Manager()
    
    process_list = []
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i,dic,))
        p.start()

        process_list.append(p)

    while True:
        count = 0
        for p in process_list:
            if not p.is_alive():
                count += 1
        if count == len(process_list):
            break
    print(dic)
    # ...
"""
# ##################### 進程間的數據其餘電腦 #####################
"""
def task(arg,dic):
    pass

if __name__ == '__main__':
    while True:
        # 鏈接上指定的服務器
        # 去機器上獲取url
        url = 'adfasdf'
        p = multiprocessing.Process(target=task, args=(url,))
        p.start()

"""

 

# by luffycity.com
import time
import threading
import multiprocessing


lock = multiprocessing.RLock()

def task(arg):
    print('鬼子來了')
    lock.acquire()
    time.sleep(2)
    print(arg)
    lock.release()


if __name__ == '__main__':
    p1 = multiprocessing.Process(target=task,args=(1,))
    p1.start()

    p2 = multiprocessing.Process(target=task, args=(2,))
    p2.start()
# by luffycity.com
import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor

def task(arg):
    time.sleep(2)
    print(arg)

if __name__ == '__main__':

    pool = ProcessPoolExecutor(5)
    for i in range(10):
        pool.submit(task,i)
# by luffycity.com
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor


# 模擬瀏覽器發送請求
# 內部建立 sk = socket.socket()
# 和抽屜進行socket鏈接 sk.connect(...)
# sk.sendall('...')
# sk.recv(...)

def task(url):
    print(url)
    r1 = requests.get(
        url=url,
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
        }
    )

    # 查看下載下來的文本信息
    soup = BeautifulSoup(r1.text,'html.parser')
    print(soup.text)
    # content_list = soup.find('div',attrs={'id':'content-list'})
    # for item in content_list.find_all('div',attrs={'class':'item'}):
    #     title = item.find('a').text.strip()
    #     target_url = item.find('a').get('href')
    #     print(title,target_url)

def run():
    pool = ThreadPoolExecutor(5)
    for i in range(1,50):
        pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)


if __name__ == '__main__':
    run()

 

 

# by luffycity.com
import multiprocessing
import time

def task(arg, dic):
    time.sleep(2)
    dic[arg] = 100


if __name__ == '__main__':
    m = multiprocessing.Manager()
    dic = {}

    process_list = []
    for i in range(10):
        p = multiprocessing.Process(target=task, args=(i, dic,))
        p.start()

    print('end')
相關文章
相關標籤/搜索