Python經常使用模塊（4）—— re、logging、hashlib、subprocess

時間 2019-11-19

標籤 python 經常使用模塊 logging hashlib subprocess 欄目 Python 简体版

原文原文鏈接

re 模塊：與正則相關的模塊python

在使用 re 模塊以前，須要先了解正則表達式（regular expression），描述了一種字符串匹配的模式（pattern），能夠用來檢查一個字符串是否含有某個子字符串、將匹配的子字符串替換或者從某個字符串中取出符合某個條件的子字符串等。正則表達式

import re # \w 匹配字母數字及下劃線
print(re.findall('\w','hello 123_ */-=')) # \W 匹配非字母數字下劃線
print(re.findall('\W','hello 123_ */-=')) # \s 匹配任意空白字符, 等價於(空格, \n, \t, \r, \f)
print(re.findall('\s','hell\no 12\t3_ */-=')) # \S 匹配任意非空字符
print(re.findall('\S','hell\no 12\t3_ */-=')) # \d 匹配任意數字, 等價於(0--9)
print(re.findall('\d','hell\no 12\t3_ */-=')) # \D 匹配任意非數字
print(re.findall('\D','hell\no 12\t3_ */-=')) # \n 匹配換行符
print(re.findall('\n','hell\no 1\n2\t3_ */-=')) # \t 匹配製表符
print(re.findall('\t','hell\no 12\t3_ */-=')) # ^ 匹配字符串的開頭
print(re.findall('qiu', 'my name is qiu, qiu like music')) print(re.findall('^qiu', 'my name is qiu, qiu like music')) print(re.findall('^qiu', 'qiu my name is qiu, qiu like music')) # $ 匹配字符串的結尾, 不匹配末尾的換行符
print(re.findall('qiu$', 'qiu my name is qiu, qiu like qiu')) print("=" * 50) # 重複匹配 # . 表明匹配換行符之外的任意一個字符
print(re.findall('a.c', 'abc a*c a\ncfdsa cds alc a+c a\tcdjsh'))  # ['abc', 'a*c', 'a c', 'alc', 'a+c'] # 若是想匹配換行符, re.DOTALL, 匹配點全部的字符
print(re.findall('a.c', 'abc a*c a\ncfdsa cds alc a+c a\tcdjsh',re.DOTALL)) # 匹配換行符之外的任意兩個字符
print(re.findall('a..c', 'abc alc aac asd aaaaac a *c a+c adsadsa =')) print(re.findall('a.c', 'abc alc aac aAc aBc asd aaaaac a *c a+c a-c a/c adsadsa = a1c a2c')) # [] 表明匹配一個字符, 該字符屬於中括號內指定的字符 # 取出a.c之間是小寫字母的字符串
print(re.findall('a[a-z]c', 'abc alc aac aAc aBc asd aaaaac a *c a+c a-c a/c adsadsa = a1c a2c')) # 取出a.c之間是大寫字母的字符串
print(re.findall('a[A-Z]c', 'abc alc aac aAc aBc asd aaaaac a *c a+c a-c a/c adsadsa = a1c a2c')) # 取出a.c之間只要+-*/ (注意: -只能放在首或尾, 放中間是鏈接符號, 必定要放中間則使用轉義字符 \-)
print(re.findall('a[-+/*]c', 'abc alc aac aAc aBc asd aaaaac a *c a+c a-c a/c adsadsa = a1c a2c')) print(re.findall('a[+\-/*]c', 'abc alc aac aAc aBc asd aaaaac a *c a+c a-c a/c adsadsa = a1c a2c')) # ^放在[]內表示取反, 除了小寫字母均可以取出
print(re.findall('a[^a-z]c', 'abc alc aac aAc aBc asd aaaaac a *c a+c a-c a/c adsadsa = a1c a2c')) print("=" * 50) # * ? + {n,m} 都不能單獨使用, 必須與其餘字符連用 # * 表明*左側的字符出現0次或無窮次
print(re.findall('ab*', 'a ab abbb abbbb a1bbbb a-123')) # 有b就獲取, 有多少b就獲取多少, 沒有就獲取匹配到的 # ['a', 'ab', 'abbb', 'abbbb', 'abbbb', 'a', 'a'] # 也能夠用{n,m}表示0到無窮次
print(re.findall('ab{0,}', 'a ab abbb abbbb a1bbbb a-123')) print("=" * 50) # ? 表明?左側的字符出現0次或1次
print(re.findall('ab?', 'a ab abbb abbbb a1bbbb a-123')) # 有b就獲取, 無論有多少b只獲取一個, 沒有就獲取匹配到的 # ['a', 'ab', 'ab', 'ab', 'a', 'a'] # 也能夠用{n,m}表示0到1次
print(re.findall('ab{0,1}', 'a ab abbb abbbb a1bbbb a-123')) print("=" * 50) # + 表明+左側的字符出現1次或無窮次
print(re.findall('ab+', 'a ab abbb abbbb a1bbbb a-123')) # 有b就獲取, 有多少b就獲取多少, 沒有就不獲取 # ['ab', 'abbb', 'abbbb'] # 也能夠用{n,m}表示1到無窮次
print(re.findall('ab{1,}', 'a ab abbb abbbb a1bbbb a-123')) print("=" * 50) # {n,m} 左側的字符最少匹配n次且最多匹配m次, n <= m
print(re.findall('ab{1,3}', 'a ab abbb abbbb a1bbbb a-123')) # ['ab', 'abbb', 'abbb']
print("=" * 50) # .* 貪婪匹配, 任意個數任意字符
print(re.findall('a.*c', 'ab123dsac32155sdadasc')) # .*? 非貪婪匹配
print(re.findall('a.*?c', 'ab123dsac32155sdadasc')) # () 分組
print(re.findall('expression=".*"','expression="1+2+3/4*5" qiu="beautiful"')) print(re.findall('expression=".*?"','expression="1+2+3/4*5" qiu="beautiful"')) print(re.findall('(expression)=".*?"','expression="1+2+3/4*5" qiu="beautiful"')) # 取出表達式1+2+3/4*5
print(re.findall('expression="(.*?)"','expression="1+2+3/4*5" qiu="beautiful"')) # 取出網址https://www.qiuxirufeng.com
print(re.findall('href="(.*?)"','<p>段落</p><a href="https://www.baidu.com">點我啊</a><h1>標題</h1><a href="https://www.qiuxirufeng.com">點我啊</a>')) # a|b 取a或b
print(re.findall('a|b', 'ab123adbhsga')) # 取出和公司相關的單詞
print(re.findall('companies|company', 'Too many companies have gone bankrupt, and the next one is my company')) # ['ies', 'y']
print(re.findall('compan(ies|y)', 'Too many companies have gone bankrupt, and the next one is my company')) # ?: 取出的內容並不是組內的內容, 而是包含外面的 ['companies', 'company']
print(re.findall('compan(?:ies|y)', 'Too many companies have gone bankrupt, and the next one is my company')) # 取 a\c # r'a\\c' 先交給python解釋器識別, 識別爲'a\\c', 再交給re模塊識別, 識別爲'a\c'
print(re.findall(r'a\\c', 'a\c alc aAc aac')) # 'a\\\\c', 先交給python解釋器識別, 識別爲'a\\c', 再交給re模塊識別, 識別爲'a\c'
print(re.findall('a\\\\c', 'a\c alc aAc aac'))

正則表達式

# -*- coding: utf-8 -*-

import re print(re.findall('qiu', 'qiu like music, qiu like folk music')) # search只匹配一個, 有就返回一個對象, 沒有就返回None
print(re.search('qiu', 'qiu like music, qiu like folk music')) # 想要取結果, 後面須要加上group()方法
print(re.search('qiu', 'qiu like music, qiu like folk music').group()) print(re.findall('qi(u)', 'qiu like music, qiu like folk music')) print(re.search('qi(u)', 'qiu like music, qiu like folk music').group()) print("=" * 50) print(re.search('qiu', '123qiu like music, qiu like folk music')) # match是從頭開始找, 至關於search在匹配中加上^
print(re.match('qiu', '123qiu like music, qiu like folk music')) print(re.search('^qiu', '123qiu like music, qiu like folk music')) print("=" * 50) li = 'qiu:22:male'.split(":") print(li) # 將字符串以:和空格切分
l1 = re.split(':| ', 'qiu:22:male xxx') print(l1) print("=" * 50) # 將qiu替換成xi, 能夠指定替換次數
print(re.sub('qiu', 'xi', 'qiu is nice, qiu qiu qiu')) print(re.sub('qiu', 'xi', 'qiu is nice, qiu qiu qiu', 1)) # 把全部以xx結尾的都替換成qiu
s = 'lxx is good, lllxx wxx cxx are good'
print(re.sub('[a-z]+xx','qiu', s)) print("=" * 50) # print(re.findall('qiu', 'qiu like music, qiu like folk music')) # print(re.search('qiu', 'qiu like music, qiu like folk music').group())
pattern = re.compile('qiu') print(pattern.findall('qiu like music, qiu like folk music')) print(pattern.search('qiu like music, qiu like folk music'))

re模塊的使用

logging 模塊：與日誌相關操做的模塊算法

logging模塊主要能夠根據自定義日誌信息，在程序運行的時候將日誌打印在終端及記錄日誌到文件中。logging支持的日誌有五個級別：shell

　　debug() 調試級別，通常用於記錄程序運行的詳細信息，對應數字級別10express

　　info() 事件級別，通常用於記錄程序的運行過程，對應數字級別20ide

　　warnning() 警告級別，，通常用於記錄程序出現潛在錯誤的情形，對應數字級別30函數

　　error() 錯誤級別，通常用於記錄程序出現錯誤，但不影響總體運行，對應數字級別40編碼

　　critical() 嚴重錯誤級別，出現該錯誤已經影響到總體運行，對應數字級別50加密

# 日誌級別遵循原則: 自下而上進行匹配 # debug --> info --> warning --> error --> critical
logging.debug('調試信息') logging.info('正常信息') logging.warning('警告信息') logging.error('錯誤信息') logging.critical('嚴重錯誤信息') # 運行輸出
WARNING:root:警告信息 ERROR:root:錯誤信息 CRITICAL:root:嚴重錯誤信息

簡單用法，將日誌打印到終端

可是這樣的輸出存在必定的問題：spa

　　一、沒有指定日誌級別

　　二、沒有指定日誌格式

　　三、只能在屏幕上打印，沒有寫入文件

因此要進行基本的日誌配置

可在logging.basicConfig()函數中可經過具體參數來更改logging模塊默認行爲，可用參數有： filename：用指定的文件名建立FiledHandler（後邊會具體講解handler的概念），這樣日誌會被存儲在指定的文件中。 filemode：文件打開方式，在指定了filename時使用這個參數，默認值爲「a」還可指定爲「w」。 format：指定handler使用的日誌顯示格式。 datefmt：指定日期時間格式。 level：設置rootlogger（後邊會講解具體概念）的日誌級別 stream：用指定的stream建立StreamHandler。能夠指定輸出到sys.stderr,sys.stdout或者文件，默認爲sys.stderr。若同時列出了filename和stream兩個參數，則stream參數會被忽略。 format參數中可能用到的格式化串： %(name)s Logger的名字 %(levelno)s 數字形式的日誌級別 %(levelname)s 文本形式的日誌級別 %(pathname)s 調用日誌輸出函數的模塊的完整路徑名，可能沒有 %(filename)s 調用日誌輸出函數的模塊的文件名 %(module)s 調用日誌輸出函數的模塊名 %(funcName)s 調用日誌輸出函數的函數名 %(lineno)d 調用日誌輸出函數的語句所在的代碼行 %(created)f 當前時間，用UNIX標準的表示時間的浮 點數表示 %(relativeCreated)d 輸出日誌信息時的，自Logger建立以 來的毫秒數 %(asctime)s 字符串形式的當前時間。默認格式是 「2003-07-08 16:49:45,896」。逗號後面的是毫秒 %(thread)d 線程ID。可能沒有 %(threadName)s 線程名。可能沒有 %(process)d 進程ID。可能沒有 %(message)s用戶輸出的消息

基本的日誌配置

import logging # 進行基本的日誌配置
logging.basicConfig(filename='access.log', format='%(asctime)s - %(name)s - %(levelname)s - %(module)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=10) logging.debug('調試信息') logging.info('正常信息') logging.warning('警告信息') logging.error('錯誤信息') logging.critical('嚴重錯誤信息') # 運行後結果存放在文件中(以GBK的格式存入文件)
2018-10-21 08:43:53 - root - DEBUG - logging模塊: 調試信息 2018-10-21 08:43:53 - root - INFO - logging模塊: 正常信息 2018-10-21 08:43:53 - root - WARNING - logging模塊: 警告信息 2018-10-21 08:43:53 - root - ERROR - logging模塊: 錯誤信息 2018-10-21 08:43:53 - root - CRITICAL - logging模塊: 嚴重錯誤信息

View Code

如今解決了上面出現的三個問題，可是又出現了新的問題：

　　一、不能指定字符編碼

　　二、只能在文件中打印

basicConfig 參數有一個方法叫 stream，設置爲 True 會提示不能同時打印到屏幕和文件裏，寫入文件中的信息目前只能將文件的字符編碼修改成GBK

import logging # 進行基本的日誌配置
logging.basicConfig(filename='access.log', format='%(asctime)s - %(name)s - %(levelname)s - %(module)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=10, stream=True ) logging.debug('調試信息') logging.info('正常信息') logging.warning('警告信息') logging.error('錯誤信息') logging.critical('嚴重錯誤信息') # 運行後輸出
Traceback (most recent call last): File "E:/Python日誌模塊的使用/logging模塊.py", line 10, in <module> stream=True File "D:\Program Files\Python36\lib\logging\__init__.py", line 1797, in basicConfig raise ValueError("'stream' and 'filename' should not be " ValueError: 'stream' and 'filename' should not be specified together

View Code

logging模塊包含四種角色：logger，filter，formatter，handler

logger 負責產生日誌信息，filter 負責篩選日誌，這一步不是咱們所須要處理的，formatter 用來控制日誌的輸出格式，handler 負責日誌輸出的目標，而後經過綁定 logger 對象與 handler 對象，讓產生的日誌信息可以同時在文件和控制檯上打印輸出，接着綁定 handler 對象與 formatter 對象，讓輸出信息按照指定的格式輸出，最後設置日誌級別，能夠在 logger 與 handler兩層關卡進行設置

import logging # 1. logger: 負責產生日誌信息
logger1 = logging.getLogger('交易日誌') # 2. filter: 負責篩選日誌 # 這一步無需咱們操做 # 3. formatter: 控制日誌的輸出格式
formatter1 = logging.Formatter(fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %X') formatter2 = logging.Formatter(fmt='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %X') # 4. handler: 負責日誌輸出的目標 # 在文件中打印
h1 = logging.FileHandler(filename='a1.log', encoding='utf-8') h2 = logging.FileHandler(filename='a2.log', encoding='utf-8') # 在控制檯輸出
s = logging.StreamHandler() # 5. 綁定logger對象與handler對象 # 讓產生的logger1分別有三種輸出對象, 可以同時在文件和控制檯中打印輸出
logger1.addHandler(h1) logger1.addHandler(h2) logger1.addHandler(s) # 6. 綁定handler對象與formatter對象 # 三種輸出對象在打印時須要按照某種格式輸出
h1.setFormatter(formatter1) h2.setFormatter(formatter1) s.setFormatter(formatter2) # 7. 設置日誌級別, 能夠在logger與handler兩層關卡進行設置 # logger1.setLevel(30) # 這裏第一層關卡設置30已經卡死, 後面再設置輸出都無效
logger1.setLevel(10) h1.setLevel(10) h2.setLevel(10) s.setLevel(10) logger1.info('qiu轉帳給xi一個億')

logging模塊的四種角色

上面這個寫法步驟有些繁瑣，因此有一個寫好的配置文件，使用的時候經過執行文件直接導入便可

# -*- coding: utf-8 -*-

# 定義三種日誌輸出格式
standard_format = '%(asctime)s - %(filename)s - 第%(lineno)d行 - %(name)s - %(levelname)s - %(message)s' simple_format = '%(asctime)s - %(levelname)s - %(message)s' id_simple_format = '%(asctime)s - %(message)s'

# log文件的全路徑
logfile_path1 = r'E:\Python\Python Fullstack\day22\日誌模塊的使用\a1.log' logfile_path2 = r'E:\Python\Python Fullstack\day22\日誌模塊的使用\a2.log'

# log配置字典
LOGGING_DIC = { 'version': 1, 'disable_existing_loggers': False, 'formatters': { 'formatter1': { 'format': standard_format }, 'formatter2': { 'format': simple_format }, }, 'filters': {}, 'handlers': { # 打印到終端的日誌
        's': { 'level': 'DEBUG', 'class': 'logging.StreamHandler',  # 打印到屏幕
            'formatter': 'formatter2' }, # 打印到文件的日誌, 收集info及以上的日誌
        'h1': { 'level': 'DEBUG', 'class': 'logging.FileHandler',  # 保存到文件
            'formatter': 'formatter1', 'filename': logfile_path1,  # 日誌文件
            'encoding': 'utf-8',  # 日誌文件的編碼
 }, 'h2': { 'level': 'DEBUG', 'class': 'logging.FileHandler',  # 保存到文件
            'formatter': 'formatter1', 'filename': logfile_path2,  # 日誌文件
            'encoding': 'utf-8',  # 日誌文件的編碼
 }, }, 'loggers': { 'logger1': { 'handlers': ['h1', 'h2', 's'],  # 這裏把上面定義的兩個handler都加上，即log數據既寫入文件又打印到屏幕
            'level': 'DEBUG', 'propagate': False,  # 向上（更高level的logger）傳遞
 }, }, }

logging配置文件

import settings import logging.config logging.config.dictConfig(settings.LOGGING_DIC) # 導入上面定義的logging配置
 logger1 = logging.getLogger('logger1') logger1.debug("調試日誌")

執行文件

上面這個只能獲取 logger1 的信息，由於此時只定義一個 logger1 ，但當須要記錄的日誌文件多起來，就須要獲取多個日誌文件，所以這裏不能寫死，能夠在配置文件中將其定義爲空，任意指定配置名，但獲取的都是相同配置

# -*- coding: utf-8 -*-

# 定義三種日誌輸出格式
standard_format = '%(asctime)s - %(filename)s - 第%(lineno)d行 - %(name)s - %(levelname)s - %(message)s' simple_format = '%(asctime)s - %(levelname)s - %(message)s' id_simple_format = '%(asctime)s - %(message)s'

# log文件的全路徑
logfile_path1 = r'E:\Python\Python Fullstack\day22\日誌模塊的使用\a1.log' logfile_path2 = r'E:\Python\Python Fullstack\day22\日誌模塊的使用\a2.log'

# log配置字典
LOGGING_DIC = { 'version': 1, 'disable_existing_loggers': False, 'formatters': { 'formatter1': { 'format': standard_format }, 'formatter2': { 'format': simple_format }, }, 'filters': {}, 'handlers': { # 打印到終端的日誌
        's': { 'level': 'DEBUG', 'class': 'logging.StreamHandler',  # 打印到屏幕
            'formatter': 'formatter2' }, # 打印到文件的日誌, 收集info及以上的日誌
        'h1': { 'level': 'DEBUG', 'class': 'logging.FileHandler',  # 保存到文件
            'formatter': 'formatter1', 'filename': logfile_path1,  # 日誌文件
            'encoding': 'utf-8',  # 日誌文件的編碼
 }, 'h2': { 'level': 'DEBUG', 'class': 'logging.FileHandler',  # 保存到文件
            'formatter': 'formatter1', 'filename': logfile_path2,  # 日誌文件
            'encoding': 'utf-8',  # 日誌文件的編碼
 }, }, '': { # 將其定義爲空
        '': { 'handlers': ['h1', 'h2', 's'],  # 這裏把上面定義的兩個handler都加上，即log數據既寫入文件又打印到屏幕
            'level': 'DEBUG', 'propagate': False,  # 向上（更高level的logger）傳遞
 }, }, }

logging配置文件

# -*- coding: utf-8 -*-

import settings import logging.config logging.config.dictConfig(settings.LOGGING_DIC) # 導入上面定義的logging配置
 logger1 = logging.getLogger('用戶相關') logger2 = logging.getLogger('交易日誌') logger1.info("調試日誌") logger2.info('qiu轉帳給xi一個億')

執行文件

hashlib 模塊：用來進行 hash 或者 md5 加密，且這種加密是不可逆的

hash：是一種算法，該算法接受傳入的內容，通過運算獲得一串 hash 值，若是把 hash 算法比喻爲一座工廠，那傳給 hash 算法的內容就是原材料，生成的 hash 值就是生產出的產品

hash 值有三大特性：

　　一、只要傳入的內容同樣，獲得的 hash 值必然同樣

　　二、只要咱們使用的 hash 算法固定，不管傳入的內容有多大，獲得的hash值的長度是固定的

　　三、不能夠用 hash 值逆推出原來的內容

基於 1 和 2 能夠在下載文件時作文件一致性校驗，基於 1 和 3 能夠對密碼進行加密

import hashlib # 1. 造出hash工廠
m = hashlib.md5() # 二、運送原材料
m.update('你好啊'.encode('utf-8')) m.update('啊哈'.encode('utf-8')) # 三、產出hash值
print(m.hexdigest())  # a4be72e57bc198333ed98188c48b2f85 # ================================================================== # 一、造出hash工廠
m = hashlib.md5('你'.encode('utf-8')) # 二、運送原材料
m.update('好啊啊哈'.encode('utf-8')) # 三、產出hash值
print(m.hexdigest())  # a4be72e57bc198333ed98188c48b2f85 # ================================================================== # 應用一：文件一致性校驗 # 一、造出hash工廠
m = hashlib.sha512('你'.encode('utf-8')) # 二、運送原材料
m.update('好啊sadfsadf啊哈asdfsafdadsadsadfsadfsadfsadfasdff的張銘言'.encode('utf-8')) # 三、產出hash值
print(m.hexdigest()) # 977c7a4f13e76f3f026e45540c72c6ba5dbfc41357bc452fba9d9824b71c5e074298c3f62f57c6a42dd769e6a03c26be44742e4e77f284e19c106e7fba3093da

# =================================================================== # 一、造出hash工廠
m = hashlib.md5() # 二、運送原材料
with open(r'E:\01.mp4','rb') as f: for line in f: m.update(line) # 三、產出hash值
print(m.hexdigest()) # b5672ac47a068231f2c56da5df652f47

hashlib模塊

密碼加鹽：對現有的密碼進行處理，好比加一行文字，再對齊全部內容進行加密

password = input('>>>: ') m = hashlib.md5() m.update('天王蓋地虎'.encode('utf-8')) m.update(password.encode('utf-8')) print(m.hexdigest())

密碼加鹽

Python 還有一個 hmac 模塊，它內部對咱們建立 key 和內容進行進一步的處理而後再加密

import hmac m = hmac.new('小雞燉蘑菇'.encode('utf-8')) m.update('hello'.encode('utf-8')) print(m.hexdigest())

hmac

subprocess 模塊：用來執行系統命令

subprocess 是子進程，導入模塊使用 subprocess 的 Popen() 方法調用 shell 終端執行 tasklist 命令用來顯示運行在本地計算機上的全部進程，可是這時執行的 Python 程序至關於一個父進程，在子進程尚未運行結束父進程便已經結束，因此沒法顯示結果，能夠經過 time 模塊讓 Python 程序睡眠一段時間，從而查看 subprocess 的執行結果

import time import subprocess subprocess.Popen("tasklist", shell=True) time.sleep(3)

View Code

可是上面只能輸出在控制檯，假若要輸出到文件或者別的地方，因此在子進程與父進程之間要建立一個共享數據的地方，叫作管道，且是在內存中建立。當使用這些數據時，直接從管道中取，固然取出的是正確的數據，由於這是正確的管道。對於上面來講，命令有可能輸入錯誤，因而還有錯誤的數據，因此應該再使用一個存放錯誤數據的管道，這樣在取數據的時候就有了區分，獲得的結果也能夠作存放於文件的操做

import subprocess obj = subprocess.Popen("tasklist", shell=True, # 正確數據的管道
                 stdout=subprocess.PIPE, # 錯誤數據的管道
                 stderr=subprocess.PIPE) stdout_res = obj.stdout.read() print(stdout_res.decode('gbk')) # 若是輸入錯誤的命令，則使用下面的代碼 # stderr_res = obj.stderr.read() # print(stderr_res.decode('gbk'))