import re # 匹配- match從頭匹配 (第一個參數就是正則表達式, 數據) 若是開始不能匹配就匹配失敗 # 正則表達式都是以r字符開始的 -- 牆裂建議 # 返回值就是匹配結果的對象 若是須要獲取對象中的結果就須要.group() res = re.match(r'ello','ello meizi hello') # 若是成功匹配 則返回對象 若是沒有匹配結果則返回值爲空 if res: print(res.group()) else: print("沒有匹配結果") # 搜索 查找 --- 若是開始不能匹配 則繼續日後搜索 嘗試看可否匹配 res = re.search(r'hello','ello meizi hello') # 若是成功匹配 則返回對象 若是沒有匹配結果則返回值爲空 if res: print(res.group()) else: print("沒有匹配結果") """match函數和search函數的功能 區別和聯繫"""
In [1]: import re In [2]: re.match(r"hello","hello").group() # ----------1. .匹配一個任意字符(除去\n)----------------------------------------- In [3]: re.match(r".ello","hello").group() In [4]: re.match(r".ello","Hello").group() In [5]: re.match(r".ello","1ello").group() In [6]: re.match(r".ello","&ello").group() In [7]: re.match(r".ello","\nello").group() # 出錯 In [14]: re.match(r".ello",".ello").group() In [15]: re.match(r"\.ello",".ello").group() ------------2. []匹配集合中任意一個字符---------------------------------------- In [8]: re.match(r"[Hh]ello","Hello").group() In [9]: re.match(r"[Hh]ello","hello").group() In [10]: re.match(r"[Hh]ello","1ello").group() # 出錯 In [11]: re.match(r"[0123456789]ello","1ello").group() In [12]: re.match(r"[0123456789]ello","9ello").group() In [13]: re.match(r"[0123456789]ello","Hello").group() # 出錯 In [16]: re.match(r"[0123456789]ello","Hello").group() # 出錯 In [17]: re.match(r"[0123456789]ello","1ello").group() ------------3. [-]匹配範圍內部的任意一個字符----------------- In [18]: re.match(r"[0-9]ello","1ello").group() In [19]: re.match(r"[0-35-9]ello","1ello").group() In [20]: re.match(r"[0-35-9]ello","9ello").group() In [21]: re.match(r"[0-35-9]ello","4ello").group() # 出錯 In [22]: re.match(r"[0-9a-zA-Z]ello","4ello").group() In [23]: re.match(r"[0-9a-zA-Z]ello","aello").group() In [24]: re.match(r"[0-9a-zA-Z]ello","Hello").group() In [25]: re.match(r"[0-35-9]ello","9ello").group() In [26]: re.match(r"[0-35-9]ello","4ello").group() # 出錯 ------------4. [^] 禁止匹配 範圍內部的任意一個字符----------------- In [27]: re.match(r"[^4]ello","4ello").group() In [28]: re.match(r"[^4]ello","0ello").group() In [29]: re.match(r"[^4]ello","9ello").group() ------------5. '\d'匹配一個任意數字字符 '\D'匹配任意個非數字字符 ----------------- In [30]: re.match(r"\dello","9ello").group() In [31]: re.match(r"\dello","0ello").group() In [32]: re.match(r"\dello","@ello").group() In [33]: re.match(r"\Dello","0ello").group() In [34]: re.match(r"\Dello","@ello").group() In [35]: re.match(r"\Dello","?ello").group() ------------6. '\s'匹配一個任意空白字符 '\S'匹配一個任意 非空白字符串------------------------------- In [36]: re.match(r"\Dello"," ello").group() In [37]: re.match(r"[\t\r\n\v\f ]ello"," ello").group() In [38]: re.match(r"\sello"," ello").group() In [39]: re.match(r"\Sello"," ello").group() In [40]: re.match(r"\Sello","1ello").group() -----------7. '\w'匹配一個任意單詞字符 '\W'匹配一個任務 非單詞字符----------------------------- In [41]: re.match(r"\wello","1ello").group() In [42]: re.match(r"\wello","hello").group() In [43]: re.match(r"\wello","Hello").group() In [44]: re.match(r"\wello","_ello").group() In [45]: re.match(r"\Wello","_ello").group() In [46]: re.match(r"\Wello","?ello").group() ------------'\w'語義拓展 In [2]: re.match(r"\wBC","ABC") In [3]: re.match(r"\wBC","ABC").group() In [4]: re.match(r"\wBC","呵BC").group() In [5]: re.match(r"\wBC","呵BC",re.ASCII).group() In [6]: re.match(r"\wBC","呵BC",re.UNICODE).group() In [7]: re.match(r"\wBC","呵BC").group() -----------二 量詞 匹配多個字符--------------------------- In [47]: re.match(r"嫦娥號\d升空了","嫦娥1號升空了").group() In [48]: re.match(r"嫦娥\d號升空了","嫦娥1號升空了").group() In [49]: re.match(r"嫦娥\d號升空了","嫦娥9號升空了").group() In [50]: re.match(r"嫦娥\d號升空了","嫦娥10號升空了").group() In [51]: re.match(r"嫦娥\d\d號升空了","嫦娥10號升空了").group() In [52]: re.match(r"嫦娥\d\d號升空了","嫦娥99號升空了").group() In [53]: re.match(r"嫦娥\d\d號升空了","嫦娥100號升空了").group() In [54]: re.match(r"嫦娥\d\d\d號升空了","嫦娥100號升空了").group() In [55]: re.match(r"嫦娥\d\d\d\d\d號升空了","嫦娥10000號升空了").group() In [56]: re.match(r"嫦娥\d{5}號升空了","嫦娥10000號升空了").group() In [57]: re.match(r"嫦娥\d{3}號升空了","嫦娥10000號升空了").group() In [58]: re.match(r"嫦娥\d{3}號升空了","嫦娥100號升空了").group() In [59]: re.match(r"嫦娥\d{1,3}號升空了","嫦娥100號升空了").group() In [60]: re.match(r"嫦娥\d{1,3}號升空了","嫦娥1號升空了").group() In [61]: re.match(r"嫦娥\d{1,3}號升空了","嫦娥10號升空了").group() In [62]: re.match(r"嫦娥\d{1,1}號升空了","嫦娥10號升空了").group() In [64]: re.match(r"嫦娥\d{0,3}號升空了","嫦娥號升空了").group() In [65]: re.match(r"嫦娥\d{0,}號升空了","嫦娥號升空了").group() In [66]: re.match(r"嫦娥\d*號升空了","嫦娥號升空了").group() In [67]: re.match(r"嫦娥\d{1,}號升空了","嫦娥1號升空了").group() In [68]: re.match(r"嫦娥\d{1,}號升空了","嫦娥號升空了").group() In [69]: re.match(r"嫦娥\d{1,}號升空了","嫦娥1000號升空了").group() In [70]: re.match(r"嫦娥\d+號升空了","嫦娥1000號升空了").group() In [8]: re.match(r"\w?BC","ABC").group() In [9]: re.match(r"\w?BC","BC").group() ---------------三 匹配開始^ 和結束位置$-------------------------- In [71]: re.match(r"\w{4,20}@163.com","hello@163.com").group() In [72]: re.match(r"\w{4,20}@163.com","hello@163Acom").group() In [73]: re.match(r"\w{4,20}@163\.com","hello@163Acom").group() In [74]: re.match(r"\w{4,20}@163\.com","hello@163.com").group() In [75]: re.match(r"\w{4,20}@163\.com","hello@163.com.cn").group() In [76]: re.match(r"\w{4,20}@163\.com","cc.hello@163.com").group() In [77]: re.search(r"\w{4,20}@163\.com","cc.hello@163.com").group() In [78]: re.search(r"\w{4,20}@163\.com","cc.hello@163.com.cn").group() In [79]: re.search(r"^\w{4,20}@163\.com","cc.hello@163.com.cn").group() In [80]: re.search(r"^\w{4,20}@163\.com","hello@163.com.cn").group() In [81]: re.search(r"^\w{4,20}@163\.com$","hello@163.com.cn").group() In [82]: re.search(r"^\w{4,20}@163\.com$","hello@163.com").group() In [83]: re.match(r"^\w{4,20}@163\.com","hello@163.com.cn").group() In [84]: re.match(r"^\w{4,20}@163\.com$","hello@163.com.cn").group() In [85]: re.match(r"^\w{4,20}@163\.com$","hello@163.com").group() -------------四 匹配分組 ()將感興趣的數據進行提取------ In [86]: re.match(r"嫦娥(\d+)號升空了","嫦娥1000號升空了").group() In [87]: re.match(r"嫦娥(\d+)號升空了","嫦娥1000號升空了").group(0) In [88]: re.match(r"嫦娥(\d+)號升空了","嫦娥1000號升空了").group(1) In [89]: re.match(r"^(\w{4,20})@(163)\.com$","hello@163.com").group() In [90]: re.match(r"^(\w{4,20})@(163)\.com$","hello@163.com").group(1) In [91]: re.match(r"^(\w{4,20})@(163)\.com$","hello@163.com").group(2) -------(|)匹配其中任何一個表達式而且放入分組中---- In [92]: re.match(r"^(\w{4,20})@(163|qq)\.com$","hello@263.com").group(2) In [93]: re.match(r"^(\w{4,20})@(163|qq)\.com$","hello@qq.com").group(2) In [94]: re.match(r"^(\w*)hello(\w*)$","hellohello@qq.com").group(1) ---------------------'\分組編號' 使用某個分組的數據在後面某個位置繼續匹配 ---- In [11]: re.match(r"^\w{4,20}@163.com$|^\w{4,20}@qq.com$","hello@163.com").group() In [12]: re.match(r"^\w{4,20}@163.com$|^\w{4,20}@qq.com$","hello@qq.com").group() In [13]: re.match(r"^\w{4,20}@(163|qq).com$","hello@qq.com").group() In [14]: re.match(r"^\w{4,20}@(163|qq).com$","hello@163.com").group() In [15]: re.match(r"(\d{3,4})-(\d{6,8})","0755-12345678").group() In [16]: re.match(r"(\d{3,4})-(\d{6,8})","0755-12345678").group(1) In [17]: re.match(r"(\d{3,4})-(\d{6,8})","0755-12345678").group(2) In [18]: re.match(r"(\d{3,4})-(\d{6,8}) \1-\2","0755-12345678 0755-1234567").group(2) In [19]: re.match(r"(\d{3,4})-(\d{6,8}) \1-\2","0755-12345678 0755-12345678").group() In [20]: re.match(r"(\d{3,4})-(\d{6,8}) \1-\2","0755-12345678 0755-1234567").group() In [21]: re.match(r"<(\w+)>","<html>aaa</html>").group(1) In [22]: re.match(r"<(\w+)>(.*)</\1>","<html>aaa</html>").group(1) In [23]: re.match(r"<(\w+)>(.*)</\1>","<html>aaa</html2>").group(1) 匹配多個標籤中的數據 <html><body>hello</body></html> """思考問題 如何建立有名分組 如何引用有名分組""" In [24]: re.match(r"<(\w+)><(\w+)>(.+)</\2></\1>","<html><body>hello</body></html>").group() In [25]: re.match(r"<(\w+)><(\w+)>(.+)</\2></\1>","<html><body>hello</body></html>").group(1) In [26]: re.match(r"<(\w+)><(\w+)>(.+)</\2></\1>","<html><body>hello</body></html>").group(2) In [27]: re.match(r"<(\w+)><(\w+)>(.+)</\2></\1>","<html><body>hello</body></html1>").group(2) In [28]: re.match(r"<(\w+)><(\w+)>(.+)</\2></\1>","<html><body>hello</body></html1>").group() In [29]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group() In [30]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group(1) In [31]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group(2) In [32]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group(3) In [34]: re.match(r"((\d{3,4})-(\d{6,8})) \2-\3","0755-12345678 0755-12345678").group() In [35]: re.match(r"(?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8})","0755-12345678").group() In [36]: re.match(r"(?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8})","0755-12345678").group(1) In [37]: re.match(r"(?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8})","0755-12345678").group(2) In [38]: re.match(r"(?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8})","0755-12345678").group('quhao') In [39]: re.match(r"(?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8})","0755-12345678").group('zuoji') In [40]: re.match(r"(?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8}) (?P=quhao)-(?P=zuoji)","0755-12345678 07 ...: 55-12345678").group() In [41]: re.match(r"((?P<quhao>\d{3,4})-(?P<zuoji>\d{6,8})) (?P=quhao)-(?P=zuoji)","0755-12345678 ...: 0755-12345678").group() ---------------------------re模塊高級函數-------------------------- In [42]: ret = re.search(r"\d+", "閱讀次數爲 9999").group() In [43]: re.search(r"\d+", "閱讀次數爲 9999").group() Out[43]: '9999' In [46]: re.findall(r"\d+", "python = 9999, c = 7890, c++ = 12345") Out[46]: ['9999', '7890', '12345'] In [47]: re.sub(r"\d+","998","python=997") Out[47]: 'python=998' In [48]: re.sub(r"\d+","998","python=997 c=988") Out[48]: 'python=998 c=998' In [49]: re.sub(r"\d+","998","python=997 c=988",1) Out[49]: 'python=998 c=988' In [52]: def func(matchobj): ...: data = matchobj.group() ...: str_data = str( int(data) + 1) ...: return str_data ...: In [53]: re.sub(r"\d+",func, "age=17") Out[53]: 'age=18' In [54]: data = """ ...: <div> ...: <p>崗位職責:</p> ...: <p>完成推薦算法、數據統計、接口、後臺等服務器端相關工做</p> ...: <p><br></p> ...: <p>必備要求:</p> ...: <p>良好的自我驅動力和職業素養,工做積極主動、結果導向</p> ...: <p> <br></p> ...: <p>技術要求:</p> ...: <p>一、一年以上 Python 開發經驗,掌握面向對象分析和設計,瞭解設計模式</p> ...: <p>二、掌握HTTP協議,熟悉MVC、MVVM等概念以及相關WEB開發框架</p> ...: <p>三、掌握關係數據庫開發設計,掌握 SQL,熟練使用 MySQL/PostgreSQL 中的一種<br></p> ...: <p>四、掌握NoSQL、MQ,熟練使用對應技術解決方案</p> ...: <p>五、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js</p> ...: <p> <br></p> ...: <p>加分項:</p> ...: <p>大數據,數理統計,機器學習,sklearn,高性能,大併發。</p> ...: ...: </div>""" In [55]: re.sub(r"<.*>","",data) Out[55]: '\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ' In [56]: re.sub(r"<\w+>","",data) Out[56]: '\n\n 崗位職責:</p>\n完成推薦算法、數據統計、接口、後臺等服務器端相關工做</p>\n</p>\n必備要求:</p>\n良好的自我驅動力和職業素養,工做積極主動、結果導向</p>\n </p>\n技術要求:</p>\n一、一年以上 Python 開發經驗,掌握面向對象分析和設計,瞭解設計模式</p>\n二、掌握HTTP協議,熟悉MVC、MVVM等概念以及相關WEB開發框架</p>\n三、掌握關係數據庫開發設計,掌握 SQL,熟練使用 MySQL/PostgreSQL 中的一種</p>\n四、掌握NoSQL、MQ,熟練使用對應技術解決方案</p>\n五、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js</p>\n </p>\n加分項:</p>\n大數據,數理統計,機器學習,sklearn,高性能,大併發。</p>\n\n </div>' In [57]: re.sub(r"</?\w+>","",data) Out[57]: '\n\n 崗位職責:\n完成推薦算法、數據統計、接口、後臺等服務器端相關工做\n\n必備要求:\n良好的自我驅動力和職業素養,工做積極主動、結果導向\n \n技術要求:\n一、一年以上 Python 開發經驗,掌握面向對象分析和設計,瞭解設計模式\n二、掌握HTTP協議,熟悉MVC、MVVM等概念以及相關WEB開發框架\n三、掌握關係數據庫開發設計,掌握 SQL,熟練使用 MySQL/PostgreSQL 中的一種\n四、掌握NoSQL、MQ,熟練使用對應技術解決方案\n五、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js\n \n加分項:\n大數據,數理統計,機器學習,sklearn,高性能,大併發。\n\n ' In [58]: re.sub(r"</?\w+>|\n","",data) Out[58]: ' 崗位職責:完成推薦算法、數據統計、接口、後臺等服務器端相關工做必備要求:良好的自我驅動力和職業素養,工做積極主動、結果導向 技術要求:一、一年以上 Python 開發經驗,掌握面向對象分析和設計,瞭解設計模式二、掌握HTTP協議,熟悉MVC、MVVM等概念以及相關WEB開發框架三、掌握關係數據庫開發設計,掌握 SQL,熟練使用 MySQL/PostgreSQL 中的一種四、掌握NoSQL、MQ,熟練使用對應技術解決方案五、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js 加分項:大數據,數理統計,機器學習,sklearn,高性能,大併發。 ' In [59]: re.sub(r"</?\w+>|\n| ","",data) Out[59]: ' 崗位職責:完成推薦算法、數據統計、接口、後臺等服務器端相關工做必備要求:良好的自我驅動力和職業素養,工做積極主動、結果導向;技術要求:一、一年以上 Python 開發經驗,掌握面向對象分析和設計,瞭解設計模式二、掌握HTTP協議,熟悉MVC、MVVM等概念以及相關WEB開發框架三、掌握關係數據庫開發設計,掌握 SQL,熟練使用 MySQL/PostgreSQL 中的一種四、掌握NoSQL、MQ,熟練使用對應技術解決方案五、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js;加分項:大數據,數理統計,機器學習,sklearn,高性能,大併發。 ' In [60]: re.split(r" ","age=18 name=tom") Out[60]: ['age=18', 'name=tom'] In [61]: re.split(r" |=","age=18 name=tom") Out[61]: ['age', '18', 'name', 'tom'] ----------------------?號將正則轉化爲非貪婪模式(懶惰模式)------------------------------- In [62]: re.match(r"(\d+)(\d?)","12345678").group() Out[62]: '12345678' In [63]: re.match(r"(\d+)(\d?)","12345678").group(1) Out[63]: '12345678' In [64]: re.match(r"(\d+)(\d?)","12345678").group(2) Out[64]: '' In [65]: re.match(r"(\d+?)(\d?)","12345678").group(2) Out[65]: '2' In [66]: re.match(r"(\d+?)(\d+)","12345678").group(2) Out[66]: '2345678' In [67]: re.match(r"(\d+?)(\d+)","12345678").group(1) Out[67]: '1' In [68]: re.match(r"(\d+)(\d+)","12345678").group(1) Out[68]: '1234567' In [69]: re.match(r"(\d+)(\d+)","12345678").group(2) Out[69]: '8' In [70]: url = """<img alt="丁叮c的直播" data-original="https://rpic.douyucdn.cn/live-cover/appCov ...: ers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/ap ...: pCovers/2017/12/27/462253_20171227014914_big.jpg" width="283" height="163" style="display ...: : block;">""" In [72]: re.search(r"http.*jpg",url).group() Out[72]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg' In [73]: re.search(r"http.*?jpg",url).group() Out[73]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg' ----------------r原生字符串 能夠自動將其中的反斜線 進行轉義---------------------- In [74]: path = "c:\\a\\b" In [75]: print(path) c:\a\b In [76]: path = "c:\a\b\n" In [77]: print(path) c: In [78]: re.match("c:\\a","c:\\a\\b\\c").group() --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-78-c8bbf045c2be> in <module>() ----> 1 re.match("c:\\a","c:\\a\\b\\c").group() AttributeError: 'NoneType' object has no attribute 'group' In [79]: re.match("c:\\\\a","c:\\a\\b\\c").group() Out[79]: 'c:\\a' In [80]: re.match("c:\\\\a\\\\b\\\\c","c:\\a\\b\\c").group() Out[80]: 'c:\\a\\b\\c' In [81]: re.match(r"c:\\a\\b\\c","c:\\a\\b\\c").group() Out[81]: 'c:\\a\\b\\c' In [82]: r"c:\\a\\b\\c" Out[82]: 'c:\\\\a\\\\b\\\\c'