1、正則表達式python
1.使用正則表達式的動機正則表達式
1. 文本處理已經成爲計算機常見工做之一sql
2. 對文本內容的搜索,定位,提取是邏輯比較複雜的工做編程
import re
s = "Levi:1994,Sunny:1993"
pattern = r"(\w+):(\d+)"
# re模塊調用
# l = re.findall(pattern, s)
# print(l)
# compile對象調用
regex = re.compile(pattern, flags=0)
l = regex.findall(s, 0, 55 )
print(l)
[('Levi', '1994'), ('Sunny', '1993')]
import re s = "hello world how are you" # pattern = r"[^\w]+" pattern = r"\W+" l = re.split(pattern, s) print(l)
import re s = "時間:2019/10/12" ns = re.sub(r'/', '-', s) print(ns)
時間:2019-10-12
import re s = "時間:2019/10/12" ns = re.subn(r'/', '-', s, 4) print(ns) ('時間:2019-10-12', 2)
import re s = '2019年,建國70週年' pattern = r"\d+" ite = re.finditer(pattern, s) # 方法1 print("ite類型", type(ite)) print(ite.__next__().group()) print(ite.__next__().group()) # 方法2 print("=========") for i in ite: print(i.group())
ite類型 <class 'callable_iterator'>
2019
70
import re m = re.fullmatch(r'\w+',"hello1973") print(m.group())
import re
#驗證密碼是否符合規範---只含字母和數字 m = re.fullmatch(r'[0-9A-Za-z]+', "hello1973") print(m.group()) hello1973
import re m = re.match(r'[A-Z]\w*',"Hello1973") print(m.group()) Hello1973
import re m = re.search(r'\S+', "好\n嗨 喲") print(m.group()) 好
import re pattern = r"(ab)cd(?P<pig>ef)" regex = re.compile(pattern) # 生成match對象 obj = regex.search("abcdefghi", pos=0, endpos=7) # 演示match對象屬性變量 print(obj.pos) print(obj.endpos) print(obj.re) print(obj.string) print(obj.lastgroup) print(obj.lastindex) 0 7 re.compile('(ab)cd(?P<pig>ef)') abcdefghi pig 2
import re pattern = r"(ab)cd(?P<pig>ef)" regex = re.compile(pattern) # 生成match對象 obj = regex.search("abcdefghi", pos=0, endpos=7) # 演示match對象方法 print(obj.start()) print(obj.end()) print(obj.span()) print(obj.groupdict()) print(obj.groups()) print(obj.group())#獲取整個match對象內容 print(obj.group(1))#獲取第一子組內容 print(obj.group('pig'))#獲取組名爲pig的子組內容 0 6 (0, 6) {'pig': 'ef'} ('ab', 'ef') abcdef ab ef
import re s = """hello world 你好,北京 """ # 只能匹配ASCII碼字符 regex = re.compile(r'\w+', flags=re.A) l = regex.findall(s) print(l)
['Hello', 'world']
import re s = """Hello world 你好,北京 """ # 匹配時忽略字母大小寫 regex = re.compile(r'[A-Z]+', flags=re.I) l = regex.findall(s) print(l) ['Hello', 'world']
import re s = """Hello world 你好,北京 """ # 匹配時不能夠匹配換行 regex = re.compile(r'.+') l = regex.findall(s) print(l) ['Hello world', '你好,北京']
import re s = """Hello world 你好,北京 """ # 匹配時能夠匹配換行 regex = re.compile(r'.+',flags=re.S) l = regex.findall(s) print(l) ['Hello world\n你好,北京\n']
import re s = """Hello world 你好,北京 """ # 匹配每一行的開頭或者結尾 regex = re.compile(r'world$',flags=re.M) l = regex.findall(s) print(l) ['world']
import re s = """Hello world 你好,北京 """ # 匹配每一行的開頭或者結尾 pattern = r"""\w+ # 第一部分 \s+ # 第二部分 \w+ # 第三部分 """ regex = re.compile(pattern,flags=re.X) l = regex.findall(s) print(l) ['Hello world']
""" 匹配每段IP地址,要求: 根據輸入的每段首單詞,獲取IP地址 """ import re import sys port = sys.argv[1] f = open('1.txt') # 找到port段落 while True: data = '' for line in f: if line != '\n': # 不是空行 data += line else: break if not data: # 文件結尾 print("Not Found the %s"%port) break # 匹配字符串首個單詞 key_word = re.match(r'\S+',data).group() if port == key_word: # 匹配目標內容 # pattern = r"[0-9a-f]{4}\.[0-9a-f]{4}\.[0-9a-f]{4}" pattern=r"(\d{1,3}\.){3}\d{1,3}/\d+|Unknow" try: address = re.search(pattern,data).group() print(address) except: print("No address") break