Regular Express 匹配中文,全部中文標點符號

import rephp

import requestsspa

text=requests.get("https://movie.douban.com").text.net

#1.匹配漢字 \u4E00-\u9FA5 code

re.findall('[ \u4E00-\u9FA5]+',text)blog

#參考unicode

https://www.qqxiuzi.cn/zh/hanzi-unicode-bianma.phpget

#2.匹配全部中文標點符號  [\u3002|\uff1f|\uff01|\uff0c|\u3001|\uff1b|\uff1a|\u201c|\u201d|\u2018|\u2019|\uff08|\uff09|\u300a|\u300b|\u3008|\u3009|\u3010|\u3011|\u300e|\u300f|\u300c|\u300d|\ufe43|\ufe44|\u3014|\u3015|\u2026|\u2014|\uff5e|\ufe4f|\uffe5]requests

re.findall('[\u3002|\uff1f|\uff01|\uff0c|\u3001|\uff1b|\uff1a|\u201c|\u201d|\u2018|\u2019|\uff08|\uff09|\u300a|\u300b|\u3008|\u3009|\u3010|\u3011|\u300e|\u300f|\u300c|\u300d|\ufe43|\ufe44|\u3014|\u3015|\u2026|\u2014|\uff5e|\ufe4f|\uffe5]',text)import

#參考request

https://blog.csdn.net/cysear/article/details/80435756

相關文章
相關標籤/搜索