利用xpath建標籤樹之後,雖然提升了元素匹配效率,可是etree會把中文轉爲ASCII碼,因此簡單地tostring之後會有亂碼。html
解決方法:app
import requests from requests.exceptions import RequestException from lxml import etree headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15', } def get_one_page(url, headers): try: response = requests.get(url, headers=headers) if response.status_code == 200: response.encoding = response.apparent_encoding return response.text return None except RequestException: return None tree = etree.HTML(html) aim = tree.xpath(exp) for i in aim: content = etree.tostring(i, encoding='utf-8', pretty_print=True, method="html").decode('utf-8')