import requests from lxml.html import etree # headers= { # 'User-Agent':' Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36', # 'Cookie':' DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; webDict_HdAD=%7B%22req%22%3A%22http%3A//dict.youdao.com%22%2C%22width%22%3A960%2C%22height%22%3A240%2C%22showtime%22%3A5000%2C%22fadetime%22%3A500%2C%22notShowInterval%22%3A3%2C%22notShowInDays%22%3Afalse%2C%22lastShowDate%22%3A%22Mon%20Nov%2008%202010%22%7D; ___rl__test__cookies=1565782601235; OUTFOX_SEARCH_USER_ID=131296774@139.226.172.110; OUTFOX_SEARCH_USER_ID_NCOO=1369535179.7407944; _ntes_nnid=b3ad33663a64ae962e76c71b2df46330,1565057224869; JSESSIONID=abcfltcZlc31Td7QD1pYw; search-popup-show=8-14; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; ___rl__test__cookies=1565782014056' # } # 原本覺得有道可能會有點反爬措施,結果發現並無... a = input('請輸入你翻譯的內容') rp = requests.get(f'https://dict.youdao.com/w/{a}/') # 這個url是Fiddler得到的 # 他翻譯的內容可能會出如今兩個xpath中 data_xpath_1 = '//*[@id="phrsListTab"]/div/ul/li/text()' html = etree.HTML(rp.text) data = html.xpath(data_xpath_1) if not data: data_xpath_2 = '//*[@id="phrsListTab"]/div/ul/p/span[2]/a/text()' data = html.xpath(data_xpath_2) #一長串東西翻譯 if not data: data_xpath_3 = '//*[@id="fanyiToggle"]/div/p[2]/text()' data = html.xpath(data_xpath_3) #單純的爲了打印好看 count = 1 for english in data: print(f'翻譯{count}:{english}') count += 1
我還發現了一條好玩的js
https://dict.youdao.com/word/wordarticle?query=這裏是咱們查詢的翻譯輸入的內容
&jsonp=jQuery191018231021198201125_1565783847667&_=1565783847668 HTTP/1.1
不少內容會匹配不到,你輸入天才能夠試試
是匹配相關文章的html