spider from mobile to mobile to mobilehtml
from selenium import webdriver from selenium.webdriver.chrome.options import Options # from selenium.webdriver.firefox.options import Options import time from time import sleep import math import random import sys tag_jmtool_list = ['(', '(', '-'] ua_list = [] with open('mobile_ua.txt', 'r', encoding='utf-8') as uafile: for i in uafile: if i.find('Mozilla') > -1: ua_list.append(i.replace('\n', '').strip()) ua_list_len_ = len(ua_list) - 1 def extract_name(name_): for i in tag_jmtool_list: name_ = name_.split(i)[0] return name_ target_type_list = ['住宅小區', '寫字樓'] target_type_list = ['住宅小區'] target_dic = {} with open('JMTool0819am/任務JMTool.csv', 'r', encoding='utf-8') as csvfile: for i in csvfile: l = i.replace(' ', '').replace('\n', '').split('";"') if l[0].replace('"', '') in target_type_list: type_, city, district, addr, name_ = l type_, name_ = type_.replace('"', ''), name_.replace('"', '') name_reduction = extract_name(name_) if city not in target_dic: target_dic[city] = {} if district not in target_dic[city]: target_dic[city][district] = {} if type_ not in target_dic[city][district]: target_dic[city][district][type_] = {} if name_reduction not in target_dic[city][district]: target_dic[city][district][type_][name_reduction] = {} target_dic[city][district][type_][name_reduction]['name_reduction_list'] = [] target_dic[city][district][type_][name_reduction]['history_list'] = [] target_dic[city][district][type_][name_reduction]['name_reduction_list'].append(name_) target_dic[city][district][type_][name_reduction]['history_list'].append(l) def write_res_html(browser, dir_='baidu_map_html/'): current_url_ = '%s%s%s%s' % ('<!--', input_, browser.current_url, '-->') page_source = '%s%s' % (current_url_, browser.page_source) localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) file_name = '%s%s%s%s' % (dir_, input_, localtime_, '.html') fo = open(file_name, 'w', encoding='utf-8') fo.write(page_source) fo.closed def gen_random_letter(): return chr(random.randint(97, 122)) def gen_random_num(): return random.randint(0, 10) def gen_sougo_pid(): res_ = '' for i in range(1, 17, 1): if i in [1, 3, 4, 15]: res_ = '%s%s' % (res_, gen_random_letter()) else: res_ = '%s%s' % (res_, gen_random_num()) return res_ def close_alert(browser, attitude='accept'): try: sleep(2) al = browser.switch_to.alert() sleep(1) if attitude == 'accept': al.accept() elif attitude == 'dismiss': al.dismiss() print(sys._getframe().f_lineno, 'alert-closed-ok') except Exception: print(sys._getframe().f_lineno, Exception, 'no-alert') # input_ = '深圳市南山區薈芳園' def mobile_mobile_pages_html(input_): # mobile_emulation = { # "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0}, # "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"} ua_list_index = random.randint(0, ua_list_len_) mobile_emulation = { "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0}} mobile_emulation['userAgent'] = ua_list[ua_list_index] chrome_options = Options() chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) browser = webdriver.Chrome(chrome_options=chrome_options) url_seed = 'http://m.sogou.com/web/searchList.jsp?pid=sogou-mobb-%s-0007&keyword=百度地圖' % (gen_sougo_pid()) print(url_seed) browser.get(url_seed) js = '%s%s%s' % ('document.getElementsByClassName("input-default js_input")[0].value="', input_, '"') browser.execute_script(js) xp_newpage = '//*[@id="sogou_vr_21384401_1_wrap"]/div/div[1]/a' browser.find_element_by_xpath(xp_newpage).click() sleep(2) # xp = '//*[@id="common-bottombanner-widget-fis"]/div/div/div[2]' # sleep(1) # browser.find_element_by_xpath(xp).click() close_alert(browser) try: xp = '//*[@id="place-widget-placenewlist-showall"]/span[1]' sleep(2) close_alert(browser) browser.find_element_by_xpath(xp) except Exception: print(sys._getframe().f_lineno, Exception) return close_alert(browser) if browser.find_element_by_xpath(xp).text.find('所有') == -1: return res_num = browser.find_element_by_xpath(xp).text.split('所有')[1].split('條')[0] res_num = int(res_num) page_num = 10 loop_breaker = math.ceil(res_num / page_num) close_alert(browser) if res_num <= page_num: write_res_html(browser) browser.quit() return close_alert(browser) xp = '//*[@id="place-widget-placenewlist-showall"]' browser.find_element_by_xpath(xp).click() write_res_html(browser) close_alert(browser) js = "window.scrollTo(0,document.body.scrollHeight)" browser.execute_script(js) sleep(1) try: xp_newpage = '//*[@id="fis_elm__7"]/div/div[2]/span[2]' browser.find_element_by_xpath(xp_newpage).click() sleep(1) except Exception: print(sys._getframe().f_lineno, Exception) write_res_html(browser) browser.quit() return for i in range(1, loop_breaker, 1): sleep(1) try: xp = '//*[@id="common-bottombanner-widget-fis"]/div/div/div[2]' sleep(3) browser.find_element_by_xpath(xp).click() except Exception: print(sys._getframe().f_lineno, Exception) sleep(10) break try: js = "window.scrollTo(0,document.body.scrollHeight)" browser.execute_script(js) sleep(1) except Exception: print(sys._getframe().f_lineno, Exception) sleep(10) try: xp_newpage = '//*[@id="fis_elm_pager__qk_7"]/div/div/span[2]' sleep(1) print(input_, i) browser.find_element_by_xpath(xp_newpage).click() write_res_html(browser) except Exception: print(sys._getframe().f_lineno, Exception) sleep(10) sleep(2) browser.quit() for city in target_dic: for district in target_dic[city]: for type_ in target_dic[city][district]: for name_reduction in target_dic[city][district][type_]: input_ = '%s%s%s' % (city, district, name_reduction) mobile_mobile_pages_html(input_)
from selenium import webdriver from selenium.webdriver.chrome.options import Options import time from time import sleep import math url_seed = 'http://m.sogou.com/web/searchList.jsp?pid=sogou-mobb-123asd-0007&keyword=百度地圖' mobile_emulation = { "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0}, "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"} chrome_options = Options() chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) browser = webdriver.Chrome(chrome_options=chrome_options) browser.get(url_seed) input_ = '深圳市南山區薈芳園' js = '%s%s%s' % ('document.getElementsByClassName("input-default js_input")[0].value="', input_, '"') browser.execute_script(js) xp_newpage = '//*[@id="sogou_vr_21384401_1_wrap"]/div/div[1]/a' browser.find_element_by_xpath(xp_newpage).click() sleep(1) xp = '//*[@id="common-bottombanner-widget-fis"]/div/div/div[2]' browser.find_element_by_xpath(xp).click() xp = '//*[@id="place-widget-placenewlist-showall"]/span[1]' browser.find_element_by_xpath(xp) res_num = browser.find_element_by_xpath(xp).text.split('所有')[1].split('條')[0] res_num = int(res_num) page_num = 10 loop_breaker = math.ceil(res_num / page_num) def write_res_html(browser, dir_='baidu_map_html/'): current_url_ = '%s%s%s%s' % ('<!--', input_, browser.current_url, '-->') page_source = '%s%s' % (current_url_, browser.page_source) localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime()) file_name = '%s%s%s%s' % (dir_, input_, localtime_, '.html') fo = open(file_name, 'w', encoding='utf-8') fo.write(page_source) fo.closed xp = '//*[@id="place-widget-placenewlist-showall"]' browser.find_element_by_xpath(xp).click() write_res_html(browser) js = "window.scrollTo(0,document.body.scrollHeight)" browser.execute_script(js) sleep(1) xp_newpage = '//*[@id="fis_elm__7"]/div/div[2]/span[2]' browser.find_element_by_xpath(xp_newpage).click() sleep(1) for i in range(1, loop_breaker, 1): sleep(1) xp = '//*[@id="common-bottombanner-widget-fis"]/div/div/div[2]' browser.find_element_by_xpath(xp).click() js = "window.scrollTo(0,document.body.scrollHeight)" browser.execute_script(js) sleep(1) xp_newpage = '//*[@id="fis_elm_pager__qk_7"]/div/div/span[2]' browser.find_element_by_xpath(xp_newpage).click() write_res_html(browser)
from selenium import webdriver from selenium.webdriver.chrome.options import Options url_seed = 'http://m.sogou.com/web/searchList.jsp?pid=sogou-mobb-123asd-0007&keyword=百度地圖' mobile_emulation = { "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0}, "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"} chrome_options = Options() chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) browser = webdriver.Chrome(chrome_options=chrome_options) browser.get(url_seed) js = 'document.getElementsByClassName("input-default js_input")[0].value="深圳市南山區海岸城"' browser.execute_script(js) xp = '//*[@id="sogou_vr_21384401_1_wrap"]/div/div[1]/a' browser.find_element_by_xpath(xp).click()
ua python
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6
Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER
Opera/9.25 (Windows NT 5.1; U; en)
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)
Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6
Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0
import os, sys import time import logging import requests import threading from random import choice from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities """ 全局約定,便於後期作日誌分析 os._exit(INT) 4001 4002 4003 4004 """ os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] base_dir = os.path.dirname(os_sep.join(os.path.abspath(__file__).split(os_sep)[0:-2])) log_abspath = '%s%s%s' % (base_dir, os_sep, 'log') """ 日誌的記錄不能依賴於日誌類 """ now_, e = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), '啓動腳本' logf, s = '%s%s%s%s' % (log_abspath, os_sep, this_file_name, now_), '%s%s%s%s' % (__file__, now_, os.getcwd(), e) with open(logf, 'a') as fo: fo.write(s) print(s) try: sys.path.append(base_dir) from core.utils import MysqlHelper except Exception as e: s = '%s%s%s' % ( 'from core.utils import MysqlHelper EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4001) try: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]', datefmt='%a, %d %b %Y %H:%M:%S', filename=logf, filemode='a') except Exception as e: s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4002) try: fua, lua = '%s%s%s' % (this_file_abspath, os_sep, 'ua_list.txt'), [] with open(fua, 'r') as fo: for i in fo: lua.append(i.replace('\n', '')) except Exception as e: s = '%s%s' % ('打開文件 EXCEPTION ua文件路徑: ', fua) logging.error(s) print(s) os._exit(4003) dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = choice(lua) dcap['browserName'], dcap['platform'] = '', '' class MyThread(threading.Thread): def __init__(self, func, args, name): threading.Thread.__init__(self) self.func, self.args, self.name = func, args, name def run(self): self.func(self.args) ctrl_start, max_script_time = time.time(), 3600 * 4 def ctrl_runtime(exit_type=''): if time.time() - ctrl_start >= max_script_time: s = '%s%s%s%s%s%s%s%s%s' % ( '程序開始執行時間', ctrl_start, '執行時間閾值', max_script_time, '終止執行', ' exit_type =', exit_type, ' threadID ', threading.get_ident()) logging.info(s) if exit_type == '': exit(s) elif exit_type == 'sys': sys.exit(s) elif exit_type == 'os': # an integer is required # Required argument 'status' (pos 1) not found os._exit(4004) url_counter = 0 def main(): """ 對異常無限重啓 """ try: mysql_obj = MysqlHelper() q = 'SELECT direct_order_id FROM test_error;' tuple_l = mysql_obj.select(q) pass_id_l = [i[0] for i in tuple_l] pass_id_l = [str(i) for i in pass_id_l] pass_id_l_s = ','.join(pass_id_l) del mysql_obj, tuple_l # 業務當前未失效的url在在test_order具備惟一行 # """ 後期任務: test_error積累必定數據後對url從新檢測 #3個功能點:當前半個小時、當前未失效的url test_order內url的異常狀況(當前的2個功能點)、(後期任務:test_error積累必定數據後對url從新檢測) q = 'SELECT url,id FROM test_order WHERE unix_timestamp(now()) - create_time<=3600*48 AND id NOT in ( %s ) ORDER BY id DESC ;' % ( pass_id_l_s) q = 'SELECT url,id FROM test_order WHERE unix_timestamp(now()) < expire_time AND id NOT in ( %s ) ORDER BY id DESC ;' % ( pass_id_l_s) """ mysql_obj = MysqlHelper() q = 'SELECT url,id FROM test_order WHERE unix_timestamp(now()) < expire_time AND id NOT in ( %s ) ORDER BY id DESC ;' % ( pass_id_l_s) tuple_l = mysql_obj.select(q) del mysql_obj if len(tuple_l) == 0: s = '無待檢測url,程序退出' print(s) logging.info(s) except Exception as e: s = '%s%s%s' % ('初始數據,查詢數據庫異常,無限次重啓該腳本', e, time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time()))) print(s) logging.warning(s) cmd = 'python %s' % (__file__) os.system(cmd) os._exit(1024) # 考慮到每1小時執行下該腳本,對url異常的處理爲:第一次請求爲預期則終止請求,反之,間隔30後,再至多請求2次,每次間隔10s sleep_counter, sleep_step, sleep_seconds, mycode_l, repeat_times, repeat_sleep_times = 0, 20, 1, [ 'g3user.com', '51g3.com.cn'], 4, 10 # 重構到基類 where list # d當前爲爲了f_l字段的需求改動 def get_onerow(url, f_l=['title', 'uid', 'money_total'], tab='test_order'): t = -1 try: mysql_obj = MysqlHelper() f_s = ','.join(f_l) q = 'SELECT %s FROM %s WHERE url="%s" ORDER BY id DESC LIMIT 1' % (f_s, tab, url) s = '%s%s' % (' DB ', q) logging.info(s) t = mysql_obj.select(q) if t != -1: t = t[0] del mysql_obj except Exception as e: s = '%s%s' % (' DB ', e) logging.info(s) return t return t def chk_exception_url(url, sleep_seconds=0, http_tag='http://'): time.sleep(sleep_seconds) global url_counter ret = {} # db url狀態值 狀態 0:打不開 1:打開無廣告 2:已處理 ret['ok'], ret['status_code'], s = -1, -1, '%s%s%s%s' % ( time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())), ' threadID ', threading.get_ident(), url) try: if url.find('http') == -1: url = '%s%s' % (http_tag, url) r = requests.get(url) ret['status_code'], txt_pos = int(r.status_code), -1 s = '%s,%s,%s,%s,%s' % (s, ret['status_code'], url, r, r.reason) except Exception as e: ret['ok'] = 0 s = '%s %s %s' % (s, ' SPIDER ', e) logging.error(s) print(e, url) # 當前,僅考慮目標站返回200 if ret['status_code'] == 200: for ii in mycode_l: if r.text.find(ii) > -1: ret['ok'], txt_pos = 1, 1 break if txt_pos == -1: try: driver = webdriver.PhantomJS(desired_capabilities=dcap, executable_path='/usr/local/phantomjs/bin/phantomjs') driver.get(url) time.sleep(1) page_source = driver.page_source driver.quit() for ii in mycode_l: if page_source.find(ii) > -1: ret['ok'] = 1 break if ret['ok'] == -1: s = '%s%s' % (s, '返回200,可是在html中未檢測到我公司代碼。') ret['ok'], ret['info'] = 0, s except Exception as e: s = '%s %s %s' % (s, ' SPIDER ', e) logging.error(s) print(e, url) # elif ret['status_code'] == 403: # www.hsdcw.com/fenlei/41668214.html elif ret['status_code'] == 403: pass else: ret['ok'], ret['info'] = 0, s url_counter += 1 s = '%s/%s%s%s' % (url_counter, len(tuple_l), 'chk-ret', s) print(s) if ret['ok'] == 0: logging.warning(s) else: logging.info(s) return ret tn, tl, tstep = len(tuple_l), [], 4000 def tf(ts): te = ts + tstep te = min(te, tn) for i in tuple_l[ts:te]: ctrl_runtime(exit_type='os') url, chk_id = i s = '%s%s%s%s' % ( time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())), ' threadID ', threading.get_ident(), url) if chk_id in pass_id_l: s = '%s%s' % (s, ' 跳過,以前test_error已寫入該url ') logging.info(s) print(s) """ 針對新浪愛問的規則: 不檢測 """ if url.find('iask.sina.com') > -1: continue write_db_flag = 1 for t in range(0, repeat_times, 1): ret = chk_exception_url(url, repeat_sleep_times) if ret['ok'] == 1: write_db_flag = 0 break if write_db_flag == 1: try: title, uid, money_total = get_onerow(url) except Exception as e: s = '%s%s%s' % (s, ' DB Exception-去test_order查', e) logging.info(s) print(s) break # 多線程 考慮到原包的 數據庫限制,每次均實例化數據庫類,用後刪除 try: # 能夠考慮分裝到類構造器中 mysql_obj = MysqlHelper() except Exception as e: s = '%s%s%s' % (s, ' DB Exception- ', e) logging.error(s) print(s) break """ 多進程、線程併發 待優化,好比隊列 """ q = 'SELECT id FROM test_error WHERE url="%s" LIMIT 1' % (url) try: r = mysql_obj.select(q) s = '%s%s%s' % (s, ' -SQL- ', q) logging.info(s) print(q) except Exception as e: s = '%s %s %s %s' % (s, ' DB Exception-', q, e) logging.info(s) print(s) break ctime = int(time.time()) # 建議優化此處數據庫設計 db_status = 1 if ret['status_code'] == 200 else 0 if len(r) == 0: q = 'INSERT INTO test_error (title,url,status,remarks,update_time,create_time,uid,money,direct_order_id) VALUES ("%s","%s","%s","%s","%s","%s","%s","%s","%s")' % ( title, url, db_status, ret['info'], ctime, ctime, uid, money_total, chk_id) try: mysql_obj.execute(q) mysql_obj.commit() del mysql_obj s = '%s%s%s' % (s, ' DB SQL ok ', q) logging.info(s) print(s) except Exception as e: s = '%s%s%s%s' % (s, ' DB Exception- ', q, e) logging.error(s) print(s) elif len(r) == 1: continue for i in range(0, tn, tstep): if i >= tn: break thread_instance = MyThread(tf, (i), tf.__name__) tl.append(thread_instance) for t in tl: t.setDaemon = False t.start() for t in tl: t.join() if __name__ == '__main__': main()
from selenium import webdriverfrom selenium.webdriver.chrome.options import Options# from selenium.webdriver.firefox.options import Optionsimport timefrom time import sleepimport mathimport randomimport sysimport threadingfrom random import choice# import urllib.parsefrom bs4 import BeautifulSoupua_list = []with open('mobile_ua.txt', 'r', encoding='utf-8') as uafile: for i in uafile: if i.find('Mozilla') > -1: ua_list.append(i.replace('\n', '').strip())ua_list_len_ = len(ua_list) - 1def close_alert(browser, attitude='accept'): # js='alert(window.alert=function(str){return;}' # browser.execute_script(js) # js= 'window.alert = function(str){return ;}' # browser.execute_script(js) return# mobile_emulation = {# "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},# "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}ua_list_index = random.randint(0, ua_list_len_)# mobile_emulation = {# "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0}}## mobile_emulation['userAgent'] = choice(ua_list)# chrome_options = Options()# chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)# browser = webdriver.Chrome(chrome_options=chrome_options)browser = webdriver.Chrome()s_wd = '長尾'url_seed = 'https://m.baidu.com/s?word=s_wd'url_seed = url_seed.replace('s_wd', s_wd)print(url_seed)browser.get(url_seed)rd = BeautifulSoup(browser.page_source, 'html.parser').find_all('a', class_='rw-item')res_d_l = [{'contents': d.contents, 'href': d.attrs['href']} for d in rd]browser.quit()d = 3