#!/usr/bin/env python # -*- coding:utf-8 -*- import requests from fake_useragent import UserAgent from lxml import etree agent = UserAgent() url = 'http://search.51job.com/list/010000%252C020000%252C030200%252C040000%252C180200,000000,0000,00,9,11,python,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=21&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=' response = requests.get( url, headers={'User-Agent':agent.random} ) response.encoding=response.apparent_encoding root = etree.HTML(response.text) div_list = root.xpath("//div[@class='dw_table']/div[@class='el']") for div in div_list: money = div.xpath("span[@class='t4']/text()") # if not money: # money = '面議' # else: # money = money[0] money = money[0] if money else "面議" print(money) # 工做名稱不可能爲空,因此不用判斷 a = div.xpath("p/span/a")[0] job_name = a.xpath("text()")[0].strip() job_href = a.xpath("@href")[0] print(job_name) date_time = div.xpath("span[@class='t5']/text()") date_time = date_time[0] if date_time else "沒有時間" print(date_time) # with open('job.csv','a',encoding='gb18030') as f: # f.write(job_name+','+date_time+','+money+'\n') with open('job.csv','a',encoding='gb18030') as f: job_list = [job_name,date_time,money,'\n'] f.write(','.join(job_list))