Jsoup爬取職位信息

待爬取的牛客網的實習信息java

https://www.nowcoder.com/job/centernode

 

首先在Eclipse新建一個maven項目apache

一、在maven文件中加入如下的代碼maven

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.wu</groupId>
  <artifactId>TopEssay</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  
  
  <dependencies>
	<dependency>
	    <groupId>org.jsoup</groupId>
	    <artifactId>jsoup</artifactId>
	    <version>1.11.3</version>
	</dependency>
  	
  </dependencies>
  
  
</project>

  

二、提取所須要的信息ide

這裏編寫CSS規則,有點麻煩,咱們能夠利用遊覽器自帶的工具,幫助咱們快速選擇所須要的的元素工具

好比咱們這裏的標題,經過這種該方法,爲 body > div.nk-container > div.nk-main.clearfix > div.nk-content > div > div.module-body > ul > li:nth-child(1) > div > div.reco-job-cont > athis

而後咱們能夠在上面這個基礎上進行相應的修改,有效節省了咱們的時間。url

 

 

package com.jsoup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.entity.JobInfo;

public class NiuKeSpider {
	private static final String url = "https://www.nowcoder.com/job/center";
	
	public static void main(String[] args) {
		try {
			// 獲取網頁的源代碼
			Document document = Jsoup.connect(url).get();
			// 篩選出和職位有關的網頁源碼
			Elements jobs = document.getElementsByClass("reco-job-main");
			System.out.println(jobs.size());
			
			List<JobInfo> lists = new ArrayList<>();
			
			//工做描述+公司+地點+工資+url
			for(Element element : jobs) {
				JobInfo jobInfo = new JobInfo();
				jobInfo.setJobContent(element.getElementsByClass("reco-job-cont").text());
				jobInfo.setUrl(element.select("div.reco-job-cont > a").attr("abs:href"));
				jobInfo.setCompany(element.getElementsByClass("reco-job-com").text());
				jobInfo.setAddress(element.getElementsByClass("job-address").text());
				jobInfo.setSalary(element.select("div.reco-job-info > div:nth-child(1) > span:nth-child(2)").text().trim());
				lists.add(jobInfo);
			}
			
			for(JobInfo job : lists) {
				System.out.println(job);
			}
			
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}

}

  

三、封裝所需的信息spa

package com.entity;

/**
 * 職位有關的信息
 * @author Administrator
 *
 */
public class JobInfo {
	private String jobContent;
	private String url;
	private String company;
	private String address;
	private String Salary;
	
	public String getJobContent() {
		return jobContent;
	}
	public void setJobContent(String jobContent) {
		this.jobContent = jobContent;
	}
	
	public String getUrl() {
		return url;
	}
	public void setUrl(String url) {
		this.url = url;
	}
	public String getCompany() {
		return company;
	}
	public void setCompany(String company) {
		this.company = company;
	}
	public String getAddress() {
		return address;
	}
	public void setAddress(String address) {
		this.address = address;
	}
	public String getSalary() {
		return Salary;
	}
	public void setSalary(String salary) {
		Salary = salary;
	}
	@Override
	public String toString() {
		return "job [jobContent=" + jobContent + ", url=" + url + ", company=" + company + ", address=" + address
				+ ", Salary=" + Salary + "]";
	}
	
}

  

四、運行結果:3d

 

 

總結:

相關文章
相關標籤/搜索