jsoup獲取文檔類示例

 

 1 import java.io.IOException;  2 
 3 import org.jsoup.Jsoup;  4 import org.jsoup.nodes.Document;  5 import org.jsoup.nodes.Element;  6 import org.jsoup.select.Elements;  7 
 8 
 9 public class WyCrawler { 10     public static void main(String[] args) { 11         try { 12             Document document = Jsoup.connect("http://某網頁").timeout(3000).get(); 13             String selector = "li>div[class=titleBar clearfix]>h3>a"; 14             Elements elements = document.select(selector); 15             for(Element element:elements){ 16 // System.out.println(element.text());
17                 String url = element.absUrl("href"); 18                 Document document2 = Jsoup.connect(url).get(); 19                 Elements elements2 = document2.select("#endText"); 20                 for(Element element2:elements2){ 21  System.out.println(element2.text()); 22  } 23  } 24         } catch (IOException e) { 25  e.printStackTrace(); 26  } 27  } 28 }

上面是如何爬取超連接裏的內容,下面的比較簡單java

 1 import java.io.IOException;  2 
 3 import org.jsoup.Jsoup;  4 import org.jsoup.nodes.Document;  5 import org.jsoup.nodes.Element;  6 import org.jsoup.select.Elements;  7 
 8 
 9 public class Test { 10     public static void main(String[] args) { 11         try { 12             Document document = Jsoup.connect("http://www.某網頁.com/").get(); 13             //獲取內容 14 // String selector = "div[class=panel panel20 post-item post-box]>div[class=item-detail]>div[class=item-content]"; 15 // Elements elements = document.select(selector); 16 // for(Element element:elements){ 17 // System.out.println(element.text()); 18 // } 19             
20             //獲取標題 21 // String selector2 = "div[class=panel panel20 post-item post-box]>div[class=item-detail]>h2[class=item-title]"; 22 // Elements elements = document.select(selector2); 23 // for(Element element:elements){ 24 // System.out.println(element.text()); 25 // } 26             
27             //綜合寫法,標題內容一塊兒獲取
28             String selector = "div[class=panel panel20 post-item post-box]>div[class=item-detail]"; 29             Elements elements = document.select(selector); 30             for(Element element:elements){ 31                 Elements titles = element.select("div[class=item-title]"); 32                 Elements content = element.select("h2[class=item-content]"); 33                 System.out.println(titles.text()+"\n"+content.text()); 34  } 35             
36             
37             
38         } catch (IOException e) { 39  e.printStackTrace(); 40  } 41  } 42 }
相關文章
相關標籤/搜索