Jsoup獲取全國地區數據(省市縣鎮村)

`package com.soft.di.jsoup; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import java.util.Map;css

import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements;java

/**node

  • 全國省市縣鎮村數據爬取url

  • @version 1.0.0 */ public class JsoupTest { private static Map<Integer, String> cssMap = new HashMap<Integer, String>(); private static BufferedWriter bufferedWriter = null;code

    static { cssMap.put(1, "provincetr");// 省 cssMap.put(2, "citytr");// 市 cssMap.put(3, "countytr");// 縣 cssMap.put(4, "towntr");// 鎮 cssMap.put(5, "villagetr");// 村 }遞歸

    public static void main(String[] args) throws IOException { int level = 1;ci

    initFile();
    
     // 獲取全國各個省級信息
     Document connect = connect("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2013/");
     Elements rowProvince = connect.select("tr." + cssMap.get(level));
     for (Element provinceElement : rowProvince)// 遍歷每一行的省份城市
     {
         Elements select = provinceElement.select("a");
         for (Element province : select)// 每個省份(四川省)
         {
             parseNextLevel(province, level + 1);
         }
     }
    
     closeStream();

    }element

    private static void initFile() { try { bufferedWriter = new BufferedWriter(new FileWriter(new File("d:\CityInfo.txt"), true)); } catch (IOException e) { e.printStackTrace(); } }get

    private static void closeStream() { if (bufferedWriter != null) { try { bufferedWriter.close(); } catch (IOException e) { e.printStackTrace(); } bufferedWriter = null; } }input

    private static void parseNextLevel(Element parentElement, int level) throws IOException { try { Thread.sleep(500);//睡眠一下,不然可能出現各類錯誤狀態碼 } catch (InterruptedException e) { e.printStackTrace(); }

    Document doc = connect(parentElement.attr("abs:href"));
     if (doc != null)
     {
         Elements newsHeadlines = doc.select("tr." + cssMap.get(level));//
         // 獲取表格的一行數據
         for (Element element : newsHeadlines)
         {
             printInfo(element, level + 1);
             Elements select = element.select("a");// 在遞歸調用的時候,這裏是判斷是不是村一級的數據,村一級的數據沒有a標籤
             if (select.size() != 0)
             {
                 parseNextLevel(select.last(), level + 1);
             }
         }
     }

    }

    /**

    • 寫一行數據到數據文件中去
    • @param element 爬取到的數據元素
    • @param level 城市級別 */ private static void printInfo(Element element, int level) { try { bufferedWriter.write(element.select("td").last().text() + "{" + level + "}[" + element.select("td").first().text() + "]"); bufferedWriter.newLine(); bufferedWriter.flush(); } catch (IOException e) { e.printStackTrace(); } }

    private static Document connect(String url) { if (url == null || url.isEmpty()) { throw new IllegalArgumentException("The input url('" + url + "') is invalid!"); } try { return Jsoup.connect(url).timeout(100 * 1000).get(); } catch (IOException e) { e.printStackTrace(); return null; } } } `

相關文章
相關標籤/搜索