1 在解析HTML以前還需導入jsoup-1.10.2.jarhtml
2 解析HTML,代碼以下:java
package com.od.cn; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.log4j.Logger; import org.apache.log4j.PropertyConfigurator; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupParserHtml { private static final Logger LOGGER=Logger.getLogger(JsoupParserHtml.class); //從網上把天氣爬下來 private List<Weather> parserHtmlByHttp(String url){ List<Weather> weathers=new ArrayList<Weather>(); try { Document document=Jsoup.connect(url).get(); Elements classes=document.getElementsByClass("part_se"); for(Element ele:classes){ String data_role=ele.attr("data-role"); if("collapsible".equals(data_role)){ Elements h1=ele.select("h1"); Elements td=ele.select("td"); Weather weather=new Weather(); weather.setArea(h1.text()); weather.setAirTemperature(td.get(1).text()); weather.setRainFall(td.get(3).text()); weather.setRelativeWet(td.get(5).text()); weather.setWindPower(td.get(7).text()); weather.setWindDirection(td.get(9).text()); weather.setDate(td.get(11).text()); weathers.add(weather); } } } catch (IOException e) { LOGGER.error("解析網頁異常:"+e.getMessage()); } LOGGER.info("成功獲取網頁數據"); return weathers; } //以json的格式保存到文本中 private void saveFile(List<Weather> weathers){ if(weathers!=null){ SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd"); StringBuffer buffer=new StringBuffer(); buffer.append("{date:\""+sdf.format(new Date())+"\",data["); for(int i=0;i<weathers.size();i++){ Weather weather=weathers.get(i); if(i==weathers.size()-1){ buffer.append("{area:\""+weather.getArea()+"\",airTemperature:\""+weather.getAirTemperature()+"\",rainFall:\""+weather.getRainFall()+ "\",relativeWet:\""+weather.getRelativeWet()+"\",windPower:\""+weather.getWindPower()+"\",windDirection:\""+weather.getWindDirection()+"\",dateTime:\""+weather.getDate()+"\"}"); }else{ buffer.append("{area:\""+weather.getArea()+"\",airTemperature:\""+weather.getAirTemperature()+"\",rainFall:\""+weather.getRainFall()+ "\",relativeWet:\""+weather.getRelativeWet()+"\",windPower:\""+weather.getWindPower()+"\",windDirection:\""+weather.getWindDirection()+"\",dateTime:\""+weather.getDate()+"\"},"); } } buffer.append("]}"); BufferedWriter bw=null; try { bw=new BufferedWriter(new FileWriter("d:\\weather.txt")); bw.write(buffer.toString()); bw.flush(); LOGGER.info("已保存文件"); } catch (IOException e) { LOGGER.error("保存文件異常:"+e.getMessage()); }finally{ if(bw!=null){ try { bw.close(); } catch (IOException e) { LOGGER.error("關閉流異常:"+e.getMessage()); } } } } } public static void main(String[] args) { PropertyConfigurator.configure("WebRoot/conf/log4j.properties"); LOGGER.info("啓動程序"); JsoupParserHtml jph=new JsoupParserHtml(); List<Weather> weathers=jph.parserHtmlByHttp("http://www.zhpmsc.org.cn/WeChat/monitorController/zoneSk?winzoom=1#"); jph.saveFile(weathers); LOGGER.info("程序結束"); } }