需求分析:抓取:http://tools.2345.com/rili.htm中的萬年曆(陽曆、陰曆等等)。html
package com.wan.domain; public class Almanac { private String solar; /* 陽曆 e.g.2016年 4月11日 星期一 */ private String lunar; /* 陰曆 e.g. 猴年 三月初五*/ private String chineseAra; /* 天干地支紀年法 e.g.丙申年 壬辰月 癸亥日*/ private String should; /* 宜e.g. 求子 祈福 開光 祭祀 安牀*/ private String avoid; /* 忌 e.g. 玉堂(黃道)危日,忌出行*/ public String getSolar() { return solar; } public void setSolar(String solar) { this.solar = solar; } public String getLunar() { return lunar; } public void setLunar(String lunar) { this.lunar = lunar; } public String getChineseAra() { return chineseAra; } public void setChineseAra(String chineseAra) { this.chineseAra = chineseAra; } public String getShould() { return should; } public void setShould(String should) { this.should = should; } public String getAvoid() { return avoid; } public void setAvoid(String avoid) { this.avoid = avoid; } public Almanac(String solar, String lunar, String chineseAra, String should,String avoid) { this.solar = solar; this.lunar = lunar; this.chineseAra = chineseAra; this.should = should; this.avoid = avoid; } }
package com.wan.controller; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.http.HttpEntity; import org.apache.http.ParseException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.wan.domain.Almanac; public class AlmanacUtil { /** * 單例工具類 */ private AlmanacUtil() { } /** * 獲取萬年曆信息 * @return */ public static Almanac getAlmanac(){ String url="http://tools.2345.com/rili.htm"; String html=pickData(url); Almanac almanac=analyzeHTMLByString(html); return almanac; } /* * 爬取網頁信息 */ private static String pickData(String url) { CloseableHttpClient httpclient = HttpClients.createDefault(); try { HttpGet httpget = new HttpGet(url); CloseableHttpResponse response = httpclient.execute(httpget); try { // 獲取響應實體 HttpEntity entity = response.getEntity(); // 打印響應狀態 if (entity != null) { return EntityUtils.toString(entity); } } finally { response.close(); } } catch (ClientProtocolException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { // 關閉鏈接,釋放資源 try { httpclient.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } /* * 使用jsoup解析網頁信息 */ private static Almanac analyzeHTMLByString(String html){ String solarDate,lunarDate,chineseAra,should,avoid=" "; Document document = Jsoup.parse(html); //公曆時間 solarDate=getSolarDate(document,"bjtime"); //農曆時間 Element eLunarDate=document.getElementById("info_nong"); lunarDate=eLunarDate.child(0).html().substring(1,3)+eLunarDate.html().substring(11); //天干地支紀年法 Element eChineseAra=document.getElementById("info_chang"); chineseAra=eChineseAra.text().toString(); //宜 should=getSuggestion(document,"yi"); //忌 avoid=getSuggestion(document,"ji"); Almanac almanac=new Almanac(solarDate,lunarDate,chineseAra,should,avoid); return almanac; } /* * 獲取忌/宜 */ private static String getSuggestion(Document doc,String id){ Element element=doc.getElementById(id); Elements elements=element.getElementsByTag("a"); StringBuffer sb=new StringBuffer(); for (Element e : elements) { sb.append(e.text()+" "); } return sb.toString(); } /* * 獲取公曆時間,用yyyy年MM月dd日 EEEE格式表示。 * @return yyyy年MM月dd日 EEEE */ private static String getSolarDate(Document doc,String id) { Calendar calendar = Calendar.getInstance(); Date solarDate = calendar.getTime(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy年MM月dd日 EEEE"); return formatter.format(solarDate); } }
注:公曆時間沒有實現網頁抓取。是獲取的系統的時間java
package com.wan.test; import com.wan.controller.AlmanacUtil; import com.wan.domain.Almanac; public class Test { public static void main(String[] args) { // TODO Auto-generated method stub Almanac almanac=AlmanacUtil.getAlmanac(); System.out.println("公曆時間"+almanac.getSolar()); System.out.println("農曆時間"+almanac.getLunar()); System.out.println("天干地支"+almanac.getChineseAra()); System.out.println("宜"+almanac.getShould()); System.out.println("忌"+almanac.getAvoid()); } }
最後在控制檯輸出:node