轉載請代表出處:http://blog.csdn.net/lmj623565791/article/details/23532797html
這篇博客接着上一篇(Android 使用Fragment,ViewPagerIndicator 製做csdn app主要框架)繼續實現接下來的功能,若是你想了解整個app的製做過程,你能夠去看一下上一篇,固然若是你只對網頁信息的抓取感興趣,你能夠直接閱讀本篇博客。我會把app功能分解,儘量的每篇之間的耦合度不會過高。java
好了,開始進入正題。這篇內容我新建一個java項目實現,一方面java調試比較方便,另外一方面我會使用導入jar包的方式,把這個項目導入到android項目使用,你們若是在導jar方面沒有經驗,能夠看下。node
先看下項目結構:android
定義了一個NewsBean對於app的每一個ListView的Item,Constaint是個接口,存放了一些常量,還有就是一些輔助類。程序員
NewsItem.java面試
package com.zhy.bean; public class NewsItem { private int id; /** * 標題 */ private String title; /** * 連接 */ private String link; /** * 發佈日期 */ private String date; /** * 圖片的連接 */ private String imgLink; /** * 內容 */ private String content; /** * 類型 * */ private int newsType; public int getNewsType() { return newsType; } public void setNewsType(int newsType) { this.newsType = newsType; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getLink() { return link; } public void setLink(String link) { this.link = link; } public int getId() { return id; } public void setId(int id) { this.id = id; } public String getDate() { return date; } public void setDate(String date) { this.date = date; } public String getImgLink() { return imgLink; } public void setImgLink(String imgLink) { this.imgLink = imgLink; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } @Override public String toString() { return "NewsItem [id=" + id + ", title=" + title + ", link=" + link + ", date=" + date + ", imgLink=" + imgLink + ", content=" + content + ", newsType=" + newsType + "]"; } }
package com.zhy.bean; public class CommonException extends Exception { public CommonException() { super(); // TODO Auto-generated constructor stub } public CommonException(String message, Throwable cause) { super(message, cause); // TODO Auto-generated constructor stub } public CommonException(String message) { super(message); // TODO Auto-generated constructor stub } public CommonException(Throwable cause) { super(cause); // TODO Auto-generated constructor stub } }
package com.zhy.csdn; public interface Constaint { public static final int NEWS_TYPE_YEJIE = 1; public static final int NEWS_TYPE_YIDONG = 2; public static final int NEWS_TYPE_YANFA = 3; public static final int NEWS_TYPE_CHENGXUYUAN = 4; public static final int NEWS_TYPE_YUNJISUAN = 5; }DataUtil.java
package com.zhy.csdn; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import com.zhy.bean.CommonException; public class DataUtil { /** * 返回該連接地址的html數據 * * @param urlStr * @return * @throws CommonException */ public static String doGet(String urlStr) throws CommonException { StringBuffer sb = new StringBuffer(); try { URL url = new URL(urlStr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(5000); conn.setDoInput(true); conn.setDoOutput(true); if (conn.getResponseCode() == 200) { InputStream is = conn.getInputStream(); int len = 0; byte[] buf = new byte[1024]; while ((len = is.read(buf)) != -1) { sb.append(new String(buf, 0, len, "UTF-8")); } is.close(); } else { throw new CommonException("訪問網絡失敗!"); } } catch (Exception e) { throw new CommonException("訪問網絡失敗!"); } return sb.toString(); } }
package com.zhy.csdn; public class URLUtil { public static final String NEWS_LIST_URL = "http://www.csdn.net/headlines.html"; public static final String NEWS_LIST_URL_YIDONG = "http://mobile.csdn.net/mobile"; public static final String NEWS_LIST_URL_YANFA = "http://sd.csdn.net/sd"; public static final String NEWS_LIST_URL_YUNJISUAN = "http://cloud.csdn.net/cloud"; public static final String NEWS_LIST_URL_ZAZHI = "http://programmer.csdn.net/programmer"; public static final String NEWS_LIST_URL_YEJIE = "http://news.csdn.net/news"; /** * 根據文章類型,和當前頁碼生成url * @param newsType * @param currentPage * @return */ public static String generateUrl(int newsType, int currentPage) { currentPage = currentPage > 0 ? currentPage : 1; String urlStr = ""; switch (newsType) { case Constaint.NEWS_TYPE_YEJIE: urlStr = NEWS_LIST_URL_YEJIE; break; case Constaint.NEWS_TYPE_YANFA: urlStr = NEWS_LIST_URL_YANFA; break; case Constaint.NEWS_TYPE_CHENGXUYUAN: urlStr = NEWS_LIST_URL_ZAZHI; break; case Constaint.NEWS_TYPE_YUNJISUAN: urlStr = NEWS_LIST_URL_YUNJISUAN; break; default: urlStr = NEWS_LIST_URL_YIDONG; break; } urlStr += "/" + currentPage; return urlStr; } }
package com.zhy.biz; import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.zhy.bean.CommonException; import com.zhy.bean.NewsItem; import com.zhy.csdn.DataUtil; import com.zhy.csdn.URLUtil; /** * 處理NewItem的業務類 * @author zhy * */ public class NewsItemBiz { /** * 業界、移動、雲計算 * * @param htmlStr * @return * @throws CommonException */ public List<NewsItem> getNewsItems( int newsType , int currentPage) throws CommonException { String urlStr = URLUtil.generateUrl(newsType, currentPage); String htmlStr = DataUtil.doGet(urlStr); List<NewsItem> newsItems = new ArrayList<NewsItem>(); NewsItem newsItem = null; Document doc = Jsoup.parse(htmlStr); Elements units = doc.getElementsByClass("unit"); for (int i = 0; i < units.size(); i++) { newsItem = new NewsItem(); newsItem.setNewsType(newsType); Element unit_ele = units.get(i); Element h1_ele = unit_ele.getElementsByTag("h1").get(0); Element h1_a_ele = h1_ele.child(0); String title = h1_a_ele.text(); String href = h1_a_ele.attr("href"); newsItem.setLink(href); newsItem.setTitle(title); Element h4_ele = unit_ele.getElementsByTag("h4").get(0); Element ago_ele = h4_ele.getElementsByClass("ago").get(0); String date = ago_ele.text(); newsItem.setDate(date); Element dl_ele = unit_ele.getElementsByTag("dl").get(0);// dl Element dt_ele = dl_ele.child(0);// dt try {// 可能沒有圖片 Element img_ele = dt_ele.child(0); String imgLink = img_ele.child(0).attr("src"); newsItem.setImgLink(imgLink); } catch (IndexOutOfBoundsException e) { } Element content_ele = dl_ele.child(1);// dd String content = content_ele.text(); newsItem.setContent(content); newsItems.add(newsItem); } return newsItems; } }好了,最後就是測試了,這裏使用單元測試,下面是測試代碼和結果。
測試代碼:算法
package com.zhy.test; import java.util.List; import com.zhy.bean.CommonException; import com.zhy.bean.NewsItem; import com.zhy.biz.NewsItemBiz; import com.zhy.csdn.Constaint; import com.zhy.csdn.DataUtil; public class Test { @org.junit.Test public void test01() { NewsItemBiz biz = new NewsItemBiz(); int currentPage = 1; try { /** * 業界 */ List<NewsItem> newsItems = biz.getNewsItems(Constaint.NEWS_TYPE_YEJIE, currentPage); for (NewsItem item : newsItems) { System.out.println(item); } System.out.println("----------------------"); /** * 程序員雜誌 */ newsItems = biz.getNewsItems(Constaint.NEWS_TYPE_CHENGXUYUAN, currentPage); for (NewsItem item : newsItems) { System.out.println(item); } System.out.println("----------------------"); /** * 研發 */ newsItems = biz.getNewsItems(Constaint.NEWS_TYPE_YANFA, currentPage); for (NewsItem item : newsItems) { System.out.println(item); } System.out.println("----------------------"); /** * 移動 */ newsItems = biz.getNewsItems(Constaint.NEWS_TYPE_YIDONG, currentPage); for (NewsItem item : newsItems) { System.out.println(item); } System.out.println("----------------------"); } catch (CommonException e) { e.printStackTrace(); } } }
NewsItem [id=0, title=如何作到天天寫代碼?, date=2014-04-11 11:26, newsType=1] NewsItem [id=0, title=一週消息樹:超級充電器來襲,30秒可爲手機充滿電, date=2014-04-11 15:20, newsType=1] NewsItem [id=0, title=Google Glass於4月15日在美對外開放購買,售價爲1500美圓, date=2014-04-11 08:01, newsType=1] NewsItem [id=0, title=Cortana與Siri、Google Now的較量:支持功能更多, date=2014-04-10 16:30, newsType=1] NewsItem [id=0, title=優秀Unix管理員的七個習慣, date=2014-04-10 10:58, newsType=1] NewsItem [id=0, title=國外用戶也不幸福!Facebook強制用戶必須下載Messager, date=2014-04-10 09:10, newsType=1] NewsItem [id=0, title=ThoughtWorks CTO談IT職場女性:你並不奇怪, date=2014-04-09 18:18, newsType=1] NewsItem [id=0, title=微軟轉型之路:從Build 2014開始, date=2014-04-09 17:05, newsType=1] NewsItem [id=0, title=設計師爲何要學編程,開發者爲何要學設計?, date=2014-04-09 14:07, newsType=1] NewsItem [id=0, title=Windows 8.1 Update 1的下載地址和八點???知, date=2014-04-09 08:38, newsType=1] ---------------------- NewsItem [id=0, title=頁面仔和他的小創新, date=2014-04-11 11:09, newsType=4] NewsItem [id=0, title=將來兩年必須掌握的移動互聯網技術與能力, date=2014-04-10 14:43, newsType=4] NewsItem [id=0, title=互聯網思惟究竟是什麼——移動浪潮下的新商業邏輯, date=2014-04-09 13:05, newsType=4] NewsItem [id=0, title=虛擬現實之眼——Oculus與HMD關鍵技術, date=2014-04-09 12:47, newsType=4] NewsItem [id=0, title=如何實現團隊的自組織管理, date=2014-04-09 11:59, newsType=4] NewsItem [id=0, title=途牛網CTO湯崢嶸:互聯網思惟——光說不練遠遠不夠, date=2014-04-08 11:10, newsType=4] NewsItem [id=0, title=理解創客, date=2014-04-04 17:55, newsType=4] NewsItem [id=0, title=TypeScript:更好的JavaScript, date=2014-04-03 16:10, newsType=4] NewsItem [id=0, title=Chris Anderson:咱們正經歷一場真正的革命, date=2014-04-02 14:45, newsType=4] NewsItem [id=0, title=Cocos2d-x 3.0帶來了什麼, date=2014-04-02 14:09, newsType=4] ---------------------- NewsItem [id=0, title=研發週報:Perl創歷史新低, date=2014-04-11 14:13, newsType=3] NewsItem [id=0, title=代碼面試最經常使用的10大算法, date=2014-04-10 11:34, newsType=3] NewsItem [id=0, title=TIOBE 2014年4月編程語言排行榜:Perl跌至歷史最低點, date=2014-04-10 09:20, newsType=3] NewsItem [id=0, title=金蝶發佈Apusic智慧雲平臺 構建產業聯盟推進信息化建設, date=2014-04-09 10:38, newsType=3] NewsItem [id=0, title=OpenSSL究竟爲什麼物,爲什麼它的影響力如此之大?, date=2014-04-09 08:52, newsType=3] NewsItem [id=0, title=Airbnb的管理之道:產品設計的點評策略與技巧, date=2014-04-09 07:01, newsType=3] NewsItem [id=0, title=大勢所趨 HTML5成Web開發者最關心的技術, date=2014-04-08 14:30, newsType=3] NewsItem [id=0, title=研發週報:微軟Build2014精華彙總, date=2014-04-04 16:09, newsType=3] NewsItem [id=0, title=Facebook發佈PlanOut 開源部分A/B測試源碼, date=2014-04-04 11:02, newsType=3] NewsItem [id=0, title=撼動企業應用架構的十大技術趨勢, date=2014-04-08 14:40, newsType=3] ---------------------- NewsItem [id=0, title=2014移動開發者必備的十大應用測試工具, date=22小時前, newsType=2] NewsItem [id=0, title=前《連線》主編Chris Anderson:創客就要DIT, date=22小時前, newsType=2] NewsItem [id=0, title=創客天下——《Make》及Maker Faire創辦人、O'Reilly Media創始人Dale Dougherty專訪, date=2014-04-11 11:21, newsType=2] NewsItem [id=0, title=《近匠》aGlass團隊:透析眼控技術的價值, date=2014-04-11 10:51, newsType=2] NewsItem [id=0, title=UC多屏戰略 推出電腦版和電視版瀏覽器, date=2014-04-11 07:07, newsType=2] NewsItem [id=0, title=「顛覆醫療」 時雲醫療推三款硬件產品, date=2014-04-10 21:05, newsType=2] NewsItem [id=0, title=2014Unity亞洲開發者大會倒計時 乾貨內容日程彙總, date=2014-04-10 10:06, newsType=2] NewsItem [id=0, title=《近匠》棱鏡:手遊渠道SDK平臺的技術歷程, date=2014-04-09 10:27, newsType=2] NewsItem [id=0, title=絕對的超現實!Jaunt打造360°全景VR電影, date=2014-04-08 15:45, newsType=2] NewsItem [id=0, title=Unite China 2014課程解析:行業解決方案專場免費開放, date=2014-04-08 13:13, newsType=2] ----------------------
若是你以爲這篇文章對你有幫助,能夠頂一個。編程