import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class aaaa { public static void main(String[] args) { aaaa t = new aaaa(); t.getExl(); } //獲得URL數據 public List<String> getText() throws IOException{ List<String>Url=new ArrayList<String>(); //批量導入網址 String file = "shop.txt"; BufferedReader in = new BufferedReader( new InputStreamReader( new BufferedInputStream( new FileInputStream(file)), "gbk")); String str; while((str= in.readLine())!=null){ Url.add(str); System.out.println(str); } in.close(); return Url; } //將抓取的數據組合成json對象 public List<JSONObject> parseUrl() { List<JSONObject>UTL=new ArrayList<JSONObject>(); try { for(String url:getText()){ //將url這個網頁解析成一個dom對象 Document doc = Jsoup.connect(url).get(); //篩選<a class="mall-icon">的全部對象 Elements hrefs = doc.select("a.mall-icon"); //遍歷改對象數組 for(Element href:hrefs){ //對該對象的元素進行處理,包裝成另外一個url進行二級網站的數據抓取 String number=href.attr("data-uid"); String jsonurl="http://rate.taobao.com/ShopService4C.htm?userNumId="+number; System.out.println(jsonurl); Document doc1 = Jsoup.connect(jsonurl).get(); Element jsons=doc1.body(); JSONObject json=JSONObject.fromObject(jsons.toString().replaceAll("<body>|</body>", "").replace(""", "")); UTL.add(json); } } } catch (IOException e) { e.printStackTrace(); } return UTL; } //導出excel表格 public void getExl(){ try { // 第一步,建立一個webbook,對應一個Excel文件 HSSFWorkbook wb = new HSSFWorkbook(); // 第二步,在webbook中添加一個sheet,對應Excel文件中的sheet HSSFSheet sheet = wb.createSheet("行業統計"); // 第三步,在sheet中添加表頭第0行,注意老版本poi對Excel的行數列數有限制short HSSFRow row = sheet.createRow((int) 0); // 第四步,建立單元格,並設置值表頭 設置表頭居中 HSSFCellStyle style = wb.createCellStyle(); style.setAlignment(HSSFCellStyle.ALIGN_CENTER); // 建立一個居中格式 HSSFCell cell = row.createCell((short) 0); cell.setCellValue("URL"); cell.setCellStyle(style); cell = row.createCell((short) 1); cell.setCellValue("行業均值"); cell.setCellStyle(style); cell = row.createCell((short) 2); cell.setCellValue("本店值"); cell.setCellStyle(style); List<JSONObject>list=parseUrl(); List<String>url=getText(); System.out.println(list.size()); System.out.println(url.size()); for (int i = 0; i < list.size(); i++) { row = sheet.createRow((int) i + 1); JSONObject json=(JSONObject)list.get(i); JSONObject ratRefund=json.optJSONObject("ratRefund"); // 第四步,建立單元格,並設置值 System.out.println(i); row.createCell((short) 0).setCellValue("淘寶淘寶"); row.createCell((short) 1).setCellValue(ratRefund.optString("indVal")); row.createCell((short) 2).setCellValue(ratRefund.optString("localVal")); } FileOutputStream fout = new FileOutputStream("E:/taobao.xls"); wb.write(fout); fout.close(); } catch (Exception e) { e.printStackTrace(); } } }