package cn.com.czj.front.service.common; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.Date; import java.util.List; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.StringUtils; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import cn.com.czj.base.entity.data.DataPinganerEntity; import cn.com.czj.base.entity.data.DataUrlEntity; import cn.com.czj.front.dao.DataPingAnerDao; import cn.com.czj.front.dao.DataUrlDao; import cn.com.czj.front.dao.UrlContentDao; import cn.com.czj.front.utils.http.SslUtils; import cn.com.easy.utils.HttpClientUtils; /** * 抓平安數據 * * [@author](https://my.oschina.net/arthor) linwk 2016年10月25日 * */ [@Service](https://my.oschina.net/service) public class CrawerUserPingAnService { /** * 網址 * */ @Autowired private DataUrlDao dataUrlDao; @Autowired private DataPingAnerDao dataPinganerDao; @Autowired private DownLoadPicService DownLoadPicService; /** * 內容 * */ @Autowired private UrlContentDao urlContentDao; public void doCrawerService() throws Exception { // 網頁 List<DataUrlEntity> urls = dataUrlDao.findAll(); if (CollectionUtils.isNotEmpty(urls)) { for (int i = 0; i < urls.size(); i++) { // 獲取分頁 循環 int pageSize = 10; // 加密 DataUrlEntity tDataUrlEntity = urls.get(i); // 地址 tDataUrlEntity.getUrl(); // 組合 // System.out.println(tDataUrlEntity.getUrl()); String pcString = StringUtils.replace(tDataUrlEntity.getUrl(), "http://life.pingan.com/kehufuwu/fuwugongju/return_select.shtml?", ""); System.out.println(pcString); String signatureSource = pcString + "®ionCode=&sex=&age=¤tTime=" + new Date().getTime() + "&roundRex=" + RandomStringUtils.random(5, "1234567890"); String urlString = "http://life.pingan.com/binfenxiari/signOfAgent.do?" + signatureSource + "&_=" + new Date().getTime(); // System.out.println(urlString); String aString = HttpClientUtils.get(urlString); if (StringUtils.isBlank(aString)) { continue; } // System.out.println(aString); JSONObject demoJson = new JSONObject(aString); String sign = getJsonName(demoJson, "sign"); String signature = sign; // System.out.println(sign); int j = 0; while(pageSize > j) { j++; try { String contentString = extracted(signatureSource, signature, j); if (StringUtils.equals(contentString, "success_jsoncallback({\"RESFLAG\":\"N\",\"errMsg\":\"無效連接\"})")) { System.out.println("無效連接"); continue; } if (StringUtils.equals(contentString, "success_jsoncallback({\"RESFLAG\":\"N\",\"errMsg\":\"連接超時\"})")) { System.out.println("連接超時"); signatureSource = pcString + "®ionCode=&sex=&age=¤tTime=" + new Date().getTime() + "&roundRex=" + RandomStringUtils.random(5, "1234567890"); urlString = "http://life.pingan.com/binfenxiari/signOfAgent.do?" + signatureSource + "&_=" + new Date().getTime(); // System.out.println(urlString); aString = HttpClientUtils.get(urlString); if (StringUtils.isNotBlank(aString)) { continue; } // System.out.println(aString); demoJson = new JSONObject(aString); sign = getJsonName(demoJson, "sign"); signature = sign; // System.out.println(sign); contentString = extracted(signatureSource, signature, j); } if (StringUtils.isNotBlank(contentString)) { // 解析保存到用戶 int start = contentString.indexOf("("); String newJson = contentString.substring(start + 1, contentString.lastIndexOf(")"));// 組裝成新的Json數據 //System.out.println("*******************************************新的數值***************************************************"); //System.out.println(newJson); //System.out.println("*******************************************新的數值***************************************************"); JSONObject jo = new JSONObject(newJson); JSONObject pageBean = jo.getJSONObject("pageBean"); // System.out.println("\n將Json數據解析爲Map:"); System.out.println("*******************************************totalPageSize***************************************************"); System.out.println("totalPageSize: " + pageBean.getInt("totalPageSize") + " totalResults: " + pageBean.getInt("totalResults")); System.out.println("*******************************************totalPageSize***************************************************"); if (pageBean.getInt("totalPageSize") > 0) { pageSize = pageBean.getInt("totalPageSize"); } else { pageSize = 0; continue; } JSONArray jsonStrArray = jo.getJSONArray("resultList"); for (int k = 0; k < jsonStrArray.length(); k++) { JSONObject dataPinganerEntity = jsonStrArray.getJSONObject(k); DataPinganerEntity dataPinganerEntity2 = new DataPinganerEntity(); dataPinganerEntity2.setDEPTNAME(dataPinganerEntity.get("DEPTNAME").toString()); // dataPinganerEntity2.setSELFINTRODUCE(dataPinganerEntity.get("SELFINTRODUCE").toString()); dataPinganerEntity2.setTEL(dataPinganerEntity.get("TEL").toString()); dataPinganerEntity2.setSEX(dataPinganerEntity.get("SEX").toString()); dataPinganerEntity2.setEMAIL(dataPinganerEntity.get("EMAIL").toString()); // dataPinganerEntity2.setAGENTID(dataPinganerEntity.get("AGENTID").toString()); dataPinganerEntity2.setNAME(dataPinganerEntity.get("NAME").toString()); dataPinganerEntity2.setDESCRIPTION(dataPinganerEntity.get("DESCRIPTION").toString()); dataPinganerEntity2.setHEADSHOT(dataPinganerEntity.get("HEADSHOT").toString()); // dataPinganerEntity2.setHOMEALIAS(dataPinganerEntity.get("HOMEALIAS").toString()); dataPinganerEntity2.setMOBILE(dataPinganerEntity.get("MOBILE").toString()); dataPinganerEntity2.setHOMEADDR(dataPinganerEntity.get("HOMEADDR").toString()); // System.out.println(dataPinganerEntity2.getNAME()); int count = dataPinganerDao.countByMobile(dataPinganerEntity2.getMOBILE()); if (count > 0) { System.out.println("重複了"); } else { dataPinganerDao.save(dataPinganerEntity2); System.out.println("保存進度**********************" + i * 100 / urls.size() + "%*********************"); } } } } catch (Exception e) { continue; } } System.out.println("保存進度**********************" + i * 100 / urls.size() + "%*********************"); } } } private String extracted(String signatureSource, String signature, int j) throws Exception { String urlString; // var // urls="http://sales.pa18.com/life/toolbox.queryAgentsManualSelection.shtml?provinceCode="+province+"&cityCode="+citys+"®ionCode="+area+"&sex="+sex+"&age="+age+"¤tTime="+currentTime*1+"&roundRex="+sui_num*1+"&signature="+signature; urlString = "https://sales.pa18.com/life/toolbox.queryAgentsManualSelection.shtml?" + signatureSource + "&signature=" + signature + "&pageSize=100" + "&jsoncallback=success_jsoncallback" + "&_=" + new Date().getTime() + "¤tPage=" + j; // // 獲取內容 System.out.println(urlString); String contentString = ""; // HttpUtils.doGet(urlString); // System.out.println(contentString); // contentString = doGet(urlString); contentString = HttpClientUtils.get(urlString); System.out.println(contentString); // 獲取內容 if (StringUtils.isBlank(contentString)) { // contentString = doGet(urlString); contentString = HttpClientUtils.get(urlString); System.out.println(contentString); // contentString = // DownLoadPicService.getHTML(urlString); // contentString=URLEncoder.encode(contentString, // "UTF-8"); // System.out.println(contentString); } return contentString; } private static String getJsonName(JSONObject demoJson, String tString) throws JSONException { return demoJson.getString(tString); } /** the connection connect time out in millionseconds */ private static final int CONNECT_TIME_OUT = 60000; /** the connection read time out in millionseconds */ private static final int READ_TIME_OUT = 60000; public static String doGet(String url) throws Exception { String result = ""; BufferedReader in = null; try { URL realUrl = new URL(url.trim()); if ("https".equalsIgnoreCase(realUrl.getProtocol())) { try { SslUtils.ignoreSsl(); } catch (Exception e) { } } // open connection URLConnection connection = realUrl.openConnection(); connection.setConnectTimeout(CONNECT_TIME_OUT); connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36"); // connection.setRequestProperty("Content-Type", // "text/json;charset=UTF-8"); connection.setReadTimeout(READ_TIME_OUT); // connect connection.connect(); // define BufferedReader to read input content in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "GBK")); String line; while ((line = in.readLine()) != null) { result += line; } StringBuffer buffer = new StringBuffer(); char[] buf = new char[64]; int count = 0; while ((count = in.read(buf)) != -1) { buffer.append(buffer, 0, count); } System.out.println("**************************************************************************************************************"); System.out.println(result); System.out.println("**************************************************************************************************************"); System.out.println(buffer.toString()); System.out.println("**************************************************************************************************************"); } finally { if (in != null) { in.close(); } } return result; } }