這東西啊,自己是無用的,可是要是移植就有用。java
package util; import java.util.Properties; public class HttpProxyConfiger { public static void configProxy(){ Properties prop=System.getProperties(); prop.setProperty("proxySet","true"); prop.setProperty("http.proxyHost","192.168.xx.xx"); prop.setProperty("http.proxyPort","port"); } }
工具類中包括不少種方法,讀寫txt,發post請求等。sql
package util; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringEscapeUtils; import com.google.gson.Gson; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.sun.org.apache.bcel.internal.generic.NEW; import dao.DataBaseDao; import dao.impl.DataBaseDaoImpl; import entity.AiDataBase; import entity.AiResultBai; import entity.AiResultFs; public class Tools { static Date datetime=new Date(); static Timestamp ts = new Timestamp(datetime.getTime()); //讀取本地文件pos.txt public static List<String> readFile02(String path) { // 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組 List<String> list = new ArrayList<String>(); try { FileInputStream fis = new FileInputStream(path); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); String line = ""; while ((line = br.readLine()) != null) { // 若是 t x t文件裏的路徑 不包含---字符串 這裏是對裏面的內容進行一個篩選 list.add(line); } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } return list; } /** * 分析fs數據結果時用 * @param path * @return */ public static List<String> readFiletofs(String path) { // 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組 List<String> list = new ArrayList<String>(); try { FileInputStream fis = new FileInputStream(path); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); String line = ""; while ((line = br.readLine()) != null) { // 若是 t x t文件裏的路徑 不包含---字符串 這裏是對裏面的內容進行一個篩選 list.add(line.replace("+", "%2B")); } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } return list; } /* * 專門給百度去用的,把一份大文件拆分紅幾個小文件 * 本地讀取13W基礎數據,拆分紅每1W條存入一個list執行一次分析 */ public static Map readFilechaifen(String path) { Map map=new HashMap(); //存放總數的text List<String> listall=new ArrayList<String>(); try { FileInputStream fis = new FileInputStream(path); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); String line = ""; while ((line = br.readLine()) != null) { // 若是 t x t文件裏的路徑 不包含---字符串 這裏是對裏面的內容進行一個篩選 listall.add(line.replace("+", "%2B")); } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } //每讀取同樣,index加一,到了index % 5000 == 0 的時候,就用一個新的list //把if裏面的代碼放到後面來,list定義在for外面, 在你這個if裏面再次new一下 //就行了 List<String> listnew=null; for(int i=0;i<=listall.size();i++) { if(i%5000==0) { listnew=new ArrayList<String>(); listnew.add(listall.get(i)); } listnew.add(listall.get(i)); } return map; } public static List<AiDataBase> readFiletoadb(String path) { // 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組 List<AiDataBase> adblist = new ArrayList<AiDataBase>(); try { FileInputStream fis = new FileInputStream(path); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); String line = ""; //int i=0; while ((line = br.readLine()) != null) { //i++; //System.out.println(i+"==========="+line); // 若是 t x t文件裏的路徑 不包含---字符串 這裏是對裏面的內容進行一個篩選 if(line.contains("+")) { line=line.replace("+", "%2B"); //System.out.println("替換+後=="+line); } if(line.contains("'")) { line=line.replaceAll("'", ""); //System.out.println("替換'後=="+line); } if(line.contains("\\")){ line=line.replace("\\", ""); } AiDataBase aiDataBase=new AiDataBase(); aiDataBase.setText(line.replaceAll(" ", "").trim()); aiDataBase.setCreate_time(ts); adblist.add(aiDataBase); } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } return adblist; } /*public static void main(String[] args) { List<AiDataBase> adblist =readFiletoadb("E:\\360downloads\\111\\data_jd.txt"); System.out.println(adblist.size()); }*/ //讀取本地文件result_fs.txt public static List<AiResultFs> readFileforFs(String path) { // 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組 List<AiResultFs> list = new ArrayList<AiResultFs>(); try { FileInputStream fis = new FileInputStream(path); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); DataBaseDao dbd=new DataBaseDaoImpl(); String line = ""; int j=0; while ((line = br.readLine())!=null) { if(line.contains("…")) { line=StringEscapeUtils.escapeHtml(line); } if(line.contains("+")) { line=line.replace("+", "%2B"); //System.out.println("替換+後=="+line); } /*if(line.contains("'")) { line=line.replaceAll("'", ""); //System.out.println("替換'後=="+line); }*/ j++; if((dbd.getIdByText1(line.split("===")[0].trim().replaceAll(" ", "")))>0){ AiResultFs arf=new AiResultFs(); System.out.println(j+line); arf.setData_base_id(dbd.getIdByText1(line.split("===")[0].trim().replaceAll(" ", ""))); arf.setSentiment(Integer.parseInt(((((line.split("==="))[1]).split(":"))[1]))); arf.setScore(Double.parseDouble(((((line.split("==="))[2]).split(":"))[1]))); arf.setCreate_time(ts); list.add(arf); }else { System.out.println(line+"不存在"); } //} } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } return list; } /** * 給aibaidu.java用的,把13W的數據拆成5000一個list而後進行分析 * @param paramlist * @return */ public static Map<Integer, List<String>> getmapchafen(List<String> paramlist){ List<String> listnew=new ArrayList<String>(); Map<Integer , List<String>> lMap=new HashMap<Integer , List<String>>(); for(int i=0;i<paramlist.size();i++) { if(i==0 ||i%5000==0) { listnew=new ArrayList<String>(); listnew.add(paramlist.get(i)); lMap.put(i+1, listnew); }else { //listnew=new ArrayList<String>(); listnew.add(paramlist.get(i)); } } System.out.println(lMap.size()); return lMap; } //讀取本地文件result_baidu.txt,存儲爲List<AiResultBai> public static List<AiResultBai> readFileforbaidu(String path) { // 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組 List<AiResultBai> list = new ArrayList<AiResultBai>(); try { FileInputStream fis = new FileInputStream(path); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); DataBaseDao dbd=new DataBaseDaoImpl(); String line = ""; int j=0; while ((line = br.readLine())!=null) { if(!line.equals("")) { if(line.length()>=256) { line.substring(0, 256); } if(line.contains("+")) { line=line.replace("+", "%2B"); //System.out.println("替換+後=="+line); } /*if(line.contains("'")) { line=line.replaceAll("'", "''"); //System.out.println("替換'後=="+line); }*/ /*if(line.contains("…")) { line=StringEscapeUtils.escapeHtml(line); }*/ j++; System.out.println(j+line); ; AiResultBai arBai=new AiResultBai(); if((dbd.getIdByText1(((line.split("===")[0]).trim().replaceAll(" ", ""))))>0) { arBai.setData_base_id(dbd.getIdByText1((line.split("===")[0]).trim().replaceAll(" ", ""))); arBai.setSentiment(Integer.parseInt(((((line.split("==="))[1]).split(":"))[1]))); arBai.setPositive_prob(Double.parseDouble(((((line.split("==="))[2]).split(":"))[1]))); arBai.setNegative_prob(Double.parseDouble(((((line.split("==="))[3]).split(":"))[1]))); arBai.setCreate_time(ts); list.add(arBai); }else { System.out.println(line+"不存在"); } } } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } return list; } /*public static void main(String[] args) { String string="物流很快,手機殼也很精美,本身真的超喜歡啊……但河馬嘴巴上的灰漬是髒"; StringEscapeUtils.escapeHtml(string); System.out.println(string); //System.out.println(list.size()); }*/ //把字符串一行行寫入文件 public void writeTxt(String result,String resultfilepath) { //寫入中文字符時解決中文亂碼問題 try { FileOutputStream fos = new FileOutputStream(new File(resultfilepath),true); OutputStreamWriter osw = new OutputStreamWriter(fos, "UTF-8"); BufferedWriter bw = new BufferedWriter(osw); bw.write(result + "\t\n"); //注意關閉的前後順序,先打開的後關閉,後打開的先關閉 bw.close(); osw.close(); fos.close(); } catch (Exception e) { e.printStackTrace(); } } //2個文本文件一行一行對比 public static List<String> compare(String path1,String path2) { // 使用一個字符串集合來存儲文本中的路徑 ,也可用String []數組 List<String> strlist = new ArrayList<String>(); try { FileInputStream fis = new FileInputStream(path1); // 防止路徑亂碼 若是utf-8 亂碼 改GBK eclipse裏建立的txt 用UTF-8,在電腦上本身建立的txt 用GBK InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); BufferedReader br = new BufferedReader(isr); FileInputStream fis1 = new FileInputStream(path2); InputStreamReader isr1 = new InputStreamReader(fis1, "UTF-8"); BufferedReader br1 = new BufferedReader(isr1); String line1 = ""; String line2=""; while (((line1 = br.readLine()) != null)&&((line2 = br1.readLine()) != null)) { // 若是 t x t文件裏的路徑 不包含---字符串 這裏是對裏面的內容進行一個篩選 if(!((line1.trim().replace(" ", "")).equals(line2.trim().replace(" ", "")))) { strlist.add((line1.split("==="))[0].trim()+"=="+(line1.split("==="))[1]+"=="+(line2.split("==="))[1]); } } br.close(); isr.close(); fis.close(); } catch (Exception e) { e.printStackTrace(); } return strlist; } //post請求獲取結果 /** * sendUrl (遠程請求的URL) * param (遠程請求參數) * JSONObject (遠程請求返回的JSON) */ public String sendPostUrl(String url,String param){ PrintWriter out = null; BufferedReader in = null; Gson gson = new Gson(); JsonParser parser=new JsonParser(); String result = ""; List<String> reslutlist=new ArrayList<String>(); String qingganqingxiang=""; String jieguo=""; try { URL realUrl = new URL(url); // 打開和URL之間的鏈接 URLConnection conn = realUrl.openConnection(); // 發送POST請求必須設置以下兩行 conn.setDoOutput(true); conn.setDoInput(true); // 獲取URLConnection對象對應的輸出流(設置請求編碼爲UTF-8) out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream(), "UTF-8")); // 發送請求參數 out.print(param); // flush輸出流的緩衝 out.flush(); // 獲取請求返回數據(設置返回數據編碼爲UTF-8) in = new BufferedReader( new InputStreamReader(conn.getInputStream(), "UTF-8")); String line; while ((line = in.readLine()) != null) { result += line; } JsonObject jsonObject = parser.parse(result).getAsJsonObject(); // System.out.println(jsonObject); //分析結果,得出字符串 裝了好幾臺機子了,一直都用這個===情感傾向:2===分值:0.632563 if((Double.parseDouble(jsonObject.get("score").toString()))>0.5){ qingganqingxiang="===2"; } else { qingganqingxiang="===0"; } String score="==="+jsonObject.get("score").toString(); String sentence=jsonObject.get("sentence").toString(); System.out.println("json獲取的值"+sentence); jieguo=sentence+qingganqingxiang+score; // jieguo=param+qingganqingxiang+score; } catch (IOException e) { e.printStackTrace(); } finally{ try{ if(out!=null){ out.close(); } if(in!=null){ in.close(); } } catch(IOException ex){ ex.printStackTrace(); } } return jieguo; } /** * 改變post方法,最終獲取的是一個list<airesultfs> * @param url * @param paramlist * @return */ public static List<AiResultFs> sendPostUrl1(String url,List<String> paramlist){ Date datetime=new Date(); Timestamp ts = new Timestamp(datetime.getTime()); PrintWriter out = null; BufferedReader in = null; Gson gson = new Gson(); JsonParser parser=new JsonParser(); DataBaseDao dBaseDao=new DataBaseDaoImpl(); List<AiResultFs> reslutlist=new ArrayList<AiResultFs>(); for (String param : paramlist) { try { HttpProxyConfiger.configProxy(); //System.out.println(param); URL realUrl = new URL(url); // 打開和URL之間的鏈接 HttpURLConnection conn = (HttpURLConnection)realUrl.openConnection(); //URLConnection conn = realUrl.openConnection(); // 發送POST請求必須設置以下兩行 conn.setDoOutput(true); conn.setDoInput(true); // 獲取URLConnection對象對應的輸出流(設置請求編碼爲UTF-8) out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream(), "UTF-8")); // 發送請求參數 out.print("sentence="+param); // flush輸出流的緩衝 out.flush(); // 獲取請求返回數據(設置返回數據編碼爲UTF-8) in = new BufferedReader( new InputStreamReader(conn.getInputStream(), "UTF-8")); String line; String result = ""; while ((line = in.readLine()) != null) { result += line; } JsonObject jsonObject =parser.parse(result.replace("'", "''")).getAsJsonObject(); //System.out.println("jsonObject=="+jsonObject); //分析結果,得出字符串 裝了好幾臺機子了,一直都用這個===情感傾向:2===分值:0.632563 int sentiment; if((Double.parseDouble(jsonObject.get("score").toString()))>0.5){ sentiment=2; } else { sentiment=0; } double score=Double.parseDouble(jsonObject.get("score").toString()); String sentence=jsonObject.get("sentence").toString(); System.out.println("param========"+param); int database_id=dBaseDao.getIdByText1(param.trim().replace(" ", "")); System.out.println(database_id); if(database_id>0) { AiResultFs arf=new AiResultFs(); arf.setData_base_id(database_id); arf.setSentiment(sentiment); arf.setScore(score); arf.setCreate_time(ts); reslutlist.add(arf); } //System.out.println("結束"); } catch (IOException e) { e.printStackTrace(); } finally{ try{ if(out!=null){ out.close(); } if(in!=null){ in.close(); } } catch(IOException ex){ ex.printStackTrace(); } } } return reslutlist; } }