親愛的開發者朋友們,知道百度網址翻譯麼?他們爲什麼可以翻譯源網頁呢,iframe但是不能跨域操做的哦,那麼能夠用代理實現。直接上代碼:javascript
本Demo基於MVC寫的,灰常簡單,copy過去,簡單改改就能夠用的哦。css
//Action層 /** * 網址翻譯代理服務器接口層 * @Description: 此接口層可完成對所請求網址的代理,實現同域訪問 * @author zhanglongping * @CreateDate: 2016-8-23 上午10:52:49 */ @At("/proxy") public class ProxyModule { /** * 獲取網頁 * @return * @author zhanglongping * @date 2016-8-23 上午10:54:13 */ @At("/gethtml") @Ok("Raw") @Authority("") public Object gethtml(@Param("yeekit_proxy_url") String url,HttpServletRequest request, HttpServletResponse response){ try { String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; String html = new ProxyUtils().getUrlMap(url,basePath); // return html; InputStream is = new StringInputStream(html); BufferedReader in = new BufferedReader(new InputStreamReader(is,"UTF-8")); String line; PrintWriter out = response.getWriter(); while ((line = in.readLine()) != null) { out.println(line); } out.flush(); in.close(); } catch (Exception e) { e.printStackTrace(); } return null; } /** * 使用GET提交到目標服務器。 * * @param request * @param response * @param targetUrl * @throws IOException */ @At("/forward") @Ok("Raw") @Authority("") public Object urlRedirect(@Param("yeekit_proxy_url") String targetUrl,HttpServletRequest request, HttpServletResponse response) throws IOException { if(targetUrl.endsWith(".htm") || targetUrl.endsWith(".html") || targetUrl.endsWith(".shtml")){ try { String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; String html = new ProxyUtils().getUrlMap(targetUrl,basePath); // return html; InputStream is = new StringInputStream(html); BufferedReader in = new BufferedReader(new InputStreamReader(is,"UTF-8")); String line; PrintWriter out = response.getWriter(); while ((line = in.readLine()) != null) { out.println(line); } out.flush(); in.close(); } catch (Exception e) { e.printStackTrace(); // return null; } }else if(targetUrl.endsWith(".css") || targetUrl.endsWith(".js") || targetUrl.endsWith(".jpg")|| targetUrl.endsWith(".png") || targetUrl.endsWith(".svg") || targetUrl.endsWith(".gif")){ String fileName = targetUrl.split("/")[targetUrl.split("/").length-1]; // response.setHeader("Content-Disposition", "attachment; filename=" // + java.net.URLEncoder.encode(fileName, "UTF-8")); //圖片的名稱 String imgName = fileName; //名稱轉碼,避免中文亂碼 imgName = new String(imgName.getBytes("iso8859-1"),"UTF-8"); //圖片的資源地址,http://10.80.3.229:8081/mediaserver/574fe515e30ab97c9068d2e1 //這是媒體服務器返回的地址,由於是網絡地址,因此須要使用HttpURLConnection去獲取圖片 String imgUrl = targetUrl; //輸入流,用來讀取圖片 InputStream ins = null; HttpURLConnection httpURL = null; try{ URL url = new URL(imgUrl); //打開一個網絡鏈接 httpURL = (HttpURLConnection)url.openConnection(); //設置網絡鏈接超時時間 httpURL.setConnectTimeout(3000); //設置應用程序要從網絡鏈接讀取數據 httpURL.setDoInput(true); //設置請求方式 httpURL.setRequestMethod("GET"); //獲取請求返回碼 int responseCode = httpURL.getResponseCode(); if(responseCode == 200){ //若是響應爲「200」,表示成功響應,則返回一個輸入流 ins = httpURL.getInputStream(); //設置response響應頭 //encodeChineseDownloadFileName()用來解決文件名爲中文的問題,方法體在下面 if(fileName.indexOf(".css")>-1){ response.setContentType("text/css"); } response.setHeader("content-disposition", "attachment;filename="+ ProxyUtils.encodeChineseDownloadFileName(request,imgName)); //輸出流到response中 byte[] data = new byte[1024]; int len = 0; //輸出流 OutputStream out = response.getOutputStream(); while((len = ins.read(data)) > 0){ out.write(data, 0, len); } out.flush(); ins.close(); } }catch(Exception e){ System.out.println("下載附件圖片出錯!"+targetUrl); e.printStackTrace(); } } return null; }
工具類html
import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.http.HttpServletRequest; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; /** * 譯庫網址翻譯代理服務工具類 * * @Description:包含:提取HTML中網址,並轉換爲代理的網址服務地址; * @author zhanglongping * @CreateDate: 2016-8-23 上午10:15:08 * @UpdateUser: zhanglongping * @UpdateDate: 2016-8-23 上午10:15:08 * @UpdateRemark: 說明本次修改內容 */ public class ProxyUtils { // public static void main(String[] args) throws IOException { //// ProxyUtils pu = new ProxyUtils(); //// pu.getUrlMap("http://english.cas.cn"); // Connection conn = Jsoup.connect("http://www.bbc.com"); // Document doc_one = conn.get(); // System.out.println(doc_one); // } /** * 獲取url哈希:key:源url value:代理url * @param url * @author zhanglongping * @date 2016-8-23 上午10:42:41 */ public String getUrlMap(String url,String basePath){ // String url_protocol = "",url_host = ""; try { //特殊網址轉換 url = transformation(url); URL urlcurr = new URL(url); // url_protocol = urlcurr.getProtocol(); // url_host = urlcurr.getHost(); String hostname = urlcurr.getProtocol()+"://"+urlcurr.getHost(); // String proxyHost = basePath; // String proxyHostName = proxyHost+"proxy/forward?yeekit_proxy_url="; Document doc_one; Connection conn = Jsoup.connect(hostname); doc_one = conn.get(); doc_one.setBaseUri(hostname); // Elements links = doc_one.select("a[href]"); // Elements media = doc_one.select("[src]"); // Elements imports = doc_one.select("link[href]"); Elements head = doc_one.select("meta"); head.get(0).before("<base href=\""+hostname+"/"+"\" />"); //鼠標懸停翻譯js腳本注入 //懸停腳本引用 String hover_js = "<script src=\""+basePath+"/yeekit_translate_url/js/yeekit_hover_trans.js\" type=\"text/javascript\"></script>"; String jquery_js = "<script src=\"http://cdn.bootcss.com/jquery/3.1.0/jquery.min.js\" type=\"text/javascript\"></script>"; head.get(0).after(jquery_js + hover_js); // for (Element src : media) { // String key = src.attr("abs:src"); // src.attr("src", proxyHostName+key); // } // // for (Element link : imports) { // String key = link.attr("abs:href"); // link.attr("href", proxyHostName+key); // } // // for (Element link : links) { // String key = link.attr("abs:href"); // link.attr("href", proxyHostName+key); // } String dochtml = doc_one.html().toString(); //加強型處理 - 處理js腳本里靜態資源地址引用 // List<String> list_src_img = getImgSrc(dochtml); // for(String src:list_src_img){ // if(src.indexOf("./") > -1){ // dochtml = dochtml.replaceAll(src, proxyHostName+hostname+src.substring(1)); // } // } // System.out.println(dochtml); return dochtml; } catch (IOException e) { e.printStackTrace(); return null; } } /** * 內容獲取 * @return * @author zhanglongping * @throws IOException * @date 2016-8-30 下午5:44:31 */ public String get_https_html(String url) throws IOException{ URL urlcurr = new URL(url); String hostname = urlcurr.getProtocol()+"://"+urlcurr.getHost(); Document doc_one; Connection conn = Jsoup.connect(hostname); doc_one = conn.post(); doc_one.setBaseUri(hostname); Elements head = doc_one.select("meta"); head.get(0).before("<base href=\""+hostname+"/"+"\" />"); String dochtml = doc_one.html().toString(); return dochtml; } /* * 解決文件爲中文名的亂碼問題 */ public static String encodeChineseDownloadFileName(HttpServletRequest request, String pFileName) throws UnsupportedEncodingException{ String filename = null; //獲取請求頭中的瀏覽器標識 String agent = request.getHeader("USER-AGENT"); if(agent != null){ if(agent.indexOf("Firefox") != -1){ //Firefox filename = "=?UTF-8?B?" + (new String(Base64.encodeBase64(pFileName.getBytes("UTF-8")))) + "?="; }else if(agent.indexOf("Chrome") != -1){ //Chrome filename = new String(pFileName.getBytes(), "ISO8859-1"); }else{ //IE7+ filename = URLEncoder.encode(pFileName, "UTF-8"); //替換空格 filename = StringUtils.replace(filename, "+", "%20"); } }else{ filename = pFileName; } return filename; } /** * 獲取img標籤中的src值 * @param content * @return */ public List<String> getImgSrc(String content){ List<String> list = new ArrayList<String>(); //目前img標籤標示有3種表達式 //<img alt="" src="1.jpg"/> <img alt="" src="1.jpg"></img> <img alt="" src="1.jpg"> //開始匹配content中的<img />標籤 Pattern p_img = Pattern.compile("<(img|IMG)(.*?)(/>|></img>|>)"); Matcher m_img = p_img.matcher(content); boolean result_img = m_img.find(); if (result_img) { while (result_img) { //獲取到匹配的<img />標籤中的內容 String str_img = m_img.group(2); //開始匹配<img />標籤中的src Pattern p_src = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')"); Matcher m_src = p_src.matcher(str_img); if (m_src.find()) { String str_src = m_src.group(3); list.add(str_src); } //結束匹配<img />標籤中的src //匹配content中是否存在下一個<img />標籤,有則繼續以上步驟匹配<img />標籤中的src result_img = m_img.find(); } } return list; } /** * 特殊網址轉換 * @param url * @return * @author zhanglongping * @date 2016-8-30 下午6:18:48 */ public String transformation(String url){ //百度的二級域名www.baidu.com重定向存在問題 if(url.equals("http://www.baidu.com")){ url = "http://baidu.com"; } return url; } }