package image.images; import java.io.File; import java.io.IOException; import java.io.InputStream; import org.apache.commons.io.FileUtils; import org.apache.http.HttpEntity; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; /** * 負責爬去驗證碼圖片 * @author FMm * */ public class HttpClientImage { public static void main(String[] args) throws ClientProtocolException, IOException, InterruptedException { // 建立httpClient實例 CloseableHttpClient httpClient = HttpClients.createDefault(); // url String url = "http://con.monyun.cn:9960/aut_checkCode.hts?iden=1422083454409442236&t=0.06292891333169814"; // 循環爬去20張驗證碼 for (int i = 0; i < 20; i++) { // 休眠一下不然圖片加載不出來, Thread.sleep(2000); // 建立請求 HttpGet httpGet = new HttpGet(url); // 執行請求 CloseableHttpResponse response = httpClient.execute(httpGet); // 獲取實體對象 HttpEntity httpEntity = response.getEntity(); if (httpEntity != null) { // 打印返回類型保證是圖片類型 System.out.println("Content-Type:" + httpEntity.getContentType().getValue()); // 獲取InputStream流 InputStream inputStream = httpEntity.getContent(); // 文件複製,common io 包下,須要 引入依賴 FileUtils.copyToFile(inputStream, new File("/Users/mac/Desktop/image/" + i + ".png")); } } httpClient.close(); } }
package image.images; import java.awt.Color; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import javax.imageio.ImageIO; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import javax.net.ssl.HttpsURLConnection; /** * 圖片背景處理,提升識別度 * @author FMm * */ public class ImageConverter { private static List<File> fileList = new ArrayList<File>(); public static void main(String[] args) { // 圖片所在的根目錄 , 圖片去除水印後的存儲目錄 // 支持批量去除圖片水印,第一個參數爲圖片存在目錄,第二爲新建圖片目錄 convertAllImages("/Users/mac/Desktop/image/", "/Users/mac/Desktop/images/"); } private static void convertAllImages(String dir, String saveDir) { File dirFile = new File(dir); File saveDirFile = new File(saveDir); dir = dirFile.getAbsolutePath(); saveDir = saveDirFile.getAbsolutePath(); loadImages(new File(dir)); for (File file : fileList) { String filePath = file.getAbsolutePath(); String dstPath = saveDir + filePath.substring(filePath.indexOf(dir) + dir.length(), filePath.length()); System.out.println("converting: " + filePath); replaceColor(file.getAbsolutePath(), dstPath); } } public static void loadImages(File f) { if (f != null) { if (f.isDirectory()) { File[] fileArray = f.listFiles(); if (fileArray != null) { for (int i = 0; i < fileArray.length; i++) { loadImages(fileArray[i]); } } } else { String name = f.getName(); if (name.endsWith("png") || name.endsWith("jpg")) { fileList.add(f); } } } } @SuppressWarnings("unused") private static void replaceFolderImages(String dir) { File dirFile = new File(dir); File[] files = dirFile.listFiles(new FileFilter() { public boolean accept(File file) { String name = file.getName(); if (name.endsWith("png") || name.endsWith("jpg")) { return true; } return false; } }); for (File img : files) { replaceColor(img.getAbsolutePath(), img.getAbsolutePath()); } } private static void replaceColor(String srcFile, String dstFile) { try { Color color = new Color(255, 195, 195); replaceImageColor(srcFile, dstFile, color, Color.WHITE); } catch (IOException e) { e.printStackTrace(); } } public static void replaceImageColor(String file, String dstFile, Color srcColor, Color targetColor) throws IOException { URL http; if (file.trim().startsWith("https")) { http = new URL(file); HttpsURLConnection conn = (HttpsURLConnection) http.openConnection(); conn.setRequestMethod("GET"); } else if (file.trim().startsWith("http")) { http = new URL(file); HttpURLConnection conn = (HttpURLConnection) http.openConnection(); conn.setRequestMethod("GET"); } else { http = new File(file).toURI().toURL(); } BufferedImage bi = ImageIO.read(http.openStream()); if (bi == null) { return; } Color wColor = new Color(255, 255, 255);// 白色 for (int i = 0; i < bi.getWidth(); i++) { for (int j = 0; j < bi.getHeight(); j++) { System.out.println(i + "---" + j); int color = bi.getRGB(i, j); Color oriColor = new Color(color); int red = oriColor.getRed(); int greed = oriColor.getGreen(); int blue = oriColor.getBlue(); // 如下if處是則改變圖片顏色 將圖片背景改變成純白色,提升識別度 if (i < 4 | i > 100) { bi.setRGB(i, j, wColor.getRGB()); //bi.setRGB()方法是把對應的xy座標的顏色改變成純白色Color wColor = new Color(255, 255, 255);// 白色 } if (j < 4 | j > 35) { bi.setRGB(i, j, wColor.getRGB()); } if (red < 107 && greed < 107 && blue < 107) { bi.setRGB(i, j, wColor.getRGB()); } if ((red <= 255 && red > 200) & (greed <= 255 && greed >= 200) & blue <= 255 && blue >= 200) { bi.setRGB(i, j, wColor.getRGB()); } } } String type = file.substring(file.lastIndexOf(".") + 1, file.length()); Iterator<ImageWriter> it = ImageIO.getImageWritersByFormatName(type); ImageWriter writer = it.next(); File f = new File(dstFile); f.getParentFile().mkdirs(); ImageOutputStream ios = ImageIO.createImageOutputStream(f); writer.setOutput(ios); writer.write(bi); bi.flush(); ios.flush(); ios.close(); } }
package image.images; import java.io.File; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.tess4j.ITesseract; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; /** * 讀取驗證碼並返回結構 * @author FMm * */ public class ReadImage { public static void main(String[] args) throws TesseractException { ITesseract instance = new Tesseract(); //若是未將tessdata放在根目錄下須要指定絕對路徑,tessdata包下是tess4j訓練語言包 instance.setDatapath("/Users/mac/Desktop/tessdata"); //若是須要識別英文以外的語種,須要指定識別語種,而且須要將對應的語言包放進項目中 instance.setLanguage("eng"); // 指定識別圖片路徑 File imgDir = new File("/Users/mac/Desktop/images/8.png"); long startTime = System.currentTimeMillis(); String ocrResult = instance.doOCR(imgDir); System.out.println(ocrResult); if(isStartWithNumber(ocrResult)) { String start = ocrResult.substring(0,1); String meg = ocrResult.substring(1,2); String end = ocrResult.substring(2,3); // 輸出識別結果 System.out.println("OCR Result: \n" + ocrResult + "\n 耗時:" + (System.currentTimeMillis() - startTime) + "ms"); // 得出結果 System.out.println(start+meg+end+"="+(Integer.parseInt(start)+Integer.parseInt(end))); } else { System.out.println("僅支持數字"); return; } } public static boolean isStartWithNumber(String str) { Pattern pattern = Pattern.compile("[0-9]*"); Matcher isNum = pattern.matcher(str.charAt(0)+""); if (!isNum.matches()) { return false; } return true; } }
maven文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>image</groupId> <artifactId>images</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>images</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.8</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.12.1</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.5</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>4.4.1</version> </dependency> </dependencies> </project>
不是maven項目所需jar
org/apache/httpcomponents/httpclient/4.5.8/httpclient-4.5.8.jarjava
org/apache/httpcomponents/httpcore/4.4.11/httpcore-4.4.11.jarios
commons-logging/commons-logging/1.2/commons-logging-1.2.jargit
commons-codec/commons-codec/1.11/commons-codec-1.11.jargithub
org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jarapache
commons-io/commons-io/2.5/commons-io-2.5.jarjson
com/alibaba/fastjson/1.2.47/fastjson-1.2.47.jarapi
junit/junit/3.8.1/junit-3.8.1.jarmaven
net/sourceforge/tess4j/tess4j/4.4.1/tess4j-4.4.1.jaride
net/java/dev/jna/jna/5.4.0/jna-5.4.0.jarui
com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0.jar
org/ghost4j/ghost4j/1.0.1/ghost4j-1.0.1.jar
log4j/log4j/1.2.17/log4j-1.2.17.jar
commons-beanutils/commons-beanutils/1.9.2/commons-beanutils-1.9.2.jar
commons-collections/commons-collections/3.2.1/commons-collections-3.2.1.jar
org/apache/xmlgraphics/xmlgraphics-commons/1.4/xmlgraphics-commons-1.4.jar
com/lowagie/itext/2.1.7/itext-2.1.7.jar
org/apache/pdfbox/pdfbox/2.0.17/pdfbox-2.0.17.jar
org/apache/pdfbox/fontbox/2.0.17/fontbox-2.0.17.jar
org/apache/pdfbox/pdfbox-tools/2.0.17/pdfbox-tools-2.0.17.jar
org/apache/pdfbox/pdfbox-debugger/2.0.17/pdfbox-debugger-2.0.17.jar
org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2.jar
net/sourceforge/lept4j/lept4j/1.12.3/lept4j-1.12.3.jar
org/jboss/jboss-vfs/3.2.14.Final/jboss-vfs-3.2.14.Final.jar
org/jboss/logging/jboss-logging/3.1.4.GA/jboss-logging-3.1.4.GA.jar
ch/qos/logback/logback-classic/1.2.3/logback-classic-1.2.3.jar
ch/qos/logback/logback-core/1.2.3/logback-core-1.2.3.jar
org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar
org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28.jar
org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28.jar
org/slf4j/log4j-over-slf4j/1.7.28/log4j-over-slf4j-1.7.28.jar
所需的tess4j文件,文件名稱:tessdata/,這個文件你能夠放在項目中,也能夠放在其餘位置可是路徑不能寫錯
tessdata/
chi_sim.traineddata // 中文識別
eng.traineddata //數字識別 下載地址https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata
osd.traineddata
pdf.ttf
pdf.ttx
readme
tess4j官網文件下載:https://sourceforge.net/projects/tess4j/