Windows 版本java
這兩個請自行百度便可,而後咱們將其安裝在D:下,其中將語言包放在安裝目錄下的tessdata的目錄下。ios
Linux 版本git
我使用的是centos7,下面給出安裝tesseract的前提條件。github
yum install autoconf automake libtool yum install libjpeg-devel libpng-devel libtiff-devel zlib-devel
請注意這裏面是兩個庫,使用了頓號隔開的,通常系統都有存在這個庫的web
安裝依賴的leptonica庫ajax
wget http://www.leptonica.com/source/leptonica-1.72.tar.gz tar -xvf leptonica-1.72.tar.gz cd leptonica-1.72 ./configure --with-libpng && make && make install
這個依賴庫要注意,必定是1.71以上的版本。spring
下載編譯tesseractapache
wget https://github.com/tesseract-ocr/tesseract/archive/3.04.00.tar.gz mv 3.04.00 Tesseract3.04.00.tar.gz tar -xvf Tesseract3.04.00.tar.gz cd tesseract-3.04.00/ ./autogen.sh ./configure make && make install ldconfig
tesseract我安裝在了 /usr/local 這個目錄下,名稱爲 tesseract-3.04.00 。若是你使用的是3.01的版本,須要在./autogen.sh 後面執行mkdir m4;這條命令,不然他會提示m4這個目錄不存在。json
wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/chi_sim.traineddata wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/chi_tra.traineddata wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz
注意此處的語言包以及解壓出的語言包都要放在 /usr/local/share/tessdata/ 目錄下。centos
package com.zefun.common.utils; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import org.jdesktop.swingx.util.OS; /** * ocr 讀取命令程序 * @author 高國藩 * @date 2016年12月5日 上午10:27:16 */ public class Ocr { /** 英文字母小寫l,並不是數字1 */ private static final String LANG_OPTION = "-l"; /** 系統換行符 */ private static final String EOL = System.getProperty("line.separator"); /** 系統目錄符 */ private static final String GNL = System.getProperty("file.separator"); /** log */ private Logger logger = Logger.getLogger(Ocr.class); /** 目標,下面分別爲Linux和Windows下的程序配備,Linux不須要此處的路徑了 */ // private String tessPath = GNL + "usr" + GNL + "local" + GNL + "tesseract-3.04.00"; private String tessPath = new File("D:\\Tesseract-OCR").getAbsolutePath(); /** * 解析圖片 * @author 高國藩 * @date 2016年12月5日 上午10:28:47 * @param imageFile image file * @param imageFormat 轉碼路徑 * @return ver_code * @throws Exception 異常處理啊 */ public String recognizeText(File imageFile, String imageFormat) throws Exception { File tempImage = ImageIOHelper.createImage(imageFile, imageFormat); File outputFile = new File(imageFile.getParentFile(), "output"); StringBuffer strB = new StringBuffer(); List<String> cmd = new ArrayList<String>(); if (OS.isWindowsXP()) { cmd.add(tessPath + "//tesseract"); } else if (OS.isLinux()) { cmd.add("tesseract"); } else { cmd.add(tessPath + "//tesseract"); } cmd.add(""); cmd.add(outputFile.getName()); cmd.add(LANG_OPTION); // cmd.add("chi_sim"); 更換語言包 cmd.add("eng"); ProcessBuilder pb = new ProcessBuilder(); pb.directory(imageFile.getParentFile()); cmd.set(1, tempImage.getName()); pb.command(cmd); pb.redirectErrorStream(true); Process process = pb.start(); logger.info(cmd.toString()); // tesseract.exe 1.jpg 1 -l chi_sim int w = process.waitFor(); // 刪除臨時正在工做文件 tempImage.delete(); if (w == 0) { BufferedReader in = new BufferedReader(new InputStreamReader( new FileInputStream(outputFile.getAbsolutePath() + ".txt"), "UTF-8")); String str; while ((str = in.readLine()) != null) { strB.append(str).append(EOL); } in.close(); } else { String msg; switch (w) { case 1: msg = "Errors accessing files.There may be spaces in your image's filename."; break; case 29: msg = "Cannot recongnize the image or its selected region."; break; case 31: msg = "Unsupported image format."; break; default: msg = "Errors occurred."; } tempImage.delete(); throw new RuntimeException(msg); } new File(outputFile.getAbsolutePath() + ".txt").delete(); logger.info("圖形識別結果 ====>>> " + strB.toString()); return strB.toString(); } }
此處要注意一下tesseract的命令目錄,Windows和Linux的目錄不一樣,尤爲分隔符。
package com.zefun.common.utils; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.Locale; import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageReader; import javax.imageio.ImageWriteParam; import javax.imageio.ImageWriter; import javax.imageio.metadata.IIOMetadata; import javax.imageio.stream.ImageInputStream; import javax.imageio.stream.ImageOutputStream; import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam; /** * ver_image 圖片解析器 * @author 高國藩 * @date 2016年12月5日 上午10:31:09 */ public class ImageIOHelper { /** * 圖片文件轉換爲tif格式 * @param imageFile 文件路徑 * @param imageFormat 文件擴展名 * @return 路徑 */ public static File createImage(File imageFile, String imageFormat) { File tempFile = null; try { Iterator<ImageReader> readers = ImageIO .getImageReadersByFormatName(imageFormat); ImageReader reader = readers.next(); ImageInputStream iis = ImageIO.createImageInputStream(imageFile); reader.setInput(iis); // Read the stream metadata IIOMetadata streamMetadata = reader.getStreamMetadata(); // Set up the writeParam TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam( Locale.CHINESE); tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); // Get tif writer and set output to file Iterator<ImageWriter> writers = ImageIO .getImageWritersByFormatName("tiff"); ImageWriter writer = writers.next(); BufferedImage bi = reader.read(0); IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0)); tempFile = tempImageFile(imageFile); ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile); writer.setOutput(ios); writer.write(streamMetadata, image, tiffWriteParam); ios.close(); writer.dispose(); reader.dispose(); } catch (IOException e) { e.printStackTrace(); } return tempFile; } /** * 格式化圖片 * @author 高國藩 * @date 2016年12月5日 上午10:31:41 * @param imageFile imageFile * @return File */ private static File tempImageFile(File imageFile) { String path = imageFile.getPath(); StringBuffer strB = new StringBuffer(path); strB.insert(path.lastIndexOf('.'), 0); return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif")); } }
改程序會將圖片首先解析爲tif類型文件,在其中讀取出數據。
測試加載
String valCode = new Ocr().recognizeText(new File(new File(path4).getAbsolutePath()), "jpg"); logger.info(valCode);
注意在測試中的文件路徑問題,Linux和Windows區別很大。
<dependency> <groupId>net.java.dev.jna</groupId> <artifactId>jna</artifactId> <version>4.2.1</version> </dependency> <dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>2.0.1</version> <exclusions> <exclusion> <groupId>com.sun.jna</groupId> <artifactId>jna</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>com.kenai.nbpwr</groupId> <artifactId>org-jdesktop-swingx</artifactId> <version>1.6-201002261215</version> </dependency>
package com.zefun.wechat.controller; import java.io.File; import java.io.InputStream; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.servlet.http.HttpServletRequest; import org.apache.commons.io.FileUtils; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.BasicCookieStore; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.servlet.ModelAndView; import com.zefun.common.consts.Url; import com.zefun.common.utils.HttpClientUtil; import com.zefun.common.utils.Ocr; import com.zefun.web.controller.BaseController; import net.sf.json.JSONObject; /** * ImageUtilsController * @author 高國藩 * @date 2016年12月5日 下午12:01:32 */ @Controller public class ImageUtilsController extends BaseController { /** logger */ private Logger logger = Logger.getLogger(ImageUtilsController.class); /** * login * @author 高國藩 * @date 2016年12月6日 下午6:49:10 * @param request request * @return ModelAndView */ @RequestMapping(value = Url.MessagePushMember.VIEW_IMAGE, method = RequestMethod.GET) public ModelAndView appointView(HttpServletRequest request){ try { String verPath = "http://vip1.sentree.com.cn/shair/vc"; String loginAction = "http://vip1.sentree.com.cn/shair/loginAction!ajaxLogin.action"; CloseableHttpClient httpImageClientStore = HttpClientBuilder.create().build(); HttpGet imagePath = new HttpGet(verPath); HttpResponse imageResponse = httpImageClientStore.execute(imagePath); BasicCookieStore cookieStore = new BasicCookieStore(); cookieStore = HttpClientUtil.setCookieStore(imageResponse, cookieStore, "vip1.sentree.com.cn"); InputStream is = imageResponse.getEntity().getContent(); String imageSystemPath = "D:/" + new Date().getTime() + ".jpg"; File tempImagePathFile = new File(imageSystemPath); FileUtils.copyInputStreamToFile(is, tempImagePathFile); String valCode = new Ocr().recognizeText(tempImagePathFile, "jpg"); CloseableHttpClient httpClientLogin = HttpClients.custom().setDefaultCookieStore(cookieStore).build(); HttpPost httpPost = new HttpPost(loginAction); Map<String, String> params = new HashMap<>(); params.put("login", "fs"); params.put("passwd", "ab82443397"); params.put("rand", valCode.trim()); List<NameValuePair> pairs = HttpClientUtil.geneNameValPairs(params); httpPost.setEntity(new UrlEncodedFormEntity(pairs, "UTF-8")); RequestConfig reqConf = RequestConfig.DEFAULT; httpPost.setConfig(reqConf); HttpResponse loginResult = httpClientLogin.execute(httpPost); String loginCode = EntityUtils.toString(loginResult.getEntity()); if (JSONObject.fromObject(loginCode).get("code").toString().equals("7")){ logger.info("系統侵入成功 ..."); } FileUtils.deleteQuietly(tempImagePathFile); } catch (Exception e) { e.printStackTrace(); } return null; } }