Java 圖片提取驗證碼

安裝 Tesseract-OCR

Windows 版本java

  1. tesseract-ocr-setup-xx.xx.exe       
  2. chi_sim.traineddata.gz   中文語言包  

這兩個請自行百度便可,而後咱們將其安裝在D:下,其中將語言包放在安裝目錄下的tessdata的目錄下。ios

Linux 版本git

我使用的是centos7,下面給出安裝tesseract的前提條件。github

  1. 安裝編譯庫
    yum install autoconf automake libtool
    yum install libjpeg-devel libpng-devel libtiff-devel zlib-devel

    請注意這裏面是兩個庫,使用了頓號隔開的,通常系統都有存在這個庫的web

  2. 安裝依賴的leptonica庫ajax

    wget http://www.leptonica.com/source/leptonica-1.72.tar.gz
    tar -xvf leptonica-1.72.tar.gz  
    cd leptonica-1.72
    ./configure --with-libpng && make && make install

    這個依賴庫要注意,必定是1.71以上的版本。spring

  3. 下載編譯tesseractapache

    wget https://github.com/tesseract-ocr/tesseract/archive/3.04.00.tar.gz
    mv 3.04.00  Tesseract3.04.00.tar.gz
    tar -xvf Tesseract3.04.00.tar.gz  
    cd tesseract-3.04.00/
    ./autogen.sh
    ./configure
    make && make install
    ldconfig

    tesseract我安裝在了 /usr/local 這個目錄下,名稱爲 tesseract-3.04.00 。若是你使用的是3.01的版本,須要在./autogen.sh 後面執行mkdir m4;這條命令,不然他會提示m4這個目錄不存在。json

  4. 下載識別庫(語言包)
    wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata
    wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/chi_sim.traineddata 
    wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/chi_tra.traineddata
    wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz

    注意此處的語言包以及解壓出的語言包都要放在 /usr/local/share/tessdata/ 目錄下。centos

Java 讀取數據 

  1. 啓動命令程序
    package com.zefun.common.utils;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.log4j.Logger;
    import org.jdesktop.swingx.util.OS;
    
    /**
     * ocr 讀取命令程序
    * @author 高國藩
    * @date 2016年12月5日 上午10:27:16
     */
    public class Ocr {
        
        /** 英文字母小寫l,並不是數字1 */
        private static final String LANG_OPTION = "-l";
        /** 系統換行符 */
        private static final String EOL = System.getProperty("line.separator");
        /** 系統目錄符 */
        private static final String GNL = System.getProperty("file.separator");
        /** log */
        private Logger logger = Logger.getLogger(Ocr.class);
        
        /** 目標,下面分別爲Linux和Windows下的程序配備,Linux不須要此處的路徑了 */
    //    private String tessPath = GNL + "usr" + GNL + "local" + GNL + "tesseract-3.04.00";
        private String tessPath = new File("D:\\Tesseract-OCR").getAbsolutePath();
    
        /**
         * 解析圖片
        * @author 高國藩
        * @date 2016年12月5日 上午10:28:47
        * @param imageFile          image file
        * @param imageFormat        轉碼路徑     
        * @return ver_code          
        * @throws Exception         異常處理啊
         */
        public String recognizeText(File imageFile, String imageFormat) throws Exception {
            
            File tempImage = ImageIOHelper.createImage(imageFile, imageFormat);
            File outputFile = new File(imageFile.getParentFile(), "output");
            StringBuffer strB = new StringBuffer();
            List<String> cmd = new ArrayList<String>();
            if (OS.isWindowsXP()) {
                cmd.add(tessPath + "//tesseract");
            } 
            else if (OS.isLinux()) {
                cmd.add("tesseract");
            } 
            else {
                cmd.add(tessPath + "//tesseract");
            }
            cmd.add("");
            cmd.add(outputFile.getName());
            cmd.add(LANG_OPTION);
            // cmd.add("chi_sim");  更換語言包
            cmd.add("eng");
    
            ProcessBuilder pb = new ProcessBuilder();
            pb.directory(imageFile.getParentFile());
    
            cmd.set(1, tempImage.getName());
            pb.command(cmd);
            pb.redirectErrorStream(true);
    
            Process process = pb.start();
            logger.info(cmd.toString());
            // tesseract.exe 1.jpg 1 -l chi_sim
            int w = process.waitFor();
    
            // 刪除臨時正在工做文件
            tempImage.delete();
    
            if (w == 0) {
                BufferedReader in = new BufferedReader(new InputStreamReader(
                        new FileInputStream(outputFile.getAbsolutePath() + ".txt"),
                        "UTF-8"));
    
                String str;
                while ((str = in.readLine()) != null) {
                    strB.append(str).append(EOL);
                }
                in.close();
            } 
            else {
                String msg;
                switch (w) {
                    case 1:
                        msg = "Errors accessing files.There may be spaces in your image's filename.";
                        break;
                    case 29:
                        msg = "Cannot recongnize the image or its selected region.";
                        break;
                    case 31:
                        msg = "Unsupported image format.";
                        break;
                    default:
                        msg = "Errors occurred.";
                }
                tempImage.delete();
                throw new RuntimeException(msg);
            }
            new File(outputFile.getAbsolutePath() + ".txt").delete();
            logger.info("圖形識別結果 ====>>> " + strB.toString());
            return strB.toString();
        }
        
    }

    此處要注意一下tesseract的命令目錄,Windows和Linux的目錄不一樣,尤爲分隔符。

  2. 解析圖片程序
    package com.zefun.common.utils;
    
    import java.awt.image.BufferedImage;
    import java.io.File;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.Locale;
    
    import javax.imageio.IIOImage;
    import javax.imageio.ImageIO;
    import javax.imageio.ImageReader;
    import javax.imageio.ImageWriteParam;
    import javax.imageio.ImageWriter;
    import javax.imageio.metadata.IIOMetadata;
    import javax.imageio.stream.ImageInputStream;
    import javax.imageio.stream.ImageOutputStream;
    
    import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
    
    /**
     * ver_image 圖片解析器
    * @author 高國藩
    * @date 2016年12月5日 上午10:31:09
     */
    public class ImageIOHelper {
        /** 
         * 圖片文件轉換爲tif格式 
         * @param imageFile 文件路徑 
         * @param imageFormat 文件擴展名 
         * @return  路徑
         */
        public static File createImage(File imageFile, String imageFormat) {
            File tempFile = null;
            try {
                Iterator<ImageReader> readers = ImageIO
                        .getImageReadersByFormatName(imageFormat);
                ImageReader reader = readers.next();
    
                ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
                reader.setInput(iis);
                // Read the stream metadata
                IIOMetadata streamMetadata = reader.getStreamMetadata();
    
                // Set up the writeParam
                TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(
                        Locale.CHINESE);
                tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
    
                // Get tif writer and set output to file
                Iterator<ImageWriter> writers = ImageIO
                        .getImageWritersByFormatName("tiff");
                ImageWriter writer = writers.next();
    
                BufferedImage bi = reader.read(0);
                IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));
                tempFile = tempImageFile(imageFile);
                ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);
                writer.setOutput(ios);
                writer.write(streamMetadata, image, tiffWriteParam);
                ios.close();
    
                writer.dispose();
                reader.dispose();
    
            } 
            catch (IOException e) {
                e.printStackTrace();
            }
            return tempFile;
        }
    
        /**
         * 格式化圖片
        * @author 高國藩
        * @date 2016年12月5日 上午10:31:41
        * @param imageFile  imageFile
        * @return           File
         */
        private static File tempImageFile(File imageFile) {
            String path = imageFile.getPath();
            StringBuffer strB = new StringBuffer(path);
            strB.insert(path.lastIndexOf('.'), 0);
            return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));
        }
    }

    改程序會將圖片首先解析爲tif類型文件,在其中讀取出數據。

  3. 測試加載

    String valCode = new Ocr().recognizeText(new File(new File(path4).getAbsolutePath()), "jpg");
    logger.info(valCode);

    注意在測試中的文件路徑問題,Linux和Windows區別很大。

  4. Maven 包管理
  5. <dependency>
    		<groupId>net.java.dev.jna</groupId>
    		<artifactId>jna</artifactId>
    		<version>4.2.1</version>
    	</dependency>
    	<dependency>
    		<groupId>net.sourceforge.tess4j</groupId>
    		<artifactId>tess4j</artifactId>
    		<version>2.0.1</version>
    		<exclusions>
    			<exclusion>
    				<groupId>com.sun.jna</groupId>
    				<artifactId>jna</artifactId>
    			</exclusion>
    		</exclusions>
    	</dependency>
    	<dependency>
    		<groupId>com.kenai.nbpwr</groupId>
    		<artifactId>org-jdesktop-swingx</artifactId>
    		<version>1.6-201002261215</version>
    	</dependency>

     

使用Java模擬系統登陸

package com.zefun.wechat.controller;

import java.io.File;
import java.io.InputStream;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.servlet.ModelAndView;

import com.zefun.common.consts.Url;
import com.zefun.common.utils.HttpClientUtil;
import com.zefun.common.utils.Ocr;
import com.zefun.web.controller.BaseController;
import net.sf.json.JSONObject;

/**
 * ImageUtilsController 
* @author 高國藩
* @date 2016年12月5日 下午12:01:32
 */
@Controller
public class ImageUtilsController extends BaseController {

    /** logger */
    private Logger logger = Logger.getLogger(ImageUtilsController.class);
    
    /**
     * login
    * @author 高國藩
    * @date 2016年12月6日 下午6:49:10
    * @param request    request
    * @return           ModelAndView
     */
    @RequestMapping(value = Url.MessagePushMember.VIEW_IMAGE, method = RequestMethod.GET)
    public ModelAndView appointView(HttpServletRequest request){
        try {
            String verPath = "http://vip1.sentree.com.cn/shair/vc";
            String loginAction = "http://vip1.sentree.com.cn/shair/loginAction!ajaxLogin.action";
            
            CloseableHttpClient httpImageClientStore = HttpClientBuilder.create().build();
            HttpGet imagePath = new HttpGet(verPath);
            HttpResponse imageResponse = httpImageClientStore.execute(imagePath);
            
            BasicCookieStore cookieStore = new BasicCookieStore();
            cookieStore = HttpClientUtil.setCookieStore(imageResponse, cookieStore, "vip1.sentree.com.cn");
            
            InputStream is = imageResponse.getEntity().getContent();
            String imageSystemPath = "D:/" + new Date().getTime() + ".jpg";
            File tempImagePathFile = new File(imageSystemPath);
            FileUtils.copyInputStreamToFile(is, tempImagePathFile);
            String valCode = new Ocr().recognizeText(tempImagePathFile, "jpg");
            
            CloseableHttpClient httpClientLogin = HttpClients.custom().setDefaultCookieStore(cookieStore).build();
            HttpPost httpPost = new HttpPost(loginAction);
            Map<String, String> params = new HashMap<>();
            params.put("login", "fs");
            params.put("passwd", "ab82443397");
            params.put("rand", valCode.trim());
            List<NameValuePair> pairs = HttpClientUtil.geneNameValPairs(params);
            httpPost.setEntity(new UrlEncodedFormEntity(pairs, "UTF-8"));
            RequestConfig reqConf = RequestConfig.DEFAULT;
            httpPost.setConfig(reqConf);
            HttpResponse loginResult = httpClientLogin.execute(httpPost);
            String loginCode = EntityUtils.toString(loginResult.getEntity());
            
            if (JSONObject.fromObject(loginCode).get("code").toString().equals("7")){
                logger.info("系統侵入成功 ...");
            }
            FileUtils.deleteQuietly(tempImagePathFile);
        } 
        catch (Exception e) {
            e.printStackTrace();
        } 
        return null;
    }

}
相關文章
相關標籤/搜索