最近在調研用JAVA識別圖片中的數字,查看了一些資料,在此轉載一篇識別簡單驗證碼的文章。該方法只適用於字體統一規整的、沒有扭曲拉伸的簡單數字驗證碼的識別,形如 這樣的圖片驗證碼。java
引用jar包:jai-core-1.1.3.jar 、jai-codec-1.1.3.jar
算法
算法思路以下: 數組
1. 根據驗證碼圖片的分析結果(主要是分析數字所在的像素位置),對其進行分割,分割成包含單個數字的圖片。 app
2. 對分割後的圖片先進行灰度化,而後二值化,生成單色位圖。 測試
3. 讀取單色位圖的像素點,轉換爲 0 , 1 數組。 字體
4.把該數組和提早生成好的0-9的字模數組進行比對,取匹配率最大的那個字模所對應的數字。 spa
package com;
import java.awt.Graphics;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStream;
import javax.imageio.ImageIO;
import javax.media.jai.JAI;
import javax.media.jai.RenderedOp;
/**
* 數字驗證碼識別器(用於識別xxx系統的圖片驗證碼)
*
* 算法以下: 分析驗證碼圖片結構,將其分隔成4個獨立的數字圖片,把四個獨立的數字圖片處理成單色位圖。
* 把單色位圖轉換爲0、1數組,而後分別和0-9的字模進行匹配,獲得圖片上的數字信息。
*
* @version 1.0 2009-7-7
* @author huangyuanmu
* @since JDK 1.5.0_8
*/
public class NumberVerificationCodeIdentifier {
static
{
System.setProperty("com.sun.media.jai.disableMediaLib", "true");
}
// 數字模板 0-9
static int[][] value = {
// num 0;
{ 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
0, 1, 1, 1, 0, 0 },
// num 1
{ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0 }, // num2 { 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0 }, // num3 { 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // num4 { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, // num5 { 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0 }, // num6 { 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0 }, // num7 { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, // num8 { 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0 }, // num9 { 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0 } }; /** * 識別圖像 * * @author huangyuanmu 2009-7-14 * @param byteArray * @return * @throws Exception */ public static String recognize(byte[] byteArray) throws Exception { InputStream is = new ByteArrayInputStream(byteArray); BufferedImage image = ImageIO.read(is); return recognize(image); } /** * 識別圖像 * * @author huangyuanmu 2009-7-14 * @param image * @return * @throws Exception */ public static String recognize(BufferedImage image) throws Exception { StringBuffer sb = new StringBuffer(""); BufferedImage newim[] = new BufferedImage[4]; if(null == image){ throw new RuntimeException("iamage爲null"); } // 將圖像分紅四塊,由於要處理的文件有四個數字。 newim[0] = generateSingleColorBitMap(image.getSubimage(2, 1, 8, 11)); newim[1] = generateSingleColorBitMap(image.getSubimage(11, 1, 8, 11)); newim[2] = generateSingleColorBitMap(image.getSubimage(20, 1, 8, 11)); newim[3] = generateSingleColorBitMap(image.getSubimage(29, 1, 8, 11)); for (int k = 0; k < 4; k++) { int iw = newim[k].getWidth(null); int ih = newim[k].getHeight(null); int[] pix = new int[iw * ih]; // 由於是二值圖像,這裏的方法將像素讀取出來的同時,轉換爲0,1的圖像數組。 for (int i = 0; i < ih; i++) { for (int j = 0; j < iw; j++) { pix[i * (iw) + j] = newim[k].getRGB(j, i); if (pix[i * (iw) + j] == -1) pix[i * (iw) + j] = 0; else pix[i * (iw) + j] = 1; } } // 獲得像匹配的數字。 int r = getMatchNum(pix); sb.append(r); } return sb.toString(); } /** * 把單色位圖轉換成的0、1數組和字模數組進行比較,返回匹配的數字 * * @author huangyuanmu 2009-7-7 * @param pix * @return */ private static int getMatchNum(int[] pix) { int result = -1; int temp = 100; int x; for (int k = 0; k <= 9; k++) { x = 0; for (int i = 0; i < pix.length; i++) { x = x + Math.abs(pix[i] - value[k][i]); } if(x == 0){ result = k; break; }else if (x < temp){ temp = x; result = k; } } return result; } /** * 把彩色圖像轉換單色圖像 * * @author huangyuanmu 2009-7-7 * @param colorImage * @return */ private static BufferedImage generateSingleColorBitMap(Image colorImage) { BufferedImage image = new BufferedImage(8, 11, BufferedImage.TYPE_BYTE_GRAY); Graphics g = image.getGraphics(); g.drawImage(colorImage, 0, 0, null); g.dispose(); RenderedOp ro = JAI.create("binarize", image, new Double(100)); BufferedImage bi = ro.getAsBufferedImage(); return bi; } /** * 測試 * * @author huangyuanmu 2009-7-7 * @param args */ public static void main(String args[]) throws Exception { String s = recognize(ImageIO.read(new File("D:\\testNum2.jpeg"))); System.out.println("recognize result" + s); }}
複雜的驗證碼識別技術就至關複雜了,我的也沒有精力去研究了,使用現成的OCR軟件不失爲一個便捷的方法。tesseract是一個不錯的選擇,它是惠普公code