您的位置:首页 > 其它

简单验证码识别

2015-12-02 19:57 537 查看
验证码识别主要包括两部分:去除干扰和识别。其中最麻烦的是去除干扰。对于识别有现成的库:tesseract。在进行验证码识别之前,首先需要得到验证码数据,如果从网上下载是在太麻烦,就写了一个生成验证码的程序,用来生成各种随机验证码。一、生成验证码数据集1)验证码的随机性,包括字符串本身的随机性,字体以及字体大小的随机性以及字体旋转和各种随机干扰线。2)简单的代码:
using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.Linq;using System.Text;using System.Threading.Tasks;using System.Windows.Forms;namespace GenSecurityCode{public partial class SecurityCodeGen : Form{public const int DEFAULT_FONT_SIZE = 20;private int iCodeLen = 6;private int iFontSize = DEFAULT_FONT_SIZE;private int iInterferLines = 6;private int iCodeNums = 20;public string batchSavePath = string.Empty;public SecurityCodeGen(){InitializeComponent();}private void SecurityCodeGen_Load(object sender, EventArgs e){checkNum.Checked = true;checkUpperCase.Checked = false;checkLowerCase.Checked = false;textCodeLength.Text = iCodeLen.ToString();txtCodeNums.Text = iCodeNums.ToString();}/*Generate Security Code String*/private string GenCodeString(){string codeStr = String.Empty;char code;System.Random random = new Random();int iRandCode;if (checkNum.Checked){for (int i = 0; i < iCodeLen; i++){iRandCode = random.Next();code = (char)('0' + (char)(iRandCode % 10));codeStr += code.ToString();}}return codeStr;}private System.Drawing.Bitmap GenCodeImage(string sCodeStr){char[] chars = sCodeStr.ToCharArray();//define random colorColor[] colorArray = { Color.Black, Color.Red, Color.DarkBlue, Color.Green,Color.Orange, Color.Brown, Color.DarkCyan, Color.Purple };//define random fontstring[] fontArray = { "Verdana", "Microsoft Sans Serif", "Comic Sans MS", "Arial", "宋体" };//define random transform angleint iTransAngleRange = 0;//define imageSystem.Drawing.Bitmap image = new System.Drawing.Bitmap((int)Math.Ceiling((sCodeStr.Length * iFontSize* 1.0)),(int)Math.Ceiling(iFontSize * 2.0));Graphics g = Graphics.FromImage(image);//Gen Backgroud Image;System.Random random = new Random();g.Clear(Color.White);g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.AntiAlias;//draw interferior linesfor (int i = 0; i < iInterferLines; i++){int x1 = random.Next(image.Width);int x2 = random.Next(image.Width);int y1 = random.Next(image.Height);int y2 = random.Next(image.Height);int colorIndex = random.Next(colorArray.Length);g.DrawLine(new Pen(colorArray[colorIndex]), x1, y1, x2, y2);}StringFormat format = new StringFormat(StringFormatFlags.NoClip);format.Alignment = StringAlignment.Center;format.LineAlignment = StringAlignment.Center;//draw security code stringfor (int i = 0; i < chars.Length; i++){int fontIndex = 1;//random.Next(5);//no random font used firstint colorIndex = random.Next(colorArray.Length);int transAngleIndex = 0;// no transform first;Font font = new System.Drawing.Font(fontArray[fontIndex], iFontSize, System.Drawing.FontStyle.Bold);Point dot = new Point(iFontSize/8, iFontSize/8);Brush brush = new System.Drawing.SolidBrush(colorArray[colorIndex]);float angle = random.Next(-transAngleIndex, transAngleIndex);g.TranslateTransform(dot.X, dot.Y);g.RotateTransform(angle);g.DrawString(chars[i].ToString(), font, brush, 1, 1);//MessageBox.Show(chars[i].ToString());g.RotateTransform(-angle);g.TranslateTransform((int)Math.Ceiling(iFontSize*0.75), -dot.Y);//g.DrawString(sCodeStr.ToString(), font, brush, 1, 1);}//draw interferior pointfor (int i = 0; i < 150; i++){int x = random.Next(image.Width);int y = random.Next(image.Height);image.SetPixel(x, y, Color.FromArgb(random.Next()));}//draw border//g.DrawRectangle(new Pen(Color.Silver), 0, 0, image.Width - 1, image.Height - 1);return image;}private void btnGen_Click(object sender, EventArgs e){string codeStr = GenCodeString();System.Drawing.Bitmap image = GenCodeImage(codeStr);txtCodeStr.Text = codeStr;this.picShowCtrl.Width = image.Width;this.picShowCtrl.Height = image.Height;this.picShowCtrl.BackgroundImage = image;image.Save(codeStr + ".jpg");}private void btnBatchGen_Click(object sender, EventArgs e){if (batchSavePath.Length == 0){FolderBrowserDialog folderDlg = new FolderBrowserDialog();folderDlg.ShowDialog();batchSavePath = folderDlg.SelectedPath;if (batchSavePath.Length == 0){MessageBox.Show("You didn't select path!");}}iCodeNums = int.Parse(txtCodeNums.Text);iCodeLen = int.Parse(textCodeLength.Text);txtCodeStr.Text = string.Empty;for (int i = 0; i < iCodeNums; i++){string codeStr = GenCodeString();System.Drawing.Bitmap image = GenCodeImage(codeStr);if (txtCodeStr.Text.Length != 0)//txtCodeStr.Text += Environment.NewLine +codeStr;txtCodeStr.Text += "," + codeStr;elsetxtCodeStr.Text = codeStr;if(batchSavePath.Length==0)image.Save(codeStr + ".jpg");elseimage.Save(batchSavePath+"\\"+codeStr + ".jpg");System.Threading.Thread.Sleep(50);}}}}
二、去除干扰
Mat PreProcess(Mat &img){Mat grayImg,binaryImg,erodeImg,dilateImg,maskedImg;cvtColor(img, grayImg, CV_RGB2GRAY);Mat mask(img.rows, img.cols, CV_8UC1,Scalar(0));/*OSTU threshold*/threshold(grayImg, binaryImg, 0, 255, CV_THRESH_OTSU + CV_THRESH_BINARY_INV);imwrite("binary.jpg", binaryImg);/*erode to remove noise,erode and dilate function only work on white pixel*/erode(binaryImg, erodeImg, Mat(2, 2, CV_8U), Point(-1, -1), 1);imwrite("erode.jpg", erodeImg);/*dilate to link contour*/dilate(erodeImg, dilateImg, Mat(3, 3, CV_8U), Point(-1, -1), 1);imwrite("dilate.jpg", dilateImg);dilateImg.copyTo(grayImg);/*find contour*/vector<vector<Point>> contours;findContours(dilateImg, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);imwrite("dilate1.jpg", grayImg);/*remove interferior*/vector<vector<Point>>::iterator it;Point2f center;float radius;Rect rc;for (it = contours.begin(); it != contours.end();){minEnclosingCircle(*it, center, radius);if (center.y > (img.rows / 4) && center.y < img.rows * 3 / 4){
<span style="white-space:pre">		</span>/*you can add more condition to remove more interferior*/rc = boundingRect(*it);rectangle(mask, rc, CV_RGB(255,255,255), -1);it++;}else{/*please note how to erase one item in vector*/it = contours.erase(it);}}imwrite("mask.jpg", mask);//copy(dilateImg, maskedImg, mask);grayImg.copyTo(maskedImg, mask);erode(maskedImg, erodeImg, Mat(2, 2, CV_8U), Point(-1, -1), 1);bitwise_not(erodeImg, erodeImg);imwrite("masked.jpg", erodeImg);return erodeImg;}
三、字符识别字符识别最麻烦其实是tesseract的库的编译,使用其实很简单。
#include <tesseract/baseapi.h>#include <leptonica/allheaders.h>
/*snippet code for recognition */
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();// Initialize tesseract-ocr with English, without specifying tessdata pathif (api->Init(NULL, "eng")) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}api->SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);api->SetImage((uchar*)img.data, img.cols, img.rows, 1, img.cols);char* out = api->GetUTF8Text();std::cout << "recognition:" << out << std::endl;
tesseract 编译 请参照这里进行编译 https://github.com/charlesw/tesseract-vs2012
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: