ocr简介

    技术2022-05-19  22

       汉字识别在将近20年的研究开发过程中,中国大陆先后有十几个 研究单位开展过汉字识别的研发工作,并得到了国家“七五攻 关”计划、863计划、国家自然科学基金的大力支持,目前已有 一些产品面向市场并取得了可喜的销售成绩,如“汉王笔”、 “文通笔”、“汉王 OCR”等。    Tesseract的OCR引擎最先由HP实验室于1985年开始研发,至1995年时已经成为OCR业内最准确的三款识别引擎之一。之后,HP将Tesseract贡 献给开源软件业,让其重焕新生--2005年,Tesseract由美国内华达州信息技术研究所获得,并求诸于Google对Tesseract进行改进、消除 Bug、优化工作。    本文是自己做的一个小程序,通过摄像头采集的数据,然后对图片进行处理,包括灰度二制化,然后用tesseract进行解析正确率可以达到70%以上。 using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.Text;using System.Windows.Forms;using System.IO;using System.Linq;using Emgu.CV;using Emgu.CV.CvEnum;using Emgu.CV.Structure;using Emgu.CV.UI; /*Trace*/using System.Diagnostics; namespace SoundVideo{    public partial class Form1 : Form    {        Camera wc;        string ImgFileName;        string TxtFileName;             string TxtMenuInfo;        int ImgNum = 0;        Image<Gray, Byte> img1;        public Form1()        {            InitializeComponent();        }         /************************************************************************/        /*                       读写ini文件函数                                */        /************************************************************************/         private void Form1_Load(object sender, EventArgs e)        {            try            {                //this.btnPlay.Enabled = false;                  //this.btnClose.Enabled = true;                  //panelPreview.Size = new Size(330,330);                  wc = new Camera(pictureBox1.Handle, pictureBox1.Width, pictureBox1.Height);                this.label1.Text = "状态显示";                wc.StartCamera(pictureBox1.Width, pictureBox1.Height);            }            catch            {                MessageBox.Show("打开摄像头有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);            }         }         private void btnPlay_Click(object sender, EventArgs e)        {            try            {                // this.btnPlay.Enabled = false;                  //this.btnClose.Enabled = true;                  wc = new Camera(pictureBox1.Handle, pictureBox1.Width, pictureBox1.Height);                wc.StartCamera(300, 200);            }            catch            {                MessageBox.Show("打开摄像头有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);            }         }         private void btnScroll_Click(object sender, EventArgs e)        {            try            {                wc.capImage(wc.lwndC, "d://test.bmp");             }            catch            {                MessageBox.Show("抓图有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);            }            img1 = new Image<Gray, Byte>("d://test.bmp");            imageBox1.Image = img1;        }         private void btnStopCap_Click(object sender, EventArgs e)        {            try            {                wc.stopCapScope(wc.lwndC);            }            catch            {                MessageBox.Show("停止摄像有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);            }         }         private void btnStartCap_Click(object sender, EventArgs e)        {            try            {                wc.capScope(wc.lwndC, "d://test.avi");            }            catch            {                MessageBox.Show("开始摄像有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);            }         }         private void btnClose_Click(object sender, EventArgs e)        {            try            {                //this.btnPlay.Enabled = true;                  //this.btnClose.Enabled = false;                  wc.CloseCamera();            }            catch            {                MessageBox.Show("关闭摄像头有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);            }         }         private void btnAnalys_Click(object sender, EventArgs e)         {             ImgFileName = "d://test.bmp";             if (File.Exists("d://test1.bmp"))             {                 ImgFileName = "d://test1.bmp";             }             TxtFileName = "d://test";             TxtMenuInfo = "d://MenuInfo.txt";             this.txtBoxOutput.Text = " ";             this.label1.Text = "分析中......";             this.label1.Update();             System.Diagnostics.Process OCR = new System.Diagnostics.Process();             OCR.StartInfo.FileName = "tesseract.exe";             OCR.StartInfo.Arguments = ImgFileName + " " + TxtFileName + " -l chi_sim";             OCR.StartInfo.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden;             OCR.Start();             while (!OCR.HasExited)             {             }             FileStream OCRResultFile = new FileStream(TxtFileName + ".txt", FileMode.Open, FileAccess.Read);             StreamReader OCRResult_streamReader = new StreamReader(OCRResultFile);             string strLineOCR = OCRResult_streamReader.ReadToEnd();             this.txtBoxOutput.Text = strLineOCR;             Trace.WriteLine("The OCR result is:");             Trace.WriteLine(strLineOCR);             OCRResult_streamReader.Close();             FileStream menuFile = new FileStream(TxtMenuInfo, FileMode.Open, FileAccess.Read);             StreamReader menu_streamReader = new StreamReader(menuFile);             string strLineMenu = menu_streamReader.ReadLine();             this.txtBoxMatchMenu.Text = "";             int ppnum = 0;             while (strLineMenu != null)             {                 int matchNum = 0;                 for (int i = 0; i < strLineMenu.Length; i++)                 {                     if (strLineOCR.Contains(strLineMenu.Substring(i, 1)))                     {                         Trace.WriteLine(strLineMenu.Substring(i, 1));                         matchNum = matchNum + 1;                     }                 }                 if (matchNum > strLineMenu.Length * 0.5)                 {                     this.txtBoxMatchMenu.Text += strLineMenu;                     this.txtBoxMatchMenu.Text += "/r/n";                     ppnum++;                 }                 strLineMenu = menu_streamReader.ReadLine();             }             this.label1.Text = "跑完了......正确率是" + ((double)ppnum / 11) * 100 + "%";         }         private void ImgGrapTimer_Tick(object sender, EventArgs e)         {             this.labelCue.Text = "分析中……";             ImgFileName = "d://ImgAndTxt//Img" + ImgNum.ToString() + ".bmp";             TxtFileName = "d://ImgAndTxt//Txt" + ImgNum.ToString();             TxtMenuInfo = "d://MenuInfo.txt";             /*抓图*/             try             {                 wc.capImage(wc.lwndC, ImgFileName);             }             catch             {                 MessageBox.Show("抓图有误,请检查", "系统消息", MessageBoxButtons.OK, MessageBoxIcon.Information);             }             /*执行tesseract.exe"*/             System.Diagnostics.Process OCR = new System.Diagnostics.Process();             OCR.StartInfo.FileName = "tesseract.exe";             OCR.StartInfo.Arguments = ImgFileName + " " + TxtFileName + " -l chi_sim";             OCR.StartInfo.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden;             OCR.Start();             /*wait直到OCR分析程序跑完*/             while (!OCR.HasExited)             {             }             /*Read OCR Result*/             FileStream OCRResultFile = new FileStream(TxtFileName + ".txt", FileMode.Open, FileAccess.Read);             StreamReader OCRResult_streamReader = new StreamReader(OCRResultFile);             string strLineOCR = OCRResult_streamReader.ReadToEnd();             this.txtBoxOutput.Text = strLineOCR;             Trace.WriteLine("The OCR result is:");             Trace.WriteLine(strLineOCR);             OCRResult_streamReader.Close();             /*Read Menu Files and Do Match*/             FileStream menuFile = new FileStream(TxtMenuInfo, FileMode.Open, FileAccess.Read);             StreamReader menu_streamReader = new StreamReader(menuFile);             string strLineMenu = menu_streamReader.ReadLine();             this.txtBoxMatchMenu.Text = "";             while (strLineMenu != null)             {                 int matchNum = 0;                 for (int i = 0; i < strLineMenu.Length; i++)                 {                     if (strLineOCR.Contains(strLineMenu.Substring(i, 1)))                     {                         Trace.WriteLine(strLineMenu.Substring(i, 1));                         matchNum = matchNum + 1;                     }                 }                 if (matchNum > strLineMenu.Length * 0.5)                 {                     this.txtBoxMatchMenu.Text += strLineMenu;                     this.txtBoxMatchMenu.Text += "/r/n";                 }                 strLineMenu = menu_streamReader.ReadLine();                 Trace.WriteLine(strLineMenu);             }             this.labelCue.Text = "跑完了......";             if (ImgNum < 10000)             {                 ImgNum = ImgNum + 1;             }             else             {                 ImgNum = 0;             }         }         /*          * 根据图片计算阈值          */         public double getThreshold()         {             img1 = new Image<Gray, Byte>("d://test.bmp");             int height = img1.Height;             Console.WriteLine(height);             int width = img1.Width;             double threshold = 0;             double sum = 0;             int i;             int j;             for (i = 0; i < height; i++)             {                 for (j = 0; j < width; j++)                 {                     Gray color = img1[i, j];                     sum += color.Intensity;                 }             }             threshold = sum / (width * height);             return threshold;         }         /*           滑动调整阈值          */         private void trackBar1_Scroll(object sender, EventArgs e)         {             int num = 0;             int i;             int j;             int height = img1.Height;             int width = img1.Width;             num += trackBar1.Value;             double threshold = getThreshold() * (num / 5);             img1 = new Image<Gray, Byte>("d://test.bmp");             txtBoxMatchMenu.Text = " ";             txtBoxOutput.Text = " ";             for (i = 0; i < height; i++)             {                 for (j = 0; j < width; j++)                 {                     Gray color = img1[i, j];                     if (color.Intensity > threshold)                         img1[i, j] = new Gray(255);                     else                         img1[i, j] = new Gray(0);                     if (j > 300)                     {                         img1[i, j] = new Gray(255);                     }                 }             }             imageBox1.Image = img1;             img1.Save("d://test1.bmp");         }     } }


    最新回复(0)