OpenCV Adaptive Threshold OCR

user3247146 picture user3247146 · Mar 2, 2014 · Viewed 11.8k times · Source

I am using OpenCV to prepare images for OCR from an iPhone camera, and I have been having trouble getting the results I need for an accurate OCR scan. Here is the code I am using now.

    cv::cvtColor(cvImage, cvImage, CV_BGR2GRAY);
    cv::medianBlur(cvImage, cvImage, 0);
    cv::adaptiveThreshold(cvImage, cvImage, 255, CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY, 5, 4);

This method takes a bit too long and does not provide me good results. enter image description here enter image description here

Any suggestions on how I could make this more effective? The images are coming from an iPhone camera.

After using Andry's suggestion.

enter image description here

    cv::Mat cvImage = [self cvMatFromUIImage:image];
    cv::Mat res;
    cv::cvtColor(cvImage, cvImage, CV_RGB2GRAY);
    cvImage.convertTo(cvImage,CV_32FC1,1.0/255.0);
    CalcBlockMeanVariance(cvImage,res);
    res=1.0-res;
    res=cvImage+res;
    cv::threshold(res,res, 0.85, 1, cv::THRESH_BINARY);
    cv::resize(res, res, cv::Size(res.cols/2,res.rows/2));
    image = [self UIImageFromCVMat:cvImage];

Method:

void CalcBlockMeanVariance(cv::Mat Img,cv::Mat Res,float blockSide=21) // blockSide - the parameter (set greater for larger font on image)
{
    cv::Mat I;
    Img.convertTo(I,CV_32FC1);
    Res=cv::Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
    cv::Mat inpaintmask;
    cv::Mat patch;
    cv::Mat smallImg;
    cv::Scalar m,s;

    for(int i=0;i<Img.rows-blockSide;i+=blockSide)
    {
        for (int j=0;j<Img.cols-blockSide;j+=blockSide)
        {
             patch=I(cv::Rect(j,i,blockSide,blockSide));
            cv::meanStdDev(patch,m,s);
            if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
            {
                Res.at<float>(i/blockSide,j/blockSide)=m[0];
            }else
            {
                Res.at<float>(i/blockSide,j/blockSide)=0;
            }
        }
    }

    cv::resize(I,smallImg,Res.size());

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);

    cv::Mat inpainted;
    smallImg.convertTo(smallImg,CV_8UC1,255);

    inpaintmask.convertTo(inpaintmask,CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, cv::INPAINT_TELEA);

    cv::resize(inpainted,Res,Img.size());
    Res.convertTo(Res,CV_32FC1,1.0/255.0);

}

Any idea why I am getting this result? The OCR results are pretty good, but would be better if I could get an image similar to the one you got. I am developing for iOS if that matters. I had to use cvtColor because the method expects a single channel image.

Answer

Andrey  Smorodov picture Andrey Smorodov · Mar 2, 2014

Here is my result: enter image description here

Here is the code:

#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdarg.h>
#include "opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
using namespace std;
using namespace cv;

//-----------------------------------------------------------------------------------------------------
// 
//-----------------------------------------------------------------------------------------------------
void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=21) // blockSide - the parameter (set greater for larger font on image)
{
    Mat I;
    Img.convertTo(I,CV_32FC1);
    Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
    Mat inpaintmask;
    Mat patch;
    Mat smallImg;
    Scalar m,s;

    for(int i=0;i<Img.rows-blockSide;i+=blockSide)
    {       
        for (int j=0;j<Img.cols-blockSide;j+=blockSide)
        {
            patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1));
            cv::meanStdDev(patch,m,s);
            if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
            {
                Res.at<float>(i/blockSide,j/blockSide)=m[0];
            }else
            {
                Res.at<float>(i/blockSide,j/blockSide)=0;
            }           
        }
    }

    cv::resize(I,smallImg,Res.size());

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);

    Mat inpainted;
    smallImg.convertTo(smallImg,CV_8UC1,255);

    inpaintmask.convertTo(inpaintmask,CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

    cv::resize(inpainted,Res,Img.size());
    Res.convertTo(Res,CV_32FC1,1.0/255.0);

}
//-----------------------------------------------------------------------------------------------------
// 
//-----------------------------------------------------------------------------------------------------
int main( int argc, char** argv )
{
    namedWindow("Img");
    namedWindow("Edges");
    //Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0);
    Mat Img=imread("Test2.JPG",0);
    Mat res;
    Img.convertTo(Img,CV_32FC1,1.0/255.0);
    CalcBlockMeanVariance(Img,res); 
    res=1.0-res;
    res=Img+res;
    imshow("Img",Img);
    cv::threshold(res,res,0.85,1,cv::THRESH_BINARY);
    cv::resize(res,res,cv::Size(res.cols/2,res.rows/2));
    imwrite("result.jpg",res*255);
    imshow("Edges",res);
    waitKey(0);

    return 0;
}

And Python port:

import cv2 as cv
import numpy as np 

#-----------------------------------------------------------------------------------------------------
# 
#-----------------------------------------------------------------------------------------------------
def CalcBlockMeanVariance(Img,blockSide=21): # blockSide - the parameter (set greater for larger font on image)            
    I=np.float32(Img)/255.0
    Res=np.zeros( shape=(int(Img.shape[0]/blockSide),int(Img.shape[1]/blockSide)),dtype=np.float)

    for i in range(0,Img.shape[0]-blockSide,blockSide):           
        for j in range(0,Img.shape[1]-blockSide,blockSide):        
            patch=I[i:i+blockSide+1,j:j+blockSide+1]
            m,s=cv.meanStdDev(patch)
            if(s[0]>0.001): # Thresholding parameter (set smaller for lower contrast image)
                Res[int(i/blockSide),int(j/blockSide)]=m[0]
            else:            
                Res[int(i/blockSide),int(j/blockSide)]=0

    smallImg=cv.resize(I,(Res.shape[1],Res.shape[0] ) )    
    _,inpaintmask=cv.threshold(Res,0.02,1.0,cv.THRESH_BINARY);    
    smallImg=np.uint8(smallImg*255)    

    inpaintmask=np.uint8(inpaintmask)
    inpainted=cv.inpaint(smallImg, inpaintmask, 5, cv.INPAINT_TELEA)    
    Res=cv.resize(inpainted,(Img.shape[1],Img.shape[0] ) )
    Res=np.float32(Res)/255    
    return Res

#-----------------------------------------------------------------------------------------------------
# 
#-----------------------------------------------------------------------------------------------------

cv.namedWindow("Img")
cv.namedWindow("Edges")
Img=cv.imread("F:\\ImagesForTest\\BookPage.JPG",0)
res=CalcBlockMeanVariance(Img)
res=1.0-res
Img=np.float32(Img)/255
res=Img+res
cv.imshow("Img",Img);
_,res=cv.threshold(res,0.85,1,cv.THRESH_BINARY);
res=cv.resize(res,( int(res.shape[1]/2),int(res.shape[0]/2) ))
cv.imwrite("result.jpg",res*255);
cv.imshow("Edges",res)
cv.waitKey(0)