I am trying to find the bounding boxes of text in an image and am currently using this approach:
// calculate the local variances of the grayscale image
Mat t_mean, t_mean_2;
Mat grayF;
outImg_gray.convertTo(grayF, CV_32F);
int winSize = 35;
blur(grayF, t_mean, cv::Size(winSize,winSize));
blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize));
Mat varMat = t_mean_2 - t_mean.mul(t_mean);
varMat.convertTo(varMat, CV_8U);
// threshold the high variance regions
Mat varMatRegions = varMat > 100;
When given an image like this:
Then when I show varMatRegions
I get this image:
As you can see it somewhat combines the left block of text with the header of the card, for most cards this method works great but on busier cards it can cause problems.
The reason it is bad for those contours to connect is that it makes the bounding box of the contour nearly take up the entire card.
Can anyone suggest a different way I can find the text to ensure proper detection of text?
200 points to whoever can find the text in the card above the these two.
You can detect text by finding close edge elements (inspired from a LPD):
#include "opencv2/opencv.hpp"
std::vector<cv::Rect> detectLetters(cv::Mat img)
{
std::vector<cv::Rect> boundRect;
cv::Mat img_gray, img_sobel, img_threshold, element;
cvtColor(img, img_gray, CV_BGR2GRAY);
cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
std::vector< std::vector< cv::Point> > contours;
cv::findContours(img_threshold, contours, 0, 1);
std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
for( int i = 0; i < contours.size(); i++ )
if (contours[i].size()>100)
{
cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
if (appRect.width>appRect.height)
boundRect.push_back(appRect);
}
return boundRect;
}
Usage:
int main(int argc,char** argv)
{
//Read
cv::Mat img1=cv::imread("side_1.jpg");
cv::Mat img2=cv::imread("side_2.jpg");
//Detect
std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
//Display
for(int i=0; i< letterBBoxes1.size(); i++)
cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
cv::imwrite( "imgOut1.jpg", img1);
for(int i=0; i< letterBBoxes2.size(); i++)
cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
cv::imwrite( "imgOut2.jpg", img2);
return 0;
}
Results:
a. element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
b. element = getStructuringElement(cv::MORPH_RECT, cv::Size(30, 30) );
Results are similar for the other image mentioned.