remove the background noise for ocr with opencv

  • Last Update :
  • Techknowledgy :
  • too large or too small to be letters

  • not vertically centered with the rest of the text

import cv2 as cv
import numpy as np

im = cv.imread('ocr.png')
imgray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(imgray, 127, 255, 0)

def size_threshold(bw, minimum, maximum):
   retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((stats[: , 4] < minimum) + (stats[: , 4] > maximum))[0]:
   labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255

def y_centroid_threshold(bw, minimum, maximum):
   retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((centroids[: , 1] < minimum) + (centroids[: , 1] > maximum))[0]:
   labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255

sized = size_threshold(thresh, 60, 300)
centered = y_centroid_threshold(sized, 40, 63)
cv.imwrite('ocr_out.png', centered)

Suggestion : 2

I'm trying to do OCR with tesseract, to get anycodings_python a better result, I'd like to remove the anycodings_python background noise before sending it to anycodings_python tessseract.,Starting with your first result you anycodings_python could remove noise that is:,I already knew the text has the fixed color anycodings_python and use cv2.inrange to remove the noise anycodings_python background, but the problem is the anycodings_python background noise has the similar color to anycodings_python the text color, so I've get stuck on this anycodings_python situation.,here is the my image for processing original anycodings_python test:

  • I also tried to use bitwise_and to merge white and black together, but got the similar result, not good neither. can someone help me or recommend anything to me, thank you in advance.
from PIL
import Image
from pytesseract
import *
import cv2
import numpy as np

def img_hsv_mask_white(img):
   hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#
for hsv, OpenCV uses H: 0 - 179, S: 0 - 255, V: 0 - 255
lower_hsv = np.array([0, 0, 185])
upper_hsv = np.array([179, 17, 235])
mask = cv2.inRange(hsv, lower_hsv, upper_hsv)
blur = cv2.blur(mask, (3, 3))
img2 = cv2.bitwise_and(img, img, mask = blur)
#cv2.imshow("mask", mask)
#cv2.waitKey(0)
return img2

def img_hsv_mask_black(img):
   hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#
for hsv, OpenCV uses H: 0 - 179, S: 0 - 255, V: 0 - 255
lower_hsv = np.array([0, 0, 0])
upper_hsv = np.array([60, 80, 70])
mask = cv2.inRange(hsv, lower_hsv, upper_hsv)

blur = cv2.blur(mask, (8, 8))
#return blur
img2 = cv2.bitwise_and(img, img, mask = blur)
#cv2.imshow("mask", mask)
#cv2.waitKey(0)
return img2

def immerge(img1, img2):
   img = cv2.bitwise_and(img1, img2)
return img

#require module: numpy, opencv - python, Pillow, pytesseract

if __name__ == "__main__":
   pytesseract.tesseract_cmd = r 'C:\Program Files\Tesseract-OCR\tesseract'
#print(pytesseract.get_tesseract_version())
for x in range(1, 9):
   file = str.format("0711/{0}.png", x)
srcimg = cv2.imread(file, cv2.IMREAD_UNCHANGED)
white = img_hsv_mask_white(srcimg)
black = img_hsv_mask_black(srcimg)
merged = immerge(white, black)
#cv2.imwrite("result.png", mask)
code = pytesseract.image_to_string(merged, lang = 'eng')
print(code)
cv2.imshow(file, merged)
cv2.waitKey(0)
#break
  • too large or too small to be letters

  • not vertically centered with the rest of anycodings_python the text

import cv2 as cv
import numpy as np

im = cv.imread('ocr.png')
imgray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(imgray, 127, 255, 0)

def size_threshold(bw, minimum, maximum):
   retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((stats[: , 4] < minimum) + (stats[: , 4] > maximum))[0]:
   labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255

def y_centroid_threshold(bw, minimum, maximum):
   retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((centroids[: , 1] < minimum) + (centroids[: , 1] > maximum))[0]:
   labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255

sized = size_threshold(thresh, 60, 300)
centered = y_centroid_threshold(sized, 40, 63)
cv.imwrite('ocr_out.png', centered)

Suggestion : 3

Here are two potential approaches and a method to correct distorted text:,Remove text contours. Create a rectangular kernel with cv2.getStructuringElement and then perform morphological operations to remove noise.,Maybe eroding the image at this step will produce somewhat acceptable result. Instead here that image is dilated again and used as a mask to get less noisy ROI from perspective transformed image.,With the observation that the desired text to extract has a distinguishable contrast from the noise in the image, we can use color thresholding to isolate the text. The idea is to convert to HSV format then color threshold to obtain a mask using a lower/upper color range. From were we use the same process to OCR with Pytesseract.

Result from OCR

YabVzu

Code

import cv2
import pytesseract
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r "C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load image, grayscale, Otsu 's threshold
image = cv2.imread('2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Morph open to remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations = 1)

# Find contours and remove small noise
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0]
if len(cnts) == 2
else cnts[1]
for c in cnts:
   area = cv2.contourArea(c)
if area < 50:
   cv2.drawContours(opening, [c], -1, 0, -1)

# Invert and apply slight Gaussian blur
result = 255 - opening
result = cv2.GaussianBlur(result, (3, 3), 0)

# Perform OCR
data = pytesseract.image_to_string(result, lang = 'eng', config = '--psm 6')
print(data)

cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)
cv2.waitKey()

Updated code to include perspective transform

import cv2
import pytesseract
import numpy as np
from imutils.perspective
import four_point_transform

pytesseract.pytesseract.tesseract_cmd = r "C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load image, convert to HSV, color threshold to get mask
image = cv2.imread('1.png')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower = np.array([0, 0, 0])
upper = np.array([100, 175, 110])
mask = cv2.inRange(hsv, lower, upper)

# Morph close to connect individual text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations = 3)

# Find rotated bounding box then perspective transform
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0]
if len(cnts) == 2
else cnts[1]
rect = cv2.minAreaRect(cnts[0])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image, [box], 0, (36, 255, 12), 2)
warped = four_point_transform(255 - mask, box.reshape(4, 2))

# OCR
data = pytesseract.image_to_string(warped, lang = 'eng', config = '--psm 6')
print(data)

cv2.imshow('mask', mask)
cv2.imshow('close', close)
cv2.imshow('warped', warped)
cv2.imshow('image', image)
cv2.waitKey()

Suggestion : 4

by Adrian Rosebrock on October 20, 2021

To make this example more concrete, again consider Figure 2, where we have the following 5 x 5 grid of pixel values from the noisy image:

[
   [247 227 242 253 237]
   [244 228 225 212 219]
   [223 218 252 222 221]
   [242 244 228 240 230]
   [217 233 237 243 252]
]

We then flatten that into a single list of 5 x 5 = 25-d values:

[247 227 242 253 237 244 228 225 212 219 223 218 252 222 221 242 244 228
   240 230 217 233 237 243 252
]

Now, let’s assume that we have the following 5 x 5 window from our gold standard/target image:

[
   [0 0 0 0 0]
   [0 0 0 0 1]
   [0 0 1 1 1]
   [0 0 1 1 1]
   [0 0 0 1 1]
]

Luckily, OpenCV is pip-installable:

$ pip install opencv - contrib - python

Before we get any farther, let’s familiarize ourselves with the files:

| --pyimagesearch
   |
   | --__init__.py |
   | --denoising |
   | | --__init__.py |
   | | --helpers.py |
   --config |
   | --__init__.py |
   | --denoise_config.py |
   --build_features.py |
   --denoise_document.py |
   --denoiser.pickle |
   --denoising - dirty - documents |
   | --test |
   | | --1. png |
   | | --10. png |
   | | --...
   |
   | | --94. png |
   | | --97. png |
   | --train |
   | | --101. png |
   | | --102. png |
   | | --...
   |
   | | --98. png |
   | | --99. png |
   | --train_cleaned |
   | | --101. png |
   | | --102. png |
   | | --...
   |
   | | --98. png |
   | | --99. png |
   --train_denoiser.py

Suggestion : 5

Here is the code to remove the Gaussian noise from a color image using the Non-local Means Denoising algorithm:,In this section, we'll use cv2.fastNlMeansDenoisingColored() function which is the implementation of Non-local Means Denoising algorithm. It is defined like this:,If we apply the cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21) function to the wiki's Gaussian sample above:,Note: The function converts image to CIELAB colorspace and then separately denoise L and AB components with given h parameters using fastNlMeansDenoising function.

In this section, we'll use cv2.fastNlMeansDenoisingColored() function which is the implementation of Non-local Means Denoising algorithm. It is defined like this:

cv2.fastNlMeansDenoisingColored(src[, dst[, h[, hColor[, templateWindowSize[, searchWindowSize]]]]])

Here is the code to remove the Gaussian noise from a color image using the Non-local Means Denoising algorithm:

import numpy as np
import cv2
from matplotlib
import pyplot as plt

img = cv2.imread('DiscoveryMuseum_NoiseAdded.jpg')
b, g, r = cv2.split(img) # get b, g, r
rgb_img = cv2.merge([r, g, b]) #
switch it to rgb

# Denoising
dst = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)

b, g, r = cv2.split(dst) # get b, g, r
rgb_dst = cv2.merge([r, g, b]) #
switch it to rgb

plt.subplot(211), plt.imshow(rgb_img)
plt.subplot(212), plt.imshow(rgb_dst)
plt.show()