too large or too small to be letters
not vertically centered with the rest of the text
import cv2 as cv
import numpy as np
im = cv.imread('ocr.png')
imgray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(imgray, 127, 255, 0)
def size_threshold(bw, minimum, maximum):
retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((stats[: , 4] < minimum) + (stats[: , 4] > maximum))[0]:
labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255
def y_centroid_threshold(bw, minimum, maximum):
retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((centroids[: , 1] < minimum) + (centroids[: , 1] > maximum))[0]:
labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255
sized = size_threshold(thresh, 60, 300)
centered = y_centroid_threshold(sized, 40, 63)
cv.imwrite('ocr_out.png', centered)
I'm trying to do OCR with tesseract, to get anycodings_python a better result, I'd like to remove the anycodings_python background noise before sending it to anycodings_python tessseract.,Starting with your first result you anycodings_python could remove noise that is:,I already knew the text has the fixed color anycodings_python and use cv2.inrange to remove the noise anycodings_python background, but the problem is the anycodings_python background noise has the similar color to anycodings_python the text color, so I've get stuck on this anycodings_python situation.,here is the my image for processing original anycodings_python test:
- I also tried to use bitwise_and to merge white and black together, but got the similar result, not good neither. can someone help me or recommend anything to me, thank you in advance.
from PIL
import Image
from pytesseract
import *
import cv2
import numpy as np
def img_hsv_mask_white(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#
for hsv, OpenCV uses H: 0 - 179, S: 0 - 255, V: 0 - 255
lower_hsv = np.array([0, 0, 185])
upper_hsv = np.array([179, 17, 235])
mask = cv2.inRange(hsv, lower_hsv, upper_hsv)
blur = cv2.blur(mask, (3, 3))
img2 = cv2.bitwise_and(img, img, mask = blur)
#cv2.imshow("mask", mask)
#cv2.waitKey(0)
return img2
def img_hsv_mask_black(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#
for hsv, OpenCV uses H: 0 - 179, S: 0 - 255, V: 0 - 255
lower_hsv = np.array([0, 0, 0])
upper_hsv = np.array([60, 80, 70])
mask = cv2.inRange(hsv, lower_hsv, upper_hsv)
blur = cv2.blur(mask, (8, 8))
#return blur
img2 = cv2.bitwise_and(img, img, mask = blur)
#cv2.imshow("mask", mask)
#cv2.waitKey(0)
return img2
def immerge(img1, img2):
img = cv2.bitwise_and(img1, img2)
return img
#require module: numpy, opencv - python, Pillow, pytesseract
if __name__ == "__main__":
pytesseract.tesseract_cmd = r 'C:\Program Files\Tesseract-OCR\tesseract'
#print(pytesseract.get_tesseract_version())
for x in range(1, 9):
file = str.format("0711/{0}.png", x)
srcimg = cv2.imread(file, cv2.IMREAD_UNCHANGED)
white = img_hsv_mask_white(srcimg)
black = img_hsv_mask_black(srcimg)
merged = immerge(white, black)
#cv2.imwrite("result.png", mask)
code = pytesseract.image_to_string(merged, lang = 'eng')
print(code)
cv2.imshow(file, merged)
cv2.waitKey(0)
#break
too large or too small to be letters
not vertically centered with the rest of anycodings_python the text
import cv2 as cv
import numpy as np
im = cv.imread('ocr.png')
imgray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(imgray, 127, 255, 0)
def size_threshold(bw, minimum, maximum):
retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((stats[: , 4] < minimum) + (stats[: , 4] > maximum))[0]:
labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255
def y_centroid_threshold(bw, minimum, maximum):
retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((centroids[: , 1] < minimum) + (centroids[: , 1] > maximum))[0]:
labels[labels == val] = 0
return (labels > 0).astype(np.uint8) * 255
sized = size_threshold(thresh, 60, 300)
centered = y_centroid_threshold(sized, 40, 63)
cv.imwrite('ocr_out.png', centered)
Here are two potential approaches and a method to correct distorted text:,Remove text contours. Create a rectangular kernel with cv2.getStructuringElement and then perform morphological operations to remove noise.,Maybe eroding the image at this step will produce somewhat acceptable result. Instead here that image is dilated again and used as a mask to get less noisy ROI from perspective transformed image.,With the observation that the desired text to extract has a distinguishable contrast from the noise in the image, we can use color thresholding to isolate the text. The idea is to convert to HSV format then color threshold to obtain a mask using a lower/upper color range. From were we use the same process to OCR with Pytesseract.
Result from OCR
YabVzu
Code
import cv2 import pytesseract import numpy as np pytesseract.pytesseract.tesseract_cmd = r "C:\Program Files\Tesseract-OCR\tesseract.exe" # Load image, grayscale, Otsu 's threshold image = cv2.imread('2.png') gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Morph open to remove noise kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations = 1) # Find contours and remove small noise cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: area = cv2.contourArea(c) if area < 50: cv2.drawContours(opening, [c], -1, 0, -1) # Invert and apply slight Gaussian blur result = 255 - opening result = cv2.GaussianBlur(result, (3, 3), 0) # Perform OCR data = pytesseract.image_to_string(result, lang = 'eng', config = '--psm 6') print(data) cv2.imshow('thresh', thresh) cv2.imshow('opening', opening) cv2.imshow('result', result) cv2.waitKey()
Updated code to include perspective transform
import cv2 import pytesseract import numpy as np from imutils.perspective import four_point_transform pytesseract.pytesseract.tesseract_cmd = r "C:\Program Files\Tesseract-OCR\tesseract.exe" # Load image, convert to HSV, color threshold to get mask image = cv2.imread('1.png') hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) lower = np.array([0, 0, 0]) upper = np.array([100, 175, 110]) mask = cv2.inRange(hsv, lower, upper) # Morph close to connect individual text into a single contour kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations = 3) # Find rotated bounding box then perspective transform cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] rect = cv2.minAreaRect(cnts[0]) box = cv2.boxPoints(rect) box = np.int0(box) cv2.drawContours(image, [box], 0, (36, 255, 12), 2) warped = four_point_transform(255 - mask, box.reshape(4, 2)) # OCR data = pytesseract.image_to_string(warped, lang = 'eng', config = '--psm 6') print(data) cv2.imshow('mask', mask) cv2.imshow('close', close) cv2.imshow('warped', warped) cv2.imshow('image', image) cv2.waitKey()
by Adrian Rosebrock on October 20, 2021
To make this example more concrete, again consider Figure 2, where we have the following 5 x 5
grid of pixel values from the noisy image:
[ [247 227 242 253 237] [244 228 225 212 219] [223 218 252 222 221] [242 244 228 240 230] [217 233 237 243 252] ]
We then flatten that into a single list of 5 x 5 = 25-d
values:
[247 227 242 253 237 244 228 225 212 219 223 218 252 222 221 242 244 228 240 230 217 233 237 243 252 ]
Now, let’s assume that we have the following 5 x 5
window from our gold standard/target image:
[ [0 0 0 0 0] [0 0 0 0 1] [0 0 1 1 1] [0 0 1 1 1] [0 0 0 1 1] ]
Luckily, OpenCV is pip-installable:
$ pip install opencv - contrib - python
Before we get any farther, let’s familiarize ourselves with the files:
| --pyimagesearch | | --__init__.py | | --denoising | | | --__init__.py | | | --helpers.py | --config | | --__init__.py | | --denoise_config.py | --build_features.py | --denoise_document.py | --denoiser.pickle | --denoising - dirty - documents | | --test | | | --1. png | | | --10. png | | | --... | | | --94. png | | | --97. png | | --train | | | --101. png | | | --102. png | | | --... | | | --98. png | | | --99. png | | --train_cleaned | | | --101. png | | | --102. png | | | --... | | | --98. png | | | --99. png | --train_denoiser.py
Here is the code to remove the Gaussian noise from a color image using the Non-local Means Denoising algorithm:,In this section, we'll use cv2.fastNlMeansDenoisingColored() function which is the implementation of Non-local Means Denoising algorithm. It is defined like this:,If we apply the cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21) function to the wiki's Gaussian sample above:,Note: The function converts image to CIELAB colorspace and then separately denoise L and AB components with given h parameters using fastNlMeansDenoising function.
In this section, we'll use cv2.fastNlMeansDenoisingColored() function which is the implementation of Non-local Means Denoising algorithm. It is defined like this:
cv2.fastNlMeansDenoisingColored(src[, dst[, h[, hColor[, templateWindowSize[, searchWindowSize]]]]])
Here is the code to remove the Gaussian noise from a color image using the Non-local Means Denoising algorithm:
import numpy as np import cv2 from matplotlib import pyplot as plt img = cv2.imread('DiscoveryMuseum_NoiseAdded.jpg') b, g, r = cv2.split(img) # get b, g, r rgb_img = cv2.merge([r, g, b]) # switch it to rgb # Denoising dst = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21) b, g, r = cv2.split(dst) # get b, g, r rgb_dst = cv2.merge([r, g, b]) # switch it to rgb plt.subplot(211), plt.imshow(rgb_img) plt.subplot(212), plt.imshow(rgb_dst) plt.show()