From 762a5e258a90387922d6c6eb3ecc9a7ca7c96144 Mon Sep 17 00:00:00 2001 From: Samo Penic <samo.penic@gmail.com> Date: Fri, 16 Nov 2018 18:53:49 +0000 Subject: [PATCH] refactored and debugged. --- aoiOcr.py | 4 + sid_process.py | 68 +++++++++++++++++++-------------- Ocr.py | 8 +++- 3 files changed, 48 insertions(+), 32 deletions(-) diff --git a/Ocr.py b/Ocr.py index 35c6729..970fb70 100644 --- a/Ocr.py +++ b/Ocr.py @@ -6,13 +6,14 @@ class Paper: - def __init__(self, filename=None, sid_classifier=None): + def __init__(self, filename=None, sid_classifier=None, settings=None): self.filename = filename self.invalid = None self.QRData = None + self.settings = settings self.errors = [] self.warnings = [] - self.sid_classifier=sid_classifier + self.sid_classifier = sid_classifier if filename is not None: self.loadImage(filename) self.runOcr() @@ -216,11 +217,14 @@ def get_enhanced_sid(self): if self.sid_classifier is None: return "x" + if self.settings is not None: + sid_mask=self.settings.get("sid_mask", None) es = getSID( self.img[ int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight), int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth), ], self.sid_classifier, + sid_mask ) return es diff --git a/aoiOcr.py b/aoiOcr.py index 72ed3b9..5a27df0 100644 --- a/aoiOcr.py +++ b/aoiOcr.py @@ -1,10 +1,12 @@ from Ocr import Paper from sklearn.externals import joblib + +settings={'sid_mask':'11xx0xxx',} classifier = joblib.load('filename.joblib') #p=Paper(filename='testpage300dpi_scan1.png') -p=Paper(filename='sizif111.tif', sid_classifier=classifier) +p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings=settings) #p=Paper(filename='processed_scans/20141016095134535_0028.tif') print(p.QRData) diff --git a/sid_process.py b/sid_process.py index 90d9b33..67c689a 100644 --- a/sid_process.py +++ b/sid_process.py @@ -1,9 +1,8 @@ import cv2 import numpy as np -from skimage import morphology,img_as_ubyte +from skimage import morphology, img_as_ubyte from sklearn import svm from sklearn.externals import joblib - """ @@ -61,48 +60,59 @@ return np.ones((x, y), np.uint8) -def getSID(image, classifier): - image=255-image - image=img_as_ubyte(image>100) +def segment_by_contours(image, sorted_ctrs, classifier): + sid_no = "" + for i, ctr in enumerate(sorted_ctrs): + # Get bounding box + x, y, w, h = cv2.boundingRect(ctr) + # Getting ROI + if w < h / 2: + sid_no = sid_no + "1" + continue + roi = image[y : y + h, x : x + w] + roi = img_as_ubyte(roi < 128) + roi = cv2.resize(roi, (32, 32)) + + # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) + cv2.imwrite("sid_no_{}.png".format(i), roi) + sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0]) + return sid_no + + +def getSID(image, classifier, sid_mask): + image = 255 - image + image = img_as_ubyte(image > 100) cv2.imwrite("enSID0.png", image) # Remove noise - image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2,2), iterations=1) + image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1) # Closing. Connect non connected parts image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4) # Again noise removal after closing - image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8,8), iterations=1) + image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) # Skeletonization - image = img_as_ubyte(morphology.thin(image>128)) - cv2.imwrite("enSID1.png",image) + image = img_as_ubyte(morphology.thin(image > 128)) + cv2.imwrite("enSID1.png", image) # Stub removal (might not be necessary if thinning instead of skeletonize is used above # Making lines stronger image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1) image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10)) # Thining again - image = img_as_ubyte(morphology.skeletonize(image>0.5)) + image = img_as_ubyte(morphology.skeletonize(image > 0.5)) image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) - im2,ctrs, hier = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + im2, ctrs, hier = cv2.findContours( + image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE + ) sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) - #classifier = joblib.load('filename.joblib') - - sid_no="" - for i, ctr in enumerate(sorted_ctrs): - # Get bounding box - x, y, w, h = cv2.boundingRect(ctr) - # Getting ROI - if(w<h/2): - sid_no=sid_no+"1" - continue - roi = image[y:y+h, x:x+w] - roi = img_as_ubyte(roi < 128) - roi = cv2.resize(roi,(32,32)) - - #cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) - cv2.imwrite('sid_no_{}.png'.format(i), roi) - sid_no=sid_no+str(classifier.predict(roi.reshape(1,-1)/255.0)[0]) + sid_no = "" + sid_len = 0 + if sid_mask is not None: + if len(sid_mask)==len(sorted_ctrs): + sid_no=segment_by_contours(image,sorted_ctrs,classifier) + else: + print("Ooops have to find another way") print(sid_no) - return image + return sid_no -- Gitblit v1.9.3