From 5cb7c1dba78b025ff333a202b27f04a2230c9da9 Mon Sep 17 00:00:00 2001 From: Samo Penic <samo.penic@gmail.com> Date: Fri, 16 Nov 2018 21:12:32 +0000 Subject: [PATCH] recognition is a bit more robust.... --- aoiOcr.py | 2 sid_process.py | 56 ++++++++++++++++----------- Ocr.py | 4 + 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/Ocr.py b/Ocr.py index f680669..662cb0b 100644 --- a/Ocr.py +++ b/Ocr.py @@ -220,7 +220,7 @@ return "x" if self.settings is not None: sid_mask=self.settings.get("sid_mask", None) - es = getSID( + es,err,warn = getSID( self.img[ int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight), int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth), @@ -228,6 +228,8 @@ self.sid_classifier, sid_mask ) + [self.errors.append(e) for e in err] + [self.warnings.append(w) for w in warn] return es diff --git a/aoiOcr.py b/aoiOcr.py index 02eef68..41b45f0 100644 --- a/aoiOcr.py +++ b/aoiOcr.py @@ -2,7 +2,7 @@ from sklearn.externals import joblib -settings = {"sid_mask": "11xx0xxx", "answer_treshold": 0.25} +settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25} classifier = joblib.load("filename.joblib") #p = Paper(filename="testpage300dpi_scan1.png") diff --git a/sid_process.py b/sid_process.py index 48326c0..14beb68 100644 --- a/sid_process.py +++ b/sid_process.py @@ -79,23 +79,32 @@ return sid_no -def segment_by_sid_len(image,sid_len, classifier): - sid_no="" - #find biggest block of pixels +def segment_by_sid_len(image, sid_mask, classifier): + sid_no = "" + sid_len = len(sid_mask) + if sid_mask[0] == "1": + move_left = 45 + elif sid_mask[0] == "x": + move_left = 55 + else: + move_left = 0 + # find biggest block of pixels - image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3) - cv2.imwrite("sidblock1.png",image1) + image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=3) + cv2.imwrite("sidblock1.png", image1) im2, ctrs, hier = cv2.findContours( image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) - sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour + sorted_ctrs = sorted( + ctrs, key=lambda ctr: cv2.contourArea(ctr) + ) # get bigges contour x, y, w, h = cv2.boundingRect(sorted_ctrs[-1]) - image=image[y:y+h,x+25:x+w-25] - cv2.imwrite("sidblock2.png",image) + image = image[y : y + h, x + 25 - move_left : x + w - 25] + cv2.imwrite("sidblock2.png", image) imgHeight, imgWidth = image.shape[0:2] - numWidth=int(imgWidth/(sid_len)) - for i in range(0,sid_len): - num=image[:,i*numWidth:(i+1)*numWidth] + numWidth = int(imgWidth / (sid_len)) + for i in range(0, sid_len): + num = image[:, i * numWidth : (i + 1) * numWidth] num = img_as_ubyte(num < 128) num = cv2.resize(num, (32, 32)) @@ -106,6 +115,7 @@ def getSID(image, classifier, sid_mask): + sid_warn = [] image = 255 - image image = img_as_ubyte(image > 100) cv2.imwrite("enSID0.png", image) @@ -115,7 +125,8 @@ image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4) # Again noise removal after closing - #image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) + # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) + # don't do too much noise removal. image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1) # Skeletonization @@ -129,21 +140,20 @@ # Thining again image = img_as_ubyte(morphology.skeletonize(image > 0.5)) image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) - cv2.imwrite("enhancedSID.png",image) + cv2.imwrite("enhancedSID.png", image) im2, ctrs, hier = cv2.findContours( image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) sid_no = "" - #sid_len = len(sid_mask) - #sid_no = segment_by_sid_len(image, sid_len, classifier) - #if sid_mask is not None: - print(len(sid_mask),len(sorted_ctrs)) - #if len(sid_mask)==len(sorted_ctrs): - sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier) + print(len(sid_mask), len(sorted_ctrs)) + sid_no = segment_by_contours( + image, sorted_ctrs[1:], classifier + ) # we remove largest contour that surrounds whole image print(sid_no) - if(len(sid_no)!=len(sid_mask)): - print("Ooops have to find another way") - sid_no=segment_by_sid_len(image,len(sid_mask),classifier) - return sid_no + if len(sid_no) != len(sid_mask): + #print("Ooops have to find another way") + sid_warn.append("Trying second SID algorithm.") + sid_no = segment_by_sid_len(image, sid_mask, classifier) + return (sid_no, [], sid_warn) -- Gitblit v1.9.3