From 5cb7c1dba78b025ff333a202b27f04a2230c9da9 Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Fri, 16 Nov 2018 21:12:32 +0000
Subject: [PATCH] recognition is a bit more robust....

---
 aoiOcr.py      |    2 
 sid_process.py |   56 ++++++++++++++++-----------
 Ocr.py         |    4 +
 3 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/Ocr.py b/Ocr.py
index f680669..662cb0b 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -220,7 +220,7 @@
             return "x"
         if self.settings is not None:
             sid_mask=self.settings.get("sid_mask", None)
-        es = getSID(
+        es,err,warn = getSID(
             self.img[
                 int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
                 int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
@@ -228,6 +228,8 @@
             self.sid_classifier,
             sid_mask
         )
+        [self.errors.append(e) for e in err]
+        [self.warnings.append(w) for w in warn]
         return es
 
 
diff --git a/aoiOcr.py b/aoiOcr.py
index 02eef68..41b45f0 100644
--- a/aoiOcr.py
+++ b/aoiOcr.py
@@ -2,7 +2,7 @@
 from sklearn.externals import joblib
 
 
-settings = {"sid_mask": "11xx0xxx", "answer_treshold": 0.25}
+settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
 classifier = joblib.load("filename.joblib")
 
 #p = Paper(filename="testpage300dpi_scan1.png")
diff --git a/sid_process.py b/sid_process.py
index 48326c0..14beb68 100644
--- a/sid_process.py
+++ b/sid_process.py
@@ -79,23 +79,32 @@
     return sid_no
 
 
-def segment_by_sid_len(image,sid_len, classifier):
-    sid_no=""
-    #find biggest block of pixels
+def segment_by_sid_len(image, sid_mask, classifier):
+    sid_no = ""
+    sid_len = len(sid_mask)
+    if sid_mask[0] == "1":
+        move_left = 45
+    elif sid_mask[0] == "x":
+        move_left = 55
+    else:
+        move_left = 0
+    # find biggest block of pixels
 
-    image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3)
-    cv2.imwrite("sidblock1.png",image1)
+    image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=3)
+    cv2.imwrite("sidblock1.png", image1)
     im2, ctrs, hier = cv2.findContours(
         image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
     )
-    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour
+    sorted_ctrs = sorted(
+        ctrs, key=lambda ctr: cv2.contourArea(ctr)
+    )  # get bigges contour
     x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
-    image=image[y:y+h,x+25:x+w-25]
-    cv2.imwrite("sidblock2.png",image)
+    image = image[y : y + h, x + 25 - move_left : x + w - 25]
+    cv2.imwrite("sidblock2.png", image)
     imgHeight, imgWidth = image.shape[0:2]
-    numWidth=int(imgWidth/(sid_len))
-    for i in range(0,sid_len):
-        num=image[:,i*numWidth:(i+1)*numWidth]
+    numWidth = int(imgWidth / (sid_len))
+    for i in range(0, sid_len):
+        num = image[:, i * numWidth : (i + 1) * numWidth]
         num = img_as_ubyte(num < 128)
         num = cv2.resize(num, (32, 32))
 
@@ -106,6 +115,7 @@
 
 
 def getSID(image, classifier, sid_mask):
+    sid_warn = []
     image = 255 - image
     image = img_as_ubyte(image > 100)
     cv2.imwrite("enSID0.png", image)
@@ -115,7 +125,8 @@
     image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
     # Again noise removal after closing
 
-    #image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
+    # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
+    # don't do too much noise removal.
     image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
 
     # Skeletonization
@@ -129,21 +140,20 @@
     # Thining again
     image = img_as_ubyte(morphology.skeletonize(image > 0.5))
     image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
-    cv2.imwrite("enhancedSID.png",image)
+    cv2.imwrite("enhancedSID.png", image)
     im2, ctrs, hier = cv2.findContours(
         image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
     )
     sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
 
     sid_no = ""
-    #sid_len = len(sid_mask)
-    #sid_no = segment_by_sid_len(image, sid_len, classifier)
-    #if sid_mask is not None:
-    print(len(sid_mask),len(sorted_ctrs))
-    #if len(sid_mask)==len(sorted_ctrs):
-    sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier)
+    print(len(sid_mask), len(sorted_ctrs))
+    sid_no = segment_by_contours(
+        image, sorted_ctrs[1:], classifier
+    )  # we remove largest contour that surrounds whole image
     print(sid_no)
-    if(len(sid_no)!=len(sid_mask)):
-        print("Ooops have to find another way")
-        sid_no=segment_by_sid_len(image,len(sid_mask),classifier)
-    return sid_no
+    if len(sid_no) != len(sid_mask):
+        #print("Ooops have to find another way")
+        sid_warn.append("Trying second SID algorithm.")
+        sid_no = segment_by_sid_len(image, sid_mask, classifier)
+    return (sid_no, [], sid_warn)

--
Gitblit v1.9.3