From d5c694ac711ca3b434bf16bd920b90d1a7e758c4 Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Sat, 17 Nov 2018 09:57:31 +0000
Subject: [PATCH] Improving the robustness of all three algorithms.

---
 aoiOcr.py      |   33 ++++++++++++----
 sid_process.py |   82 ++++++++++++++++++++++++++++++++++++-----
 Ocr.py         |    2 
 template-8.png |    0 
 4 files changed, 97 insertions(+), 20 deletions(-)

diff --git a/Ocr.py b/Ocr.py
index 662cb0b..f4447e5 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -222,7 +222,7 @@
             sid_mask=self.settings.get("sid_mask", None)
         es,err,warn = getSID(
             self.img[
-                int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
+                int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
                 int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
             ],
             self.sid_classifier,
diff --git a/aoiOcr.py b/aoiOcr.py
index 0d59684..bb74503 100644
--- a/aoiOcr.py
+++ b/aoiOcr.py
@@ -3,14 +3,24 @@
 
 from glob import glob
 
-settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
+settings = {"sid_mask": "64xx0xxx", "answer_treshold": 0.25}
 classifier = joblib.load("filename.joblib")
 
-#p = Paper(filename="testpage300dpi_scan1.png")
-#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings=settings)
+# p = Paper(filename="testpage300dpi_scan1.png")
+#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_treshold": 0.25})
 #p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings)
-#p=Paper(filename='processed_scans/20151111080408825_0001.tif', sid_classifier=classifier, settings=settings)
-p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings)
+#p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,)
+#p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings)
+pa = [
+    "processed_scans/20141016095134535_0006.tif",
+    "processed_scans/20141016095134535_0028.tif",
+    "processed_scans/20141016095134535_0028.tif",
+    "processed_scans/20141016095134535_0037.tif",
+    "processed_scans/20141021095744144_0005.tif",
+    "processed_scans/20141021095744144_0009.tif",
+    "processed_scans/20141028095553745_0018.tif",
+]
+p=Paper(filename=pa[6], sid_classifier=classifier, settings=settings)
 
 # print(p.QRData)
 # print(p.errors)
@@ -23,8 +33,13 @@
 
 
 print(p.get_paper_ocr_data())
-exit(0)
-filelist = glob("processed_scans/*.tif")
-for f in filelist:
-    print(f,Paper(filename=f, sid_classifier=classifier, settings=settings).get_paper_ocr_data())
 
+filelist = glob("processed_scans/*.tif")
+for f in sorted(filelist):
+    print("processing: {}".format(f))
+    print(
+        f,
+        Paper(
+            filename=f, sid_classifier=classifier, settings=settings
+        ).get_paper_ocr_data(),
+    )
diff --git a/sid_process.py b/sid_process.py
index 4674c0e..1f93d3c 100644
--- a/sid_process.py
+++ b/sid_process.py
@@ -57,6 +57,13 @@
 def kernel(x, y):
     return np.ones((x, y), np.uint8)
 
+def sid_compare(sid_no, sid_mask):
+    for s,es in zip(sid_mask,sid_no):
+        if s!='x' and s!=es:
+            return False
+    return True
+
+
 
 def segment_by_contours(image, sorted_ctrs, classifier):
     sid_no = ""
@@ -77,7 +84,7 @@
     return sid_no
 
 
-def segment_by_sid_len(image, sid_mask, classifier):
+def segment_by_sid_len(image, original_image, sid_mask, classifier):
     sid_no = ""
     sid_len = len(sid_mask)
     if sid_mask[0] == "1":
@@ -86,9 +93,11 @@
         move_left = 55
     else:
         move_left = 0
+        # Remove noise
+    image2 = cv2.morphologyEx(original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=7)
     # find biggest block of pixels
-
-    image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
+    image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
+    image1=img_as_ubyte(image1>50)
     cv2.imwrite("sidblock1.png", image1)
     im2, ctrs, hier = cv2.findContours(
         image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
@@ -97,7 +106,7 @@
         ctrs, key=lambda ctr: cv2.contourArea(ctr)
     )  # get bigges contour
     x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
-    image = image[y : y + h, x + 25 - move_left : x + w - 25]
+    image = image[y : y + h, x + 25 - move_left : x + w - 40] #+25,-25
     cv2.imwrite("sidblock2.png", image)
     imgHeight, imgWidth = image.shape[0:2]
     numWidth = int(imgWidth / (sid_len))
@@ -111,14 +120,60 @@
         sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
     return sid_no
 
+def segment_by_7segments(image,original_image,sid_mask,classifier):
+    block_image = cv2.morphologyEx(original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10)
+    block_image =img_as_ubyte(block_image<50)
+    cv2.imwrite("sid_3rd1.png", block_image)
+    template = cv2.imread("template-8.png", 0)
+    w, h = template.shape[::-1]
+    res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
+    loc = np.where(res >= 0.75)
+    cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    loc_filtered_x=[]
+    loc_filtered_y=[]
+    for pt in zip(*loc[::-1]):
+        pt=(pt[0]-10,pt[1]-10)
+        loc_filtered_y.append(pt[1])
+        loc_filtered_x.append(pt[0])
+#        points.append(pt)
+    #filter points
+    if(len(loc_filtered_x)==0):
+        return ""
+    loc_filtered_x, loc_filtered_y = zip(
+                *sorted(zip(loc_filtered_x, loc_filtered_y))
+            )
+    a = np.diff(loc_filtered_x) > int(w/2)
+    a = np.append(a, True)
+    loc_filtered_x = np.array(loc_filtered_x)
+    loc_filtered_y = np.array(loc_filtered_y)
+    points = [loc_filtered_y[a], loc_filtered_x[a]]
+    for pt in zip(*points[::-1]):
+        cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
+    cv2.imwrite("sid_3rd2.png", cimg)
+
+    sid_no=""
+    for i,pt in enumerate(zip(*points[::-1])):
+        num=image[pt[1]:pt[1] + h, pt[0]:pt[0]+w]
+        #cv2.imwrite("sid_3no_{}.png".format(i), num)
+        num = img_as_ubyte(num < 128)
+        try:
+            num = cv2.resize(num, (32, 32))
+        except:
+            return ""
+        cv2.imwrite("sid_3no_{}.png".format(i), num)
+        sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
+
+    return sid_no
 
 def getSID(image, classifier, sid_mask):
     sid_warn = []
+    sid_err=[]
     image = 255 - image
+    image_original=image.copy()
     image = img_as_ubyte(image > 100)
     cv2.imwrite("enSID0.png", image)
     # Remove noise
-    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1)
+    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
     # Closing. Connect non connected parts
     image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
     # Again noise removal after closing
@@ -144,14 +199,21 @@
     )
     sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
 
-    sid_no = ""
     print(len(sid_mask), len(sorted_ctrs))
     sid_no = segment_by_contours(
         image, sorted_ctrs[1:], classifier
     )  # we remove largest contour that surrounds whole image
     print(sid_no)
-    if len(sid_no) != len(sid_mask):
-        #print("Ooops have to find another way")
+    if len(sid_no) != len(sid_mask) or not sid_compare(sid_no,sid_mask):
         sid_warn.append("Trying second SID algorithm.")
-        sid_no = segment_by_sid_len(image, sid_mask, classifier)
-    return (sid_no, [], sid_warn)
+        sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
+    print(sid_no)
+    if(len(sid_no))!=len(sid_mask):
+        sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
+        sid_warn.append("Trying third SID algorithm.")
+
+
+    if not sid_compare(sid_no, sid_mask):
+            sid_err=['Wrong SID!']
+
+    return (sid_no, sid_err, sid_warn)
diff --git a/template-8.png b/template-8.png
new file mode 100644
index 0000000..cb2063b
--- /dev/null
+++ b/template-8.png
Binary files differ

--
Gitblit v1.9.3