From 762a5e258a90387922d6c6eb3ecc9a7ca7c96144 Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Fri, 16 Nov 2018 18:53:49 +0000
Subject: [PATCH] refactored and debugged.

---
 aoiOcr.py      |    4 +
 sid_process.py |   68 +++++++++++++++++++--------------
 Ocr.py         |    8 +++-
 3 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/Ocr.py b/Ocr.py
index 35c6729..970fb70 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -6,13 +6,14 @@
 
 
 class Paper:
-    def __init__(self, filename=None, sid_classifier=None):
+    def __init__(self, filename=None, sid_classifier=None, settings=None):
         self.filename = filename
         self.invalid = None
         self.QRData = None
+        self.settings = settings
         self.errors = []
         self.warnings = []
-        self.sid_classifier=sid_classifier
+        self.sid_classifier = sid_classifier
         if filename is not None:
             self.loadImage(filename)
             self.runOcr()
@@ -216,11 +217,14 @@
     def get_enhanced_sid(self):
         if self.sid_classifier is None:
             return "x"
+        if self.settings is not None:
+            sid_mask=self.settings.get("sid_mask", None)
         es = getSID(
             self.img[
                 int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
                 int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
             ],
             self.sid_classifier,
+            sid_mask
         )
         return es
diff --git a/aoiOcr.py b/aoiOcr.py
index 72ed3b9..5a27df0 100644
--- a/aoiOcr.py
+++ b/aoiOcr.py
@@ -1,10 +1,12 @@
 from Ocr import Paper
 from sklearn.externals import joblib
 
+
+settings={'sid_mask':'11xx0xxx',}
 classifier = joblib.load('filename.joblib')
 
 #p=Paper(filename='testpage300dpi_scan1.png')
-p=Paper(filename='sizif111.tif', sid_classifier=classifier)
+p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings=settings)
 #p=Paper(filename='processed_scans/20141016095134535_0028.tif')
 
 print(p.QRData)
diff --git a/sid_process.py b/sid_process.py
index 90d9b33..67c689a 100644
--- a/sid_process.py
+++ b/sid_process.py
@@ -1,9 +1,8 @@
 import cv2
 import numpy as np
-from skimage import morphology,img_as_ubyte
+from skimage import morphology, img_as_ubyte
 from sklearn import svm
 from sklearn.externals import joblib
-
 
 
 """
@@ -61,48 +60,59 @@
     return np.ones((x, y), np.uint8)
 
 
-def getSID(image, classifier):
-    image=255-image
-    image=img_as_ubyte(image>100)
+def segment_by_contours(image, sorted_ctrs, classifier):
+    sid_no = ""
+    for i, ctr in enumerate(sorted_ctrs):
+        # Get bounding box
+        x, y, w, h = cv2.boundingRect(ctr)
+        # Getting ROI
+        if w < h / 2:
+            sid_no = sid_no + "1"
+            continue
+        roi = image[y : y + h, x : x + w]
+        roi = img_as_ubyte(roi < 128)
+        roi = cv2.resize(roi, (32, 32))
+
+        # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
+        cv2.imwrite("sid_no_{}.png".format(i), roi)
+        sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
+    return sid_no
+
+
+def getSID(image, classifier, sid_mask):
+    image = 255 - image
+    image = img_as_ubyte(image > 100)
     cv2.imwrite("enSID0.png", image)
     # Remove noise
-    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2,2), iterations=1)
+    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1)
     # Closing. Connect non connected parts
     image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
     # Again noise removal after closing
 
-    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8,8), iterations=1)
+    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
     # Skeletonization
-    image = img_as_ubyte(morphology.thin(image>128))
-    cv2.imwrite("enSID1.png",image)
+    image = img_as_ubyte(morphology.thin(image > 128))
+    cv2.imwrite("enSID1.png", image)
     # Stub removal (might not be necessary if thinning instead of skeletonize is used above
     # Making lines stronger
     image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1)
 
     image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10))
     # Thining again
-    image = img_as_ubyte(morphology.skeletonize(image>0.5))
+    image = img_as_ubyte(morphology.skeletonize(image > 0.5))
     image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
 
-    im2,ctrs, hier = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    im2, ctrs, hier = cv2.findContours(
+        image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+    )
     sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
 
-    #classifier = joblib.load('filename.joblib')
-
-    sid_no=""
-    for i, ctr in enumerate(sorted_ctrs):
-        # Get bounding box
-        x, y, w, h = cv2.boundingRect(ctr)
-        # Getting ROI
-        if(w<h/2):
-            sid_no=sid_no+"1"
-            continue
-        roi = image[y:y+h, x:x+w]
-        roi = img_as_ubyte(roi < 128)
-        roi = cv2.resize(roi,(32,32))
-
-        #cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
-        cv2.imwrite('sid_no_{}.png'.format(i), roi)
-        sid_no=sid_no+str(classifier.predict(roi.reshape(1,-1)/255.0)[0])
+    sid_no = ""
+    sid_len = 0
+    if sid_mask is not None:
+        if len(sid_mask)==len(sorted_ctrs):
+            sid_no=segment_by_contours(image,sorted_ctrs,classifier)
+        else:
+            print("Ooops have to find another way")
     print(sid_no)
-    return image
+    return sid_no

--
Gitblit v1.9.3