From e0996e35862256114826a6314dc649972972a60c Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Sat, 17 Nov 2018 13:42:52 +0000
Subject: [PATCH] Multiple SID robustness..

---
 aoiOcr.py      |   30 ++++++---
 sid_process.py |    4 
 Ocr.py         |   97 ++++++++++++++++++-------------
 3 files changed, 77 insertions(+), 54 deletions(-)

diff --git a/Ocr.py b/Ocr.py
index 33d9457..6e9a082 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -10,10 +10,10 @@
         self.filename = filename
         self.invalid = None
         self.QRData = None
-        self.settings={'answer_treshold':0.25,} if settings is None else settings
+        self.settings = {"answer_threshold": 0.25} if settings is None else settings
         self.errors = []
         self.warnings = []
-        self.sid=None
+        self.sid = None
         self.sid_classifier = sid_classifier
         if filename is not None:
             self.loadImage(filename)
@@ -57,6 +57,11 @@
             self.data = None
             self.invalid = True
             return
+        if(len(d)>1): #if there are multiple codes, get first ean or qr code available.
+            for dd in d:
+                if(dd.type=="EAN13" or dd.type=="QR"):
+                    d[0]=dd
+                    break
         self.QRDecode = d
         self.QRData = d[0].data
         xpos = d[0].rect.left
@@ -66,11 +71,11 @@
             self.rotateAngle(180)
 
     def rotateAngle(self, angle=0):
-        #rot_mat = cv2.getRotationMatrix2D(
+        # rot_mat = cv2.getRotationMatrix2D(
         #    (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
-        #)
+        # )
         rot_mat = cv2.getRotationMatrix2D(
-            (self.imgWidth/2, self.imgHeight/2), angle, 1.0
+            (self.imgWidth / 2, self.imgHeight / 2), angle, 1.0
         )
         result = cv2.warpAffine(
             self.img,
@@ -222,58 +227,68 @@
         if self.sid_classifier is None:
             return "x"
         if self.settings is not None:
-            sid_mask=self.settings.get("sid_mask", None)
-        es,err,warn = getSID(
+            sid_mask = self.settings.get("sid_mask", None)
+        es, err, warn = getSID(
             self.img[
                 int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
                 int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
             ],
             self.sid_classifier,
-            sid_mask
+            sid_mask,
         )
         [self.errors.append(e) for e in err]
         [self.warnings.append(w) for w in warn]
         return es
 
-
     def get_code_data(self):
         if self.QRData is None:
             self.errors.append("Could not read QR or EAN code! Not an exam?")
-            retval = {'exam_id': None,
-                      'page_no': None,
-                      'paper_id': None,
-                      'faculty_id': None,
-                      'sid':None
-                      }
-            return retval
-        qrdata = bytes.decode(self.QRData, 'utf8')
-        if self.QRDecode[0].type=='EAN13':
-            return {'exam_id': int(qrdata[0:7]),
-                    'page_no': int(qrdata[7]),
-                    'paper_id': int(qrdata[-5:-1]),
-                    'faculty_id': None,
-                    'sid': None
-                    }
-        else:
-            data=qrdata.split(',')
-            retval={'exam_id': int(data[1]),
-                    'page_no': int(data[3]),
-                    'paper_id':int(data[2]),
-                    'faculty_id':int(data[0]),
+            retval = {
+                "exam_id": None,
+                "page_no": None,
+                "paper_id": None,
+                "faculty_id": None,
+                "sid": None,
             }
-            if(len(data)>4):
-                retval['sid']=data[4]
+            return retval
+        qrdata = bytes.decode(self.QRData, "utf8")
+        if self.QRDecode[0].type == "EAN13":
+            return {
+                "exam_id": int(qrdata[0:7]),
+                "page_no": int(qrdata[7]),
+                "paper_id": int(qrdata[-5:-1]),
+                "faculty_id": None,
+                "sid": None,
+            }
+        else:
+            data = qrdata.split(",")
+            retval = {
+                "exam_id": int(data[1]),
+                "page_no": int(data[3]),
+                "paper_id": int(data[2]),
+                "faculty_id": int(data[0]),
+            }
+            if len(data) > 4:
+                retval["sid"] = data[4]
 
             return retval
 
     def get_paper_ocr_data(self):
-        data=self.get_code_data()
-        data['qr']=self.QRData
-        data['errors']=self.errors
-        data['warnings']=self.warnings
-        data['up_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
-        data['right_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
-        data['ans_matrix']=((np.array(self.answerMatrix)>self.settings['answer_treshold'])*1).tolist()
-        if data['sid'] is None:
-            data['sid']=self.get_enhanced_sid()
+        data = self.get_code_data()
+        data["qr"] = self.QRData
+        data["errors"] = self.errors
+        data["warnings"] = self.warnings
+        data["up_position"] = (
+            list(self.xMarkerLocations[1] / self.imgWidth),
+            list(self.yMarkerLocations[1] / self.imgHeight),
+        )
+        data["right_position"] = (
+            list(self.xMarkerLocations[1] / self.imgWidth),
+            list(self.yMarkerLocations[1] / self.imgHeight),
+        )
+        data["ans_matrix"] = (
+            (np.array(self.answerMatrix) > self.settings["answer_threshold"]) * 1
+        ).tolist()
+        if data["sid"] is None and data["page_no"] == 0:
+            data["sid"] = self.get_enhanced_sid()
         return data
diff --git a/aoiOcr.py b/aoiOcr.py
index 3554c92..9188b0a 100644
--- a/aoiOcr.py
+++ b/aoiOcr.py
@@ -3,11 +3,11 @@
 
 from glob import glob
 
-settings = {"sid_mask": "64xx0xxx", "answer_treshold": 0.25}
+settings = {"sid_mask": "64xx0xxx", "answer_threshold": 0.25}
 classifier = joblib.load("filename.joblib")
 
 #p = Paper(filename="testpage300dpi_scan1.png")
-#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_treshold": 0.25})
+#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25})
 #p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings)
 #p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,)
 #p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings)
@@ -20,9 +20,10 @@
     "processed_scans/20141021095744144_0009.tif",
     "processed_scans/20141028095553745_0018.tif",
     "processed_scans/20151013180545275_0011.tif",
-    "processed_scans/20160408140801098_0004.tif"
+    "processed_scans/20160408140801098_0004.tif",
+    "processed_scans/20160510075445995_0026.tif"
 ]
-p=Paper(filename=pa[8], sid_classifier=classifier, settings=settings)
+p=Paper(filename=pa[9], sid_classifier=classifier, settings=settings)
 
 # print(p.QRData)
 # print(p.errors)
@@ -35,13 +36,20 @@
 
 
 print(p.get_paper_ocr_data())
-exit(0)
+
+
 filelist = glob("processed_scans/*.tif")
+wrong_sid=0;
+total=0
 for f in sorted(filelist):
     print("processing: {}".format(f))
-    print(
-        f,
-        Paper(
-            filename=f, sid_classifier=classifier, settings=settings
-        ).get_paper_ocr_data(),
-    )
+    p=Paper(filename=f, sid_classifier=classifier, settings=settings).get_paper_ocr_data()
+    print(f,p)
+    if(p['page_no']==0):
+        total+=1
+    if(len(p['errors'])!=0):
+        wrong_sid+=1
+    if total%10 == 0:
+        print("Total:{}, wrong SID: {}".format(total,wrong_sid))
+
+print("Total:{}, wrong SID: {}".format(total,wrong_sid))
\ No newline at end of file
diff --git a/sid_process.py b/sid_process.py
index ab8aaa3..f21fafb 100644
--- a/sid_process.py
+++ b/sid_process.py
@@ -210,11 +210,11 @@
     cv2.imwrite("enhancedSID.png", image)
 
     sid_no = segment_by_contours(image, image_original, classifier, sid_mask)
-    print(sid_no)
+
     if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask):
         sid_warn.append("Trying second SID algorithm.")
         sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
-    print(sid_no)
+
     if (len(sid_no)) != len(sid_mask):
         sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
         sid_warn.append("Trying third SID algorithm.")

--
Gitblit v1.9.3