From ac766ed5ec375a384da5c454103aef055aa9344a Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Fri, 16 Nov 2018 20:41:33 +0000
Subject: [PATCH] recognition is a bit more robust....

---
 Ocr.py |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/Ocr.py b/Ocr.py
index ee715f0..f680669 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -1,16 +1,20 @@
 from pyzbar.pyzbar import decode
+from sid_process import getSID
 import cv2
 import numpy as np
 import math
 
 
 class Paper:
-    def __init__(self, filename=None):
+    def __init__(self, filename=None, sid_classifier=None, settings=None):
         self.filename = filename
         self.invalid = None
         self.QRData = None
+        self.settings={'answer_treshold':0.25,} if settings is None else settings
         self.errors = []
         self.warnings = []
+        self.sid=None
+        self.sid_classifier = sid_classifier
         if filename is not None:
             self.loadImage(filename)
             self.runOcr()
@@ -136,8 +140,8 @@
             loc_filtered_x, loc_filtered_y = zip(
                 *sorted(zip(loc_filtered_x, loc_filtered_y))
             )
-        # loc=[loc_filtered_y,loc_filtered_x]
-        # remove duplicates
+            # loc=[loc_filtered_y,loc_filtered_x]
+            # remove duplicates
             a = np.diff(loc_filtered_x) > 40
             a = np.append(a, True)
             loc_filtered_x = np.array(loc_filtered_x)
@@ -210,3 +214,52 @@
                 black = totpx - cv2.countNonZero(roi)
                 oneline.append(black / totpx)
             self.answerMatrix.append(oneline)
+
+    def get_enhanced_sid(self):
+        if self.sid_classifier is None:
+            return "x"
+        if self.settings is not None:
+            sid_mask=self.settings.get("sid_mask", None)
+        es = getSID(
+            self.img[
+                int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
+                int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
+            ],
+            self.sid_classifier,
+            sid_mask
+        )
+        return es
+
+
+    def get_code_data(self):
+        qrdata = bytes.decode(self.QRData, 'utf8')
+        if self.QRDecode[0].type=='EAN13':
+            return {'exam_id': int(qrdata[0:7]),
+                    'page_no': int(qrdata[7]),
+                    'paper_id': int(qrdata[-5:-1]),
+                    'faculty_id': None,
+                    'sid': None
+                    }
+        else:
+            data=qrdata.split(',')
+            retval={'exam_id': int(data[1]),
+                    'page_no': int(data[3]),
+                    'paper_id':int(data[2]),
+                    'faculty_id':int(data[0]),
+            }
+            if(len(data)>4):
+                retval['sid']=data[4]
+
+            return retval
+
+    def get_paper_ocr_data(self):
+        data=self.get_code_data()
+        data['qr']=self.QRData
+        data['errors']=self.errors
+        data['warnings']=self.warnings
+        data['up_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
+        data['right_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
+        data['ans_matrix']=((np.array(self.answerMatrix)>self.settings['answer_treshold'])*1).tolist()
+        if data['sid'] is None:
+            data['sid']=self.get_enhanced_sid()
+        return data

--
Gitblit v1.9.3