havoc/sizif-ocr.git

from pyzbar.pyzbar import decode
from sid_process import getSID
import cv2
import numpy as np
import math
 
 
class Paper:
    def __init__(self, filename=None, sid_classifier=None, settings=None):
        self.filename = filename
        self.invalid = None
        self.QRData = None
        self.settings={'answer_treshold':0.25,} if settings is None else settings
        self.errors = []
        self.warnings = []
        self.sid=None
        self.sid_classifier = sid_classifier
        if filename is not None:
            self.loadImage(filename)
            self.runOcr()
 
    def loadImage(self, filename, rgbchannel=0):
        self.img = cv2.imread(filename, rgbchannel)
        if self.img is None:
            self.errors.append("File could not be loaded!")
            self.invalid = True
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
 
    def saveImage(self, filename="debug_image.png"):
        cv2.imwrite(filename, self.img)
 
    def runOcr(self):
        if self.invalid == True:
            return
        self.decodeQRandRotate()
        self.imgTreshold()
        skewAngle = 0
        #         try:
        #             skewAngle=self.getSkewAngle()
        #         except:
        #             self.errors.append("Could not determine skew angle!")
        #         self.rotateAngle(skewAngle)
 
        self.generateAnswerMatrix()
 
        self.saveImage()
 
    def decodeQRandRotate(self):
        if self.invalid == True:
            return
        blur = cv2.blur(self.img, (3, 3))
        d = decode(blur)
        self.img = blur
        if len(d) == 0:
            self.errors.append("QR code could not be found!")
            self.data = None
            self.invalid = True
            return
        self.QRDecode = d
        self.QRData = d[0].data
        xpos = d[0].rect.left
        ypos = d[0].rect.top
        # check if image is rotated wrongly
        if xpos > self.imgHeight / 2.0 and ypost > self.imgWidth / 2.0:
            self.rotateAngle(180)
 
    def rotateAngle(self, angle=0):
        rot_mat = cv2.getRotationMatrix2D(
            (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
        )
        result = cv2.warpAffine(
            self.img,
            rot_mat,
            (self.imgHeight, self.imgWidth),
            flags=cv2.INTER_CUBIC,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(255, 255, 255),
        )
 
        self.img = result
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
 
        # todo, make better tresholding
 
    def imgTreshold(self):
        (self.thresh, self.bwimg) = cv2.threshold(
            self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
        )
 
    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite("debug_1.png", neg)
 
        angle_counter = 0  # number of angles
        angle = 0.0  # collects sum of angles
        cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)
 
        # get all the Hough lines
        for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325):
            x1, y1, x2, y2 = line[0]
            cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2)
            # calculate the angle (in radians)
            this_angle = np.arctan2(y2 - y1, x2 - x1)
            if this_angle and abs(this_angle) <= 10:
                # filtered zero degree and outliers
                angle += this_angle
                angle_counter += 1
 
                # the skew is calculated of the mean of the total angles, #try block helps with division by zero.
        try:
            skew = np.rad2deg(
                angle / angle_counter
            )  # the 1.2 factor is just experimental....
        except:
            skew = 0
 
        cv2.imwrite("debug_2.png", cimg)
        return skew
 
    def locateUpMarkers(self, threshold=0.85, height=200):
        template = cv2.imread("template.png", 0)
        w, h = template.shape[::-1]
        crop_img = self.img[0:height, :]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[0]) == 0:
            min_y = -1
        else:
            min_y = np.min(loc[0])
            for pt in zip(*loc[::-1]):
                if pt[1] < min_y + 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by x coordinate
            loc_filtered_x, loc_filtered_y = zip(
                *sorted(zip(loc_filtered_x, loc_filtered_y))
            )
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_x) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
 
        cv2.imwrite("debug_3.png", cimg)
 
        self.xMarkerLocations = loc
        return loc
 
    def locateRightMarkers(self, threshold=0.85, width=200):
        template = cv2.imread("template.png", 0)
        w, h = template.shape[::-1]
        crop_img = self.img[:, -width:]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[1]) == 0:
            min_x = -1
        else:
            max_x = np.max(loc[1])
            for pt in zip(*loc[::-1]):
                if pt[1] > max_x - 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by y coordinate
            loc_filtered_y, loc_filtered_x = zip(
                *sorted(zip(loc_filtered_y, loc_filtered_x))
            )
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_y) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
 
        cv2.imwrite("debug_4.png", cimg)
 
        self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
        return self.yMarkerLocations
 
    def generateAnswerMatrix(self):
        self.locateUpMarkers()
        self.locateRightMarkers()
 
        roixoff = 10
        roiyoff = 5
        roiwidth = 50
        roiheight = roiwidth
        totpx = roiwidth * roiheight
 
        self.answerMatrix = []
        for y in self.yMarkerLocations[0]:
            oneline = []
            for x in self.xMarkerLocations[1]:
                roi = self.bwimg[
                    y - roiyoff : y + int(roiheight - roiyoff),
                    x - roixoff : x + int(roiwidth - roixoff),
                ]
                # cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
                black = totpx - cv2.countNonZero(roi)
                oneline.append(black / totpx)
            self.answerMatrix.append(oneline)
 
    def get_enhanced_sid(self):
        if self.sid_classifier is None:
            return "x"
        if self.settings is not None:
            sid_mask=self.settings.get("sid_mask", None)
        es,err,warn = getSID(
            self.img[
                int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
                int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
            ],
            self.sid_classifier,
            sid_mask
        )
        [self.errors.append(e) for e in err]
        [self.warnings.append(w) for w in warn]
        return es
 
 
    def get_code_data(self):
        qrdata = bytes.decode(self.QRData, 'utf8')
        if self.QRDecode[0].type=='EAN13':
            return {'exam_id': int(qrdata[0:7]),
                    'page_no': int(qrdata[7]),
                    'paper_id': int(qrdata[-5:-1]),
                    'faculty_id': None,
                    'sid': None
                    }
        else:
            data=qrdata.split(',')
            retval={'exam_id': int(data[1]),
                    'page_no': int(data[3]),
                    'paper_id':int(data[2]),
                    'faculty_id':int(data[0]),
            }
            if(len(data)>4):
                retval['sid']=data[4]
 
            return retval
 
    def get_paper_ocr_data(self):
        data=self.get_code_data()
        data['qr']=self.QRData
        data['errors']=self.errors
        data['warnings']=self.warnings
        data['up_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
        data['right_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
        data['ans_matrix']=((np.array(self.answerMatrix)>self.settings['answer_treshold'])*1).tolist()
        if data['sid'] is None:
            data['sid']=self.get_enhanced_sid()
        return data