havoc/sizif-ocr.git

from pyzbar.pyzbar import decode
from .sid_process import getSID
import cv2
import numpy as np
import os
import pkg_resources
 
markerfile = '/template-sq.png'  # always use slash
markerfilename = pkg_resources.resource_filename(__name__, markerfile)
 
 
 
class Paper:
    def __init__(self, filename=None, sid_classifier=None, settings=None, output_path="/tmp"):
        self.filename = filename
        self.output_path=output_path
        self.invalid = None
        self.QRData = None
        self.settings = {"answer_threshold": 0.25} if settings is None else settings
        self.errors = []
        self.warnings = []
        self.sid = None
        self.sid_classifier = sid_classifier
        if filename is not None:
            self.loadImage(filename)
            self.runOcr()
 
    def loadImage(self, filename, rgbchannel=0):
        self.img = cv2.imread(filename, rgbchannel)
        if self.img is None:
            self.errors.append("File could not be loaded!")
            self.invalid = True
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
 
    def saveImage(self, filename="/tmp/debug_image.png"):
        cv2.imwrite(filename, self.img)
 
    def runOcr(self):
        if self.invalid == True:
            return
        self.decodeQRandRotate()
        self.imgTreshold()
        cv2.imwrite('/tmp/debug_threshold.png', self.bwimg)
        skewAngle = 0
        #         try:
        #             skewAngle=self.getSkewAngle()
        #         except:
        #             self.errors.append("Could not determine skew angle!")
        #         self.rotateAngle(skewAngle)
 
        self.generateAnswerMatrix()
 
        self.saveImage()
 
    def decodeQRandRotate(self):
        if self.invalid == True:
            return
        blur = cv2.blur(self.img, (3, 3))
        d = decode(blur)
        self.img = blur
        if len(d) == 0:
            self.errors.append("QR code could not be found!")
            self.data = None
            self.invalid = True
            return
        if(len(d)>1): #if there are multiple codes, get first ean or qr code available.
            for dd in d:
                if(dd.type=="EAN13" or dd.type=="QR"):
                    d[0]=dd
                    break
        self.QRDecode = d
        self.QRData = d[0].data
        xpos = d[0].rect.left
        ypos = d[0].rect.top
        # check if image is rotated wrongly
        if xpos > self.imgHeight / 2.0 and ypos > self.imgWidth / 2.0:
            self.rotateAngle(180)
 
    def rotateAngle(self, angle=0):
        # rot_mat = cv2.getRotationMatrix2D(
        #    (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
        # )
        rot_mat = cv2.getRotationMatrix2D(
            (self.imgWidth / 2, self.imgHeight / 2), angle, 1.0
        )
        result = cv2.warpAffine(
            self.img,
            rot_mat,
            (self.imgWidth, self.imgHeight),
            flags=cv2.INTER_CUBIC,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(255, 255, 255),
        )
 
        self.img = result
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
 
        # todo, make better tresholding
    def imgTreshold(self):
        (self.thresh, self.bwimg) = cv2.threshold(
            self.img, 128, 255,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU
        )
 
    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite("/tmp/debug_1.png", neg)
 
        angle_counter = 0  # number of angles
        angle = 0.0  # collects sum of angles
        cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)
 
        # get all the Hough lines
        for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325):
            x1, y1, x2, y2 = line[0]
            cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2)
            # calculate the angle (in radians)
            this_angle = np.arctan2(y2 - y1, x2 - x1)
            if this_angle and abs(this_angle) <= 10:
                # filtered zero degree and outliers
                angle += this_angle
                angle_counter += 1
 
                # the skew is calculated of the mean of the total angles, #try block helps with division by zero.
        try:
            skew = np.rad2deg(
                angle / angle_counter
            )  # the 1.2 factor is just experimental....
        except:
            skew = 0
 
        cv2.imwrite("/tmp/debug_2.png", cimg)
        return skew
 
    def locateUpMarkers(self, threshold=0.85, height=200):
        template = cv2.imread(markerfilename, 0)
        w, h = template.shape[::-1]
        crop_img = self.bwimg[0:height, :]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[0]) == 0:
            min_y = -1
        else:
            min_y = np.min(loc[0])
            for pt in zip(*loc[::-1]):
                if pt[1] < min_y + 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by x coordinate
            loc_filtered_x, loc_filtered_y = zip(
                *sorted(zip(loc_filtered_x, loc_filtered_y))
            )
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_x) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
 
        cv2.imwrite("/tmp/debug_3.png", cimg)
 
        self.xMarkerLocations = loc
        return loc
 
    def locateRightMarkers(self, threshold=0.85, width=200):
        template = cv2.imread(markerfilename, 0)
        w, h = template.shape[::-1]
        crop_img = self.bwimg[:, -width:]
        cv2.imwrite('/tmp/debug_right.png', crop_img)
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[1]) == 0:
            min_x = -1
        else:
            max_x = np.max(loc[1])
            for pt in zip(*loc[::-1]):
                if pt[1] > max_x - 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by y coordinate
            try:
                loc_filtered_y, loc_filtered_x = zip(
                    *sorted(zip(loc_filtered_y, loc_filtered_x))
                )
            except:
                self.yMarkerLocations=[np.array([1,1]),np.array([1,2])]
                return self.yMarkerLocations
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_y) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
 
        cv2.imwrite("/tmp/debug_4.png", cimg)
 
        self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
        return self.yMarkerLocations
 
    def generateAnswerMatrix(self):
        self.locateUpMarkers()
        self.locateRightMarkers()
 
        roixoff = 10
        roiyoff = 5
        roiwidth = 50
        roiheight = roiwidth
        totpx = roiwidth * roiheight
 
        self.answerMatrix = []
        for y in self.yMarkerLocations[0]:
            oneline = []
            for x in self.xMarkerLocations[1]:
                roi = self.bwimg[
                    y - roiyoff : y + int(roiheight - roiyoff),
                    x - roixoff : x + int(roiwidth - roixoff),
                ]
                # cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
                black = totpx - cv2.countNonZero(roi)
                oneline.append(black / totpx)
            self.answerMatrix.append(oneline)
 
    def get_enhanced_sid(self):
        if self.sid_classifier is None:
            return "x"
        if self.settings is not None:
            sid_mask = self.settings.get("sid_mask", None)
        es, err, warn = getSID(
            self.img[
                int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
                int(0.65 * self.imgWidth) : int(0.95 * self.imgWidth),
            ],
            self.sid_classifier,
            sid_mask,
        )
        [self.errors.append(e) for e in err]
        [self.warnings.append(w) for w in warn]
        return es
 
    def get_code_data(self):
        if self.QRData is None:
            self.errors.append("Could not read QR or EAN code! Not an exam?")
            retval = {
                "exam_id": None,
                "page_no": None,
                "paper_id": None,
                "faculty_id": None,
                "sid": None,
            }
            return retval
        qrdata = bytes.decode(self.QRData, "utf8")
        if self.QRDecode[0].type == "EAN13":
            return {
                "exam_id": int(qrdata[0:7]),
                "page_no": int(qrdata[7])+1,
                "paper_id": int(qrdata[-5:-1]),
                "faculty_id": None,
                "sid": None,
            }
        else:
            data = qrdata.split(",")
            retval = {
                "exam_id": int(data[1]),
                "page_no": int(data[3]),
                "paper_id": int(data[2]),
                "faculty_id": int(data[0]),
                "sid": None
            }
            if len(data) > 4:
                retval["sid"] = data[4]
 
            return retval
 
    def get_paper_ocr_data(self):
        data = self.get_code_data()
        data["qr"] = bytes.decode(self.QRData, 'utf8')
        data["errors"] = self.errors
        data["warnings"] = self.warnings
        data["up_position"] = (
            list(self.xMarkerLocations[0] / self.imgWidth),
            list(self.xMarkerLocations[1] / self.imgHeight),
        )
        data["right_position"] = (
            list(self.yMarkerLocations[0] / self.imgWidth),
            list(self.yMarkerLocations[1] / self.imgHeight),
        )
        data["ans_matrix"] = (
            (np.array(self.answerMatrix) > self.settings["answer_threshold"]) * 1
        ).tolist()
        if data["sid"] is None and data["page_no"] == 1:
            data["sid"] = self.get_enhanced_sid()
        output_filename=os.path.join(self.output_path, '.'.join(self.filename.split('/')[-1].split('.')[:-1])+".png")
        cv2.imwrite(output_filename, self.img)
        data['output_filename']=output_filename
        return data