Development of the ocr part of AOI
Samo Penic
2018-11-16 5cb7c1dba78b025ff333a202b27f04a2230c9da9
Ocr.py
@@ -1,201 +1,267 @@
from pyzbar.pyzbar import decode
from sid_process import getSID
import cv2
import numpy as np
import math
class Paper:
    def __init__(self, filename=None, sid_classifier=None, settings=None):
        self.filename = filename
        self.invalid = None
        self.QRData = None
        self.settings={'answer_treshold':0.25,} if settings is None else settings
        self.errors = []
        self.warnings = []
        self.sid=None
        self.sid_classifier = sid_classifier
        if filename is not None:
            self.loadImage(filename)
            self.runOcr()
    def loadImage(self, filename, rgbchannel=0):
        self.img = cv2.imread(filename, rgbchannel)
        if self.img is None:
            self.errors.append("File could not be loaded!")
            self.invalid = True
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
    def saveImage(self, filename="debug_image.png"):
        cv2.imwrite(filename, self.img)
    def runOcr(self):
        if self.invalid == True:
            return
        self.decodeQRandRotate()
        self.imgTreshold()
        skewAngle = 0
        #       try:
        #          skewAngle=self.getSkewAngle()
        #       except:
        #          self.errors.append("Could not determine skew angle!")
        #       self.rotateAngle(skewAngle)
        self.generateAnswerMatrix()
        self.saveImage()
    def decodeQRandRotate(self):
        if self.invalid == True:
            return
        blur = cv2.blur(self.img, (3, 3))
        d = decode(blur)
        self.img = blur
        if len(d) == 0:
            self.errors.append("QR code could not be found!")
            self.data = None
            self.invalid = True
            return
        self.QRDecode = d
        self.QRData = d[0].data
        xpos = d[0].rect.left
        ypos = d[0].rect.top
        # check if image is rotated wrongly
        if xpos > self.imgHeight / 2.0 and ypost > self.imgWidth / 2.0:
            self.rotateAngle(180)
    def rotateAngle(self, angle=0):
        rot_mat = cv2.getRotationMatrix2D(
            (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
        )
        result = cv2.warpAffine(
            self.img,
            rot_mat,
            (self.imgHeight, self.imgWidth),
            flags=cv2.INTER_CUBIC,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(255, 255, 255),
        )
        self.img = result
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
        # todo, make better tresholding
    def imgTreshold(self):
        (self.thresh, self.bwimg) = cv2.threshold(
            self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
        )
    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite("debug_1.png", neg)
        angle_counter = 0  # number of angles
        angle = 0.0  # collects sum of angles
        cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)
        # get all the Hough lines
        for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325):
            x1, y1, x2, y2 = line[0]
            cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2)
            # calculate the angle (in radians)
            this_angle = np.arctan2(y2 - y1, x2 - x1)
            if this_angle and abs(this_angle) <= 10:
                # filtered zero degree and outliers
                angle += this_angle
                angle_counter += 1
                # the skew is calculated of the mean of the total angles, #try block helps with division by zero.
        try:
            skew = np.rad2deg(
                angle / angle_counter
            )  # the 1.2 factor is just experimental....
        except:
            skew = 0
        cv2.imwrite("debug_2.png", cimg)
        return skew
    def locateUpMarkers(self, threshold=0.85, height=200):
        template = cv2.imread("template.png", 0)
        w, h = template.shape[::-1]
        crop_img = self.img[0:height, :]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[0]) == 0:
            min_y = -1
        else:
            min_y = np.min(loc[0])
            for pt in zip(*loc[::-1]):
                if pt[1] < min_y + 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by x coordinate
            loc_filtered_x, loc_filtered_y = zip(
                *sorted(zip(loc_filtered_x, loc_filtered_y))
            )
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_x) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
        cv2.imwrite("debug_3.png", cimg)
        self.xMarkerLocations = loc
        return loc
    def locateRightMarkers(self, threshold=0.85, width=200):
        template = cv2.imread("template.png", 0)
        w, h = template.shape[::-1]
        crop_img = self.img[:, -width:]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[1]) == 0:
            min_x = -1
        else:
            max_x = np.max(loc[1])
            for pt in zip(*loc[::-1]):
                if pt[1] > max_x - 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by y coordinate
            loc_filtered_y, loc_filtered_x = zip(
                *sorted(zip(loc_filtered_y, loc_filtered_x))
            )
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_y) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
        cv2.imwrite("debug_4.png", cimg)
        self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
        return self.yMarkerLocations
    def generateAnswerMatrix(self):
        self.locateUpMarkers()
        self.locateRightMarkers()
        roixoff = 10
        roiyoff = 5
        roiwidth = 50
        roiheight = roiwidth
        totpx = roiwidth * roiheight
        self.answerMatrix = []
        for y in self.yMarkerLocations[0]:
            oneline = []
            for x in self.xMarkerLocations[1]:
                roi = self.bwimg[
                    y - roiyoff : y + int(roiheight - roiyoff),
                    x - roixoff : x + int(roiwidth - roixoff),
                ]
                # cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
                black = totpx - cv2.countNonZero(roi)
                oneline.append(black / totpx)
            self.answerMatrix.append(oneline)
    def get_enhanced_sid(self):
        if self.sid_classifier is None:
            return "x"
        if self.settings is not None:
            sid_mask=self.settings.get("sid_mask", None)
        es,err,warn = getSID(
            self.img[
                int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
                int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
            ],
            self.sid_classifier,
            sid_mask
        )
        [self.errors.append(e) for e in err]
        [self.warnings.append(w) for w in warn]
        return es
class Paper():
   def __init__(self, filename=None):
      self.filename=filename
      self.invalid=None
      self.QRData=None
      self.errors=[]
      self.warnings=[]
      if filename is not None:
         self.loadImage(filename)
         self.runOcr()
    def get_code_data(self):
        qrdata = bytes.decode(self.QRData, 'utf8')
        if self.QRDecode[0].type=='EAN13':
            return {'exam_id': int(qrdata[0:7]),
                    'page_no': int(qrdata[7]),
                    'paper_id': int(qrdata[-5:-1]),
                    'faculty_id': None,
                    'sid': None
                    }
        else:
            data=qrdata.split(',')
            retval={'exam_id': int(data[1]),
                    'page_no': int(data[3]),
                    'paper_id':int(data[2]),
                    'faculty_id':int(data[0]),
            }
            if(len(data)>4):
                retval['sid']=data[4]
            return retval
   def loadImage(self, filename, rgbchannel=0):
      self.img=cv2.imread(filename,rgbchannel)
      if self.img is None:
         self.errors.append("File could not be loaded!")
         self.invalid=True
         return
      self.imgHeight, self.imgWidth = self.img.shape[0:2]
   def saveImage(self, filename='debug_image.png'):
      cv2.imwrite(filename, self.img)
   def runOcr(self):
      if self.invalid==True:
         return
      self.decodeQRandRotate()
      self.imgTreshold()
      skewAngle=0
#      try:
#         skewAngle=self.getSkewAngle()
#      except:
#         self.errors.append("Could not determine skew angle!")
#      self.rotateAngle(skewAngle)
      self.generateAnswerMatrix()
      self.saveImage()
   def decodeQRandRotate(self):
      if self.invalid == True:
         return
      blur = cv2.blur(self.img,(3,3))
      d=decode(blur)
      self.img=blur
      if len(d) == 0:
         self.errors.append("QR code could not be found!")
         self.data=None
         self.invalid=True
         return
      self.QRDecode=d
      self.QRData=d[0].data
      xpos=d[0].rect.left
      ypos=d[0].rect.top
      #check if image is rotated wrongly
      if xpos>self.imgHeight/2.0 and ypost>self.imgWidth/2.0:
         self.rotate(180)
   def rotateAngle(self,angle=0):
      rot_mat = cv2.getRotationMatrix2D((self.imgHeight/2, self.imgWidth/2), angle, 1.0)
      result = cv2.warpAffine(self.img,
                        rot_mat,
                        (self.imgHeight, self.imgWidth),
                        flags=cv2.INTER_CUBIC,
                        borderMode=cv2.BORDER_CONSTANT,
                        borderValue=(255, 255, 255))
      self.img=result
      self.imgHeight, self.imgWidth = self.img.shape[0:2]
   #todo, make better tresholding
   def imgTreshold(self):
      (self.thresh, self.bwimg) = cv2.threshold(self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
   def getSkewAngle(self):
      neg = 255 - self.bwimg  # get negative image
      cv2.imwrite('debug_1.png', neg)
      angle_counter = 0 # number of angles
      angle = 0.0 # collects sum of angles
      cimg = cv2.cvtColor(self.img,cv2.COLOR_GRAY2BGR)
      # get all the Hough lines
      for line in cv2.HoughLinesP(neg, 1, np.pi/180, 325):
         x1, y1, x2, y2 = line[0]
         cv2.line(cimg,(x1,y1), (x2,y2), (0,0,255),2)
         # calculate the angle (in radians)
         this_angle = np.arctan2(y2 - y1, x2 - x1)
         if this_angle and abs(this_angle) <= 10:
            # filtered zero degree and outliers
            angle += this_angle
            angle_counter += 1
      # the skew is calculated of the mean of the total angles, #try block helps with division by zero.
      try:
         skew = np.rad2deg(angle / angle_counter) #the 1.2 factor is just experimental....
      except:
         skew=0
      cv2.imwrite('debug_2.png',cimg)
      return skew
   def locateUpMarkers(self, threshold=0.8, height=200):
      template = cv2.imread('template.png',0)
      w, h = template.shape[::-1]
      crop_img = self.img[0:height, :]
      res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED)
      loc = np.where( res >= threshold)
      cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR)
      #remove false matching of the squares in qr code
      loc_filtered_x=[]
      loc_filtered_y=[]
      min_y=np.min(loc[0])
      for pt in zip(*loc[::-1]):
         if(pt[1]<min_y+20):
            loc_filtered_y.append(pt[1])
            loc_filtered_x.append(pt[0])
      #order by x coordinate
      loc_filtered_x,loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y)))
      #loc=[loc_filtered_y,loc_filtered_x]
      #remove duplicates
      a=np.diff(loc_filtered_x)>40
      a=np.append(a,True)
      loc_filtered_x=np.array(loc_filtered_x)
      loc_filtered_y=np.array(loc_filtered_y)
      loc=[loc_filtered_y[a],loc_filtered_x[a]]
      for pt in zip(*loc[::-1]):
         cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
      cv2.imwrite('debug_3.png',cimg)
      self.xMarkerLocations=loc
      return loc
   def locateRightMarkers(self, threshold=0.8, width=200):
      template = cv2.imread('template.png',0)
      w, h = template.shape[::-1]
      crop_img = self.img[:, -width:]
      res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED)
      loc = np.where( res >= threshold)
      cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR)
      #remove false matching of the squares in qr code
      loc_filtered_x=[]
      loc_filtered_y=[]
      max_x=np.max(loc[1])
      for pt in zip(*loc[::-1]):
         if(pt[1]>max_x-20):
            loc_filtered_y.append(pt[1])
            loc_filtered_x.append(pt[0])
      #order by y coordinate
      loc_filtered_y,loc_filtered_x = zip(*sorted(zip(loc_filtered_y, loc_filtered_x)))
      #loc=[loc_filtered_y,loc_filtered_x]
      #remove duplicates
      a=np.diff(loc_filtered_y)>40
      a=np.append(a,True)
      loc_filtered_x=np.array(loc_filtered_x)
      loc_filtered_y=np.array(loc_filtered_y)
      loc=[loc_filtered_y[a],loc_filtered_x[a]]
      for pt in zip(*loc[::-1]):
         cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
      cv2.imwrite('debug_4.png',cimg)
      self.yMarkerLocations=[loc[0], loc[1]+self.imgWidth-width]
      return self.yMarkerLocations
   def generateAnswerMatrix(self):
      self.locateUpMarkers()
      self.locateRightMarkers()
      roixoff=10
      roiyoff=5
      roiwidth=50
      roiheight=roiwidth
      totpx=roiwidth*roiheight
      self.answerMatrix=[]
      for y in self.yMarkerLocations[0]:
         oneline=[]
         for x in self.xMarkerLocations[1]:
            roi=self.bwimg[ y-roiyoff:y+int(roiheight-roiyoff),x-roixoff:x+int(roiwidth-roixoff)]
            #cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
            black=totpx-cv2.countNonZero(roi)
            oneline.append(black/totpx)
         self.answerMatrix.append(oneline)
    def get_paper_ocr_data(self):
        data=self.get_code_data()
        data['qr']=self.QRData
        data['errors']=self.errors
        data['warnings']=self.warnings
        data['up_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
        data['right_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight))
        data['ans_matrix']=((np.array(self.answerMatrix)>self.settings['answer_treshold'])*1).tolist()
        if data['sid'] is None:
            data['sid']=self.get_enhanced_sid()
        return data