Development of the ocr part of AOI
Samo Penic
2018-11-16 511c2e7452a37fd32e7e7e1b83d46277ea57b6e3
Works even if markers are not detected.
2 files modified
2 files added
382 ■■■■ changed files
.idea/sonarIssues.xml 19 ●●●●● patch | view | raw | blame | history
.idea/vcs.xml 6 ●●●●● patch | view | raw | blame | history
Ocr.py 353 ●●●● patch | view | raw | blame | history
aoiOcr.py 4 ●●●● patch | view | raw | blame | history
.idea/sonarIssues.xml
New file
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="issues">
    <option name="index">
      <map>
        <entry key="$PROJECT_DIR$/Ocr.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/aoiOcr.py">
          <value>
            <set />
          </value>
        </entry>
      </map>
    </option>
  </component>
</project>
.idea/vcs.xml
New file
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
</project>
Ocr.py
@@ -4,198 +4,209 @@
import math
class Paper:
    def __init__(self, filename=None):
        self.filename = filename
        self.invalid = None
        self.QRData = None
        self.errors = []
        self.warnings = []
        if filename is not None:
            self.loadImage(filename)
            self.runOcr()
    def loadImage(self, filename, rgbchannel=0):
        self.img = cv2.imread(filename, rgbchannel)
        if self.img is None:
            self.errors.append("File could not be loaded!")
            self.invalid = True
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
class Paper():
    def __init__(self, filename=None):
        self.filename=filename
        self.invalid=None
        self.QRData=None
        self.errors=[]
        self.warnings=[]
        if filename is not None:
            self.loadImage(filename)
            self.runOcr()
    def saveImage(self, filename="debug_image.png"):
        cv2.imwrite(filename, self.img)
    def runOcr(self):
        if self.invalid == True:
            return
        self.decodeQRandRotate()
        self.imgTreshold()
        skewAngle = 0
        #         try:
        #             skewAngle=self.getSkewAngle()
        #         except:
        #             self.errors.append("Could not determine skew angle!")
        #         self.rotateAngle(skewAngle)
    def loadImage(self, filename, rgbchannel=0):
        self.img=cv2.imread(filename,rgbchannel)
        if self.img is None:
            self.errors.append("File could not be loaded!")
            self.invalid=True
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
        self.generateAnswerMatrix()
    def saveImage(self, filename='debug_image.png'):
        cv2.imwrite(filename, self.img)
        self.saveImage()
    def runOcr(self):
        if self.invalid==True:
            return
        self.decodeQRandRotate()
        self.imgTreshold()
        skewAngle=0
#        try:
#            skewAngle=self.getSkewAngle()
#        except:
#            self.errors.append("Could not determine skew angle!")
#        self.rotateAngle(skewAngle)
    def decodeQRandRotate(self):
        if self.invalid == True:
            return
        blur = cv2.blur(self.img, (3, 3))
        d = decode(blur)
        self.img = blur
        if len(d) == 0:
            self.errors.append("QR code could not be found!")
            self.data = None
            self.invalid = True
            return
        self.QRDecode = d
        self.QRData = d[0].data
        xpos = d[0].rect.left
        ypos = d[0].rect.top
        # check if image is rotated wrongly
        if xpos > self.imgHeight / 2.0 and ypost > self.imgWidth / 2.0:
            self.rotateAngle(180)
        self.generateAnswerMatrix()
    def rotateAngle(self, angle=0):
        rot_mat = cv2.getRotationMatrix2D(
            (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
        )
        result = cv2.warpAffine(
            self.img,
            rot_mat,
            (self.imgHeight, self.imgWidth),
            flags=cv2.INTER_CUBIC,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(255, 255, 255),
        )
        self.saveImage()
        self.img = result
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
    def decodeQRandRotate(self):
        if self.invalid == True:
            return
        blur = cv2.blur(self.img,(3,3))
        d=decode(blur)
        self.img=blur
        if len(d) == 0:
            self.errors.append("QR code could not be found!")
            self.data=None
            self.invalid=True
            return
        self.QRDecode=d
        self.QRData=d[0].data
        xpos=d[0].rect.left
        ypos=d[0].rect.top
        #check if image is rotated wrongly
        if xpos>self.imgHeight/2.0 and ypost>self.imgWidth/2.0:
            self.rotate(180)
        # todo, make better tresholding
    def rotateAngle(self,angle=0):
        rot_mat = cv2.getRotationMatrix2D((self.imgHeight/2, self.imgWidth/2), angle, 1.0)
        result = cv2.warpAffine(self.img,
                        rot_mat,
                        (self.imgHeight, self.imgWidth),
                        flags=cv2.INTER_CUBIC,
                        borderMode=cv2.BORDER_CONSTANT,
                        borderValue=(255, 255, 255))
    def imgTreshold(self):
        (self.thresh, self.bwimg) = cv2.threshold(
            self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
        )
        self.img=result
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite("debug_1.png", neg)
        angle_counter = 0  # number of angles
        angle = 0.0  # collects sum of angles
        cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)
    #todo, make better tresholding
    def imgTreshold(self):
        (self.thresh, self.bwimg) = cv2.threshold(self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # get all the Hough lines
        for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325):
            x1, y1, x2, y2 = line[0]
            cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2)
            # calculate the angle (in radians)
            this_angle = np.arctan2(y2 - y1, x2 - x1)
            if this_angle and abs(this_angle) <= 10:
                # filtered zero degree and outliers
                angle += this_angle
                angle_counter += 1
    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite('debug_1.png', neg)
                # the skew is calculated of the mean of the total angles, #try block helps with division by zero.
        try:
            skew = np.rad2deg(
                angle / angle_counter
            )  # the 1.2 factor is just experimental....
        except:
            skew = 0
        angle_counter = 0 # number of angles
        angle = 0.0 # collects sum of angles
        cimg = cv2.cvtColor(self.img,cv2.COLOR_GRAY2BGR)
        cv2.imwrite("debug_2.png", cimg)
        return skew
        # get all the Hough lines
        for line in cv2.HoughLinesP(neg, 1, np.pi/180, 325):
            x1, y1, x2, y2 = line[0]
            cv2.line(cimg,(x1,y1), (x2,y2), (0,0,255),2)
            # calculate the angle (in radians)
            this_angle = np.arctan2(y2 - y1, x2 - x1)
            if this_angle and abs(this_angle) <= 10:
                # filtered zero degree and outliers
                angle += this_angle
                angle_counter += 1
    def locateUpMarkers(self, threshold=0.85, height=200):
        template = cv2.imread("template.png", 0)
        w, h = template.shape[::-1]
        crop_img = self.img[0:height, :]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[0]) == 0:
            min_y = -1
        else:
            min_y = np.min(loc[0])
            for pt in zip(*loc[::-1]):
                if pt[1] < min_y + 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by x coordinate
            loc_filtered_x, loc_filtered_y = zip(
                *sorted(zip(loc_filtered_x, loc_filtered_y))
            )
        # loc=[loc_filtered_y,loc_filtered_x]
        # remove duplicates
            a = np.diff(loc_filtered_x) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
        # the skew is calculated of the mean of the total angles, #try block helps with division by zero.
        try:
            skew = np.rad2deg(angle / angle_counter) #the 1.2 factor is just experimental....
        except:
            skew=0
        cv2.imwrite("debug_3.png", cimg)
        cv2.imwrite('debug_2.png',cimg)
        return skew
        self.xMarkerLocations = loc
        return loc
    def locateRightMarkers(self, threshold=0.85, width=200):
        template = cv2.imread("template.png", 0)
        w, h = template.shape[::-1]
        crop_img = self.img[:, -width:]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
        loc = np.where(res >= threshold)
        cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
        # remove false matching of the squares in qr code
        loc_filtered_x = []
        loc_filtered_y = []
        if len(loc[1]) == 0:
            min_x = -1
        else:
            max_x = np.max(loc[1])
            for pt in zip(*loc[::-1]):
                if pt[1] > max_x - 20:
                    loc_filtered_y.append(pt[1])
                    loc_filtered_x.append(pt[0])
                    # order by y coordinate
            loc_filtered_y, loc_filtered_x = zip(
                *sorted(zip(loc_filtered_y, loc_filtered_x))
            )
            # loc=[loc_filtered_y,loc_filtered_x]
            # remove duplicates
            a = np.diff(loc_filtered_y) > 40
            a = np.append(a, True)
            loc_filtered_x = np.array(loc_filtered_x)
            loc_filtered_y = np.array(loc_filtered_y)
            loc = [loc_filtered_y[a], loc_filtered_x[a]]
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
    def locateUpMarkers(self, threshold=0.8, height=200):
        template = cv2.imread('template.png',0)
        w, h = template.shape[::-1]
        crop_img = self.img[0:height, :]
        res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED)
        loc = np.where( res >= threshold)
        cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR)
        #remove false matching of the squares in qr code
        loc_filtered_x=[]
        loc_filtered_y=[]
        min_y=np.min(loc[0])
        for pt in zip(*loc[::-1]):
            if(pt[1]<min_y+20):
                loc_filtered_y.append(pt[1])
                loc_filtered_x.append(pt[0])
        #order by x coordinate
        loc_filtered_x,loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y)))
        #loc=[loc_filtered_y,loc_filtered_x]
        #remove duplicates
        a=np.diff(loc_filtered_x)>40
        a=np.append(a,True)
        loc_filtered_x=np.array(loc_filtered_x)
        loc_filtered_y=np.array(loc_filtered_y)
        loc=[loc_filtered_y[a],loc_filtered_x[a]]
        for pt in zip(*loc[::-1]):
            cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
        cv2.imwrite("debug_4.png", cimg)
        self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
        return self.yMarkerLocations
        cv2.imwrite('debug_3.png',cimg)
    def generateAnswerMatrix(self):
        self.locateUpMarkers()
        self.locateRightMarkers()
        self.xMarkerLocations=loc
        return loc
        roixoff = 10
        roiyoff = 5
        roiwidth = 50
        roiheight = roiwidth
        totpx = roiwidth * roiheight
    def locateRightMarkers(self, threshold=0.8, width=200):
        template = cv2.imread('template.png',0)
        w, h = template.shape[::-1]
        crop_img = self.img[:, -width:]
        res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED)
        loc = np.where( res >= threshold)
        cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR)
        #remove false matching of the squares in qr code
        loc_filtered_x=[]
        loc_filtered_y=[]
        max_x=np.max(loc[1])
        for pt in zip(*loc[::-1]):
            if(pt[1]>max_x-20):
                loc_filtered_y.append(pt[1])
                loc_filtered_x.append(pt[0])
        #order by y coordinate
        loc_filtered_y,loc_filtered_x = zip(*sorted(zip(loc_filtered_y, loc_filtered_x)))
        #loc=[loc_filtered_y,loc_filtered_x]
        #remove duplicates
        a=np.diff(loc_filtered_y)>40
        a=np.append(a,True)
        loc_filtered_x=np.array(loc_filtered_x)
        loc_filtered_y=np.array(loc_filtered_y)
        loc=[loc_filtered_y[a],loc_filtered_x[a]]
        for pt in zip(*loc[::-1]):
            cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
        cv2.imwrite('debug_4.png',cimg)
        self.yMarkerLocations=[loc[0], loc[1]+self.imgWidth-width]
        return self.yMarkerLocations
    def generateAnswerMatrix(self):
        self.locateUpMarkers()
        self.locateRightMarkers()
        roixoff=10
        roiyoff=5
        roiwidth=50
        roiheight=roiwidth
        totpx=roiwidth*roiheight
        self.answerMatrix=[]
        for y in self.yMarkerLocations[0]:
            oneline=[]
            for x in self.xMarkerLocations[1]:
                roi=self.bwimg[ y-roiyoff:y+int(roiheight-roiyoff),x-roixoff:x+int(roiwidth-roixoff)]
                #cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
                black=totpx-cv2.countNonZero(roi)
                oneline.append(black/totpx)
            self.answerMatrix.append(oneline)
        self.answerMatrix = []
        for y in self.yMarkerLocations[0]:
            oneline = []
            for x in self.xMarkerLocations[1]:
                roi = self.bwimg[
                    y - roiyoff : y + int(roiheight - roiyoff),
                    x - roixoff : x + int(roiwidth - roixoff),
                ]
                # cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
                black = totpx - cv2.countNonZero(roi)
                oneline.append(black / totpx)
            self.answerMatrix.append(oneline)
aoiOcr.py
@@ -1,8 +1,8 @@
from Ocr import Paper
p=Paper(filename='testpage250dpi1.png')
#p=Paper(filename='sizif111.tif')
#p=Paper(filename='testpage300dpi_scan1.png')
p=Paper(filename='sizif111.tif')
print(p.QRData)
print(p.errors)