From e2fa6a35a6548a5acfd000af56ea333df8149b27 Mon Sep 17 00:00:00 2001 From: Samo Penic <samo.penic@gmail.com> Date: Fri, 16 Nov 2018 21:29:56 +0000 Subject: [PATCH] More debugging cases --- Ocr.py | 448 ++++++++++++++++++++++++++++++++----------------------- 1 files changed, 257 insertions(+), 191 deletions(-) diff --git a/Ocr.py b/Ocr.py index c7db7cf..662cb0b 100644 --- a/Ocr.py +++ b/Ocr.py @@ -1,201 +1,267 @@ from pyzbar.pyzbar import decode +from sid_process import getSID import cv2 import numpy as np import math +class Paper: + def __init__(self, filename=None, sid_classifier=None, settings=None): + self.filename = filename + self.invalid = None + self.QRData = None + self.settings={'answer_treshold':0.25,} if settings is None else settings + self.errors = [] + self.warnings = [] + self.sid=None + self.sid_classifier = sid_classifier + if filename is not None: + self.loadImage(filename) + self.runOcr() + + def loadImage(self, filename, rgbchannel=0): + self.img = cv2.imread(filename, rgbchannel) + if self.img is None: + self.errors.append("File could not be loaded!") + self.invalid = True + return + self.imgHeight, self.imgWidth = self.img.shape[0:2] + + def saveImage(self, filename="debug_image.png"): + cv2.imwrite(filename, self.img) + + def runOcr(self): + if self.invalid == True: + return + self.decodeQRandRotate() + self.imgTreshold() + skewAngle = 0 + # try: + # skewAngle=self.getSkewAngle() + # except: + # self.errors.append("Could not determine skew angle!") + # self.rotateAngle(skewAngle) + + self.generateAnswerMatrix() + + self.saveImage() + + def decodeQRandRotate(self): + if self.invalid == True: + return + blur = cv2.blur(self.img, (3, 3)) + d = decode(blur) + self.img = blur + if len(d) == 0: + self.errors.append("QR code could not be found!") + self.data = None + self.invalid = True + return + self.QRDecode = d + self.QRData = d[0].data + xpos = d[0].rect.left + ypos = d[0].rect.top + # check if image is rotated wrongly + if xpos > self.imgHeight / 2.0 and ypost > self.imgWidth / 2.0: + self.rotateAngle(180) + + def rotateAngle(self, angle=0): + rot_mat = cv2.getRotationMatrix2D( + (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0 + ) + result = cv2.warpAffine( + self.img, + rot_mat, + (self.imgHeight, self.imgWidth), + flags=cv2.INTER_CUBIC, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(255, 255, 255), + ) + + self.img = result + self.imgHeight, self.imgWidth = self.img.shape[0:2] + + # todo, make better tresholding + + def imgTreshold(self): + (self.thresh, self.bwimg) = cv2.threshold( + self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU + ) + + def getSkewAngle(self): + neg = 255 - self.bwimg # get negative image + cv2.imwrite("debug_1.png", neg) + + angle_counter = 0 # number of angles + angle = 0.0 # collects sum of angles + cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR) + + # get all the Hough lines + for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325): + x1, y1, x2, y2 = line[0] + cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2) + # calculate the angle (in radians) + this_angle = np.arctan2(y2 - y1, x2 - x1) + if this_angle and abs(this_angle) <= 10: + # filtered zero degree and outliers + angle += this_angle + angle_counter += 1 + + # the skew is calculated of the mean of the total angles, #try block helps with division by zero. + try: + skew = np.rad2deg( + angle / angle_counter + ) # the 1.2 factor is just experimental.... + except: + skew = 0 + + cv2.imwrite("debug_2.png", cimg) + return skew + + def locateUpMarkers(self, threshold=0.85, height=200): + template = cv2.imread("template.png", 0) + w, h = template.shape[::-1] + crop_img = self.img[0:height, :] + res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED) + loc = np.where(res >= threshold) + cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR) + # remove false matching of the squares in qr code + loc_filtered_x = [] + loc_filtered_y = [] + if len(loc[0]) == 0: + min_y = -1 + else: + min_y = np.min(loc[0]) + for pt in zip(*loc[::-1]): + if pt[1] < min_y + 20: + loc_filtered_y.append(pt[1]) + loc_filtered_x.append(pt[0]) + # order by x coordinate + loc_filtered_x, loc_filtered_y = zip( + *sorted(zip(loc_filtered_x, loc_filtered_y)) + ) + # loc=[loc_filtered_y,loc_filtered_x] + # remove duplicates + a = np.diff(loc_filtered_x) > 40 + a = np.append(a, True) + loc_filtered_x = np.array(loc_filtered_x) + loc_filtered_y = np.array(loc_filtered_y) + loc = [loc_filtered_y[a], loc_filtered_x[a]] + for pt in zip(*loc[::-1]): + cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2) + + cv2.imwrite("debug_3.png", cimg) + + self.xMarkerLocations = loc + return loc + + def locateRightMarkers(self, threshold=0.85, width=200): + template = cv2.imread("template.png", 0) + w, h = template.shape[::-1] + crop_img = self.img[:, -width:] + res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED) + loc = np.where(res >= threshold) + cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR) + # remove false matching of the squares in qr code + loc_filtered_x = [] + loc_filtered_y = [] + if len(loc[1]) == 0: + min_x = -1 + else: + max_x = np.max(loc[1]) + for pt in zip(*loc[::-1]): + if pt[1] > max_x - 20: + loc_filtered_y.append(pt[1]) + loc_filtered_x.append(pt[0]) + # order by y coordinate + loc_filtered_y, loc_filtered_x = zip( + *sorted(zip(loc_filtered_y, loc_filtered_x)) + ) + # loc=[loc_filtered_y,loc_filtered_x] + # remove duplicates + a = np.diff(loc_filtered_y) > 40 + a = np.append(a, True) + loc_filtered_x = np.array(loc_filtered_x) + loc_filtered_y = np.array(loc_filtered_y) + loc = [loc_filtered_y[a], loc_filtered_x[a]] + for pt in zip(*loc[::-1]): + cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2) + + cv2.imwrite("debug_4.png", cimg) + + self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width] + return self.yMarkerLocations + + def generateAnswerMatrix(self): + self.locateUpMarkers() + self.locateRightMarkers() + + roixoff = 10 + roiyoff = 5 + roiwidth = 50 + roiheight = roiwidth + totpx = roiwidth * roiheight + + self.answerMatrix = [] + for y in self.yMarkerLocations[0]: + oneline = [] + for x in self.xMarkerLocations[1]: + roi = self.bwimg[ + y - roiyoff : y + int(roiheight - roiyoff), + x - roixoff : x + int(roiwidth - roixoff), + ] + # cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi) + black = totpx - cv2.countNonZero(roi) + oneline.append(black / totpx) + self.answerMatrix.append(oneline) + + def get_enhanced_sid(self): + if self.sid_classifier is None: + return "x" + if self.settings is not None: + sid_mask=self.settings.get("sid_mask", None) + es,err,warn = getSID( + self.img[ + int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight), + int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth), + ], + self.sid_classifier, + sid_mask + ) + [self.errors.append(e) for e in err] + [self.warnings.append(w) for w in warn] + return es -class Paper(): - - def __init__(self, filename=None): - self.filename=filename - self.invalid=None - self.QRData=None - self.errors=[] - self.warnings=[] - if filename is not None: - self.loadImage(filename) - self.runOcr() + def get_code_data(self): + qrdata = bytes.decode(self.QRData, 'utf8') + if self.QRDecode[0].type=='EAN13': + return {'exam_id': int(qrdata[0:7]), + 'page_no': int(qrdata[7]), + 'paper_id': int(qrdata[-5:-1]), + 'faculty_id': None, + 'sid': None + } + else: + data=qrdata.split(',') + retval={'exam_id': int(data[1]), + 'page_no': int(data[3]), + 'paper_id':int(data[2]), + 'faculty_id':int(data[0]), + } + if(len(data)>4): + retval['sid']=data[4] + return retval - def loadImage(self, filename, rgbchannel=0): - self.img=cv2.imread(filename,rgbchannel) - if self.img is None: - self.errors.append("File could not be loaded!") - self.invalid=True - return - self.imgHeight, self.imgWidth = self.img.shape[0:2] - - def saveImage(self, filename='debug_image.png'): - cv2.imwrite(filename, self.img) - - def runOcr(self): - if self.invalid==True: - return - self.decodeQRandRotate() - self.imgTreshold() - skewAngle=0 -# try: -# skewAngle=self.getSkewAngle() -# except: -# self.errors.append("Could not determine skew angle!") -# self.rotateAngle(skewAngle) - - self.generateAnswerMatrix() - - self.saveImage() - - def decodeQRandRotate(self): - if self.invalid == True: - return - blur = cv2.blur(self.img,(3,3)) - d=decode(blur) - self.img=blur - if len(d) == 0: - self.errors.append("QR code could not be found!") - self.data=None - self.invalid=True - return - self.QRDecode=d - self.QRData=d[0].data - xpos=d[0].rect.left - ypos=d[0].rect.top - #check if image is rotated wrongly - if xpos>self.imgHeight/2.0 and ypost>self.imgWidth/2.0: - self.rotate(180) - - def rotateAngle(self,angle=0): - rot_mat = cv2.getRotationMatrix2D((self.imgHeight/2, self.imgWidth/2), angle, 1.0) - result = cv2.warpAffine(self.img, - rot_mat, - (self.imgHeight, self.imgWidth), - flags=cv2.INTER_CUBIC, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(255, 255, 255)) - - self.img=result - self.imgHeight, self.imgWidth = self.img.shape[0:2] - - - #todo, make better tresholding - def imgTreshold(self): - (self.thresh, self.bwimg) = cv2.threshold(self.img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) - - - def getSkewAngle(self): - neg = 255 - self.bwimg # get negative image - cv2.imwrite('debug_1.png', neg) - - angle_counter = 0 # number of angles - angle = 0.0 # collects sum of angles - cimg = cv2.cvtColor(self.img,cv2.COLOR_GRAY2BGR) - - # get all the Hough lines - for line in cv2.HoughLinesP(neg, 1, np.pi/180, 325): - x1, y1, x2, y2 = line[0] - cv2.line(cimg,(x1,y1), (x2,y2), (0,0,255),2) - # calculate the angle (in radians) - this_angle = np.arctan2(y2 - y1, x2 - x1) - if this_angle and abs(this_angle) <= 10: - # filtered zero degree and outliers - angle += this_angle - angle_counter += 1 - - # the skew is calculated of the mean of the total angles, #try block helps with division by zero. - try: - skew = np.rad2deg(angle / angle_counter) #the 1.2 factor is just experimental.... - except: - skew=0 - - cv2.imwrite('debug_2.png',cimg) - return skew - - - def locateUpMarkers(self, threshold=0.8, height=200): - template = cv2.imread('template.png',0) - w, h = template.shape[::-1] - crop_img = self.img[0:height, :] - res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED) - loc = np.where( res >= threshold) - cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR) - #remove false matching of the squares in qr code - loc_filtered_x=[] - loc_filtered_y=[] - min_y=np.min(loc[0]) - for pt in zip(*loc[::-1]): - if(pt[1]<min_y+20): - loc_filtered_y.append(pt[1]) - loc_filtered_x.append(pt[0]) - #order by x coordinate - loc_filtered_x,loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y))) - #loc=[loc_filtered_y,loc_filtered_x] - #remove duplicates - a=np.diff(loc_filtered_x)>40 - a=np.append(a,True) - loc_filtered_x=np.array(loc_filtered_x) - loc_filtered_y=np.array(loc_filtered_y) - loc=[loc_filtered_y[a],loc_filtered_x[a]] - for pt in zip(*loc[::-1]): - cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2) - - - cv2.imwrite('debug_3.png',cimg) - - self.xMarkerLocations=loc - return loc - - def locateRightMarkers(self, threshold=0.8, width=200): - template = cv2.imread('template.png',0) - w, h = template.shape[::-1] - crop_img = self.img[:, -width:] - res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED) - loc = np.where( res >= threshold) - cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR) - #remove false matching of the squares in qr code - loc_filtered_x=[] - loc_filtered_y=[] - max_x=np.max(loc[1]) - for pt in zip(*loc[::-1]): - if(pt[1]>max_x-20): - loc_filtered_y.append(pt[1]) - loc_filtered_x.append(pt[0]) - #order by y coordinate - loc_filtered_y,loc_filtered_x = zip(*sorted(zip(loc_filtered_y, loc_filtered_x))) - #loc=[loc_filtered_y,loc_filtered_x] - #remove duplicates - a=np.diff(loc_filtered_y)>40 - a=np.append(a,True) - loc_filtered_x=np.array(loc_filtered_x) - loc_filtered_y=np.array(loc_filtered_y) - loc=[loc_filtered_y[a],loc_filtered_x[a]] - for pt in zip(*loc[::-1]): - cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2) - - - cv2.imwrite('debug_4.png',cimg) - - self.yMarkerLocations=[loc[0], loc[1]+self.imgWidth-width] - return self.yMarkerLocations - - - def generateAnswerMatrix(self): - self.locateUpMarkers() - self.locateRightMarkers() - - roixoff=10 - roiyoff=5 - roiwidth=50 - roiheight=roiwidth - totpx=roiwidth*roiheight - - self.answerMatrix=[] - for y in self.yMarkerLocations[0]: - oneline=[] - for x in self.xMarkerLocations[1]: - roi=self.bwimg[ y-roiyoff:y+int(roiheight-roiyoff),x-roixoff:x+int(roiwidth-roixoff)] - #cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi) - black=totpx-cv2.countNonZero(roi) - oneline.append(black/totpx) - self.answerMatrix.append(oneline) - + def get_paper_ocr_data(self): + data=self.get_code_data() + data['qr']=self.QRData + data['errors']=self.errors + data['warnings']=self.warnings + data['up_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight)) + data['right_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight)) + data['ans_matrix']=((np.array(self.answerMatrix)>self.settings['answer_treshold'])*1).tolist() + if data['sid'] is None: + data['sid']=self.get_enhanced_sid() + return data -- Gitblit v1.9.3