havoc/sizif-ocr.git

			@@ -1,201 +1,294 @@
			from pyzbar.pyzbar import decode
			from sid_process import getSID
			import cv2
			import numpy as np
			import math


			class Paper:
			def __init__(self, filename=None, sid_classifier=None, settings=None):
			self.filename = filename
			self.invalid = None
			self.QRData = None
			self.settings = {"answer_threshold": 0.25} if settings is None else settings
			self.errors = []
			self.warnings = []
			self.sid = None
			self.sid_classifier = sid_classifier
			if filename is not None:
			self.loadImage(filename)
			self.runOcr()

			def loadImage(self, filename, rgbchannel=0):
			self.img = cv2.imread(filename, rgbchannel)
			if self.img is None:
			self.errors.append("File could not be loaded!")
			self.invalid = True
			return
			self.imgHeight, self.imgWidth = self.img.shape[0:2]

			class Paper():

			def __init__(self, filename=None):
			self.filename=filename
			self.invalid=None
			self.QRData=None
			self.errors=[]
			self.warnings=[]
			if filename is not None:
			self.loadImage(filename)
			self.runOcr()
			def saveImage(self, filename="debug_image.png"):
			cv2.imwrite(filename, self.img)

			def runOcr(self):
			if self.invalid == True:
			return
			self.decodeQRandRotate()
			self.imgTreshold()
			skewAngle = 0
			# try:
			# skewAngle=self.getSkewAngle()
			# except:
			# self.errors.append("Could not determine skew angle!")
			# self.rotateAngle(skewAngle)

			def loadImage(self, filename, rgbchannel=0):
			self.img=cv2.imread(filename,rgbchannel)
			if self.img is None:
			self.errors.append("File could not be loaded!")
			self.invalid=True
			return
			self.imgHeight, self.imgWidth = self.img.shape[0:2]
			self.generateAnswerMatrix()

			def saveImage(self, filename='debug_image.png'):
			cv2.imwrite(filename, self.img)
			self.saveImage()

			def runOcr(self):
			if self.invalid==True:
			return
			self.decodeQRandRotate()
			self.imgTreshold()
			skewAngle=0
			# try:
			# skewAngle=self.getSkewAngle()
			# except:
			# self.errors.append("Could not determine skew angle!")
			# self.rotateAngle(skewAngle)
			def decodeQRandRotate(self):
			if self.invalid == True:
			return
			blur = cv2.blur(self.img, (3, 3))
			d = decode(blur)
			self.img = blur
			if len(d) == 0:
			self.errors.append("QR code could not be found!")
			self.data = None
			self.invalid = True
			return
			if(len(d)>1): #if there are multiple codes, get first ean or qr code available.
			for dd in d:
			if(dd.type=="EAN13" or dd.type=="QR"):
			d[0]=dd
			break
			self.QRDecode = d
			self.QRData = d[0].data
			xpos = d[0].rect.left
			ypos = d[0].rect.top
			# check if image is rotated wrongly
			if xpos > self.imgHeight / 2.0 and ypos > self.imgWidth / 2.0:
			self.rotateAngle(180)

			self.generateAnswerMatrix()
			def rotateAngle(self, angle=0):
			# rot_mat = cv2.getRotationMatrix2D(
			# (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
			# )
			rot_mat = cv2.getRotationMatrix2D(
			(self.imgWidth / 2, self.imgHeight / 2), angle, 1.0
			)
			result = cv2.warpAffine(
			self.img,
			rot_mat,
			(self.imgWidth, self.imgHeight),
			flags=cv2.INTER_CUBIC,
			borderMode=cv2.BORDER_CONSTANT,
			borderValue=(255, 255, 255),
			)

			self.saveImage()
			self.img = result
			self.imgHeight, self.imgWidth = self.img.shape[0:2]

			def decodeQRandRotate(self):
			if self.invalid == True:
			return
			blur = cv2.blur(self.img,(3,3))
			d=decode(blur)
			self.img=blur
			if len(d) == 0:
			self.errors.append("QR code could not be found!")
			self.data=None
			self.invalid=True
			return
			self.QRDecode=d
			self.QRData=d[0].data
			xpos=d[0].rect.left
			ypos=d[0].rect.top
			#check if image is rotated wrongly
			if xpos>self.imgHeight/2.0 and ypost>self.imgWidth/2.0:
			self.rotate(180)
			# todo, make better tresholding

			def rotateAngle(self,angle=0):
			rot_mat = cv2.getRotationMatrix2D((self.imgHeight/2, self.imgWidth/2), angle, 1.0)
			result = cv2.warpAffine(self.img,
			rot_mat,
			(self.imgHeight, self.imgWidth),
			flags=cv2.INTER_CUBIC,
			borderMode=cv2.BORDER_CONSTANT,
			borderValue=(255, 255, 255))
			def imgTreshold(self):
			(self.thresh, self.bwimg) = cv2.threshold(
			self.img, 128, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU
			)

			self.img=result
			self.imgHeight, self.imgWidth = self.img.shape[0:2]
			def getSkewAngle(self):
			neg = 255 - self.bwimg # get negative image
			cv2.imwrite("debug_1.png", neg)

			angle_counter = 0 # number of angles
			angle = 0.0 # collects sum of angles
			cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)

			#todo, make better tresholding
			def imgTreshold(self):
			(self.thresh, self.bwimg) = cv2.threshold(self.img, 128, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU)

			# get all the Hough lines
			for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325):
			x1, y1, x2, y2 = line[0]
			cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2)
			# calculate the angle (in radians)
			this_angle = np.arctan2(y2 - y1, x2 - x1)
			if this_angle and abs(this_angle) <= 10:
			# filtered zero degree and outliers
			angle += this_angle
			angle_counter += 1

			def getSkewAngle(self):
			neg = 255 - self.bwimg # get negative image
			cv2.imwrite('debug_1.png', neg)
			# the skew is calculated of the mean of the total angles, #try block helps with division by zero.
			try:
			skew = np.rad2deg(
			angle / angle_counter
			) # the 1.2 factor is just experimental....
			except:
			skew = 0

			angle_counter = 0 # number of angles
			angle = 0.0 # collects sum of angles
			cimg = cv2.cvtColor(self.img,cv2.COLOR_GRAY2BGR)
			cv2.imwrite("debug_2.png", cimg)
			return skew

			# get all the Hough lines
			for line in cv2.HoughLinesP(neg, 1, np.pi/180, 325):
			x1, y1, x2, y2 = line[0]
			cv2.line(cimg,(x1,y1), (x2,y2), (0,0,255),2)
			# calculate the angle (in radians)
			this_angle = np.arctan2(y2 - y1, x2 - x1)
			if this_angle and abs(this_angle) <= 10:
			# filtered zero degree and outliers
			angle += this_angle
			angle_counter += 1
			def locateUpMarkers(self, threshold=0.85, height=200):
			template = cv2.imread("template.png", 0)
			w, h = template.shape[::-1]
			crop_img = self.img[0:height, :]
			res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
			loc = np.where(res >= threshold)
			cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
			# remove false matching of the squares in qr code
			loc_filtered_x = []
			loc_filtered_y = []
			if len(loc[0]) == 0:
			min_y = -1
			else:
			min_y = np.min(loc[0])
			for pt in zip(*loc[::-1]):
			if pt[1] < min_y + 20:
			loc_filtered_y.append(pt[1])
			loc_filtered_x.append(pt[0])
			# order by x coordinate
			loc_filtered_x, loc_filtered_y = zip(
			*sorted(zip(loc_filtered_x, loc_filtered_y))
			)
			# loc=[loc_filtered_y,loc_filtered_x]
			# remove duplicates
			a = np.diff(loc_filtered_x) > 40
			a = np.append(a, True)
			loc_filtered_x = np.array(loc_filtered_x)
			loc_filtered_y = np.array(loc_filtered_y)
			loc = [loc_filtered_y[a], loc_filtered_x[a]]
			for pt in zip(*loc[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)

			# the skew is calculated of the mean of the total angles, #try block helps with division by zero.
			try:
			skew = np.rad2deg(angle / angle_counter) #the 1.2 factor is just experimental....
			except:
			skew=0
			cv2.imwrite("debug_3.png", cimg)

			cv2.imwrite('debug_2.png',cimg)
			return skew
			self.xMarkerLocations = loc
			return loc

			def locateRightMarkers(self, threshold=0.85, width=200):
			template = cv2.imread("template.png", 0)
			w, h = template.shape[::-1]
			crop_img = self.img[:, -width:]
			res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
			loc = np.where(res >= threshold)
			cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
			# remove false matching of the squares in qr code
			loc_filtered_x = []
			loc_filtered_y = []
			if len(loc[1]) == 0:
			min_x = -1
			else:
			max_x = np.max(loc[1])
			for pt in zip(*loc[::-1]):
			if pt[1] > max_x - 20:
			loc_filtered_y.append(pt[1])
			loc_filtered_x.append(pt[0])
			# order by y coordinate
			loc_filtered_y, loc_filtered_x = zip(
			*sorted(zip(loc_filtered_y, loc_filtered_x))
			)
			# loc=[loc_filtered_y,loc_filtered_x]
			# remove duplicates
			a = np.diff(loc_filtered_y) > 40
			a = np.append(a, True)
			loc_filtered_x = np.array(loc_filtered_x)
			loc_filtered_y = np.array(loc_filtered_y)
			loc = [loc_filtered_y[a], loc_filtered_x[a]]
			for pt in zip(*loc[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)

			def locateUpMarkers(self, threshold=0.8, height=200):
			template = cv2.imread('template.png',0)
			w, h = template.shape[::-1]
			crop_img = self.img[0:height, :]
			res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED)
			loc = np.where( res >= threshold)
			cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR)
			#remove false matching of the squares in qr code
			loc_filtered_x=[]
			loc_filtered_y=[]
			min_y=np.min(loc[0])
			for pt in zip(*loc[::-1]):
			if(pt[1]<min_y+20):
			loc_filtered_y.append(pt[1])
			loc_filtered_x.append(pt[0])
			#order by x coordinate
			loc_filtered_x,loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y)))
			#loc=[loc_filtered_y,loc_filtered_x]
			#remove duplicates
			a=np.diff(loc_filtered_x)>40
			a=np.append(a,True)
			loc_filtered_x=np.array(loc_filtered_x)
			loc_filtered_y=np.array(loc_filtered_y)
			loc=[loc_filtered_y[a],loc_filtered_x[a]]
			for pt in zip(*loc[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
			cv2.imwrite("debug_4.png", cimg)

			self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
			return self.yMarkerLocations

			cv2.imwrite('debug_3.png',cimg)
			def generateAnswerMatrix(self):
			self.locateUpMarkers()
			self.locateRightMarkers()

			self.xMarkerLocations=loc
			return loc
			roixoff = 10
			roiyoff = 5
			roiwidth = 50
			roiheight = roiwidth
			totpx = roiwidth * roiheight

			def locateRightMarkers(self, threshold=0.8, width=200):
			template = cv2.imread('template.png',0)
			w, h = template.shape[::-1]
			crop_img = self.img[:, -width:]
			res = cv2.matchTemplate(crop_img,template,cv2.TM_CCOEFF_NORMED)
			loc = np.where( res >= threshold)
			cimg = cv2.cvtColor(crop_img,cv2.COLOR_GRAY2BGR)
			#remove false matching of the squares in qr code
			loc_filtered_x=[]
			loc_filtered_y=[]
			max_x=np.max(loc[1])
			for pt in zip(*loc[::-1]):
			if(pt[1]>max_x-20):
			loc_filtered_y.append(pt[1])
			loc_filtered_x.append(pt[0])
			#order by y coordinate
			loc_filtered_y,loc_filtered_x = zip(*sorted(zip(loc_filtered_y, loc_filtered_x)))
			#loc=[loc_filtered_y,loc_filtered_x]
			#remove duplicates
			a=np.diff(loc_filtered_y)>40
			a=np.append(a,True)
			loc_filtered_x=np.array(loc_filtered_x)
			loc_filtered_y=np.array(loc_filtered_y)
			loc=[loc_filtered_y[a],loc_filtered_x[a]]
			for pt in zip(*loc[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0,255,255), 2)
			self.answerMatrix = []
			for y in self.yMarkerLocations[0]:
			oneline = []
			for x in self.xMarkerLocations[1]:
			roi = self.bwimg[
			y - roiyoff : y + int(roiheight - roiyoff),
			x - roixoff : x + int(roiwidth - roixoff),
			]
			# cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
			black = totpx - cv2.countNonZero(roi)
			oneline.append(black / totpx)
			self.answerMatrix.append(oneline)

			def get_enhanced_sid(self):
			if self.sid_classifier is None:
			return "x"
			if self.settings is not None:
			sid_mask = self.settings.get("sid_mask", None)
			es, err, warn = getSID(
			self.img[
			int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
			int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
			],
			self.sid_classifier,
			sid_mask,
			)
			[self.errors.append(e) for e in err]
			[self.warnings.append(w) for w in warn]
			return es

			cv2.imwrite('debug_4.png',cimg)
			def get_code_data(self):
			if self.QRData is None:
			self.errors.append("Could not read QR or EAN code! Not an exam?")
			retval = {
			"exam_id": None,
			"page_no": None,
			"paper_id": None,
			"faculty_id": None,
			"sid": None,
			}
			return retval
			qrdata = bytes.decode(self.QRData, "utf8")
			if self.QRDecode[0].type == "EAN13":
			return {
			"exam_id": int(qrdata[0:7]),
			"page_no": int(qrdata[7]),
			"paper_id": int(qrdata[-5:-1]),
			"faculty_id": None,
			"sid": None,
			}
			else:
			data = qrdata.split(",")
			retval = {
			"exam_id": int(data[1]),
			"page_no": int(data[3]),
			"paper_id": int(data[2]),
			"faculty_id": int(data[0]),
			}
			if len(data) > 4:
			retval["sid"] = data[4]

			self.yMarkerLocations=[loc[0], loc[1]+self.imgWidth-width]
			return self.yMarkerLocations
			return retval


			def generateAnswerMatrix(self):
			self.locateUpMarkers()
			self.locateRightMarkers()

			roixoff=10
			roiyoff=5
			roiwidth=50
			roiheight=roiwidth
			totpx=roiwidth*roiheight

			self.answerMatrix=[]
			for y in self.yMarkerLocations[0]:
			oneline=[]
			for x in self.xMarkerLocations[1]:
			roi=self.bwimg[ y-roiyoff:y+int(roiheight-roiyoff),x-roixoff:x+int(roiwidth-roixoff)]
			#cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
			black=totpx-cv2.countNonZero(roi)
			oneline.append(black/totpx)
			self.answerMatrix.append(oneline)

			def get_paper_ocr_data(self):
			data = self.get_code_data()
			data["qr"] = self.QRData
			data["errors"] = self.errors
			data["warnings"] = self.warnings
			data["up_position"] = (
			list(self.xMarkerLocations[1] / self.imgWidth),
			list(self.yMarkerLocations[1] / self.imgHeight),
			)
			data["right_position"] = (
			list(self.xMarkerLocations[1] / self.imgWidth),
			list(self.yMarkerLocations[1] / self.imgHeight),
			)
			data["ans_matrix"] = (
			(np.array(self.answerMatrix) > self.settings["answer_threshold"]) * 1
			).tolist()
			if data["sid"] is None and data["page_no"] == 0:
			data["sid"] = self.get_enhanced_sid()
			return data