havoc/sizif-ocr.git

Samo Penic

2018-11-21 d88ce4da04499fd9f48d7a21a7ecded8535e9ab2

commit \| author \| age
e555c0	1	from pyzbar.pyzbar import decode
0d97e9	2	from .sid_process import getSID
e555c0	3	import cv2
SP	4	import numpy as np
69abed	5	import os
0d97e9	6	import pkg_resources
SP	7
d88ce4	8	markerfile = '/template-sq.png' # always use slash
0d97e9	9	markerfilename = pkg_resources.resource_filename(__name__, markerfile)
SP	10
e555c0	11
SP	12
511c2e	13	class Paper:
69abed	14	def __init__(self, filename=None, sid_classifier=None, settings=None, output_path="/tmp"):
511c2e	15	self.filename = filename
69abed	16	self.output_path=output_path
511c2e	17	self.invalid = None
SP	18	self.QRData = None
e0996e	19	self.settings = {"answer_threshold": 0.25} if settings is None else settings
511c2e	20	self.errors = []
SP	21	self.warnings = []
e0996e	22	self.sid = None
762a5e	23	self.sid_classifier = sid_classifier
511c2e	24	if filename is not None:
SP	25	self.loadImage(filename)
	26	self.runOcr()
e555c0	27
511c2e	28	def loadImage(self, filename, rgbchannel=0):
SP	29	self.img = cv2.imread(filename, rgbchannel)
	30	if self.img is None:
	31	self.errors.append("File could not be loaded!")
	32	self.invalid = True
	33	return
	34	self.imgHeight, self.imgWidth = self.img.shape[0:2]
e555c0	35
0d97e9	36	def saveImage(self, filename="/tmp/debug_image.png"):
511c2e	37	cv2.imwrite(filename, self.img)
e555c0	38
511c2e	39	def runOcr(self):
SP	40	if self.invalid == True:
	41	return
	42	self.decodeQRandRotate()
	43	self.imgTreshold()
d88ce4	44	cv2.imwrite('/tmp/debug_threshold.png', self.bwimg)
511c2e	45	skewAngle = 0
SP	46	# try:
	47	# skewAngle=self.getSkewAngle()
	48	# except:
	49	# self.errors.append("Could not determine skew angle!")
	50	# self.rotateAngle(skewAngle)
e555c0	51
511c2e	52	self.generateAnswerMatrix()
e555c0	53
511c2e	54	self.saveImage()
e555c0	55
511c2e	56	def decodeQRandRotate(self):
SP	57	if self.invalid == True:
	58	return
	59	blur = cv2.blur(self.img, (3, 3))
	60	d = decode(blur)
	61	self.img = blur
	62	if len(d) == 0:
	63	self.errors.append("QR code could not be found!")
	64	self.data = None
	65	self.invalid = True
	66	return
e0996e	67	if(len(d)>1): #if there are multiple codes, get first ean or qr code available.
SP	68	for dd in d:
	69	if(dd.type=="EAN13" or dd.type=="QR"):
	70	d[0]=dd
	71	break
511c2e	72	self.QRDecode = d
SP	73	self.QRData = d[0].data
	74	xpos = d[0].rect.left
	75	ypos = d[0].rect.top
	76	# check if image is rotated wrongly
82ec6d	77	if xpos > self.imgHeight / 2.0 and ypos > self.imgWidth / 2.0:
511c2e	78	self.rotateAngle(180)
e555c0	79
511c2e	80	def rotateAngle(self, angle=0):
e0996e	81	# rot_mat = cv2.getRotationMatrix2D(
82ec6d	82	# (self.imgHeight / 2, self.imgWidth / 2), angle, 1.0
e0996e	83	# )
511c2e	84	rot_mat = cv2.getRotationMatrix2D(
e0996e	85	(self.imgWidth / 2, self.imgHeight / 2), angle, 1.0
511c2e	86	)
SP	87	result = cv2.warpAffine(
	88	self.img,
	89	rot_mat,
82ec6d	90	(self.imgWidth, self.imgHeight),
511c2e	91	flags=cv2.INTER_CUBIC,
SP	92	borderMode=cv2.BORDER_CONSTANT,
	93	borderValue=(255, 255, 255),
	94	)
e555c0	95
511c2e	96	self.img = result
SP	97	self.imgHeight, self.imgWidth = self.img.shape[0:2]
e555c0	98
511c2e	99	# todo, make better tresholding
SP	100	def imgTreshold(self):
	101	(self.thresh, self.bwimg) = cv2.threshold(
	102	self.img, 128, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU
	103	)
e555c0	104
511c2e	105	def getSkewAngle(self):
SP	106	neg = 255 - self.bwimg # get negative image
0d97e9	107	cv2.imwrite("/tmp/debug_1.png", neg)
e555c0	108
511c2e	109	angle_counter = 0 # number of angles
SP	110	angle = 0.0 # collects sum of angles
	111	cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)
e555c0	112
511c2e	113	# get all the Hough lines
SP	114	for line in cv2.HoughLinesP(neg, 1, np.pi / 180, 325):
	115	x1, y1, x2, y2 = line[0]
	116	cv2.line(cimg, (x1, y1), (x2, y2), (0, 0, 255), 2)
	117	# calculate the angle (in radians)
	118	this_angle = np.arctan2(y2 - y1, x2 - x1)
	119	if this_angle and abs(this_angle) <= 10:
	120	# filtered zero degree and outliers
	121	angle += this_angle
	122	angle_counter += 1
e555c0	123
511c2e	124	# the skew is calculated of the mean of the total angles, #try block helps with division by zero.
SP	125	try:
	126	skew = np.rad2deg(
	127	angle / angle_counter
	128	) # the 1.2 factor is just experimental....
	129	except:
	130	skew = 0
e555c0	131
0d97e9	132	cv2.imwrite("/tmp/debug_2.png", cimg)
511c2e	133	return skew
e555c0	134
511c2e	135	def locateUpMarkers(self, threshold=0.85, height=200):
0d97e9	136	template = cv2.imread(markerfilename, 0)
511c2e	137	w, h = template.shape[::-1]
d88ce4	138	crop_img = self.bwimg[0:height, :]
511c2e	139	res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
SP	140	loc = np.where(res >= threshold)
	141	cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
	142	# remove false matching of the squares in qr code
	143	loc_filtered_x = []
	144	loc_filtered_y = []
	145	if len(loc[0]) == 0:
	146	min_y = -1
	147	else:
	148	min_y = np.min(loc[0])
	149	for pt in zip(*loc[::-1]):
	150	if pt[1] < min_y + 20:
	151	loc_filtered_y.append(pt[1])
	152	loc_filtered_x.append(pt[0])
	153	# order by x coordinate
	154	loc_filtered_x, loc_filtered_y = zip(
	155	*sorted(zip(loc_filtered_x, loc_filtered_y))
	156	)
02e0f7	157	# loc=[loc_filtered_y,loc_filtered_x]
SP	158	# remove duplicates
511c2e	159	a = np.diff(loc_filtered_x) > 40
SP	160	a = np.append(a, True)
	161	loc_filtered_x = np.array(loc_filtered_x)
	162	loc_filtered_y = np.array(loc_filtered_y)
	163	loc = [loc_filtered_y[a], loc_filtered_x[a]]
	164	for pt in zip(*loc[::-1]):
	165	cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
e555c0	166
0d97e9	167	cv2.imwrite("/tmp/debug_3.png", cimg)
e555c0	168
511c2e	169	self.xMarkerLocations = loc
SP	170	return loc
e555c0	171
511c2e	172	def locateRightMarkers(self, threshold=0.85, width=200):
0d97e9	173	template = cv2.imread(markerfilename, 0)
511c2e	174	w, h = template.shape[::-1]
d88ce4	175	crop_img = self.bwimg[:, -width:]
SP	176	cv2.imwrite('/tmp/debug_right.png', crop_img)
511c2e	177	res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
SP	178	loc = np.where(res >= threshold)
	179	cimg = cv2.cvtColor(crop_img, cv2.COLOR_GRAY2BGR)
	180	# remove false matching of the squares in qr code
	181	loc_filtered_x = []
	182	loc_filtered_y = []
	183	if len(loc[1]) == 0:
	184	min_x = -1
	185	else:
	186	max_x = np.max(loc[1])
	187	for pt in zip(*loc[::-1]):
	188	if pt[1] > max_x - 20:
	189	loc_filtered_y.append(pt[1])
	190	loc_filtered_x.append(pt[0])
	191	# order by y coordinate
d88ce4	192	try:
SP	193	loc_filtered_y, loc_filtered_x = zip(
	194	*sorted(zip(loc_filtered_y, loc_filtered_x))
	195	)
	196	except:
	197	self.yMarkerLocations=[np.array([1,1]),np.array([1,2])]
	198	return self.yMarkerLocations
511c2e	199	# loc=[loc_filtered_y,loc_filtered_x]
SP	200	# remove duplicates
	201	a = np.diff(loc_filtered_y) > 40
	202	a = np.append(a, True)
	203	loc_filtered_x = np.array(loc_filtered_x)
	204	loc_filtered_y = np.array(loc_filtered_y)
	205	loc = [loc_filtered_y[a], loc_filtered_x[a]]
	206	for pt in zip(*loc[::-1]):
	207	cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
e555c0	208
0d97e9	209	cv2.imwrite("/tmp/debug_4.png", cimg)
e555c0	210
511c2e	211	self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
SP	212	return self.yMarkerLocations
e555c0	213
511c2e	214	def generateAnswerMatrix(self):
SP	215	self.locateUpMarkers()
	216	self.locateRightMarkers()
e555c0	217
511c2e	218	roixoff = 10
SP	219	roiyoff = 5
	220	roiwidth = 50
	221	roiheight = roiwidth
	222	totpx = roiwidth * roiheight
e555c0	223
511c2e	224	self.answerMatrix = []
SP	225	for y in self.yMarkerLocations[0]:
	226	oneline = []
	227	for x in self.xMarkerLocations[1]:
	228	roi = self.bwimg[
	229	y - roiyoff : y + int(roiheight - roiyoff),
	230	x - roixoff : x + int(roiwidth - roixoff),
	231	]
	232	# cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
	233	black = totpx - cv2.countNonZero(roi)
	234	oneline.append(black / totpx)
	235	self.answerMatrix.append(oneline)
9efc18	236
SP	237	def get_enhanced_sid(self):
02e0f7	238	if self.sid_classifier is None:
SP	239	return "x"
762a5e	240	if self.settings is not None:
e0996e	241	sid_mask = self.settings.get("sid_mask", None)
SP	242	es, err, warn = getSID(
02e0f7	243	self.img[
d5c694	244	int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
02e0f7	245	int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
SP	246	],
	247	self.sid_classifier,
e0996e	248	sid_mask,
02e0f7	249	)
5cb7c1	250	[self.errors.append(e) for e in err]
SP	251	[self.warnings.append(w) for w in warn]
02e0f7	252	return es
0436f6	253
SP	254	def get_code_data(self):
cf921b	255	if self.QRData is None:
SP	256	self.errors.append("Could not read QR or EAN code! Not an exam?")
e0996e	257	retval = {
SP	258	"exam_id": None,
	259	"page_no": None,
	260	"paper_id": None,
	261	"faculty_id": None,
	262	"sid": None,
0436f6	263	}
e0996e	264	return retval
SP	265	qrdata = bytes.decode(self.QRData, "utf8")
	266	if self.QRDecode[0].type == "EAN13":
	267	return {
	268	"exam_id": int(qrdata[0:7]),
69abed	269	"page_no": int(qrdata[7])+1,
e0996e	270	"paper_id": int(qrdata[-5:-1]),
SP	271	"faculty_id": None,
	272	"sid": None,
	273	}
	274	else:
	275	data = qrdata.split(",")
	276	retval = {
	277	"exam_id": int(data[1]),
69abed	278	"page_no": int(data[3])+1,
e0996e	279	"paper_id": int(data[2]),
SP	280	"faculty_id": int(data[0]),
d88ce4	281	"sid": None
e0996e	282	}
SP	283	if len(data) > 4:
	284	retval["sid"] = data[4]
0436f6	285
SP	286	return retval
	287
	288	def get_paper_ocr_data(self):
e0996e	289	data = self.get_code_data()
69abed	290	data["qr"] = bytes.decode(self.QRData, 'utf8')
e0996e	291	data["errors"] = self.errors
SP	292	data["warnings"] = self.warnings
	293	data["up_position"] = (
	294	list(self.xMarkerLocations[1] / self.imgWidth),
	295	list(self.yMarkerLocations[1] / self.imgHeight),
	296	)
	297	data["right_position"] = (
	298	list(self.xMarkerLocations[1] / self.imgWidth),
	299	list(self.yMarkerLocations[1] / self.imgHeight),
	300	)
	301	data["ans_matrix"] = (
	302	(np.array(self.answerMatrix) > self.settings["answer_threshold"]) * 1
	303	).tolist()
d88ce4	304	if data["sid"] is None and data["page_no"] == 2:
e0996e	305	data["sid"] = self.get_enhanced_sid()
69abed	306	output_filename=os.path.join(self.output_path, '.'.join(self.filename.split('/')[-1].split('.')[:-1])+".png")
SP	307	cv2.imwrite(output_filename, self.img)
	308	data['output_filename']=output_filename
0436f6	309	return data