havoc/sizif-ocr.git

Fixes in qr code, in sid third algoritm in answer matrix finding locations...

Samo Penic

2018-11-21 9c222b2a0b151e7219e30f0145aa92872890d838

commit \| author \| age
9efc18	1	import cv2
SP	2	import numpy as np
762a5e	3	from skimage import morphology, img_as_ubyte
02e0f7	4
0d97e9	5	import pkg_resources
SP	6
	7	templatefile = '/template-8.png' # always use slash
	8	template8 = pkg_resources.resource_filename(__name__, templatefile)
	9
9efc18	10
SP	11	def kernel(x, y):
6fde5f	12	"""
SP	13	Function greates square kernel of size x and y
	14	"""
9efc18	15	return np.ones((x, y), np.uint8)
SP	16
6fde5f	17
SP	18	def find_biggest_blob(image, original_image,sid_mask):
	19	if sid_mask[0] == "1":
9c222b	20	move_left = 35
6fde5f	21	elif sid_mask[0] == "x":
9c222b	22	move_left = 40
6fde5f	23	else:
SP	24	move_left = 0
	25	# Remove noise
	26	image2 = cv2.morphologyEx(
9c222b	27	original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3
6fde5f	28	)
SP	29	# find biggest block of pixels
	30	image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
	31	image1 = img_as_ubyte(image1 > 50)
0d97e9	32	cv2.imwrite("/tmp/sidblock1.png", image1)
6fde5f	33	im2, ctrs, hier = cv2.findContours(
SP	34	image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	35	)
	36	sorted_ctrs = sorted(
	37	ctrs, key=lambda ctr: cv2.contourArea(ctr)
	38	) # get bigges contour
	39	x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
	40	image = image[y : y + h, x + 25 - move_left : x + w - 40] # +25,-25
	41	return image
	42
d5c694	43	def sid_compare(sid_no, sid_mask):
6fde5f	44	"""
SP	45	Function compares student id number with student id mask if the recognised number is valid according to the mask
	46	:param sid_no:
	47	:param sid_mask:
	48	:return: True if they match, else False
	49	"""
	50	for s, es in zip(sid_mask, sid_no):
	51	if s != "x" and s != es:
d5c694	52	return False
SP	53	return True
	54
	55
6fde5f	56	def segment_by_contours(image, original_image, classifier,sid_mask):
SP	57	"""
	58	First algorithm. it segments numerals with contours. It works with numbers where individual numerals does not touch.
	59	:param image:
	60	:param original_image:
	61	:param classifier:
	62	:return: student id as a string
	63	"""
9efc18	64
762a5e	65	sid_no = ""
6fde5f	66	image=find_biggest_blob(image,original_image,sid_mask)
c1968c	67	cv2.imwrite("/tmp/sid_contour1.png",image)
6fde5f	68	im2, ctrs, hier = cv2.findContours(
SP	69	image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	70	)
	71	sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
	72
762a5e	73	for i, ctr in enumerate(sorted_ctrs):
SP	74	# Get bounding box
	75	x, y, w, h = cv2.boundingRect(ctr)
	76	# Getting ROI
	77	if w < h / 2:
	78	sid_no = sid_no + "1"
	79	continue
	80	roi = image[y : y + h, x : x + w]
	81	roi = img_as_ubyte(roi < 128)
	82	roi = cv2.resize(roi, (32, 32))
	83
	84	# cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
0d97e9	85	cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi)
762a5e	86	sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
SP	87	return sid_no
	88
	89
d5c694	90	def segment_by_sid_len(image, original_image, sid_mask, classifier):
6fde5f	91	"""
SP	92	Third algorithm. It trys to get biggest "blob" in the image and then it cuts it into individual numbers by force.
	93	It has some problems with finding individual numbers, so some tweaking must be done!
	94
	95	:param image:
	96	:param original_image:
	97	:param sid_mask:
	98	:param classifier:
	99	:return: student id as a string
	100	"""
5cb7c1	101	sid_no = ""
SP	102	sid_len = len(sid_mask)
6fde5f	103	image=find_biggest_blob(image,original_image,sid_mask)
0d97e9	104	cv2.imwrite("/tmp/sidblock2.png", image)
ac766e	105	imgHeight, imgWidth = image.shape[0:2]
5cb7c1	106	numWidth = int(imgWidth / (sid_len))
SP	107	for i in range(0, sid_len):
	108	num = image[:, i * numWidth : (i + 1) * numWidth]
ac766e	109	num = img_as_ubyte(num < 128)
SP	110	num = cv2.resize(num, (32, 32))
0d97e9	111	cv2.imwrite("/tmp/sid_no_{}.png".format(i), num)
ac766e	112	sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
SP	113	return sid_no
	114
6fde5f	115
SP	116	def segment_by_7segments(image, original_image, sid_mask, classifier):
	117	"""
	118	Second attempt. It dilates the image to get all 7 segments wisible as 8888888 then it does pattern matching of 8 with
	119	pattern image. It works if the scaned gray level is high enough.
	120
	121	:param image:
	122	:param original_image:
	123	:param sid_mask:
	124	:param classifier:
	125	:return: student id number as a string
	126	"""
	127	block_image = cv2.morphologyEx(
	128	original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10
	129	)
	130	block_image = img_as_ubyte(block_image < 50)
0d97e9	131	cv2.imwrite("/tmp/sid_3rd1.png", block_image)
SP	132	template = cv2.imread(template8, 0)
d5c694	133	w, h = template.shape[::-1]
SP	134	res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
	135	loc = np.where(res >= 0.75)
	136	cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
6fde5f	137	loc_filtered_x = []
SP	138	loc_filtered_y = []
d5c694	139	for pt in zip(*loc[::-1]):
6fde5f	140	pt = (pt[0] - 10, pt[1] - 10)
d5c694	141	loc_filtered_y.append(pt[1])
SP	142	loc_filtered_x.append(pt[0])
6fde5f	143	# points.append(pt)
SP	144	# filter points
	145	if len(loc_filtered_x) == 0:
d5c694	146	return ""
6fde5f	147	loc_filtered_x, loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y)))
SP	148	a = np.diff(loc_filtered_x) > int(w / 2)
d5c694	149	a = np.append(a, True)
SP	150	loc_filtered_x = np.array(loc_filtered_x)
	151	loc_filtered_y = np.array(loc_filtered_y)
	152	points = [loc_filtered_y[a], loc_filtered_x[a]]
	153	for pt in zip(*points[::-1]):
	154	cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
0d97e9	155	cv2.imwrite("/tmp/sid_3rd2.png", cimg)
d5c694	156
6fde5f	157	sid_no = ""
SP	158	for i, pt in enumerate(zip(*points[::-1])):
	159	num = image[pt[1] : pt[1] + h, pt[0] : pt[0] + w]
	160	# cv2.imwrite("sid_3no_{}.png".format(i), num)
d5c694	161	num = img_as_ubyte(num < 128)
SP	162	try:
	163	num = cv2.resize(num, (32, 32))
	164	except:
	165	return ""
0d97e9	166	cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num)
d5c694	167	sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
SP	168
	169	return sid_no
ac766e	170
6fde5f	171
762a5e	172	def getSID(image, classifier, sid_mask):
6fde5f	173	"""
SP	174	Tries different approaches on image to get student id number. Firstly clears image of noise and then skeletonizes
	175	numbers and thickens it until it gets normalized image. It sends it to the segmentation and recognition functions.
	176
	177	Tweak both MORPH_OPEN lines....
	178
	179	:param image:
	180	:param classifier:
	181	:param sid_mask:
	182	:return: (student_id, error, warning) student id as a string, list of errors and list of warnings during the recognition
	183
	184	"""
5cb7c1	185	sid_warn = []
6fde5f	186	sid_err = []
762a5e	187	image = 255 - image
6fde5f	188	image_original = image.copy()
9c222b	189	image = img_as_ubyte(image > 70)
0d97e9	190	cv2.imwrite("/tmp/enSID0.png", image)
6fde5f	191
9efc18	192	# Remove noise
9c222b	193	#image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
6fde5f	194
9efc18	195	# Closing. Connect non connected parts
9c222b	196	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 1), iterations=4)
02e0f7	197
6fde5f	198	# Again noise removal after closing
5cb7c1	199	# image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
SP	200	# don't do too much noise removal.
ac766e	201	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
SP	202
9efc18	203	# Skeletonization
762a5e	204	image = img_as_ubyte(morphology.thin(image > 128))
0d97e9	205	cv2.imwrite("/tmp/enSID1.png", image)
6fde5f	206
9efc18	207	# Stub removal (might not be necessary if thinning instead of skeletonize is used above
SP	208	# Making lines stronger
	209	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1)
	210	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10))
6fde5f	211
9efc18	212	# Thining again
762a5e	213	image = img_as_ubyte(morphology.skeletonize(image > 0.5))
9efc18	214	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
0d97e9	215	cv2.imwrite("/tmp/enhancedSID.png", image)
02e0f7	216
6fde5f	217	sid_no = segment_by_contours(image, image_original, classifier, sid_mask)
e0996e	218
6fde5f	219	if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask):
5cb7c1	220	sid_warn.append("Trying second SID algorithm.")
d5c694	221	sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
e0996e	222
6fde5f	223	if (len(sid_no)) != len(sid_mask):
d5c694	224	sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
SP	225	sid_warn.append("Trying third SID algorithm.")
	226
	227	if not sid_compare(sid_no, sid_mask):
6fde5f	228	sid_err = ["Wrong SID!"]
d5c694	229
6fde5f	230	return sid_no, sid_err, sid_warn