havoc/sizif-ocr.git

Samo Penic

2018-11-17 fe2c1025b102bbf20c9afbc29eedf7a5f069410c

commit \| author \| age
9efc18	1	import cv2
SP	2	import numpy as np
762a5e	3	from skimage import morphology, img_as_ubyte
02e0f7	4
9efc18	5
SP	6	def kernel(x, y):
6fde5f	7	"""
SP	8	Function greates square kernel of size x and y
	9	"""
9efc18	10	return np.ones((x, y), np.uint8)
SP	11
6fde5f	12
SP	13	def find_biggest_blob(image, original_image,sid_mask):
	14	if sid_mask[0] == "1":
	15	move_left = 45
	16	elif sid_mask[0] == "x":
	17	move_left = 55
	18	else:
	19	move_left = 0
	20	# Remove noise
	21	image2 = cv2.morphologyEx(
	22	original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=7
	23	)
	24	# find biggest block of pixels
	25	image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
	26	image1 = img_as_ubyte(image1 > 50)
	27	cv2.imwrite("sidblock1.png", image1)
	28	im2, ctrs, hier = cv2.findContours(
	29	image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	30	)
	31	sorted_ctrs = sorted(
	32	ctrs, key=lambda ctr: cv2.contourArea(ctr)
	33	) # get bigges contour
	34	x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
	35	image = image[y : y + h, x + 25 - move_left : x + w - 40] # +25,-25
	36	return image
	37
d5c694	38	def sid_compare(sid_no, sid_mask):
6fde5f	39	"""
SP	40	Function compares student id number with student id mask if the recognised number is valid according to the mask
	41	:param sid_no:
	42	:param sid_mask:
	43	:return: True if they match, else False
	44	"""
	45	for s, es in zip(sid_mask, sid_no):
	46	if s != "x" and s != es:
d5c694	47	return False
SP	48	return True
	49
	50
6fde5f	51	def segment_by_contours(image, original_image, classifier,sid_mask):
SP	52	"""
	53	First algorithm. it segments numerals with contours. It works with numbers where individual numerals does not touch.
	54	:param image:
	55	:param original_image:
	56	:param classifier:
	57	:return: student id as a string
	58	"""
9efc18	59
762a5e	60	sid_no = ""
6fde5f	61	image=find_biggest_blob(image,original_image,sid_mask)
SP	62	cv2.imwrite("sid_contour1.png",image)
	63	im2, ctrs, hier = cv2.findContours(
	64	image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	65	)
	66	sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
	67
762a5e	68	for i, ctr in enumerate(sorted_ctrs):
SP	69	# Get bounding box
	70	x, y, w, h = cv2.boundingRect(ctr)
	71	# Getting ROI
	72	if w < h / 2:
	73	sid_no = sid_no + "1"
	74	continue
	75	roi = image[y : y + h, x : x + w]
	76	roi = img_as_ubyte(roi < 128)
	77	roi = cv2.resize(roi, (32, 32))
	78
	79	# cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
	80	cv2.imwrite("sid_no_{}.png".format(i), roi)
	81	sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
	82	return sid_no
	83
	84
d5c694	85	def segment_by_sid_len(image, original_image, sid_mask, classifier):
6fde5f	86	"""
SP	87	Third algorithm. It trys to get biggest "blob" in the image and then it cuts it into individual numbers by force.
	88	It has some problems with finding individual numbers, so some tweaking must be done!
	89
	90	:param image:
	91	:param original_image:
	92	:param sid_mask:
	93	:param classifier:
	94	:return: student id as a string
	95	"""
5cb7c1	96	sid_no = ""
SP	97	sid_len = len(sid_mask)
6fde5f	98	image=find_biggest_blob(image,original_image,sid_mask)
5cb7c1	99	cv2.imwrite("sidblock2.png", image)
ac766e	100	imgHeight, imgWidth = image.shape[0:2]
5cb7c1	101	numWidth = int(imgWidth / (sid_len))
SP	102	for i in range(0, sid_len):
	103	num = image[:, i * numWidth : (i + 1) * numWidth]
ac766e	104	num = img_as_ubyte(num < 128)
SP	105	num = cv2.resize(num, (32, 32))
	106	cv2.imwrite("sid_no_{}.png".format(i), num)
	107	sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
	108	return sid_no
	109
6fde5f	110
SP	111	def segment_by_7segments(image, original_image, sid_mask, classifier):
	112	"""
	113	Second attempt. It dilates the image to get all 7 segments wisible as 8888888 then it does pattern matching of 8 with
	114	pattern image. It works if the scaned gray level is high enough.
	115
	116	:param image:
	117	:param original_image:
	118	:param sid_mask:
	119	:param classifier:
	120	:return: student id number as a string
	121	"""
	122	block_image = cv2.morphologyEx(
	123	original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10
	124	)
	125	block_image = img_as_ubyte(block_image < 50)
d5c694	126	cv2.imwrite("sid_3rd1.png", block_image)
SP	127	template = cv2.imread("template-8.png", 0)
	128	w, h = template.shape[::-1]
	129	res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
	130	loc = np.where(res >= 0.75)
	131	cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
6fde5f	132	loc_filtered_x = []
SP	133	loc_filtered_y = []
d5c694	134	for pt in zip(*loc[::-1]):
6fde5f	135	pt = (pt[0] - 10, pt[1] - 10)
d5c694	136	loc_filtered_y.append(pt[1])
SP	137	loc_filtered_x.append(pt[0])
6fde5f	138	# points.append(pt)
SP	139	# filter points
	140	if len(loc_filtered_x) == 0:
d5c694	141	return ""
6fde5f	142	loc_filtered_x, loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y)))
SP	143	a = np.diff(loc_filtered_x) > int(w / 2)
d5c694	144	a = np.append(a, True)
SP	145	loc_filtered_x = np.array(loc_filtered_x)
	146	loc_filtered_y = np.array(loc_filtered_y)
	147	points = [loc_filtered_y[a], loc_filtered_x[a]]
	148	for pt in zip(*points[::-1]):
	149	cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
	150	cv2.imwrite("sid_3rd2.png", cimg)
	151
6fde5f	152	sid_no = ""
SP	153	for i, pt in enumerate(zip(*points[::-1])):
	154	num = image[pt[1] : pt[1] + h, pt[0] : pt[0] + w]
	155	# cv2.imwrite("sid_3no_{}.png".format(i), num)
d5c694	156	num = img_as_ubyte(num < 128)
SP	157	try:
	158	num = cv2.resize(num, (32, 32))
	159	except:
	160	return ""
	161	cv2.imwrite("sid_3no_{}.png".format(i), num)
	162	sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
	163
	164	return sid_no
ac766e	165
6fde5f	166
762a5e	167	def getSID(image, classifier, sid_mask):
6fde5f	168	"""
SP	169	Tries different approaches on image to get student id number. Firstly clears image of noise and then skeletonizes
	170	numbers and thickens it until it gets normalized image. It sends it to the segmentation and recognition functions.
	171
	172	Tweak both MORPH_OPEN lines....
	173
	174	:param image:
	175	:param classifier:
	176	:param sid_mask:
	177	:return: (student_id, error, warning) student id as a string, list of errors and list of warnings during the recognition
	178
	179	"""
5cb7c1	180	sid_warn = []
6fde5f	181	sid_err = []
762a5e	182	image = 255 - image
6fde5f	183	image_original = image.copy()
762a5e	184	image = img_as_ubyte(image > 100)
9efc18	185	cv2.imwrite("enSID0.png", image)
6fde5f	186
9efc18	187	# Remove noise
d5c694	188	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
6fde5f	189
9efc18	190	# Closing. Connect non connected parts
02e0f7	191	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
SP	192
6fde5f	193	# Again noise removal after closing
5cb7c1	194	# image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
SP	195	# don't do too much noise removal.
ac766e	196	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
SP	197
9efc18	198	# Skeletonization
762a5e	199	image = img_as_ubyte(morphology.thin(image > 128))
SP	200	cv2.imwrite("enSID1.png", image)
6fde5f	201
9efc18	202	# Stub removal (might not be necessary if thinning instead of skeletonize is used above
SP	203	# Making lines stronger
	204	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1)
	205	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10))
6fde5f	206
9efc18	207	# Thining again
762a5e	208	image = img_as_ubyte(morphology.skeletonize(image > 0.5))
9efc18	209	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
5cb7c1	210	cv2.imwrite("enhancedSID.png", image)
02e0f7	211
6fde5f	212	sid_no = segment_by_contours(image, image_original, classifier, sid_mask)
e0996e	213
6fde5f	214	if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask):
5cb7c1	215	sid_warn.append("Trying second SID algorithm.")
d5c694	216	sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
e0996e	217
6fde5f	218	if (len(sid_no)) != len(sid_mask):
d5c694	219	sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
SP	220	sid_warn.append("Trying third SID algorithm.")
	221
	222	if not sid_compare(sid_no, sid_mask):
6fde5f	223	sid_err = ["Wrong SID!"]
d5c694	224
6fde5f	225	return sid_no, sid_err, sid_warn