havoc/sizif-ocr.git

Fixes in sid locator for downstairs scanner. (part 1)

Samo Penic

2018-11-23 c9e02129953e3df0d7d36c06608412ae15b46145

commit \| author \| age
9efc18	1	import cv2
SP	2	import numpy as np
762a5e	3	from skimage import morphology, img_as_ubyte
02e0f7	4
0d97e9	5	import pkg_resources
SP	6
5460bf	7	templatefile = "/template-8.png" # always use slash
0d97e9	8	template8 = pkg_resources.resource_filename(__name__, templatefile)
SP	9
9efc18	10
SP	11	def kernel(x, y):
6fde5f	12	"""
SP	13	Function greates square kernel of size x and y
	14	"""
9efc18	15	return np.ones((x, y), np.uint8)
SP	16
6fde5f	17
5460bf	18	def find_biggest_blob(image, original_image, sid_mask):
6fde5f	19	if sid_mask[0] == "1":
c9e021	20	move_left = 45
6fde5f	21	elif sid_mask[0] == "x":
c9e021	22	move_left = 50
6fde5f	23	else:
SP	24	move_left = 0
5460bf	25	# Remove noise
6fde5f	26	image2 = cv2.morphologyEx(
9c222b	27	original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3
6fde5f	28	)
SP	29	# find biggest block of pixels
c9e021	30	image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=3)
6fde5f	31	image1 = img_as_ubyte(image1 > 50)
0d97e9	32	cv2.imwrite("/tmp/sidblock1.png", image1)
6fde5f	33	im2, ctrs, hier = cv2.findContours(
SP	34	image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	35	)
	36	sorted_ctrs = sorted(
	37	ctrs, key=lambda ctr: cv2.contourArea(ctr)
	38	) # get bigges contour
	39	x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
c9e021	40	image = image[y : y + h, x + 25 - move_left : x + w - 30] # +25,-25
6fde5f	41	return image
SP	42
5460bf	43
d5c694	44	def sid_compare(sid_no, sid_mask):
6fde5f	45	"""
SP	46	Function compares student id number with student id mask if the recognised number is valid according to the mask
	47	:param sid_no:
	48	:param sid_mask:
	49	:return: True if they match, else False
	50	"""
	51	for s, es in zip(sid_mask, sid_no):
	52	if s != "x" and s != es:
d5c694	53	return False
SP	54	return True
	55
	56
5460bf	57	def segment_by_contours(image, original_image, classifier, sid_mask):
6fde5f	58	"""
SP	59	First algorithm. it segments numerals with contours. It works with numbers where individual numerals does not touch.
	60	:param image:
	61	:param original_image:
	62	:param classifier:
	63	:return: student id as a string
	64	"""
9efc18	65
762a5e	66	sid_no = ""
5460bf	67	image = find_biggest_blob(image, original_image, sid_mask)
SP	68	cv2.imwrite("/tmp/sid_contour1.png", image)
6fde5f	69	im2, ctrs, hier = cv2.findContours(
SP	70	image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	71	)
	72	sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
	73
762a5e	74	for i, ctr in enumerate(sorted_ctrs):
SP	75	# Get bounding box
	76	x, y, w, h = cv2.boundingRect(ctr)
	77	# Getting ROI
	78	if w < h / 2:
	79	sid_no = sid_no + "1"
	80	continue
	81	roi = image[y : y + h, x : x + w]
	82	roi = img_as_ubyte(roi < 128)
	83	roi = cv2.resize(roi, (32, 32))
	84
	85	# cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
0d97e9	86	cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi)
762a5e	87	sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
SP	88	return sid_no
	89
	90
d5c694	91	def segment_by_sid_len(image, original_image, sid_mask, classifier):
6fde5f	92	"""
SP	93	Third algorithm. It trys to get biggest "blob" in the image and then it cuts it into individual numbers by force.
	94	It has some problems with finding individual numbers, so some tweaking must be done!
	95
	96	:param image:
	97	:param original_image:
	98	:param sid_mask:
	99	:param classifier:
	100	:return: student id as a string
	101	"""
5cb7c1	102	sid_no = ""
SP	103	sid_len = len(sid_mask)
5460bf	104	image = find_biggest_blob(image, original_image, sid_mask)
0d97e9	105	cv2.imwrite("/tmp/sidblock2.png", image)
ac766e	106	imgHeight, imgWidth = image.shape[0:2]
5cb7c1	107	numWidth = int(imgWidth / (sid_len))
SP	108	for i in range(0, sid_len):
	109	num = image[:, i * numWidth : (i + 1) * numWidth]
ac766e	110	num = img_as_ubyte(num < 128)
SP	111	num = cv2.resize(num, (32, 32))
0d97e9	112	cv2.imwrite("/tmp/sid_no_{}.png".format(i), num)
ac766e	113	sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
SP	114	return sid_no
	115
6fde5f	116
SP	117	def segment_by_7segments(image, original_image, sid_mask, classifier):
	118	"""
	119	Second attempt. It dilates the image to get all 7 segments wisible as 8888888 then it does pattern matching of 8 with
	120	pattern image. It works if the scaned gray level is high enough.
	121
	122	:param image:
	123	:param original_image:
	124	:param sid_mask:
	125	:param classifier:
	126	:return: student id number as a string
	127	"""
	128	block_image = cv2.morphologyEx(
	129	original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10
	130	)
	131	block_image = img_as_ubyte(block_image < 50)
0d97e9	132	cv2.imwrite("/tmp/sid_3rd1.png", block_image)
SP	133	template = cv2.imread(template8, 0)
d5c694	134	w, h = template.shape[::-1]
SP	135	res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
	136	loc = np.where(res >= 0.75)
	137	cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
6fde5f	138	loc_filtered_x = []
SP	139	loc_filtered_y = []
d5c694	140	for pt in zip(*loc[::-1]):
6fde5f	141	pt = (pt[0] - 10, pt[1] - 10)
d5c694	142	loc_filtered_y.append(pt[1])
SP	143	loc_filtered_x.append(pt[0])
6fde5f	144	# points.append(pt)
SP	145	# filter points
	146	if len(loc_filtered_x) == 0:
d5c694	147	return ""
6fde5f	148	loc_filtered_x, loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y)))
SP	149	a = np.diff(loc_filtered_x) > int(w / 2)
d5c694	150	a = np.append(a, True)
SP	151	loc_filtered_x = np.array(loc_filtered_x)
	152	loc_filtered_y = np.array(loc_filtered_y)
	153	points = [loc_filtered_y[a], loc_filtered_x[a]]
	154	for pt in zip(*points[::-1]):
	155	cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
0d97e9	156	cv2.imwrite("/tmp/sid_3rd2.png", cimg)
d5c694	157
6fde5f	158	sid_no = ""
SP	159	for i, pt in enumerate(zip(*points[::-1])):
	160	num = image[pt[1] : pt[1] + h, pt[0] : pt[0] + w]
	161	# cv2.imwrite("sid_3no_{}.png".format(i), num)
d5c694	162	num = img_as_ubyte(num < 128)
SP	163	try:
	164	num = cv2.resize(num, (32, 32))
	165	except:
	166	return ""
0d97e9	167	cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num)
d5c694	168	sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
SP	169
	170	return sid_no
ac766e	171
6fde5f	172
762a5e	173	def getSID(image, classifier, sid_mask):
6fde5f	174	"""
SP	175	Tries different approaches on image to get student id number. Firstly clears image of noise and then skeletonizes
	176	numbers and thickens it until it gets normalized image. It sends it to the segmentation and recognition functions.
	177
	178	Tweak both MORPH_OPEN lines....
	179
	180	:param image:
	181	:param classifier:
	182	:param sid_mask:
	183	:return: (student_id, error, warning) student id as a string, list of errors and list of warnings during the recognition
	184
	185	"""
5cb7c1	186	sid_warn = []
6fde5f	187	sid_err = []
762a5e	188	image = 255 - image
6fde5f	189	image_original = image.copy()
9c222b	190	image = img_as_ubyte(image > 70)
0d97e9	191	cv2.imwrite("/tmp/enSID0.png", image)
6fde5f	192
9efc18	193	# Remove noise
c9e021	194	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=3)
6fde5f	195
9efc18	196	# Closing. Connect non connected parts
9c222b	197	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 1), iterations=4)
02e0f7	198
6fde5f	199	# Again noise removal after closing
c9e021	200	#image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
5cb7c1	201	# don't do too much noise removal.
ac766e	202	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
SP	203
9efc18	204	# Skeletonization
c9e021	205	#image = img_as_ubyte(morphology.skeletonize(image > 128))
762a5e	206	image = img_as_ubyte(morphology.thin(image > 128))
0d97e9	207	cv2.imwrite("/tmp/enSID1.png", image)
6fde5f	208
9efc18	209	# Stub removal (might not be necessary if thinning instead of skeletonize is used above
SP	210	# Making lines stronger
c9e021	211	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 2), iterations=1)
9efc18	212	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10))
6fde5f	213
9efc18	214	# Thining again
762a5e	215	image = img_as_ubyte(morphology.skeletonize(image > 0.5))
9efc18	216	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
0d97e9	217	cv2.imwrite("/tmp/enhancedSID.png", image)
02e0f7	218
6fde5f	219	sid_no = segment_by_contours(image, image_original, classifier, sid_mask)
e0996e	220
6fde5f	221	if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask):
5cb7c1	222	sid_warn.append("Trying second SID algorithm.")
d5c694	223	sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
e0996e	224
6fde5f	225	if (len(sid_no)) != len(sid_mask):
d5c694	226	sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
SP	227	sid_warn.append("Trying third SID algorithm.")
	228
	229	if not sid_compare(sid_no, sid_mask):
6fde5f	230	sid_err = ["Wrong SID!"]
d5c694	231
6fde5f	232	return sid_no, sid_err, sid_warn