havoc/sizif-ocr.git

Samo Penic

2018-11-16 762a5e258a90387922d6c6eb3ecc9a7ca7c96144

commit \| author \| age
9efc18	1	import cv2
SP	2	import numpy as np
762a5e	3	from skimage import morphology, img_as_ubyte
02e0f7	4	from sklearn import svm
SP	5	from sklearn.externals import joblib
	6
9efc18	7
SP	8	"""
	9	(1) The text is an array of chars (in row-major order) where
	10	* each char can be one of the following:
	11	* 'x': hit
	12	* 'o': miss
	13	* ' ': don't-care
	14	* (2) When the origin falls on a hit or miss, use an upper case
	15	* char (e.g., 'X' or 'O') to indicate it. When the origin
	16	* falls on a don't-care, indicate this with a 'C'.
	17	* The string must have exactly one origin specified.
	18	* (3) The advantage of this method is that the text can be input
	19	* in a format that shows the 2D layout of the Sel; e.g.,
	20
	21
	22	:::: AND ::::
	23
	24
	25	(10) The sequence string is formatted as follows:
	26	* ~ An arbitrary number of operations, each separated
	27	* by a '+' character. White space is ignored.
	28	* ~ Each operation begins with a case-independent character
	29	* specifying the operation:
	30	* d or D (dilation)
	31	* e or E (erosion)
	32	* o or O (opening)
	33	* c or C (closing)
	34	* r or R (rank binary reduction)
	35	* x or X (replicative binary expansion)
	36	* b or B (add a border of 0 pixels of this size)
	37	* ~ The args to the morphological operations are bricks of hits,
	38	* and are formatted as a.b, where a and b are horizontal and
	39	* vertical dimensions, rsp.
	40	* ~ The args to the reduction are a sequence of up to 4 integers,
	41	* each from 1 to 4.
	42	* ~ The arg to the expansion is a power of two, in the set
	43	* {2, 4, 8, 16}.
	44	* (11) An example valid sequence is:
	45	* "b32 + o1.3 + C3.1 + r23 + e2.2 + D3.2 + X4"
	46	* In this example, the following operation sequence is carried out:
	47	* * b32: Add a 32 pixel border around the input image
	48	* * o1.3: Opening with vert sel of length 3 (e.g., 1 x 3)
	49	* * C3.1: Closing with horiz sel of length 3 (e.g., 3 x 1)
	50	* * r23: Two successive 2x2 reductions with rank 2 in the first
	51	* and rank 3 in the second. The result is a 4x reduced pix.
	52	* * e2.2: Erosion with a 2x2 sel (origin will be at x,y: 0,0)
	53	* * d3.2: Dilation with a 3x2 sel (origin will be at x,y: 1,0)
	54	* * X4: 4x replicative expansion, back to original resolution
	55
	56	"""
	57
	58
	59	def kernel(x, y):
	60	return np.ones((x, y), np.uint8)
	61
	62
762a5e	63	def segment_by_contours(image, sorted_ctrs, classifier):
SP	64	sid_no = ""
	65	for i, ctr in enumerate(sorted_ctrs):
	66	# Get bounding box
	67	x, y, w, h = cv2.boundingRect(ctr)
	68	# Getting ROI
	69	if w < h / 2:
	70	sid_no = sid_no + "1"
	71	continue
	72	roi = image[y : y + h, x : x + w]
	73	roi = img_as_ubyte(roi < 128)
	74	roi = cv2.resize(roi, (32, 32))
	75
	76	# cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
	77	cv2.imwrite("sid_no_{}.png".format(i), roi)
	78	sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
	79	return sid_no
	80
	81
	82	def getSID(image, classifier, sid_mask):
	83	image = 255 - image
	84	image = img_as_ubyte(image > 100)
9efc18	85	cv2.imwrite("enSID0.png", image)
SP	86	# Remove noise
762a5e	87	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1)
9efc18	88	# Closing. Connect non connected parts
02e0f7	89	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
9efc18	90	# Again noise removal after closing
02e0f7	91
762a5e	92	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
9efc18	93	# Skeletonization
762a5e	94	image = img_as_ubyte(morphology.thin(image > 128))
SP	95	cv2.imwrite("enSID1.png", image)
9efc18	96	# Stub removal (might not be necessary if thinning instead of skeletonize is used above
SP	97	# Making lines stronger
	98	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1)
	99
	100	image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10))
	101	# Thining again
762a5e	102	image = img_as_ubyte(morphology.skeletonize(image > 0.5))
9efc18	103	image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
02e0f7	104
762a5e	105	im2, ctrs, hier = cv2.findContours(
SP	106	image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
	107	)
02e0f7	108	sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
SP	109
762a5e	110	sid_no = ""
SP	111	sid_len = 0
	112	if sid_mask is not None:
	113	if len(sid_mask)==len(sorted_ctrs):
	114	sid_no=segment_by_contours(image,sorted_ctrs,classifier)
	115	else:
	116	print("Ooops have to find another way")
02e0f7	117	print(sid_no)
762a5e	118	return sid_no