havoc/sizif-ocr.git

			@@ -1,8 +1,6 @@
			import cv2
			import numpy as np
			from skimage import morphology, img_as_ubyte
			from sklearn import svm
			from sklearn.externals import joblib


			"""
			@@ -59,6 +57,13 @@
			def kernel(x, y):
			return np.ones((x, y), np.uint8)

			def sid_compare(sid_no, sid_mask):
			for s,es in zip(sid_mask,sid_no):
			if s!='x' and s!=es:
			return False
			return True



			def segment_by_contours(image, sorted_ctrs, classifier):
			sid_no = ""
			@@ -79,23 +84,34 @@
			return sid_no


			def segment_by_sid_len(image,sid_len, classifier):
			sid_no=""
			#find biggest block of pixels

			image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3)
			cv2.imwrite("sidblock1.png",image1)
			def segment_by_sid_len(image, original_image, sid_mask, classifier):
			sid_no = ""
			sid_len = len(sid_mask)
			if sid_mask[0] == "1":
			move_left = 45
			elif sid_mask[0] == "x":
			move_left = 55
			else:
			move_left = 0
			# Remove noise
			image2 = cv2.morphologyEx(original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=7)
			# find biggest block of pixels
			image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
			image1=img_as_ubyte(image1>50)
			cv2.imwrite("sidblock1.png", image1)
			im2, ctrs, hier = cv2.findContours(
			image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
			)
			sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour
			sorted_ctrs = sorted(
			ctrs, key=lambda ctr: cv2.contourArea(ctr)
			) # get bigges contour
			x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
			image=image[y:y+h,x+25:x+w-25]
			cv2.imwrite("sidblock2.png",image)
			image = image[y : y + h, x + 25 - move_left : x + w - 40] #+25,-25
			cv2.imwrite("sidblock2.png", image)
			imgHeight, imgWidth = image.shape[0:2]
			numWidth=int(imgWidth/(sid_len))
			for i in range(0,sid_len):
			num=image[:,inumWidth:(i+1)numWidth]
			numWidth = int(imgWidth / (sid_len))
			for i in range(0, sid_len):
			num = image[:, i * numWidth : (i + 1) * numWidth]
			num = img_as_ubyte(num < 128)
			num = cv2.resize(num, (32, 32))

			@@ -104,18 +120,66 @@
			sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
			return sid_no

			def segment_by_7segments(image,original_image,sid_mask,classifier):
			block_image = cv2.morphologyEx(original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10)
			block_image =img_as_ubyte(block_image<50)
			cv2.imwrite("sid_3rd1.png", block_image)
			template = cv2.imread("template-8.png", 0)
			w, h = template.shape[::-1]
			res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
			loc = np.where(res >= 0.75)
			cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
			loc_filtered_x=[]
			loc_filtered_y=[]
			for pt in zip(*loc[::-1]):
			pt=(pt[0]-10,pt[1]-10)
			loc_filtered_y.append(pt[1])
			loc_filtered_x.append(pt[0])
			# points.append(pt)
			#filter points
			if(len(loc_filtered_x)==0):
			return ""
			loc_filtered_x, loc_filtered_y = zip(
			*sorted(zip(loc_filtered_x, loc_filtered_y))
			)
			a = np.diff(loc_filtered_x) > int(w/2)
			a = np.append(a, True)
			loc_filtered_x = np.array(loc_filtered_x)
			loc_filtered_y = np.array(loc_filtered_y)
			points = [loc_filtered_y[a], loc_filtered_x[a]]
			for pt in zip(*points[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
			cv2.imwrite("sid_3rd2.png", cimg)

			sid_no=""
			for i,pt in enumerate(zip(*points[::-1])):
			num=image[pt[1]:pt[1] + h, pt[0]:pt[0]+w]
			#cv2.imwrite("sid_3no_{}.png".format(i), num)
			num = img_as_ubyte(num < 128)
			try:
			num = cv2.resize(num, (32, 32))
			except:
			return ""
			cv2.imwrite("sid_3no_{}.png".format(i), num)
			sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])

			return sid_no

			def getSID(image, classifier, sid_mask):
			sid_warn = []
			sid_err=[]
			image = 255 - image
			image_original=image.copy()
			image = img_as_ubyte(image > 100)
			cv2.imwrite("enSID0.png", image)
			# Remove noise
			image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1)
			image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
			# Closing. Connect non connected parts
			image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
			# Again noise removal after closing

			#image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
			# image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
			# don't do too much noise removal.
			image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)

			# Skeletonization
			@@ -129,21 +193,27 @@
			# Thining again
			image = img_as_ubyte(morphology.skeletonize(image > 0.5))
			image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
			cv2.imwrite("enhancedSID.png",image)
			cv2.imwrite("enhancedSID.png", image)
			im2, ctrs, hier = cv2.findContours(
			image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
			)
			sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])

			sid_no = ""
			#sid_len = len(sid_mask)
			#sid_no = segment_by_sid_len(image, sid_len, classifier)
			#if sid_mask is not None:
			print(len(sid_mask),len(sorted_ctrs))
			#if len(sid_mask)==len(sorted_ctrs):
			sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier)
			print(len(sid_mask), len(sorted_ctrs))
			sid_no = segment_by_contours(
			image, sorted_ctrs[1:], classifier
			) # we remove largest contour that surrounds whole image
			print(sid_no)
			if(len(sid_no)!=len(sid_mask)):
			print("Ooops have to find another way")
			sid_no=segment_by_sid_len(image,len(sid_mask),classifier)
			return sid_no
			if len(sid_no) != len(sid_mask) or not sid_compare(sid_no,sid_mask):
			sid_warn.append("Trying second SID algorithm.")
			sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
			print(sid_no)
			if(len(sid_no))!=len(sid_mask):
			sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
			sid_warn.append("Trying third SID algorithm.")


			if not sid_compare(sid_no, sid_mask):
			sid_err=['Wrong SID!']

			return (sid_no, sid_err, sid_warn)