havoc/sizif-ocr.git

			@@ -220,7 +220,7 @@
			return "x"
			if self.settings is not None:
			sid_mask=self.settings.get("sid_mask", None)
			es = getSID(
			es,err,warn = getSID(
			self.img[
			int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
			int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
			@@ -228,6 +228,8 @@
			self.sid_classifier,
			sid_mask
			)
			[self.errors.append(e) for e in err]
			[self.warnings.append(w) for w in warn]
			return es

			@@ -79,8 +79,15 @@
			return sid_no


			def segment_by_sid_len(image,sid_len, classifier):
			def segment_by_sid_len(image, sid_mask, classifier):
			sid_no=""
			sid_len = len(sid_mask)
			if sid_mask[0] == "1":
			move_left = 45
			elif sid_mask[0] == "x":
			move_left = 55
			else:
			move_left = 0
			#find biggest block of pixels

			image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3)
			@@ -88,9 +95,11 @@
			im2, ctrs, hier = cv2.findContours(
			image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
			)
			sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour
			sorted_ctrs = sorted(
			ctrs, key=lambda ctr: cv2.contourArea(ctr)
			) # get bigges contour
			x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
			image=image[y:y+h,x+25:x+w-25]
			image = image[y : y + h, x + 25 - move_left : x + w - 25]
			cv2.imwrite("sidblock2.png",image)
			imgHeight, imgWidth = image.shape[0:2]
			numWidth=int(imgWidth/(sid_len))
			@@ -106,6 +115,7 @@


			def getSID(image, classifier, sid_mask):
			sid_warn = []
			image = 255 - image
			image = img_as_ubyte(image > 100)
			cv2.imwrite("enSID0.png", image)
			@@ -116,6 +126,7 @@
			# Again noise removal after closing

			#image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
			# don't do too much noise removal.
			image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)

			# Skeletonization
			@@ -136,14 +147,13 @@
			sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])

			sid_no = ""
			#sid_len = len(sid_mask)
			#sid_no = segment_by_sid_len(image, sid_len, classifier)
			#if sid_mask is not None:
			print(len(sid_mask),len(sorted_ctrs))
			#if len(sid_mask)==len(sorted_ctrs):
			sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier)
			sid_no = segment_by_contours(
			image, sorted_ctrs[1:], classifier
			) # we remove largest contour that surrounds whole image
			print(sid_no)
			if(len(sid_no)!=len(sid_mask)):
			print("Ooops have to find another way")
			sid_no=segment_by_sid_len(image,len(sid_mask),classifier)
			return sid_no
			if len(sid_no) != len(sid_mask):
			#print("Ooops have to find another way")
			sid_warn.append("Trying second SID algorithm.")
			sid_no = segment_by_sid_len(image, sid_mask, classifier)
			return (sid_no, [], sid_warn)

	Ocr.py	4 ●●●●● patch \| view \| raw \| blame \| history
	aoiOcr.py	2 ●●●●● patch \| view \| raw \| blame \| history
	sid_process.py	34 ●●●●● patch \| view \| raw \| blame \| history

			@@ -2,7 +2,7 @@
			from sklearn.externals import joblib


			settings = {"sid_mask": "11xx0xxx", "answer_treshold": 0.25}
			settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
			classifier = joblib.load("filename.joblib")

			#p = Paper(filename="testpage300dpi_scan1.png")