| | |
| | | |
| | | |
| | | class Paper: |
| | | def __init__(self, filename=None, sid_classifier=None): |
| | | def __init__(self, filename=None, sid_classifier=None, settings=None): |
| | | self.filename = filename |
| | | self.invalid = None |
| | | self.QRData = None |
| | | self.settings = settings |
| | | self.errors = [] |
| | | self.warnings = [] |
| | | self.sid_classifier=sid_classifier |
| | | self.sid_classifier = sid_classifier |
| | | if filename is not None: |
| | | self.loadImage(filename) |
| | | self.runOcr() |
| | |
| | | def get_enhanced_sid(self): |
| | | if self.sid_classifier is None: |
| | | return "x" |
| | | if self.settings is not None: |
| | | sid_mask=self.settings.get("sid_mask", None) |
| | | es = getSID( |
| | | self.img[ |
| | | int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight), |
| | | int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth), |
| | | ], |
| | | self.sid_classifier, |
| | | sid_mask |
| | | ) |
| | | return es |
| | |
| | | from Ocr import Paper |
| | | from sklearn.externals import joblib |
| | | |
| | | |
| | | settings={'sid_mask':'11xx0xxx',} |
| | | classifier = joblib.load('filename.joblib') |
| | | |
| | | #p=Paper(filename='testpage300dpi_scan1.png') |
| | | p=Paper(filename='sizif111.tif', sid_classifier=classifier) |
| | | p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings=settings) |
| | | #p=Paper(filename='processed_scans/20141016095134535_0028.tif') |
| | | |
| | | print(p.QRData) |
| | |
| | | import cv2 |
| | | import numpy as np |
| | | from skimage import morphology,img_as_ubyte |
| | | from skimage import morphology, img_as_ubyte |
| | | from sklearn import svm |
| | | from sklearn.externals import joblib |
| | | |
| | | |
| | | |
| | | """ |
| | |
| | | return np.ones((x, y), np.uint8) |
| | | |
| | | |
| | | def getSID(image, classifier): |
| | | image=255-image |
| | | image=img_as_ubyte(image>100) |
| | | def segment_by_contours(image, sorted_ctrs, classifier): |
| | | sid_no = "" |
| | | for i, ctr in enumerate(sorted_ctrs): |
| | | # Get bounding box |
| | | x, y, w, h = cv2.boundingRect(ctr) |
| | | # Getting ROI |
| | | if w < h / 2: |
| | | sid_no = sid_no + "1" |
| | | continue |
| | | roi = image[y : y + h, x : x + w] |
| | | roi = img_as_ubyte(roi < 128) |
| | | roi = cv2.resize(roi, (32, 32)) |
| | | |
| | | # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) |
| | | cv2.imwrite("sid_no_{}.png".format(i), roi) |
| | | sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0]) |
| | | return sid_no |
| | | |
| | | |
| | | def getSID(image, classifier, sid_mask): |
| | | image = 255 - image |
| | | image = img_as_ubyte(image > 100) |
| | | cv2.imwrite("enSID0.png", image) |
| | | # Remove noise |
| | | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2,2), iterations=1) |
| | | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1) |
| | | # Closing. Connect non connected parts |
| | | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4) |
| | | # Again noise removal after closing |
| | | |
| | | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8,8), iterations=1) |
| | | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) |
| | | # Skeletonization |
| | | image = img_as_ubyte(morphology.thin(image>128)) |
| | | cv2.imwrite("enSID1.png",image) |
| | | image = img_as_ubyte(morphology.thin(image > 128)) |
| | | cv2.imwrite("enSID1.png", image) |
| | | # Stub removal (might not be necessary if thinning instead of skeletonize is used above |
| | | # Making lines stronger |
| | | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1) |
| | | |
| | | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10)) |
| | | # Thining again |
| | | image = img_as_ubyte(morphology.skeletonize(image>0.5)) |
| | | image = img_as_ubyte(morphology.skeletonize(image > 0.5)) |
| | | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) |
| | | |
| | | im2,ctrs, hier = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
| | | im2, ctrs, hier = cv2.findContours( |
| | | image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
| | | ) |
| | | sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) |
| | | |
| | | #classifier = joblib.load('filename.joblib') |
| | | |
| | | sid_no="" |
| | | for i, ctr in enumerate(sorted_ctrs): |
| | | # Get bounding box |
| | | x, y, w, h = cv2.boundingRect(ctr) |
| | | # Getting ROI |
| | | if(w<h/2): |
| | | sid_no=sid_no+"1" |
| | | continue |
| | | roi = image[y:y+h, x:x+w] |
| | | roi = img_as_ubyte(roi < 128) |
| | | roi = cv2.resize(roi,(32,32)) |
| | | |
| | | #cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) |
| | | cv2.imwrite('sid_no_{}.png'.format(i), roi) |
| | | sid_no=sid_no+str(classifier.predict(roi.reshape(1,-1)/255.0)[0]) |
| | | sid_no = "" |
| | | sid_len = 0 |
| | | if sid_mask is not None: |
| | | if len(sid_mask)==len(sorted_ctrs): |
| | | sid_no=segment_by_contours(image,sorted_ctrs,classifier) |
| | | else: |
| | | print("Ooops have to find another way") |
| | | print(sid_no) |
| | | return image |
| | | return sid_no |