Initial version of sid recognition. Cutting the numerals is not perfect yet.
| | |
| | | <set /> |
| | | </value> |
| | | </entry> |
| | | <entry key="/a.dummy"> |
| | | <value> |
| | | <set /> |
| | | </value> |
| | | </entry> |
| | | <entry key="$PROJECT_DIR$/Ocr.py"> |
| | | <value> |
| | | <set /> |
| | |
| | | from pyzbar.pyzbar import decode |
| | | from sid_process import enhanceSID |
| | | from sid_process import getSID |
| | | import cv2 |
| | | import numpy as np |
| | | import math |
| | | |
| | | |
| | | class Paper: |
| | | def __init__(self, filename=None): |
| | | def __init__(self, filename=None, sid_classifier=None): |
| | | self.filename = filename |
| | | self.invalid = None |
| | | self.QRData = None |
| | | self.errors = [] |
| | | self.warnings = [] |
| | | self.sid_classifier=sid_classifier |
| | | if filename is not None: |
| | | self.loadImage(filename) |
| | | self.runOcr() |
| | |
| | | loc_filtered_x, loc_filtered_y = zip( |
| | | *sorted(zip(loc_filtered_x, loc_filtered_y)) |
| | | ) |
| | | # loc=[loc_filtered_y,loc_filtered_x] |
| | | # remove duplicates |
| | | # loc=[loc_filtered_y,loc_filtered_x] |
| | | # remove duplicates |
| | | a = np.diff(loc_filtered_x) > 40 |
| | | a = np.append(a, True) |
| | | loc_filtered_x = np.array(loc_filtered_x) |
| | |
| | | self.answerMatrix.append(oneline) |
| | | |
| | | def get_enhanced_sid(self): |
| | | es= enhanceSID(self.img[int(0.04*self.imgHeight):int(0.08*self.imgHeight), int(0.7*self.imgWidth):int(0.99*self.imgWidth)]) |
| | | cv2.imwrite("enhancedSID.png",es) |
| | | if self.sid_classifier is None: |
| | | return "x" |
| | | es = getSID( |
| | | self.img[ |
| | | int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight), |
| | | int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth), |
| | | ], |
| | | self.sid_classifier, |
| | | ) |
| | | return es |
| | |
| | | from Ocr import Paper |
| | | from sklearn.externals import joblib |
| | | |
| | | classifier = joblib.load('filename.joblib') |
| | | |
| | | #p=Paper(filename='testpage300dpi_scan1.png') |
| | | p=Paper(filename='sizif111.tif') |
| | | p=Paper(filename='sizif111.tif', sid_classifier=classifier) |
| | | #p=Paper(filename='processed_scans/20141016095134535_0028.tif') |
| | | |
| | | print(p.QRData) |
| | | print(p.errors) |
| | | |
| | |
| | | import cv2 |
| | | import numpy as np |
| | | from skimage import morphology,img_as_ubyte |
| | | from sklearn import svm |
| | | from sklearn.externals import joblib |
| | | |
| | | |
| | | |
| | | """ |
| | | (1) The text is an array of chars (in row-major order) where |
| | |
| | | return np.ones((x, y), np.uint8) |
| | | |
| | | |
| | | def enhanceSID(image): |
| | | def getSID(image, classifier): |
| | | image=255-image |
| | | image=img_as_ubyte(image>100) |
| | | cv2.imwrite("enSID0.png", image) |
| | | # Remove noise |
| | | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2,2), iterations=1) |
| | | # Closing. Connect non connected parts |
| | | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 5), iterations=2) |
| | | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4) |
| | | # Again noise removal after closing |
| | | |
| | | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8,8), iterations=1) |
| | | # Skeletonization |
| | | ##For thinning I am using erosion |
| | | ##image = cv2.erode(image,kernel(4,4),iterations = 40) |
| | | image = img_as_ubyte(morphology.thin(image>128)) |
| | | cv2.imwrite("enSID1.png",image) |
| | | # Stub removal (might not be necessary if thinning instead of skeletonize is used above |
| | |
| | | # Thining again |
| | | image = img_as_ubyte(morphology.skeletonize(image>0.5)) |
| | | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) |
| | | |
| | | im2,ctrs, hier = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
| | | sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) |
| | | |
| | | #classifier = joblib.load('filename.joblib') |
| | | |
| | | sid_no="" |
| | | for i, ctr in enumerate(sorted_ctrs): |
| | | # Get bounding box |
| | | x, y, w, h = cv2.boundingRect(ctr) |
| | | # Getting ROI |
| | | if(w<h/2): |
| | | sid_no=sid_no+"1" |
| | | continue |
| | | roi = image[y:y+h, x:x+w] |
| | | roi = img_as_ubyte(roi < 128) |
| | | roi = cv2.resize(roi,(32,32)) |
| | | |
| | | #cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) |
| | | cv2.imwrite('sid_no_{}.png'.format(i), roi) |
| | | sid_no=sid_no+str(classifier.predict(roi.reshape(1,-1)/255.0)[0]) |
| | | print(sid_no) |
| | | return image |