Development of the ocr part of AOI
Samo Penic
2018-11-16 5cb7c1dba78b025ff333a202b27f04a2230c9da9
recognition is a bit more robust....
3 files modified
62 ■■■■■ changed files
Ocr.py 4 ●●● patch | view | raw | blame | history
aoiOcr.py 2 ●●● patch | view | raw | blame | history
sid_process.py 56 ●●●●● patch | view | raw | blame | history
Ocr.py
@@ -220,7 +220,7 @@
            return "x"
        if self.settings is not None:
            sid_mask=self.settings.get("sid_mask", None)
        es = getSID(
        es,err,warn = getSID(
            self.img[
                int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
                int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
@@ -228,6 +228,8 @@
            self.sid_classifier,
            sid_mask
        )
        [self.errors.append(e) for e in err]
        [self.warnings.append(w) for w in warn]
        return es
aoiOcr.py
@@ -2,7 +2,7 @@
from sklearn.externals import joblib
settings = {"sid_mask": "11xx0xxx", "answer_treshold": 0.25}
settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
classifier = joblib.load("filename.joblib")
#p = Paper(filename="testpage300dpi_scan1.png")
sid_process.py
@@ -79,23 +79,32 @@
    return sid_no
def segment_by_sid_len(image,sid_len, classifier):
    sid_no=""
    #find biggest block of pixels
def segment_by_sid_len(image, sid_mask, classifier):
    sid_no = ""
    sid_len = len(sid_mask)
    if sid_mask[0] == "1":
        move_left = 45
    elif sid_mask[0] == "x":
        move_left = 55
    else:
        move_left = 0
    # find biggest block of pixels
    image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3)
    cv2.imwrite("sidblock1.png",image1)
    image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=3)
    cv2.imwrite("sidblock1.png", image1)
    im2, ctrs, hier = cv2.findContours(
        image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour
    sorted_ctrs = sorted(
        ctrs, key=lambda ctr: cv2.contourArea(ctr)
    )  # get bigges contour
    x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
    image=image[y:y+h,x+25:x+w-25]
    cv2.imwrite("sidblock2.png",image)
    image = image[y : y + h, x + 25 - move_left : x + w - 25]
    cv2.imwrite("sidblock2.png", image)
    imgHeight, imgWidth = image.shape[0:2]
    numWidth=int(imgWidth/(sid_len))
    for i in range(0,sid_len):
        num=image[:,i*numWidth:(i+1)*numWidth]
    numWidth = int(imgWidth / (sid_len))
    for i in range(0, sid_len):
        num = image[:, i * numWidth : (i + 1) * numWidth]
        num = img_as_ubyte(num < 128)
        num = cv2.resize(num, (32, 32))
@@ -106,6 +115,7 @@
def getSID(image, classifier, sid_mask):
    sid_warn = []
    image = 255 - image
    image = img_as_ubyte(image > 100)
    cv2.imwrite("enSID0.png", image)
@@ -115,7 +125,8 @@
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
    # Again noise removal after closing
    #image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
    # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
    # don't do too much noise removal.
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
    # Skeletonization
@@ -129,21 +140,20 @@
    # Thining again
    image = img_as_ubyte(morphology.skeletonize(image > 0.5))
    image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
    cv2.imwrite("enhancedSID.png",image)
    cv2.imwrite("enhancedSID.png", image)
    im2, ctrs, hier = cv2.findContours(
        image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    sid_no = ""
    #sid_len = len(sid_mask)
    #sid_no = segment_by_sid_len(image, sid_len, classifier)
    #if sid_mask is not None:
    print(len(sid_mask),len(sorted_ctrs))
    #if len(sid_mask)==len(sorted_ctrs):
    sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier)
    print(len(sid_mask), len(sorted_ctrs))
    sid_no = segment_by_contours(
        image, sorted_ctrs[1:], classifier
    )  # we remove largest contour that surrounds whole image
    print(sid_no)
    if(len(sid_no)!=len(sid_mask)):
        print("Ooops have to find another way")
        sid_no=segment_by_sid_len(image,len(sid_mask),classifier)
    return sid_no
    if len(sid_no) != len(sid_mask):
        #print("Ooops have to find another way")
        sid_warn.append("Trying second SID algorithm.")
        sid_no = segment_by_sid_len(image, sid_mask, classifier)
    return (sid_no, [], sid_warn)