Development of the ocr part of AOI
Samo Penic
2018-11-16 5cb7c1dba78b025ff333a202b27f04a2230c9da9
recognition is a bit more robust....
3 files modified
40 ■■■■■ changed files
Ocr.py 4 ●●● patch | view | raw | blame | history
aoiOcr.py 2 ●●● patch | view | raw | blame | history
sid_process.py 34 ●●●●● patch | view | raw | blame | history
Ocr.py
@@ -220,7 +220,7 @@
            return "x"
        if self.settings is not None:
            sid_mask=self.settings.get("sid_mask", None)
        es = getSID(
        es,err,warn = getSID(
            self.img[
                int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
                int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
@@ -228,6 +228,8 @@
            self.sid_classifier,
            sid_mask
        )
        [self.errors.append(e) for e in err]
        [self.warnings.append(w) for w in warn]
        return es
aoiOcr.py
@@ -2,7 +2,7 @@
from sklearn.externals import joblib
settings = {"sid_mask": "11xx0xxx", "answer_treshold": 0.25}
settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
classifier = joblib.load("filename.joblib")
#p = Paper(filename="testpage300dpi_scan1.png")
sid_process.py
@@ -79,8 +79,15 @@
    return sid_no
def segment_by_sid_len(image,sid_len, classifier):
def segment_by_sid_len(image, sid_mask, classifier):
    sid_no=""
    sid_len = len(sid_mask)
    if sid_mask[0] == "1":
        move_left = 45
    elif sid_mask[0] == "x":
        move_left = 55
    else:
        move_left = 0
    #find biggest block of pixels
    image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3)
@@ -88,9 +95,11 @@
    im2, ctrs, hier = cv2.findContours(
        image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour
    sorted_ctrs = sorted(
        ctrs, key=lambda ctr: cv2.contourArea(ctr)
    )  # get bigges contour
    x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
    image=image[y:y+h,x+25:x+w-25]
    image = image[y : y + h, x + 25 - move_left : x + w - 25]
    cv2.imwrite("sidblock2.png",image)
    imgHeight, imgWidth = image.shape[0:2]
    numWidth=int(imgWidth/(sid_len))
@@ -106,6 +115,7 @@
def getSID(image, classifier, sid_mask):
    sid_warn = []
    image = 255 - image
    image = img_as_ubyte(image > 100)
    cv2.imwrite("enSID0.png", image)
@@ -116,6 +126,7 @@
    # Again noise removal after closing
    #image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
    # don't do too much noise removal.
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
    # Skeletonization
@@ -136,14 +147,13 @@
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    sid_no = ""
    #sid_len = len(sid_mask)
    #sid_no = segment_by_sid_len(image, sid_len, classifier)
    #if sid_mask is not None:
    print(len(sid_mask),len(sorted_ctrs))
    #if len(sid_mask)==len(sorted_ctrs):
    sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier)
    sid_no = segment_by_contours(
        image, sorted_ctrs[1:], classifier
    )  # we remove largest contour that surrounds whole image
    print(sid_no)
    if(len(sid_no)!=len(sid_mask)):
        print("Ooops have to find another way")
        sid_no=segment_by_sid_len(image,len(sid_mask),classifier)
    return sid_no
    if len(sid_no) != len(sid_mask):
        #print("Ooops have to find another way")
        sid_warn.append("Trying second SID algorithm.")
        sid_no = segment_by_sid_len(image, sid_mask, classifier)
    return (sid_no, [], sid_warn)