Development of the ocr part of AOI
Samo Penic
2018-11-17 0d97e9b2d738682ed0aa6349b43a9719e0ca0aa9
Created package and setup.py.
5 files renamed
3 files added
2 files modified
157 ■■■■ changed files
.idea/sonarIssues.xml 80 ●●●●● patch | view | raw | blame | history
aoiOcr.py 7 ●●●● patch | view | raw | blame | history
aoi_ocr/Ocr.py 23 ●●●●● patch | view | raw | blame | history
aoi_ocr/__init__.py patch | view | raw | blame | history
aoi_ocr/filename.joblib patch | view | raw | blame | history
aoi_ocr/sid_process.py 27 ●●●●● patch | view | raw | blame | history
aoi_ocr/template-8.png patch | view | raw | blame | history
aoi_ocr/template.png patch | view | raw | blame | history
scan2db.py 8 ●●●●● patch | view | raw | blame | history
setup.py 12 ●●●●● patch | view | raw | blame | history
.idea/sonarIssues.xml
@@ -18,6 +18,61 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/aoi/settings.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/aoi/urls.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/views.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/organization/models.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/organization/urls.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/requirements.txt">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets-sl.js">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets.js">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/templates/studentpages/index.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/urls.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/views.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/Ocr.py">
          <value>
            <set />
@@ -33,11 +88,36 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/scan2db.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/setup.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sid_process.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sizif-ocr/Ocr.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sizif_ocr/Ocr.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sizif_ocr/sid_process.py">
          <value>
            <set />
          </value>
        </entry>
      </map>
    </option>
  </component>
aoiOcr.py
@@ -1,10 +1,13 @@
from Ocr import Paper
from aoi_ocr.Ocr import Paper
from sklearn.externals import joblib
import pkg_resources
path = '/filename.joblib'  # always use slash
filepath = pkg_resources.resource_filename('aoi_ocr', path)
from glob import glob
settings = {"sid_mask": "64xx0xxx", "answer_threshold": 0.25}
classifier = joblib.load("filename.joblib")
classifier = joblib.load(filepath)
#p = Paper(filename="testpage300dpi_scan1.png")
#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25})
aoi_ocr/Ocr.py
File was renamed from Ocr.py
@@ -1,8 +1,13 @@
from pyzbar.pyzbar import decode
from sid_process import getSID
from .sid_process import getSID
import cv2
import numpy as np
import math
import pkg_resources
markerfile = '/template.png'  # always use slash
markerfilename = pkg_resources.resource_filename(__name__, markerfile)
class Paper:
@@ -27,7 +32,7 @@
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]
    def saveImage(self, filename="debug_image.png"):
    def saveImage(self, filename="/tmp/debug_image.png"):
        cv2.imwrite(filename, self.img)
    def runOcr(self):
@@ -98,7 +103,7 @@
    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite("debug_1.png", neg)
        cv2.imwrite("/tmp/debug_1.png", neg)
        angle_counter = 0  # number of angles
        angle = 0.0  # collects sum of angles
@@ -123,11 +128,11 @@
        except:
            skew = 0
        cv2.imwrite("debug_2.png", cimg)
        cv2.imwrite("/tmp/debug_2.png", cimg)
        return skew
    def locateUpMarkers(self, threshold=0.85, height=200):
        template = cv2.imread("template.png", 0)
        template = cv2.imread(markerfilename, 0)
        w, h = template.shape[::-1]
        crop_img = self.img[0:height, :]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
@@ -158,13 +163,13 @@
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
        cv2.imwrite("debug_3.png", cimg)
        cv2.imwrite("/tmp/debug_3.png", cimg)
        self.xMarkerLocations = loc
        return loc
    def locateRightMarkers(self, threshold=0.85, width=200):
        template = cv2.imread("template.png", 0)
        template = cv2.imread(markerfilename, 0)
        w, h = template.shape[::-1]
        crop_img = self.img[:, -width:]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
@@ -195,7 +200,7 @@
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
        cv2.imwrite("debug_4.png", cimg)
        cv2.imwrite("/tmp/debug_4.png", cimg)
        self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
        return self.yMarkerLocations
aoi_ocr/__init__.py
aoi_ocr/filename.joblib
Binary files differ
aoi_ocr/sid_process.py
File was renamed from sid_process.py
@@ -2,6 +2,11 @@
import numpy as np
from skimage import morphology, img_as_ubyte
import pkg_resources
templatefile = '/template-8.png'  # always use slash
template8 = pkg_resources.resource_filename(__name__, templatefile)
def kernel(x, y):
    """
@@ -24,7 +29,7 @@
    # find biggest block of pixels
    image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
    image1 = img_as_ubyte(image1 > 50)
    cv2.imwrite("sidblock1.png", image1)
    cv2.imwrite("/tmp/sidblock1.png", image1)
    im2, ctrs, hier = cv2.findContours(
        image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
@@ -77,7 +82,7 @@
        roi = cv2.resize(roi, (32, 32))
        # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
        cv2.imwrite("sid_no_{}.png".format(i), roi)
        cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi)
        sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
    return sid_no
@@ -96,14 +101,14 @@
    sid_no = ""
    sid_len = len(sid_mask)
    image=find_biggest_blob(image,original_image,sid_mask)
    cv2.imwrite("sidblock2.png", image)
    cv2.imwrite("/tmp/sidblock2.png", image)
    imgHeight, imgWidth = image.shape[0:2]
    numWidth = int(imgWidth / (sid_len))
    for i in range(0, sid_len):
        num = image[:, i * numWidth : (i + 1) * numWidth]
        num = img_as_ubyte(num < 128)
        num = cv2.resize(num, (32, 32))
        cv2.imwrite("sid_no_{}.png".format(i), num)
        cv2.imwrite("/tmp/sid_no_{}.png".format(i), num)
        sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
    return sid_no
@@ -123,8 +128,8 @@
        original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10
    )
    block_image = img_as_ubyte(block_image < 50)
    cv2.imwrite("sid_3rd1.png", block_image)
    template = cv2.imread("template-8.png", 0)
    cv2.imwrite("/tmp/sid_3rd1.png", block_image)
    template = cv2.imread(template8, 0)
    w, h = template.shape[::-1]
    res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
    loc = np.where(res >= 0.75)
@@ -147,7 +152,7 @@
    points = [loc_filtered_y[a], loc_filtered_x[a]]
    for pt in zip(*points[::-1]):
        cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
    cv2.imwrite("sid_3rd2.png", cimg)
    cv2.imwrite("/tmp/sid_3rd2.png", cimg)
    sid_no = ""
    for i, pt in enumerate(zip(*points[::-1])):
@@ -158,7 +163,7 @@
            num = cv2.resize(num, (32, 32))
        except:
            return ""
        cv2.imwrite("sid_3no_{}.png".format(i), num)
        cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num)
        sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
    return sid_no
@@ -182,7 +187,7 @@
    image = 255 - image
    image_original = image.copy()
    image = img_as_ubyte(image > 100)
    cv2.imwrite("enSID0.png", image)
    cv2.imwrite("/tmp/enSID0.png", image)
    # Remove noise
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
@@ -197,7 +202,7 @@
    # Skeletonization
    image = img_as_ubyte(morphology.thin(image > 128))
    cv2.imwrite("enSID1.png", image)
    cv2.imwrite("/tmp/enSID1.png", image)
    # Stub removal (might not be necessary if thinning instead of skeletonize is used above
    # Making lines stronger
@@ -207,7 +212,7 @@
    # Thining again
    image = img_as_ubyte(morphology.skeletonize(image > 0.5))
    image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
    cv2.imwrite("enhancedSID.png", image)
    cv2.imwrite("/tmp/enhancedSID.png", image)
    sid_no = segment_by_contours(image, image_original, classifier, sid_mask)
aoi_ocr/template-8.png

aoi_ocr/template.png

scan2db.py
New file
@@ -0,0 +1,8 @@
import os
import django
os.chdir('/home/samo/programiranje/django/sizif-web/aoi')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "aoi.settings")
django.setup()
from exam import models
setup.py
New file
@@ -0,0 +1,12 @@
#!/usr/bin/python3
from distutils.core import setup
setup(
    name="aoi_ocr",
    version="0.1a",
    description="OCR of aoi papers.",
    author="Samo Penic",
    author_email="samo.penic@gmail.com",
    url="",
    packages=["aoi_ocr"],
)