havoc/sizif-ocr.git

parent: fe2c1025 | patch | commit | ignore whitespace

Samo Penic

2018-11-17 0d97e9b2d738682ed0aa6349b43a9719e0ca0aa9

Created package and setup.py.

2 files modified

5 files renamed

3 files added

	.idea/sonarIssues.xml	80 ●●●●● patch \| view \| raw \| blame \| history
	aoiOcr.py	7 ●●●●● patch \| view \| raw \| blame \| history
	aoi_ocr/Ocr.py	23 ●●●●● patch \| view \| raw \| blame \| history
	aoi_ocr/__init__.py	patch \| view \| raw \| blame \| history
	aoi_ocr/filename.joblib	patch \| view \| raw \| blame \| history
	aoi_ocr/sid_process.py	27 ●●●●● patch \| view \| raw \| blame \| history
	aoi_ocr/template-8.png	patch \| view \| raw \| blame \| history
	aoi_ocr/template.png	patch \| view \| raw \| blame \| history
	scan2db.py	8 ●●●●● patch \| view \| raw \| blame \| history
	setup.py	12 ●●●●● patch \| view \| raw \| blame \| history

 .idea/sonarIssues.xml

@@ -18,6 +18,61 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/aoi/settings.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/aoi/urls.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/views.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/organization/models.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/organization/urls.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/requirements.txt">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets-sl.js">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets.js">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/templates/studentpages/index.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/urls.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/views.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/Ocr.py">
          <value>
            <set />
@@ -33,11 +88,36 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/scan2db.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/setup.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sid_process.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sizif-ocr/Ocr.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sizif_ocr/Ocr.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/sizif_ocr/sid_process.py">
          <value>
            <set />
          </value>
        </entry>
      </map>
    </option>
  </component>

 aoiOcr.py

@@ -1,10 +1,13 @@
from Ocr import Paper
from aoi_ocr.Ocr import Paper
from sklearn.externals import joblib
import pkg_resources
path = '/filename.joblib'  # always use slash
filepath = pkg_resources.resource_filename('aoi_ocr', path)

from glob import glob

settings = {"sid_mask": "64xx0xxx", "answer_threshold": 0.25}
classifier = joblib.load("filename.joblib")
classifier = joblib.load(filepath)

#p = Paper(filename="testpage300dpi_scan1.png")
#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25})

 aoi_ocr/Ocr.py

File was renamed from Ocr.py
@@ -1,8 +1,13 @@
from pyzbar.pyzbar import decode
from sid_process import getSID
from .sid_process import getSID
import cv2
import numpy as np
import math

import pkg_resources

markerfile = '/template.png'  # always use slash
markerfilename = pkg_resources.resource_filename(__name__, markerfile)



class Paper:
@@ -27,7 +32,7 @@
            return
        self.imgHeight, self.imgWidth = self.img.shape[0:2]

    def saveImage(self, filename="debug_image.png"):
    def saveImage(self, filename="/tmp/debug_image.png"):
        cv2.imwrite(filename, self.img)

    def runOcr(self):
@@ -98,7 +103,7 @@

    def getSkewAngle(self):
        neg = 255 - self.bwimg  # get negative image
        cv2.imwrite("debug_1.png", neg)
        cv2.imwrite("/tmp/debug_1.png", neg)

        angle_counter = 0  # number of angles
        angle = 0.0  # collects sum of angles
@@ -123,11 +128,11 @@
        except:
            skew = 0

        cv2.imwrite("debug_2.png", cimg)
        cv2.imwrite("/tmp/debug_2.png", cimg)
        return skew

    def locateUpMarkers(self, threshold=0.85, height=200):
        template = cv2.imread("template.png", 0)
        template = cv2.imread(markerfilename, 0)
        w, h = template.shape[::-1]
        crop_img = self.img[0:height, :]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
@@ -158,13 +163,13 @@
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)

        cv2.imwrite("debug_3.png", cimg)
        cv2.imwrite("/tmp/debug_3.png", cimg)

        self.xMarkerLocations = loc
        return loc

    def locateRightMarkers(self, threshold=0.85, width=200):
        template = cv2.imread("template.png", 0)
        template = cv2.imread(markerfilename, 0)
        w, h = template.shape[::-1]
        crop_img = self.img[:, -width:]
        res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
@@ -195,7 +200,7 @@
            for pt in zip(*loc[::-1]):
                cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)

        cv2.imwrite("debug_4.png", cimg)
        cv2.imwrite("/tmp/debug_4.png", cimg)

        self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
        return self.yMarkerLocations

 aoi_ocr/__init__.py


 aoi_ocr/filename.joblib

Binary files differ

 aoi_ocr/sid_process.py

File was renamed from sid_process.py
@@ -2,6 +2,11 @@
import numpy as np
from skimage import morphology, img_as_ubyte

import pkg_resources

templatefile = '/template-8.png'  # always use slash
template8 = pkg_resources.resource_filename(__name__, templatefile)


def kernel(x, y):
    """
@@ -24,7 +29,7 @@
    # find biggest block of pixels
    image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
    image1 = img_as_ubyte(image1 > 50)
    cv2.imwrite("sidblock1.png", image1)
    cv2.imwrite("/tmp/sidblock1.png", image1)
    im2, ctrs, hier = cv2.findContours(
        image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
@@ -77,7 +82,7 @@
        roi = cv2.resize(roi, (32, 32))

        # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
        cv2.imwrite("sid_no_{}.png".format(i), roi)
        cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi)
        sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
    return sid_no

@@ -96,14 +101,14 @@
    sid_no = ""
    sid_len = len(sid_mask)
    image=find_biggest_blob(image,original_image,sid_mask)
    cv2.imwrite("sidblock2.png", image)
    cv2.imwrite("/tmp/sidblock2.png", image)
    imgHeight, imgWidth = image.shape[0:2]
    numWidth = int(imgWidth / (sid_len))
    for i in range(0, sid_len):
        num = image[:, i * numWidth : (i + 1) * numWidth]
        num = img_as_ubyte(num < 128)
        num = cv2.resize(num, (32, 32))
        cv2.imwrite("sid_no_{}.png".format(i), num)
        cv2.imwrite("/tmp/sid_no_{}.png".format(i), num)
        sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
    return sid_no

@@ -123,8 +128,8 @@
        original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10
    )
    block_image = img_as_ubyte(block_image < 50)
    cv2.imwrite("sid_3rd1.png", block_image)
    template = cv2.imread("template-8.png", 0)
    cv2.imwrite("/tmp/sid_3rd1.png", block_image)
    template = cv2.imread(template8, 0)
    w, h = template.shape[::-1]
    res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
    loc = np.where(res >= 0.75)
@@ -147,7 +152,7 @@
    points = [loc_filtered_y[a], loc_filtered_x[a]]
    for pt in zip(*points[::-1]):
        cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
    cv2.imwrite("sid_3rd2.png", cimg)
    cv2.imwrite("/tmp/sid_3rd2.png", cimg)

    sid_no = ""
    for i, pt in enumerate(zip(*points[::-1])):
@@ -158,7 +163,7 @@
            num = cv2.resize(num, (32, 32))
        except:
            return ""
        cv2.imwrite("sid_3no_{}.png".format(i), num)
        cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num)
        sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])

    return sid_no
@@ -182,7 +187,7 @@
    image = 255 - image
    image_original = image.copy()
    image = img_as_ubyte(image > 100)
    cv2.imwrite("enSID0.png", image)
    cv2.imwrite("/tmp/enSID0.png", image)

    # Remove noise
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
@@ -197,7 +202,7 @@

    # Skeletonization
    image = img_as_ubyte(morphology.thin(image > 128))
    cv2.imwrite("enSID1.png", image)
    cv2.imwrite("/tmp/enSID1.png", image)

    # Stub removal (might not be necessary if thinning instead of skeletonize is used above
    # Making lines stronger
@@ -207,7 +212,7 @@
    # Thining again
    image = img_as_ubyte(morphology.skeletonize(image > 0.5))
    image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
    cv2.imwrite("enhancedSID.png", image)
    cv2.imwrite("/tmp/enhancedSID.png", image)

    sid_no = segment_by_contours(image, image_original, classifier, sid_mask)


 aoi_ocr/template-8.png



 aoi_ocr/template.png



 scan2db.py

New file
@@ -0,0 +1,8 @@
import os
import django
os.chdir('/home/samo/programiranje/django/sizif-web/aoi')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "aoi.settings")
django.setup()
from exam import models



 setup.py

New file
@@ -0,0 +1,12 @@
#!/usr/bin/python3
from distutils.core import setup

setup(
    name="aoi_ocr",
    version="0.1a",
    description="OCR of aoi papers.",
    author="Samo Penic",
    author_email="samo.penic@gmail.com",
    url="",
    packages=["aoi_ocr"],
)

			@@ -18,6 +18,61 @@
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/aoi/settings.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/aoi/urls.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/views.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/organization/models.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/organization/urls.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/requirements.txt">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets-sl.js">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets.js">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/templates/studentpages/index.html">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/urls.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/views.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/Ocr.py">
			<value>
			<set />
			@@ -33,11 +88,36 @@
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/scan2db.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/setup.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/sid_process.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/sizif-ocr/Ocr.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/sizif_ocr/Ocr.py">
			<value>
			<set />
			</value>
			</entry>
			<entry key="$PROJECT_DIR$/sizif_ocr/sid_process.py">
			<value>
			<set />
			</value>
			</entry>
			</map>
			</option>
			</component>

			@@ -1,10 +1,13 @@
			from Ocr import Paper
			from aoi_ocr.Ocr import Paper
			from sklearn.externals import joblib
			import pkg_resources
			path = '/filename.joblib' # always use slash
			filepath = pkg_resources.resource_filename('aoi_ocr', path)

			from glob import glob

			settings = {"sid_mask": "64xx0xxx", "answer_threshold": 0.25}
			classifier = joblib.load("filename.joblib")
			classifier = joblib.load(filepath)

			#p = Paper(filename="testpage300dpi_scan1.png")
			#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25})

File was renamed from Ocr.py
			@@ -1,8 +1,13 @@
			from pyzbar.pyzbar import decode
			from sid_process import getSID
			from .sid_process import getSID
			import cv2
			import numpy as np
			import math

			import pkg_resources

			markerfile = '/template.png' # always use slash
			markerfilename = pkg_resources.resource_filename(__name__, markerfile)



			class Paper:
			@@ -27,7 +32,7 @@
			return
			self.imgHeight, self.imgWidth = self.img.shape[0:2]

			def saveImage(self, filename="debug_image.png"):
			def saveImage(self, filename="/tmp/debug_image.png"):
			cv2.imwrite(filename, self.img)

			def runOcr(self):
			@@ -98,7 +103,7 @@

			def getSkewAngle(self):
			neg = 255 - self.bwimg # get negative image
			cv2.imwrite("debug_1.png", neg)
			cv2.imwrite("/tmp/debug_1.png", neg)

			angle_counter = 0 # number of angles
			angle = 0.0 # collects sum of angles
			@@ -123,11 +128,11 @@
			except:
			skew = 0

			cv2.imwrite("debug_2.png", cimg)
			cv2.imwrite("/tmp/debug_2.png", cimg)
			return skew

			def locateUpMarkers(self, threshold=0.85, height=200):
			template = cv2.imread("template.png", 0)
			template = cv2.imread(markerfilename, 0)
			w, h = template.shape[::-1]
			crop_img = self.img[0:height, :]
			res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
			@@ -158,13 +163,13 @@
			for pt in zip(*loc[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)

			cv2.imwrite("debug_3.png", cimg)
			cv2.imwrite("/tmp/debug_3.png", cimg)

			self.xMarkerLocations = loc
			return loc

			def locateRightMarkers(self, threshold=0.85, width=200):
			template = cv2.imread("template.png", 0)
			template = cv2.imread(markerfilename, 0)
			w, h = template.shape[::-1]
			crop_img = self.img[:, -width:]
			res = cv2.matchTemplate(crop_img, template, cv2.TM_CCOEFF_NORMED)
			@@ -195,7 +200,7 @@
			for pt in zip(*loc[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)

			cv2.imwrite("debug_4.png", cimg)
			cv2.imwrite("/tmp/debug_4.png", cimg)

			self.yMarkerLocations = [loc[0], loc[1] + self.imgWidth - width]
			return self.yMarkerLocations

File was renamed from sid_process.py
			@@ -2,6 +2,11 @@
			import numpy as np
			from skimage import morphology, img_as_ubyte

			import pkg_resources

			templatefile = '/template-8.png' # always use slash
			template8 = pkg_resources.resource_filename(__name__, templatefile)


			def kernel(x, y):
			"""
			@@ -24,7 +29,7 @@
			# find biggest block of pixels
			image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
			image1 = img_as_ubyte(image1 > 50)
			cv2.imwrite("sidblock1.png", image1)
			cv2.imwrite("/tmp/sidblock1.png", image1)
			im2, ctrs, hier = cv2.findContours(
			image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
			)
			@@ -77,7 +82,7 @@
			roi = cv2.resize(roi, (32, 32))

			# cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
			cv2.imwrite("sid_no_{}.png".format(i), roi)
			cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi)
			sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0])
			return sid_no

			@@ -96,14 +101,14 @@
			sid_no = ""
			sid_len = len(sid_mask)
			image=find_biggest_blob(image,original_image,sid_mask)
			cv2.imwrite("sidblock2.png", image)
			cv2.imwrite("/tmp/sidblock2.png", image)
			imgHeight, imgWidth = image.shape[0:2]
			numWidth = int(imgWidth / (sid_len))
			for i in range(0, sid_len):
			num = image[:, i * numWidth : (i + 1) * numWidth]
			num = img_as_ubyte(num < 128)
			num = cv2.resize(num, (32, 32))
			cv2.imwrite("sid_no_{}.png".format(i), num)
			cv2.imwrite("/tmp/sid_no_{}.png".format(i), num)
			sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
			return sid_no

			@@ -123,8 +128,8 @@
			original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10
			)
			block_image = img_as_ubyte(block_image < 50)
			cv2.imwrite("sid_3rd1.png", block_image)
			template = cv2.imread("template-8.png", 0)
			cv2.imwrite("/tmp/sid_3rd1.png", block_image)
			template = cv2.imread(template8, 0)
			w, h = template.shape[::-1]
			res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
			loc = np.where(res >= 0.75)
			@@ -147,7 +152,7 @@
			points = [loc_filtered_y[a], loc_filtered_x[a]]
			for pt in zip(*points[::-1]):
			cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
			cv2.imwrite("sid_3rd2.png", cimg)
			cv2.imwrite("/tmp/sid_3rd2.png", cimg)

			sid_no = ""
			for i, pt in enumerate(zip(*points[::-1])):
			@@ -158,7 +163,7 @@
			num = cv2.resize(num, (32, 32))
			except:
			return ""
			cv2.imwrite("sid_3no_{}.png".format(i), num)
			cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num)
			sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])

			return sid_no
			@@ -182,7 +187,7 @@
			image = 255 - image
			image_original = image.copy()
			image = img_as_ubyte(image > 100)
			cv2.imwrite("enSID0.png", image)
			cv2.imwrite("/tmp/enSID0.png", image)

			# Remove noise
			image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
			@@ -197,7 +202,7 @@

			# Skeletonization
			image = img_as_ubyte(morphology.thin(image > 128))
			cv2.imwrite("enSID1.png", image)
			cv2.imwrite("/tmp/enSID1.png", image)

			# Stub removal (might not be necessary if thinning instead of skeletonize is used above
			# Making lines stronger
			@@ -207,7 +212,7 @@
			# Thining again
			image = img_as_ubyte(morphology.skeletonize(image > 0.5))
			image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
			cv2.imwrite("enhancedSID.png", image)
			cv2.imwrite("/tmp/enhancedSID.png", image)

			sid_no = segment_by_contours(image, image_original, classifier, sid_mask)

New file
			@@ -0,0 +1,8 @@
			import os
			import django
			os.chdir('/home/samo/programiranje/django/sizif-web/aoi')
			os.environ.setdefault("DJANGO_SETTINGS_MODULE", "aoi.settings")
			django.setup()
			from exam import models

New file
			@@ -0,0 +1,12 @@
			#!/usr/bin/python3
			from distutils.core import setup

			setup(
			name="aoi_ocr",
			version="0.1a",
			description="OCR of aoi papers.",
			author="Samo Penic",
			author_email="samo.penic@gmail.com",
			url="",
			packages=["aoi_ocr"],
			)