Development of the ocr part of AOI
1b70cab35ec6399d047bb717257c1d2bc579d592..bcebb8f17360fab0f4d22178bef2d33851fef312
2018-11-30 Samo Penic
Moved the answer ROIs few pixels. Recommended threshold around 0.3
bcebb8 diff | tree
2018-11-28 Samo Penic
Tring to make better predictions.
7621b3 diff | tree
4 files modified
69 ■■■■ changed files
.idea/sonarIssues.xml 50 ●●●●● patch | view | raw | blame | history
aoiOcr.py 2 ●●● patch | view | raw | blame | history
aoi_ocr/Ocr.py 11 ●●●●● patch | view | raw | blame | history
aoi_ocr/sid_process.py 6 ●●●● patch | view | raw | blame | history
.idea/sonarIssues.xml
@@ -18,12 +18,32 @@
            <set />
          </value>
        </entry>
        <entry key="$USER_HOME$/.local/share/virtualenvs/aoi-4cpS9I5x/lib/python3.6/site-packages/aoi_ocr/Ocr.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$USER_HOME$/PycharmProjects/berki-parse/aoi_gen/BerkiParse.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$USER_HOME$/PycharmProjects/berki-parse/aoi_gen/Generators.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$USER_HOME$/PycharmProjects/berki-parse/aoi_gen/Problem.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$USER_HOME$/PycharmProjects/berki-parse/aoi_gen/Variable.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$USER_HOME$/PycharmProjects/berki-parse/aoi_gen/tests/formatter_test.py">
          <value>
            <set />
          </value>
@@ -88,7 +108,22 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/templates/exam/index.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/templates/exam/postprocess.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/templates/exam/statistics.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/exam/urls.py">
          <value>
            <set />
          </value>
@@ -153,6 +188,11 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/forms.py">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/static/studentpages/ux/basic-widgets-sl.js">
          <value>
            <set />
@@ -163,6 +203,16 @@
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/templates/studentpages/complete_data.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/templates/studentpages/complete_thanks.html">
          <value>
            <set />
          </value>
        </entry>
        <entry key="$PROJECT_DIR$/../../django/sizif-web/aoi/studentpages/templates/studentpages/index.html">
          <value>
            <set />
aoiOcr.py
@@ -28,7 +28,7 @@
    "processed_scans/20160510075445995_0026.tif",
]
# p=Paper(filename=pa[9], sid_classifier=classifier, settings=settings)
p = Paper(filename="sizif000.tif", sid_classifier=classifier, settings=settings)
p = Paper(filename="sizif-test000.tif", sid_classifier=classifier, settings=settings)
# print(p.QRData)
# print(p.errors)
aoi_ocr/Ocr.py
@@ -216,12 +216,12 @@
        self.locateUpMarkers()
        self.locateRightMarkers()
        roixoff = 10
        roiyoff = 5
        roiwidth = 50
        roixoff = 4
        roiyoff = 0
        roiwidth = 55
        roiheight = roiwidth
        totpx = roiwidth * roiheight
        cimg = cv2.cvtColor(self.img, cv2.COLOR_GRAY2BGR)
        self.answerMatrix = []
        for y in self.yMarkerLocations[0]:
            oneline = []
@@ -233,6 +233,8 @@
                # cv2.imwrite('ans_x'+str(x)+'_y_'+str(y)+'.png',roi)
                black = totpx - cv2.countNonZero(roi)
                oneline.append(black / totpx)
                cv2.rectangle(cimg, (x - roixoff,y - roiyoff), (x + int(roiwidth - roixoff),y + int(roiheight - roiyoff)), (0, 255, 255), 2)
            cv2.imwrite('/tmp/debug_answers.png',cimg)
            self.answerMatrix.append(oneline)
    def get_enhanced_sid(self):
@@ -307,4 +309,5 @@
        output_filename=os.path.join(self.output_path, '.'.join(self.filename.split('/')[-1].split('.')[:-1])+".png")
        cv2.imwrite(output_filename, self.img)
        data['output_filename']=output_filename
        print(np.array(self.answerMatrix))
        return data
aoi_ocr/sid_process.py
@@ -75,7 +75,7 @@
        # Get bounding box
        x, y, w, h = cv2.boundingRect(ctr)
        # Getting ROI
        if w < h / 2:
        if w < h / 3:
            sid_no = sid_no + "1"
            continue
        roi = image[y : y + h, x : x + w]
@@ -191,7 +191,7 @@
    cv2.imwrite("/tmp/enSID0.png", image)
    # Remove noise
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=3)
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=4)
    # Closing. Connect non connected parts
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 1), iterations=4)
@@ -199,7 +199,7 @@
    # Again noise removal after closing
    #image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
    # don't do too much noise removal.
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=2)
    # Skeletonization
    #image = img_as_ubyte(morphology.skeletonize(image > 128))