commit | author | age
|
9efc18
|
1 |
import cv2 |
SP |
2 |
import numpy as np |
762a5e
|
3 |
from skimage import morphology, img_as_ubyte |
02e0f7
|
4 |
|
0d97e9
|
5 |
import pkg_resources |
SP |
6 |
|
5460bf
|
7 |
templatefile = "/template-8.png" # always use slash |
0d97e9
|
8 |
template8 = pkg_resources.resource_filename(__name__, templatefile) |
SP |
9 |
|
9efc18
|
10 |
|
SP |
11 |
def kernel(x, y): |
6fde5f
|
12 |
""" |
SP |
13 |
Function greates square kernel of size x and y |
|
14 |
""" |
9efc18
|
15 |
return np.ones((x, y), np.uint8) |
SP |
16 |
|
6fde5f
|
17 |
|
5460bf
|
18 |
def find_biggest_blob(image, original_image, sid_mask): |
6fde5f
|
19 |
if sid_mask[0] == "1": |
c9e021
|
20 |
move_left = 45 |
6fde5f
|
21 |
elif sid_mask[0] == "x": |
c9e021
|
22 |
move_left = 50 |
6fde5f
|
23 |
else: |
SP |
24 |
move_left = 0 |
5460bf
|
25 |
# Remove noise |
6fde5f
|
26 |
image2 = cv2.morphologyEx( |
9c222b
|
27 |
original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3 |
6fde5f
|
28 |
) |
SP |
29 |
# find biggest block of pixels |
c9e021
|
30 |
image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=3) |
6fde5f
|
31 |
image1 = img_as_ubyte(image1 > 50) |
0d97e9
|
32 |
cv2.imwrite("/tmp/sidblock1.png", image1) |
6fde5f
|
33 |
im2, ctrs, hier = cv2.findContours( |
SP |
34 |
image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
|
35 |
) |
|
36 |
sorted_ctrs = sorted( |
|
37 |
ctrs, key=lambda ctr: cv2.contourArea(ctr) |
|
38 |
) # get bigges contour |
|
39 |
x, y, w, h = cv2.boundingRect(sorted_ctrs[-1]) |
c9e021
|
40 |
image = image[y : y + h, x + 25 - move_left : x + w - 30] # +25,-25 |
6fde5f
|
41 |
return image |
SP |
42 |
|
5460bf
|
43 |
|
d5c694
|
44 |
def sid_compare(sid_no, sid_mask): |
6fde5f
|
45 |
""" |
SP |
46 |
Function compares student id number with student id mask if the recognised number is valid according to the mask |
|
47 |
:param sid_no: |
|
48 |
:param sid_mask: |
|
49 |
:return: True if they match, else False |
|
50 |
""" |
|
51 |
for s, es in zip(sid_mask, sid_no): |
|
52 |
if s != "x" and s != es: |
d5c694
|
53 |
return False |
SP |
54 |
return True |
|
55 |
|
|
56 |
|
5460bf
|
57 |
def segment_by_contours(image, original_image, classifier, sid_mask): |
6fde5f
|
58 |
""" |
SP |
59 |
First algorithm. it segments numerals with contours. It works with numbers where individual numerals does not touch. |
|
60 |
:param image: |
|
61 |
:param original_image: |
|
62 |
:param classifier: |
|
63 |
:return: student id as a string |
|
64 |
""" |
9efc18
|
65 |
|
762a5e
|
66 |
sid_no = "" |
5460bf
|
67 |
image = find_biggest_blob(image, original_image, sid_mask) |
SP |
68 |
cv2.imwrite("/tmp/sid_contour1.png", image) |
6fde5f
|
69 |
im2, ctrs, hier = cv2.findContours( |
SP |
70 |
image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
|
71 |
) |
|
72 |
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) |
|
73 |
|
762a5e
|
74 |
for i, ctr in enumerate(sorted_ctrs): |
SP |
75 |
# Get bounding box |
|
76 |
x, y, w, h = cv2.boundingRect(ctr) |
|
77 |
# Getting ROI |
|
78 |
if w < h / 2: |
|
79 |
sid_no = sid_no + "1" |
|
80 |
continue |
|
81 |
roi = image[y : y + h, x : x + w] |
|
82 |
roi = img_as_ubyte(roi < 128) |
|
83 |
roi = cv2.resize(roi, (32, 32)) |
|
84 |
|
|
85 |
# cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) |
0d97e9
|
86 |
cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi) |
762a5e
|
87 |
sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0]) |
SP |
88 |
return sid_no |
|
89 |
|
|
90 |
|
d5c694
|
91 |
def segment_by_sid_len(image, original_image, sid_mask, classifier): |
6fde5f
|
92 |
""" |
SP |
93 |
Third algorithm. It trys to get biggest "blob" in the image and then it cuts it into individual numbers by force. |
|
94 |
It has some problems with finding individual numbers, so some tweaking must be done! |
|
95 |
|
|
96 |
:param image: |
|
97 |
:param original_image: |
|
98 |
:param sid_mask: |
|
99 |
:param classifier: |
|
100 |
:return: student id as a string |
|
101 |
""" |
5cb7c1
|
102 |
sid_no = "" |
SP |
103 |
sid_len = len(sid_mask) |
5460bf
|
104 |
image = find_biggest_blob(image, original_image, sid_mask) |
0d97e9
|
105 |
cv2.imwrite("/tmp/sidblock2.png", image) |
ac766e
|
106 |
imgHeight, imgWidth = image.shape[0:2] |
5cb7c1
|
107 |
numWidth = int(imgWidth / (sid_len)) |
SP |
108 |
for i in range(0, sid_len): |
|
109 |
num = image[:, i * numWidth : (i + 1) * numWidth] |
ac766e
|
110 |
num = img_as_ubyte(num < 128) |
SP |
111 |
num = cv2.resize(num, (32, 32)) |
0d97e9
|
112 |
cv2.imwrite("/tmp/sid_no_{}.png".format(i), num) |
ac766e
|
113 |
sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0]) |
SP |
114 |
return sid_no |
|
115 |
|
6fde5f
|
116 |
|
SP |
117 |
def segment_by_7segments(image, original_image, sid_mask, classifier): |
|
118 |
""" |
|
119 |
Second attempt. It dilates the image to get all 7 segments wisible as 8888888 then it does pattern matching of 8 with |
|
120 |
pattern image. It works if the scaned gray level is high enough. |
|
121 |
|
|
122 |
:param image: |
|
123 |
:param original_image: |
|
124 |
:param sid_mask: |
|
125 |
:param classifier: |
|
126 |
:return: student id number as a string |
|
127 |
""" |
|
128 |
block_image = cv2.morphologyEx( |
|
129 |
original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10 |
|
130 |
) |
|
131 |
block_image = img_as_ubyte(block_image < 50) |
0d97e9
|
132 |
cv2.imwrite("/tmp/sid_3rd1.png", block_image) |
SP |
133 |
template = cv2.imread(template8, 0) |
d5c694
|
134 |
w, h = template.shape[::-1] |
SP |
135 |
res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED) |
|
136 |
loc = np.where(res >= 0.75) |
|
137 |
cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) |
6fde5f
|
138 |
loc_filtered_x = [] |
SP |
139 |
loc_filtered_y = [] |
d5c694
|
140 |
for pt in zip(*loc[::-1]): |
6fde5f
|
141 |
pt = (pt[0] - 10, pt[1] - 10) |
d5c694
|
142 |
loc_filtered_y.append(pt[1]) |
SP |
143 |
loc_filtered_x.append(pt[0]) |
6fde5f
|
144 |
# points.append(pt) |
SP |
145 |
# filter points |
|
146 |
if len(loc_filtered_x) == 0: |
d5c694
|
147 |
return "" |
6fde5f
|
148 |
loc_filtered_x, loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y))) |
SP |
149 |
a = np.diff(loc_filtered_x) > int(w / 2) |
d5c694
|
150 |
a = np.append(a, True) |
SP |
151 |
loc_filtered_x = np.array(loc_filtered_x) |
|
152 |
loc_filtered_y = np.array(loc_filtered_y) |
|
153 |
points = [loc_filtered_y[a], loc_filtered_x[a]] |
|
154 |
for pt in zip(*points[::-1]): |
|
155 |
cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2) |
0d97e9
|
156 |
cv2.imwrite("/tmp/sid_3rd2.png", cimg) |
d5c694
|
157 |
|
6fde5f
|
158 |
sid_no = "" |
SP |
159 |
for i, pt in enumerate(zip(*points[::-1])): |
|
160 |
num = image[pt[1] : pt[1] + h, pt[0] : pt[0] + w] |
|
161 |
# cv2.imwrite("sid_3no_{}.png".format(i), num) |
d5c694
|
162 |
num = img_as_ubyte(num < 128) |
SP |
163 |
try: |
|
164 |
num = cv2.resize(num, (32, 32)) |
|
165 |
except: |
|
166 |
return "" |
0d97e9
|
167 |
cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num) |
d5c694
|
168 |
sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0]) |
SP |
169 |
|
|
170 |
return sid_no |
ac766e
|
171 |
|
6fde5f
|
172 |
|
762a5e
|
173 |
def getSID(image, classifier, sid_mask): |
6fde5f
|
174 |
""" |
SP |
175 |
Tries different approaches on image to get student id number. Firstly clears image of noise and then skeletonizes |
|
176 |
numbers and thickens it until it gets normalized image. It sends it to the segmentation and recognition functions. |
|
177 |
|
|
178 |
Tweak both MORPH_OPEN lines.... |
|
179 |
|
|
180 |
:param image: |
|
181 |
:param classifier: |
|
182 |
:param sid_mask: |
|
183 |
:return: (student_id, error, warning) student id as a string, list of errors and list of warnings during the recognition |
|
184 |
|
|
185 |
""" |
5cb7c1
|
186 |
sid_warn = [] |
6fde5f
|
187 |
sid_err = [] |
762a5e
|
188 |
image = 255 - image |
6fde5f
|
189 |
image_original = image.copy() |
9c222b
|
190 |
image = img_as_ubyte(image > 70) |
0d97e9
|
191 |
cv2.imwrite("/tmp/enSID0.png", image) |
6fde5f
|
192 |
|
9efc18
|
193 |
# Remove noise |
c9e021
|
194 |
image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=3) |
6fde5f
|
195 |
|
9efc18
|
196 |
# Closing. Connect non connected parts |
9c222b
|
197 |
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 1), iterations=4) |
02e0f7
|
198 |
|
6fde5f
|
199 |
# Again noise removal after closing |
c9e021
|
200 |
#image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) |
5cb7c1
|
201 |
# don't do too much noise removal. |
ac766e
|
202 |
image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1) |
SP |
203 |
|
9efc18
|
204 |
# Skeletonization |
c9e021
|
205 |
#image = img_as_ubyte(morphology.skeletonize(image > 128)) |
762a5e
|
206 |
image = img_as_ubyte(morphology.thin(image > 128)) |
0d97e9
|
207 |
cv2.imwrite("/tmp/enSID1.png", image) |
6fde5f
|
208 |
|
9efc18
|
209 |
# Stub removal (might not be necessary if thinning instead of skeletonize is used above |
SP |
210 |
# Making lines stronger |
c9e021
|
211 |
image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 2), iterations=1) |
9efc18
|
212 |
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10)) |
6fde5f
|
213 |
|
9efc18
|
214 |
# Thining again |
762a5e
|
215 |
image = img_as_ubyte(morphology.skeletonize(image > 0.5)) |
9efc18
|
216 |
image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) |
0d97e9
|
217 |
cv2.imwrite("/tmp/enhancedSID.png", image) |
02e0f7
|
218 |
|
6fde5f
|
219 |
sid_no = segment_by_contours(image, image_original, classifier, sid_mask) |
e0996e
|
220 |
|
6fde5f
|
221 |
if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask): |
5cb7c1
|
222 |
sid_warn.append("Trying second SID algorithm.") |
d5c694
|
223 |
sid_no = segment_by_7segments(image, image_original, sid_mask, classifier) |
e0996e
|
224 |
|
6fde5f
|
225 |
if (len(sid_no)) != len(sid_mask): |
d5c694
|
226 |
sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier) |
SP |
227 |
sid_warn.append("Trying third SID algorithm.") |
|
228 |
|
|
229 |
if not sid_compare(sid_no, sid_mask): |
6fde5f
|
230 |
sid_err = ["Wrong SID!"] |
d5c694
|
231 |
|
6fde5f
|
232 |
return sid_no, sid_err, sid_warn |