commit | author | age
|
0d97e9
|
1 |
from aoi_ocr.Ocr import Paper |
02e0f7
|
2 |
from sklearn.externals import joblib |
0d97e9
|
3 |
import pkg_resources |
5460bf
|
4 |
|
SP |
5 |
path = "/filename.joblib" # always use slash |
|
6 |
filepath = pkg_resources.resource_filename("aoi_ocr", path) |
e555c0
|
7 |
|
e2fa6a
|
8 |
from glob import glob |
762a5e
|
9 |
|
9c222b
|
10 |
settings = {"sid_mask": "11x0xxxx", "answer_threshold": 0.25} |
0d97e9
|
11 |
classifier = joblib.load(filepath) |
e555c0
|
12 |
|
5460bf
|
13 |
# p = Paper(filename="testpage300dpi_scan1.png") |
SP |
14 |
# p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25}) |
|
15 |
# p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings) |
|
16 |
# p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,) |
|
17 |
# p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings) |
d5c694
|
18 |
pa = [ |
SP |
19 |
"processed_scans/20141016095134535_0006.tif", |
|
20 |
"processed_scans/20141016095134535_0028.tif", |
|
21 |
"processed_scans/20141016095134535_0028.tif", |
|
22 |
"processed_scans/20141016095134535_0037.tif", |
|
23 |
"processed_scans/20141021095744144_0005.tif", |
|
24 |
"processed_scans/20141021095744144_0009.tif", |
|
25 |
"processed_scans/20141028095553745_0018.tif", |
cf921b
|
26 |
"processed_scans/20151013180545275_0011.tif", |
e0996e
|
27 |
"processed_scans/20160408140801098_0004.tif", |
5460bf
|
28 |
"processed_scans/20160510075445995_0026.tif", |
d5c694
|
29 |
] |
5460bf
|
30 |
# p=Paper(filename=pa[9], sid_classifier=classifier, settings=settings) |
93d924
|
31 |
p = Paper(filename="test3011/sizif000.tif", sid_classifier=classifier, settings=settings) |
02e0f7
|
32 |
|
0436f6
|
33 |
# print(p.QRData) |
SP |
34 |
# print(p.errors) |
e555c0
|
35 |
|
0436f6
|
36 |
# print(p.getSkewAngle()) |
5460bf
|
37 |
# print(p.locateUpMarkers())%% |
0436f6
|
38 |
# print(p.locateRightMarkers()) |
SP |
39 |
# print(p.answerMatrix) |
|
40 |
# p.get_enhanced_sid() |
|
41 |
|
|
42 |
|
|
43 |
print(p.get_paper_ocr_data()) |
e0996e
|
44 |
|
93d924
|
45 |
#exit(0) |
SP |
46 |
filelist = glob("test3011/*.tif") |
5460bf
|
47 |
wrong_sid = 0 |
SP |
48 |
total = 0 |
d5c694
|
49 |
for f in sorted(filelist): |
SP |
50 |
print("processing: {}".format(f)) |
5460bf
|
51 |
p = Paper( |
SP |
52 |
filename=f, sid_classifier=classifier, settings=settings |
|
53 |
).get_paper_ocr_data() |
|
54 |
print(f, p) |
|
55 |
if p["page_no"] == 0: |
|
56 |
total += 1 |
|
57 |
if len(p["errors"]) != 0: |
|
58 |
wrong_sid += 1 |
|
59 |
if total % 10 == 0: |
|
60 |
print("Total:{}, wrong SID: {}".format(total, wrong_sid)) |
e0996e
|
61 |
|
5460bf
|
62 |
print("Total:{}, wrong SID: {}".format(total, wrong_sid)) |