commit | author | age
|
0d97e9
|
1 |
from aoi_ocr.Ocr import Paper |
02e0f7
|
2 |
from sklearn.externals import joblib |
0d97e9
|
3 |
import pkg_resources |
SP |
4 |
path = '/filename.joblib' # always use slash |
|
5 |
filepath = pkg_resources.resource_filename('aoi_ocr', path) |
e555c0
|
6 |
|
e2fa6a
|
7 |
from glob import glob |
762a5e
|
8 |
|
9c222b
|
9 |
settings = {"sid_mask": "11x0xxxx", "answer_threshold": 0.25} |
0d97e9
|
10 |
classifier = joblib.load(filepath) |
e555c0
|
11 |
|
6fde5f
|
12 |
#p = Paper(filename="testpage300dpi_scan1.png") |
e0996e
|
13 |
#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25}) |
e2fa6a
|
14 |
#p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings) |
d5c694
|
15 |
#p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,) |
SP |
16 |
#p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings) |
|
17 |
pa = [ |
|
18 |
"processed_scans/20141016095134535_0006.tif", |
|
19 |
"processed_scans/20141016095134535_0028.tif", |
|
20 |
"processed_scans/20141016095134535_0028.tif", |
|
21 |
"processed_scans/20141016095134535_0037.tif", |
|
22 |
"processed_scans/20141021095744144_0005.tif", |
|
23 |
"processed_scans/20141021095744144_0009.tif", |
|
24 |
"processed_scans/20141028095553745_0018.tif", |
cf921b
|
25 |
"processed_scans/20151013180545275_0011.tif", |
e0996e
|
26 |
"processed_scans/20160408140801098_0004.tif", |
SP |
27 |
"processed_scans/20160510075445995_0026.tif" |
d5c694
|
28 |
] |
d88ce4
|
29 |
#p=Paper(filename=pa[9], sid_classifier=classifier, settings=settings) |
SP |
30 |
p=Paper(filename='test3.tif', sid_classifier=classifier, settings=settings) |
02e0f7
|
31 |
|
0436f6
|
32 |
# print(p.QRData) |
SP |
33 |
# print(p.errors) |
e555c0
|
34 |
|
0436f6
|
35 |
# print(p.getSkewAngle()) |
SP |
36 |
# print(p.locateUpMarkers()) |
|
37 |
# print(p.locateRightMarkers()) |
|
38 |
# print(p.answerMatrix) |
|
39 |
# p.get_enhanced_sid() |
|
40 |
|
|
41 |
|
|
42 |
print(p.get_paper_ocr_data()) |
e0996e
|
43 |
|
d88ce4
|
44 |
exit(0) |
d5c694
|
45 |
filelist = glob("processed_scans/*.tif") |
e0996e
|
46 |
wrong_sid=0; |
SP |
47 |
total=0 |
d5c694
|
48 |
for f in sorted(filelist): |
SP |
49 |
print("processing: {}".format(f)) |
e0996e
|
50 |
p=Paper(filename=f, sid_classifier=classifier, settings=settings).get_paper_ocr_data() |
SP |
51 |
print(f,p) |
|
52 |
if(p['page_no']==0): |
|
53 |
total+=1 |
|
54 |
if(len(p['errors'])!=0): |
|
55 |
wrong_sid+=1 |
|
56 |
if total%10 == 0: |
|
57 |
print("Total:{}, wrong SID: {}".format(total,wrong_sid)) |
|
58 |
|
d88ce4
|
59 |
print("Total:{}, wrong SID: {}".format(total,wrong_sid)) |