Multiple SID robustness..
| | |
| | | self.filename = filename |
| | | self.invalid = None |
| | | self.QRData = None |
| | | self.settings={'answer_treshold':0.25,} if settings is None else settings |
| | | self.settings = {"answer_threshold": 0.25} if settings is None else settings |
| | | self.errors = [] |
| | | self.warnings = [] |
| | | self.sid=None |
| | |
| | | self.data = None |
| | | self.invalid = True |
| | | return |
| | | if(len(d)>1): #if there are multiple codes, get first ean or qr code available. |
| | | for dd in d: |
| | | if(dd.type=="EAN13" or dd.type=="QR"): |
| | | d[0]=dd |
| | | break |
| | | self.QRDecode = d |
| | | self.QRData = d[0].data |
| | | xpos = d[0].rect.left |
| | |
| | | int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth), |
| | | ], |
| | | self.sid_classifier, |
| | | sid_mask |
| | | sid_mask, |
| | | ) |
| | | [self.errors.append(e) for e in err] |
| | | [self.warnings.append(w) for w in warn] |
| | | return es |
| | | |
| | | |
| | | def get_code_data(self): |
| | | if self.QRData is None: |
| | | self.errors.append("Could not read QR or EAN code! Not an exam?") |
| | | retval = {'exam_id': None, |
| | | 'page_no': None, |
| | | 'paper_id': None, |
| | | 'faculty_id': None, |
| | | 'sid':None |
| | | retval = { |
| | | "exam_id": None, |
| | | "page_no": None, |
| | | "paper_id": None, |
| | | "faculty_id": None, |
| | | "sid": None, |
| | | } |
| | | return retval |
| | | qrdata = bytes.decode(self.QRData, 'utf8') |
| | | if self.QRDecode[0].type=='EAN13': |
| | | return {'exam_id': int(qrdata[0:7]), |
| | | 'page_no': int(qrdata[7]), |
| | | 'paper_id': int(qrdata[-5:-1]), |
| | | 'faculty_id': None, |
| | | 'sid': None |
| | | qrdata = bytes.decode(self.QRData, "utf8") |
| | | if self.QRDecode[0].type == "EAN13": |
| | | return { |
| | | "exam_id": int(qrdata[0:7]), |
| | | "page_no": int(qrdata[7]), |
| | | "paper_id": int(qrdata[-5:-1]), |
| | | "faculty_id": None, |
| | | "sid": None, |
| | | } |
| | | else: |
| | | data=qrdata.split(',') |
| | | retval={'exam_id': int(data[1]), |
| | | 'page_no': int(data[3]), |
| | | 'paper_id':int(data[2]), |
| | | 'faculty_id':int(data[0]), |
| | | data = qrdata.split(",") |
| | | retval = { |
| | | "exam_id": int(data[1]), |
| | | "page_no": int(data[3]), |
| | | "paper_id": int(data[2]), |
| | | "faculty_id": int(data[0]), |
| | | } |
| | | if(len(data)>4): |
| | | retval['sid']=data[4] |
| | | if len(data) > 4: |
| | | retval["sid"] = data[4] |
| | | |
| | | return retval |
| | | |
| | | def get_paper_ocr_data(self): |
| | | data=self.get_code_data() |
| | | data['qr']=self.QRData |
| | | data['errors']=self.errors |
| | | data['warnings']=self.warnings |
| | | data['up_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight)) |
| | | data['right_position']=(list(self.xMarkerLocations[1]/self.imgWidth), list(self.yMarkerLocations[1]/self.imgHeight)) |
| | | data['ans_matrix']=((np.array(self.answerMatrix)>self.settings['answer_treshold'])*1).tolist() |
| | | if data['sid'] is None: |
| | | data['sid']=self.get_enhanced_sid() |
| | | data["qr"] = self.QRData |
| | | data["errors"] = self.errors |
| | | data["warnings"] = self.warnings |
| | | data["up_position"] = ( |
| | | list(self.xMarkerLocations[1] / self.imgWidth), |
| | | list(self.yMarkerLocations[1] / self.imgHeight), |
| | | ) |
| | | data["right_position"] = ( |
| | | list(self.xMarkerLocations[1] / self.imgWidth), |
| | | list(self.yMarkerLocations[1] / self.imgHeight), |
| | | ) |
| | | data["ans_matrix"] = ( |
| | | (np.array(self.answerMatrix) > self.settings["answer_threshold"]) * 1 |
| | | ).tolist() |
| | | if data["sid"] is None and data["page_no"] == 0: |
| | | data["sid"] = self.get_enhanced_sid() |
| | | return data |
| | |
| | | |
| | | from glob import glob |
| | | |
| | | settings = {"sid_mask": "64xx0xxx", "answer_treshold": 0.25} |
| | | settings = {"sid_mask": "64xx0xxx", "answer_threshold": 0.25} |
| | | classifier = joblib.load("filename.joblib") |
| | | |
| | | #p = Paper(filename="testpage300dpi_scan1.png") |
| | | #p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_treshold": 0.25}) |
| | | #p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25}) |
| | | #p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings) |
| | | #p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,) |
| | | #p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings) |
| | |
| | | "processed_scans/20141021095744144_0009.tif", |
| | | "processed_scans/20141028095553745_0018.tif", |
| | | "processed_scans/20151013180545275_0011.tif", |
| | | "processed_scans/20160408140801098_0004.tif" |
| | | "processed_scans/20160408140801098_0004.tif", |
| | | "processed_scans/20160510075445995_0026.tif" |
| | | ] |
| | | p=Paper(filename=pa[8], sid_classifier=classifier, settings=settings) |
| | | p=Paper(filename=pa[9], sid_classifier=classifier, settings=settings) |
| | | |
| | | # print(p.QRData) |
| | | # print(p.errors) |
| | |
| | | |
| | | |
| | | print(p.get_paper_ocr_data()) |
| | | exit(0) |
| | | |
| | | |
| | | filelist = glob("processed_scans/*.tif") |
| | | wrong_sid=0; |
| | | total=0 |
| | | for f in sorted(filelist): |
| | | print("processing: {}".format(f)) |
| | | print( |
| | | f, |
| | | Paper( |
| | | filename=f, sid_classifier=classifier, settings=settings |
| | | ).get_paper_ocr_data(), |
| | | ) |
| | | p=Paper(filename=f, sid_classifier=classifier, settings=settings).get_paper_ocr_data() |
| | | print(f,p) |
| | | if(p['page_no']==0): |
| | | total+=1 |
| | | if(len(p['errors'])!=0): |
| | | wrong_sid+=1 |
| | | if total%10 == 0: |
| | | print("Total:{}, wrong SID: {}".format(total,wrong_sid)) |
| | | |
| | | print("Total:{}, wrong SID: {}".format(total,wrong_sid)) |
| | |
| | | cv2.imwrite("enhancedSID.png", image) |
| | | |
| | | sid_no = segment_by_contours(image, image_original, classifier, sid_mask) |
| | | print(sid_no) |
| | | |
| | | if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask): |
| | | sid_warn.append("Trying second SID algorithm.") |
| | | sid_no = segment_by_7segments(image, image_original, sid_mask, classifier) |
| | | print(sid_no) |
| | | |
| | | if (len(sid_no)) != len(sid_mask): |
| | | sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier) |
| | | sid_warn.append("Trying third SID algorithm.") |