@@ -66,20 +66,26 @@ def __call__(self, document: Document):
6666
6767 block_id = BlockId (page_id = page .page_id , block_id = block .block_id , block_type = block .block_type )
6868 confidence_thresh , relabel_block_type = self .block_relabel_map [block .block_type ]
69- confidence = block .top_k .get (block .block_type )
70- if confidence > confidence_thresh :
71- logger .debug (f"Skipping relabel for { block_id } ; Confidence: { confidence } > Confidence Threshold { confidence_thresh } for re-labelling" )
72- continue
73-
74- new_block_cls = get_block_class (relabel_block_type )
75- new_block = new_block_cls (
76- polygon = deepcopy (block .polygon ),
77- page_id = block .page_id ,
78- structure = deepcopy (block .structure ),
79- text_extraction_method = block .text_extraction_method ,
80- source = "heuristics" ,
81- top_k = block .top_k ,
82- metadata = block .metadata
83- )
84- page .replace_block (block , new_block )
85- logger .debug (f"Relabelled { block_id } to { relabel_block_type } " )
69+
70+ confidence = 0.0
71+ if hasattr (block , 'top_k' ) and block .top_k is not None :
72+ confidence = block .top_k .get (block .block_type , 0.0 )
73+
74+ if confidence_thresh >= 1.0 or confidence <= confidence_thresh :
75+ try :
76+ new_block_cls = get_block_class (relabel_block_type )
77+ new_block = new_block_cls (
78+ polygon = deepcopy (block .polygon ),
79+ page_id = block .page_id ,
80+ structure = deepcopy (block .structure ),
81+ text_extraction_method = block .text_extraction_method ,
82+ source = "heuristics" ,
83+ top_k = block .top_k ,
84+ metadata = block .metadata
85+ )
86+ page .replace_block (block , new_block )
87+ logger .debug (f"Relabelled { block_id } to { relabel_block_type } " )
88+ except Exception as e :
89+ logger .warning (f"Failed to relabel block { block_id } : { e } " )
90+ else :
91+ logger .debug (f"Skipping relabel for { block_id } ; Confidence: { confidence } > Confidence Threshold { confidence_thresh } " )
0 commit comments