Skip to content

Commit 39dafaa

Browse files
authored
feat(medcat): CU-869azdc7x: Dynamic imports for legacy conversion (#198)
* CU-869azdc7x: Import DeID stuff in a dynamic way for conversion * CU-869azdc7x: Import RelCAT stuff in a dynamic way for conversion * CU-869azdc7x: Fix some whitespace stuff * CU-869azdc7x: Import MetaCAT stuff in a dynamic way for conversion * CU-869azdc7x: Use fewer protected fields
1 parent 45c6fed commit 39dafaa

File tree

1 file changed

+32
-28
lines changed

1 file changed

+32
-28
lines changed

medcat-v2/medcat/utils/legacy/conversion_all.py

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111
from medcat.utils.legacy.convert_cdb import get_cdb_from_old
1212
from medcat.utils.legacy.convert_config import get_config_from_old
1313
from medcat.utils.legacy.convert_vocab import get_vocab_from_old
14-
from medcat.utils.legacy.convert_meta_cat import get_meta_cat_from_old
15-
from medcat.utils.legacy.convert_rel_cat import get_rel_cat_from_old
16-
from medcat.utils.legacy.convert_deid import get_trf_ner_from_old
1714
from medcat.utils.legacy.helpers import fix_subnames
1815

1916

@@ -85,40 +82,47 @@ def convert(self) -> CAT:
8582
cat = CAT(cdb, vocab, config)
8683
fix_subnames(cat)
8784
# MetaCATs
88-
meta_cats = [
89-
get_meta_cat_from_old(
90-
os.path.join(self.old_model_folder, subfolder),
91-
cat._pipeline.tokenizer)
85+
meta_cat_folders = [
86+
os.path.join(self.old_model_folder, subfolder)
9287
for subfolder in os.listdir(self.old_model_folder)
9388
if subfolder.startswith("meta_")
9489
]
95-
for mc in meta_cats:
96-
cat.add_addon(mc)
90+
if meta_cat_folders:
91+
from medcat.utils.legacy.convert_meta_cat import (
92+
get_meta_cat_from_old)
93+
for subfolder in meta_cat_folders:
94+
mc = get_meta_cat_from_old(subfolder, cat.pipe.tokenizer)
95+
cat.add_addon(mc)
9796

9897
# RelCATs
99-
rel_cats = [
100-
get_rel_cat_from_old(
101-
cdb,
102-
os.path.join(self.old_model_folder, subfolder),
103-
cat._pipeline.tokenizer)
98+
rel_cats_folders = [
99+
os.path.join(self.old_model_folder, subfolder)
104100
for subfolder in os.listdir(self.old_model_folder)
105101
if subfolder.startswith("rel_")
106102
]
107-
for rc in rel_cats:
108-
cat.add_addon(rc)
103+
if rel_cats_folders:
104+
from medcat.utils.legacy.convert_rel_cat import (
105+
get_rel_cat_from_old)
106+
for subfolder in rel_cats_folders:
107+
rel_cat = get_rel_cat_from_old(
108+
cdb, subfolder, cat.pipe.tokenizer)
109+
cat.add_addon(rel_cat)
109110

110111
# DeID / TransformersNER
111-
trf_ners = [
112-
get_trf_ner_from_old(
113-
os.path.join(self.old_model_folder, subfolder),
114-
cat._pipeline.tokenizer)
112+
trf_folders = [
113+
os.path.join(self.old_model_folder, subfolder)
115114
for subfolder in os.listdir(self.old_model_folder)
116115
if subfolder.startswith("trf_")
117116
]
118-
if len(trf_ners) > 1:
119-
raise ValueError("Cannot use more than 1 tranformers NER. "
120-
f"Got {len(trf_ners)}")
121-
if trf_ners:
117+
if trf_folders:
118+
from medcat.utils.legacy.convert_deid import get_trf_ner_from_old
119+
trf_ners = [
120+
get_trf_ner_from_old(subfolder, cat.pipe.tokenizer)
121+
for subfolder in os.listdir(self.old_model_folder)
122+
]
123+
if len(trf_ners) > 1:
124+
raise ValueError("Cannot use more than 1 tranformers NER. "
125+
f"Got {len(trf_ners)}")
122126
logger.info("Found a Transformers based NER component "
123127
"- probably for DeID")
124128
trf_ner = trf_ners[0]
@@ -132,20 +136,20 @@ def convert(self) -> CAT:
132136
# replace component in pipeline
133137
# get the index of component in list
134138
index = next((c_num for c_num, comp in
135-
enumerate(cat._pipeline._components)
139+
enumerate(cat.pipe._components)
136140
if comp.get_type() is CoreComponentType.ner))
137141
# set / change / replace the NER component
138142
logger.info(f"Changing the NER component in the pipe to {trf_ner}")
139-
cat._pipeline._components[index] = trf_ner
143+
cat.pipe._components[index] = trf_ner
140144
# replace linker to no-action linker
141145
config.components.linking.comp_name = 'no_action'
142146
index_link = next(
143-
(c_num for c_num, comp in enumerate(cat._pipeline._components)
147+
(c_num for c_num, comp in enumerate(cat.pipe._components)
144148
if comp.get_type() is CoreComponentType.linking))
145149
# set / change / replace Linker to no-action linker
146150
logger.info("Changing the linking component in the pipe to a "
147151
"no-action linker")
148-
cat._pipeline._components[index_link] = NoActionLinker()
152+
cat.pipe._components[index_link] = NoActionLinker()
149153

150154
if self.new_model_folder:
151155
logger.info("Saving converted model to '%s'",

0 commit comments

Comments
 (0)