|
156 | 156 | "source": [ |
157 | 157 | "batch_char_size = 50000 # Batch size (BS) in number of characters\n", |
158 | 158 | "for text_id, text in data_iterator(df, doc_id_column, doc_text_column):\n", |
159 | | - " cat.get_entities(text,\n", |
| 159 | + " # NOTE: get_entities_multi_text returns an iterator\n", |
| 160 | + " # so no work gets done until the iterator use materialised\n", |
| 161 | + " output = cat.get_entities_multi_texts(text,\n", |
160 | 162 | " only_cui=False,\n", |
161 | 163 | " # nproc=8, # Number of processors\n", |
162 | 164 | " # out_split_size_chars=20*batch_char_size,\n", |
163 | 165 | " # save_dir_path=ann_folder_path,\n", |
164 | 166 | " # min_free_memory=0.1,\n", |
165 | 167 | " )\n", |
| 168 | + " # so if we're doing a small amount of data and/or not saving it on disk\n", |
| 169 | + " # we probably want to just convert it to a list\n", |
| 170 | + " output = list(output)\n", |
166 | 171 | "\n", |
167 | 172 | "medcat_logger.warning(f'Annotation process complete!')\n" |
168 | 173 | ] |
|
321 | 326 | ], |
322 | 327 | "metadata": { |
323 | 328 | "kernelspec": { |
324 | | - "display_name": "Python 3", |
| 329 | + "display_name": "venv_v2", |
325 | 330 | "language": "python", |
326 | 331 | "name": "python3" |
327 | 332 | }, |
|
335 | 340 | "name": "python", |
336 | 341 | "nbconvert_exporter": "python", |
337 | 342 | "pygments_lexer": "ipython3", |
338 | | - "version": "3.10.8" |
339 | | - }, |
340 | | - "vscode": { |
341 | | - "interpreter": { |
342 | | - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" |
343 | | - } |
| 343 | + "version": "3.10.13" |
344 | 344 | } |
345 | 345 | }, |
346 | 346 | "nbformat": 4, |
|
0 commit comments