CU-8699049kf: Add a few more comments regarding get_entities_multi_texts output

mart-r · mart-r · commit 298ede03bdc2 · 2025-07-17T15:54:10.000+01:00
diff --git a/medcat/3_run_model/run_model.ipynb b/medcat/3_run_model/run_model.ipynb
@@ -156,13 +156,18 @@
    "source": [
     "batch_char_size = 50000  # Batch size (BS) in number of characters\n",
     "for text_id, text in data_iterator(df, doc_id_column, doc_text_column):\n",
-    "    cat.get_entities(text,\n",
+    "    # NOTE: get_entities_multi_text returns an iterator\n",
+    "    #       so no work gets done until the iterator use materialised\n",
+    "    output = cat.get_entities_multi_texts(text,\n",
     "                     only_cui=False,\n",
     "                    #  nproc=8, # Number of processors\n",
     "                    #  out_split_size_chars=20*batch_char_size,\n",
     "                    #  save_dir_path=ann_folder_path,\n",
     "                    #  min_free_memory=0.1,\n",
     "                     )\n",
+    "    # so if we're doing a small amount of data and/or not saving it on disk\n",
+    "    # we probably want to just convert it to a list\n",
+    "    output = list(output)\n",
     "\n",
     "medcat_logger.warning(f'Annotation process complete!')\n"
    ]
@@ -321,7 +326,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "venv_v2",
    "language": "python",
    "name": "python3"
   },
@@ -335,12 +340,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.8"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
-   }
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,