From 4860d4b6df60730709b026c0546679e9f7d5f559 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 27 Oct 2025 17:01:24 +0000 Subject: [PATCH 1/7] CU-869az43jq: Remvoe gitmodules --- .gitmodules | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .gitmodules diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 85a4d34..0000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ - -[submodule "notebooks/demo_working_with_cogstack"] - path = notebooks/demo_working_with_cogstack - url = https://github.com/CogStack/working_with_cogstack.git - branch = main - ignore = all From 3c3a394f9681931dc9f0c0174f26b10091958c55 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 27 Oct 2025 17:01:56 +0000 Subject: [PATCH 2/7] CU-869az43jq: Add explicit medcat requirement --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 2f30cad..6630df6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ wheel==0.45.1 +medcat==2.2.0 +# TODO: cogstack-es virtualenv==20.31.2 ipywidgets==8.1.7 jupyter==1.1.1 From 0c20f0b9dce222f888f2c8272e3a1c123aabddf6 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 27 Oct 2025 17:07:19 +0000 Subject: [PATCH 3/7] CU-869az43jq: Initial changes for new cogstack-es and medcat-scripts --- Dockerfile_singleuser | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Dockerfile_singleuser b/Dockerfile_singleuser index b934732..7976c36 100644 --- a/Dockerfile_singleuser +++ b/Dockerfile_singleuser @@ -212,16 +212,18 @@ RUN uv pip install --upgrade --system pip setuptools wheel # install the rest of the packages including medcat COPY ./requirements.txt /srv/jupyterhub/ -# install requirements for working with cogstack scripts -COPY notebooks/demo_working_with_cogstack/requirements.txt /srv/jupyterhub/working_with_cogstack_requirements.txt RUN if [ "$GPU_BUILD" = "true" ] && [ "$CPU_ARCHITECTURE" = "amd64" ]; then \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/requirements.txt && \ - uv pip install --system --no-cache-dir -r /srv/jupyterhub/working_with_cogstack_requirements.txt ; \ + # TODO: Figure out exact syntax + uv run python -m medcat download-scripts /srv/jupyterhub/medcat-scripts && \ + uv pip install --system --no-cache-dir -r /srv/jupyterhub/medcat-scripts/requirements.txt ; \ else \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ && \ - uv pip install --system --no-cache-dir -r /srv/jupyterhub/working_with_cogstack_requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ ; \ - fi + uv run python -m medcat download-scripts /srv/jupyterhub/medcat-scripts && \ + uv pip install --system --no-cache-dir -r /srv/jupyterhub/medcat-scripts/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ ; \ + fi && \ + mv /srv/jupyterhub/medcat-scripts/notebooks/* /home/jovyan/work/. # move notebooks ####################################################################################################### From cfc10fa14379de550491ee9bd210751dd3f7cd64 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 29 Oct 2025 11:16:17 +0000 Subject: [PATCH 4/7] CU-869az43jq: Remove whitespace --- Dockerfile_singleuser | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile_singleuser b/Dockerfile_singleuser index 7976c36..399a727 100644 --- a/Dockerfile_singleuser +++ b/Dockerfile_singleuser @@ -222,7 +222,7 @@ RUN if [ "$GPU_BUILD" = "true" ] && [ "$CPU_ARCHITECTURE" = "amd64" ]; then \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ && \ uv run python -m medcat download-scripts /srv/jupyterhub/medcat-scripts && \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/medcat-scripts/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ ; \ - fi && \ + fi && \ mv /srv/jupyterhub/medcat-scripts/notebooks/* /home/jovyan/work/. # move notebooks ####################################################################################################### From 43e45823810d1b7ba1a8a092830f93ae6da10f3d Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 29 Oct 2025 13:34:39 +0000 Subject: [PATCH 5/7] CU-869az43jq: Update download script usage for scripts download --- Dockerfile_singleuser | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile_singleuser b/Dockerfile_singleuser index 399a727..6afd871 100644 --- a/Dockerfile_singleuser +++ b/Dockerfile_singleuser @@ -215,12 +215,12 @@ COPY ./requirements.txt /srv/jupyterhub/ RUN if [ "$GPU_BUILD" = "true" ] && [ "$CPU_ARCHITECTURE" = "amd64" ]; then \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/requirements.txt && \ - # TODO: Figure out exact syntax - uv run python -m medcat download-scripts /srv/jupyterhub/medcat-scripts && \ + # NOTE: it'll create the medcat-scripts folder within + uv run python -m medcat download-scripts /srv/jupyterhub/ && \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/medcat-scripts/requirements.txt ; \ else \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ && \ - uv run python -m medcat download-scripts /srv/jupyterhub/medcat-scripts && \ + uv run python -m medcat download-scripts /srv/jupyterhub/ && \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/medcat-scripts/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ ; \ fi && \ mv /srv/jupyterhub/medcat-scripts/notebooks/* /home/jovyan/work/. # move notebooks From 1b266e8e9dbf95548933c12eb9010f0cf18ea3a2 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 29 Oct 2025 13:40:45 +0000 Subject: [PATCH 6/7] CU-869az43jq: Add cogstack-es installation --- Dockerfile_singleuser | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Dockerfile_singleuser b/Dockerfile_singleuser index 6afd871..43c6a14 100644 --- a/Dockerfile_singleuser +++ b/Dockerfile_singleuser @@ -7,6 +7,9 @@ ARG http_proxy ARG https_proxy ARG no_proxy +# NOTE: set to ES8 or Elasticsearch 8 or OS for Opensearch +ARG COGSTACK_BACKEND=ES9 + # set to "true" for the GPU build ARG GPU_BUILD=false @@ -223,6 +226,8 @@ RUN if [ "$GPU_BUILD" = "true" ] && [ "$CPU_ARCHITECTURE" = "amd64" ]; then \ uv run python -m medcat download-scripts /srv/jupyterhub/ && \ uv pip install --system --no-cache-dir -r /srv/jupyterhub/medcat-scripts/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ ; \ fi && \ + # install cogstack-es (Cogstack class) + uv pip install "cogstack-es[$COGSTACK_BACKEND]" && \ mv /srv/jupyterhub/medcat-scripts/notebooks/* /home/jovyan/work/. # move notebooks ####################################################################################################### From 133fb7b8f64f2ba778b4484a9c91fe559043ca42 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 29 Oct 2025 13:49:45 +0000 Subject: [PATCH 7/7] CU-869az43jq: Remove submodule sync workflow --- .github/workflows/submodule_sync.yml | 54 ---------------------------- 1 file changed, 54 deletions(-) delete mode 100755 .github/workflows/submodule_sync.yml diff --git a/.github/workflows/submodule_sync.yml b/.github/workflows/submodule_sync.yml deleted file mode 100755 index f14dcb8..0000000 --- a/.github/workflows/submodule_sync.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: submodules-sync - -on: - push: - branches: ['**'] - tags: ['v*.*.*'] - pull_request: - branches: ['**'] - release: - types: [published] - workflow_dispatch: - -jobs: - sync: - name: 'Submodules Sync' - runs-on: ubuntu-22.04 - if: github.actor != 'github-actions[bot]' - - defaults: - run: - shell: bash - - steps: - - name: Checkout repository with submodules - uses: actions/checkout@v5 - with: - token: ${{ secrets.GITHUB_TOKEN }} - submodules: recursive - persist-credentials: false - - - name: Sync submodule URLs - run: | - git submodule sync --recursive - - - name: Update submodules - run: | - git -c protocol.version=2 submodule update --init --remote --recursive --jobs 8 - - - name: Commit and push submodule updates - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -euo pipefail - git config user.name 'github-actions[bot]' - git config user.email '41898282+github-actions[bot]@users.noreply.github.com' - git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/${{ github.repository }} - # Stage only submodule pointer updates and .gitmodules - git add .gitmodules $(git config -f .gitmodules --get-regexp path | awk '{print $2}') || true - if git diff --cached --quiet; then - echo "No submodule pointer changes to commit" - exit 0 - fi - git commit -m "chore(submodules): auto-update pointers [skip ci]" - git push