diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml
index 5f8ba9285ac..5f731a31595 100644
--- a/.github/workflows/cibuildwheel.yml
+++ b/.github/workflows/cibuildwheel.yml
@@ -7,9 +7,12 @@ on:
       # ** matches 'zero or more of any character'
       - 'release-v[0-9]+.[0-9]+.[0-9]+**'
       - 'prerelease-v[0-9]+.[0-9]+.[0-9]+**'
+
+permissions: {}
+
 jobs:
   build_wheels:
-    uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@main
+    uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@2c98f757f13d112cf73fcf4b627249f1fffb5aae  # main
     permissions:
       contents: write
       actions: read
diff --git a/.github/workflows/explosionbot.yml b/.github/workflows/explosionbot.yml
index 78a27cfa3ba..979385ccb90 100644
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@@ -6,6 +6,8 @@ on:
       - created
       - edited
 
+permissions: {}
+
 jobs:
   explosion-bot:
     if: github.repository_owner == 'explosion'
@@ -15,13 +17,15 @@ jobs:
         env:
           GITHUB_CONTEXT: ${{ toJson(github) }}
         run: echo "$GITHUB_CONTEXT"
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
       - name: Install and run explosion-bot
         run: |
-          pip install git+https://${{ secrets.EXPLOSIONBOT_TOKEN }}@github.com/explosion/explosion-bot
+          git config --global url."https://x-access-token:${EXPLOSIONBOT_TOKEN}@github.com/".insteadOf "https://github.com/"
+          pip install git+https://github.com/explosion/explosion-bot
           python -m explosionbot
         env:
+          EXPLOSIONBOT_TOKEN: ${{ secrets.EXPLOSIONBOT_TOKEN }}
           INPUT_TOKEN: ${{ secrets.EXPLOSIONBOT_TOKEN }}
           INPUT_BK_TOKEN: ${{ secrets.BUILDKITE_SECRET }}
           ENABLED_COMMANDS: "test_gpu,test_slow,test_slow_gpu"
diff --git a/.github/workflows/issue-manager.yml b/.github/workflows/issue-manager.yml
index 6c7d7d5a6f8..264707485e7 100644
--- a/.github/workflows/issue-manager.yml
+++ b/.github/workflows/issue-manager.yml
@@ -11,12 +11,16 @@ on:
     types:
       - labeled
 
+permissions: {}
+
 jobs:
   issue-manager:
+    permissions:
+      issues: write
     if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
-      - uses: tiangolo/issue-manager@0.4.0
+      - uses: tiangolo/issue-manager@4d1b7e05935a404dc8337d30bd23be46be8bb8e5  # 0.4.0
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
           config: >
diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml
index 2bbdd64c771..8fcf3028476 100644
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@@ -16,7 +16,7 @@ jobs:
     if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
     steps:
-      - uses: dessant/lock-threads@v5
+      - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771  # v5
         with:
           process-only: 'issues'
           issue-inactive-days: '30'
diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
index 9f432874cc2..fcc6f2a9999 100644
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@@ -8,6 +8,8 @@ on:
     types:
       - published
 
+permissions: {}
+
 jobs:
   upload_pypi:
     runs-on: ubuntu-latest
@@ -21,7 +23,7 @@ jobs:
     # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
     # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
     steps:
-      - uses: robinraju/release-downloader@v1
+      - uses: robinraju/release-downloader@daf26c55d821e836577a15f77d86ddc078948b05  # v1
         with:
           tag: ${{ github.event.release.tag_name }}
           fileName: '*'
diff --git a/.github/workflows/spacy_universe_alert.yml b/.github/workflows/spacy_universe_alert.yml
index 01731ffe0d7..ec0230699be 100644
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@@ -5,21 +5,16 @@ on:
     paths:
       - "website/meta/universe.json"
 
+permissions: {}
+
 jobs:
   build:
     if: github.repository_owner == 'explosion'
     runs-on: ubuntu-latest
 
     steps:
-      - name: Dump GitHub context
-        env:
-          GITHUB_CONTEXT: ${{ toJson(github) }}
-          PR_NUMBER: ${{github.event.number}}
-        run: |
-          echo "$GITHUB_CONTEXT"
-
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: '3.10'
       - name: Install Bernadette app dependency and send an alert
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index bb4eb278131..b20dba12f04 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -19,6 +19,8 @@ on:
       - "*.mdx"
       - "website/**"
 
+permissions: {}
+
 jobs:
   validate:
     name: Validate
@@ -26,49 +28,38 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
 
       - name: Configure Python version
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: "3.10"
 
-      - name: black
-        run: |
-          python -m pip install black -c requirements.txt
-          python -m black spacy --check
-      - name: isort
+      - name: ruff format
         run: |
-          python -m pip install isort -c requirements.txt
-          python -m isort spacy --check
-      - name: flake8
+          python -m pip install ruff -c requirements.txt
+          python -m ruff format spacy --check
+      - name: ruff isort
         run: |
-          python -m pip install flake8==5.0.4
-          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
-          # Unfortunately cython-lint isn't working after the shift to Cython 3.
-          #- name: cython-lint
-          #  run: |
-          #    python -m pip install cython-lint -c requirements.txt
-          #    # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
-          #    cython-lint spacy --ignore E501,W291,E266
+          python -m ruff check spacy --select I
 
   tests:
     name: Test
     needs: Validate
     strategy:
-      fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python_version: ["3.10", "3.11", "3.12", "3.13"]
+        python_version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
 
     runs-on: ${{ matrix.os }}
 
     steps:
       - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
 
       - name: Configure Python version
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: ${{ matrix.python_version }}
 
@@ -104,7 +95,7 @@ jobs:
         shell: bash
 
       - name: Test import
-        run: python -W error -c "import spacy"
+        run: python -W error -W 'ignore:Core Pydantic V1:UserWarning:pydantic' -c "import spacy"
 
       - name: "Test download CLI"
         run: |
@@ -165,7 +156,7 @@ jobs:
 
       - name: "Run CPU tests"
         run: |
-          python -m pytest --pyargs spacy -W error
+          python -m pytest --pyargs spacy -W error -W 'ignore:Core Pydantic V1:UserWarning:pydantic'
         if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.11')"
 
       - name: "Run CPU tests with thinc-apple-ops"
diff --git a/.github/workflows/universe_validation.yml b/.github/workflows/universe_validation.yml
index ce7df49dbae..e97850cd4b0 100644
--- a/.github/workflows/universe_validation.yml
+++ b/.github/workflows/universe_validation.yml
@@ -13,6 +13,8 @@ on:
     paths:
       - "website/meta/universe.json"
 
+permissions: {}
+
 jobs:
   validate:
     name: Validate
@@ -20,10 +22,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
 
       - name: Configure Python version
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
         with:
           python-version: "3.7"
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2c5e98fd97..7d57c3a0c56 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,13 +1,7 @@
 repos:
--   repo: https://github.com/ambv/black
-    rev: 22.3.0
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.0
     hooks:
-    - id: black
-      language_version: python3.7
-      additional_dependencies: ['click==8.0.4']
--   repo: https://github.com/pycqa/flake8
-    rev: 5.0.4
-    hooks:
-    - id: flake8
-      args:
-        - "--config=setup.cfg"
+    - id: ruff
+      args: ['--fix']
+    - id: ruff-format
diff --git a/MANIFEST.in b/MANIFEST.in
index 1caf758464f..36465ea94a0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include spacy *.pyi *.pyx *.pxd *.txt *.cfg *.jinja *.toml *.hh
+recursive-include spacy_cli *.json
 include LICENSE
 include README.md
 include pyproject.toml
diff --git a/lint.sh b/lint.sh
new file mode 100755
index 00000000000..0ec0bda3f6b
--- /dev/null
+++ b/lint.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Local lint script matching the CI Validate job + mypy type checks.
+# Fixes formatting and import sorting in-place, then re-verifies in
+# check mode to catch any conflicts between the two, and runs mypy.
+set -euo pipefail
+
+err=0
+
+echo "==> ruff format (auto-fixing)"
+python -m ruff format spacy
+
+echo "==> ruff isort (auto-fixing)"
+python -m ruff check spacy --select I --fix
+
+echo "==> ruff format (verify)"
+if ! python -m ruff format spacy --check; then
+    echo "FAIL: isort fix broke formatting"
+    err=1
+fi
+
+echo "==> ruff isort (verify)"
+if ! python -m ruff check spacy --select I; then
+    echo "FAIL: format fix broke import sorting"
+    err=1
+fi
+
+echo "==> mypy"
+if ! python -m mypy spacy; then
+    err=1
+fi
+
+if [ "$err" -ne 0 ]; then
+    echo "FAIL: see errors above"
+    exit 1
+fi
+
+echo "OK: all checks passed"
diff --git a/pyproject.toml b/pyproject.toml
index 64b71429e6e..395c2f7a108 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.3.4,<8.4.0",
+    "thinc>=8.3.12,<8.4.0",
     "numpy>=2.0.0,<3.0.0"
 ]
 build-backend = "setuptools.build_meta"
@@ -62,5 +62,13 @@ repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest
 [tool.cibuildwheel.pyodide]
 
 
-[tool.isort]
-profile = "black"
+[tool.ruff]
+line-length = 88
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "C", "B", "B9"]
+ignore = ["E203", "E266", "E501", "E731", "E741", "F541"]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+split-on-trailing-comma = true
diff --git a/requirements.txt b/requirements.txt
index 6e79ed526bd..50c6382bea3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,19 +3,19 @@ spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.3.4,<8.4.0
-ml_datasets>=0.2.0,<0.3.0
+thinc>=8.3.12,<8.4.0
+ml_datasets>=0.2.1,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
-srsly>=2.4.3,<3.0.0
+srsly>=2.5.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
-typer-slim>=0.3.0,<1.0.0
-weasel>=0.4.2,<0.5.0
+typer>=0.3.0,<1.0.0
+weasel>=1.0.0,<2.0.0
 # Third party dependencies
 numpy>=2.0.0,<3.0.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
+pydantic>=2.0.0,<3.0.0
 jinja2
 # Official Python utilities
 setuptools
@@ -26,13 +26,12 @@ cython>=3.0,<4.0
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
 mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
 types-mock>=0.1.1
 types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
-black>=25.0.0
+ruff>=0.9.0
 cython-lint>=0.15.0
-isort>=5.0,<6.0
+confection>=1.1.0,<2.0.0
diff --git a/setup.cfg b/setup.cfg
index c4928af9224..83147ad0d48 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -22,6 +22,7 @@ classifiers =
     Programming Language :: Python :: 3.11
     Programming Language :: Python :: 3.12
     Programming Language :: Python :: 3.13
+    Programming Language :: Python :: 3.14
     Topic :: Scientific/Engineering
 project_urls =
     Release notes = https://github.com/explosion/spaCy/releases
@@ -41,7 +42,7 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.3.4,<8.4.0
+    thinc>=8.3.12,<8.4.0
 install_requires =
     # Our libraries
     spacy-legacy>=3.0.11,<3.1.0
@@ -49,18 +50,19 @@ install_requires =
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.3.4,<8.4.0
+    thinc>=8.3.12,<8.4.0
     wasabi>=0.9.1,<1.2.0
-    srsly>=2.4.3,<3.0.0
+    srsly>=2.5.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
-    weasel>=0.4.2,<0.5.0
+    weasel>=1.0.0,<2.0.0
+    confection>=1.1.0,<2.0.0
     # Third-party dependencies
-    typer-slim>=0.3.0,<1.0.0
+    typer>=0.3.0,<1.0.0
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0; python_version < "3.9"
     numpy>=1.19.0; python_version >= "3.9"
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
+    pydantic>=2.0.0,<3.0.0
     jinja2
     # Official Python utilities
     setuptools
@@ -68,7 +70,7 @@ install_requires =
 
 [options.entry_points]
 console_scripts =
-    spacy = spacy.cli:setup_cli
+    spacy = spacy_cli.main:main
 
 [options.extras_require]
 lookups =
@@ -130,20 +132,13 @@ universal = false
 [sdist]
 formats = gztar
 
-[flake8]
-ignore = E203, E266, E501, E731, W503, E741, F541
-max-line-length = 80
-select = B,C,E,F,W,T4,B9
-exclude =
-    .env,
-    .git,
-    __pycache__,
-    _tokenizer_exceptions_list.py,
-
 [tool:pytest]
 markers =
     slow: mark a test as slow
     issue: reference specific issue
+filterwarnings =
+    error
+    ignore:Core Pydantic V1:UserWarning:pydantic
 
 [mypy]
 ignore_missing_imports = True
diff --git a/setup.py b/setup.py
index 33178662df4..e18e98b9249 100755
--- a/setup.py
+++ b/setup.py
@@ -82,9 +82,9 @@
 }
 # Files to copy into the package that are otherwise not included
 COPY_FILES = {
-    ROOT / "setup.cfg": PACKAGE_ROOT / "tests" / "package",
-    ROOT / "pyproject.toml": PACKAGE_ROOT / "tests" / "package",
-    ROOT / "requirements.txt": PACKAGE_ROOT / "tests" / "package",
+    ROOT / "setup.cfg": PACKAGE_ROOT / "tests" / "package" / "test.cfg",
+    ROOT / "pyproject.toml": PACKAGE_ROOT / "tests" / "package" / "test.toml",
+    ROOT / "requirements.txt": PACKAGE_ROOT / "tests" / "package" / "test.txt",
 }
 
 
@@ -158,10 +158,10 @@ def _minimal_ext_cmd(cmd):
 
 
 def clean(path):
-    for path in path.glob("**/*"):
-        if path.is_file() and path.suffix in (".so", ".cpp", ".html"):
-            print(f"Deleting {path.name}")
-            path.unlink()
+    for child in path.glob("**/*"):
+        if child.is_file() and child.suffix in (".so", ".cpp", ".html"):
+            print(f"Deleting {child.name}")
+            child.unlink()
 
 
 def setup_package():
@@ -173,10 +173,10 @@ def setup_package():
         about = {}
         exec(f.read(), about)
 
-    for copy_file, target_dir in COPY_FILES.items():
+    for copy_file, target_file in COPY_FILES.items():
         if copy_file.exists():
-            shutil.copy(str(copy_file), str(target_dir))
-            print(f"Copied {copy_file} -> {target_dir}")
+            shutil.copyfile(str(copy_file), str(target_file))
+            print(f"Copied {copy_file} -> {target_file}")
 
     include_dirs = [
         numpy.get_include(),
@@ -213,7 +213,7 @@ def setup_package():
         version=about["__version__"],
         ext_modules=ext_modules,
         cmdclass={"build_ext": build_ext_subclass},
-        package_data={"": ["*.pyx", "*.pxd", "*.pxi"]},
+        package_data={"": ["*.pyx", "*.pxd", "*.pxi"], "spacy_cli": ["*.json"]},
     )
 
 
diff --git a/spacy/__init__.py b/spacy/__init__.py
index 8bb8b49498e..5b3ff25c872 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -10,17 +10,39 @@
 # These are imported as part of the API
 from thinc.api import Config, prefer_gpu, require_cpu, require_gpu  # noqa: F401
 
-from . import pipeline  # noqa: F401
-from . import util
+from . import (
+    pipeline,  # noqa: F401
+    util,
+)
 from .about import __version__  # noqa: F401
 from .cli.info import info  # noqa: F401
 from .errors import Errors
 from .glossary import explain  # noqa: F401
 from .language import Language
 from .registrations import REGISTRY_POPULATED, populate_registry
+
+# Rebuild pydantic v2 schemas that use forward references to Language/Vocab
+from .schemas import (  # noqa: F401
+    ConfigSchema,
+    ConfigSchemaInit,
+    ConfigSchemaNlp,
+    ConfigSchemaPretrain,
+    ConfigSchemaTraining,
+)
+from .training import Example  # noqa: F401
 from .util import logger, registry  # noqa: F401
 from .vocab import Vocab
 
+_rebuild_ns = {"Language": Language, "Vocab": Vocab, "Example": Example}
+for _schema in (
+    ConfigSchemaTraining,
+    ConfigSchemaNlp,
+    ConfigSchemaPretrain,
+    ConfigSchemaInit,
+    ConfigSchema,
+):
+    _schema.model_rebuild(_types_namespace=_rebuild_ns)  # type: ignore[attr-defined]
+
 if sys.maxunicode == 65535:
     raise SystemError(Errors.E130)
 
diff --git a/spacy/about.py b/spacy/about.py
index a93d91532b6..df33ff96bfe 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,5 +1,5 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.8.11"
+__version__ = "3.8.12"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 3095778fe22..f176a2eabad 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,40 +1,96 @@
+import sys
+import types
+from importlib import import_module
+from typing import Iterable
+
+from typer.main import get_command
 from wasabi import msg
 
-# Needed for testing
-from . import download as download_module  # noqa: F401
-from ._util import app, setup_cli  # noqa: F401
-from .apply import apply  # noqa: F401
-from .assemble import assemble_cli  # noqa: F401
-
-# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
-# are registered automatically and won't have to be imported here.
-from .benchmark_speed import benchmark_speed_cli  # noqa: F401
-from .convert import convert  # noqa: F401
-from .debug_config import debug_config  # noqa: F401
-from .debug_data import debug_data  # noqa: F401
-from .debug_diff import debug_diff  # noqa: F401
-from .debug_model import debug_model  # noqa: F401
-from .download import download  # noqa: F401
-from .evaluate import evaluate  # noqa: F401
-from .find_function import find_function  # noqa: F401
-from .find_threshold import find_threshold  # noqa: F401
-from .info import info  # noqa: F401
-from .init_config import fill_config, init_config  # noqa: F401
-from .init_pipeline import init_pipeline_cli  # noqa: F401
-from .package import package  # noqa: F401
-from .pretrain import pretrain  # noqa: F401
-from .profile import profile  # noqa: F401
-from .project.assets import project_assets  # type: ignore[attr-defined]  # noqa: F401
-from .project.clone import project_clone  # type: ignore[attr-defined]  # noqa: F401
-from .project.document import (  # type: ignore[attr-defined]  # noqa: F401
-    project_document,
+from ..util import registry
+from ._dispatch import (
+    GROUP_MODULES,
+    PUBLIC_ATTRS,
+    SUBCOMMAND_MODULES,
+    TOP_LEVEL_MODULES,
+    iter_builtin_modules,
 )
-from .project.dvc import project_update_dvc  # type: ignore[attr-defined]  # noqa: F401
-from .project.pull import project_pull  # type: ignore[attr-defined]  # noqa: F401
-from .project.push import project_push  # type: ignore[attr-defined]  # noqa: F401
-from .project.run import project_run  # type: ignore[attr-defined]  # noqa: F401
-from .train import train_cli  # type: ignore[attr-defined]  # noqa: F401
-from .validate import validate  # type: ignore[attr-defined]  # noqa: F401
+from ._util import COMMAND, add_project_cli, app
+
+HELP_OPTIONS = {"--help", "-h"}
+ROOT_OPTIONS = HELP_OPTIONS | {"--install-completion", "--show-completion"}
+
+__all__ = [
+    "app",
+    "load_all_commands",
+    "load_for_argv",
+    "setup_cli",
+    *sorted(PUBLIC_ATTRS),
+]
+
+
+def _import_modules(module_names: Iterable[str]) -> None:
+    for module_name in module_names:
+        import_module(module_name)
+
+
+def load_all_commands() -> None:
+    _import_modules(iter_builtin_modules())
+    add_project_cli()
+
+
+def load_for_argv(argv: Iterable[str]) -> None:
+    args = list(argv)
+    if not args or args[0] in ROOT_OPTIONS or args[0].startswith("-"):
+        load_all_commands()
+        return
+    command = args[0]
+    if command == "project":
+        add_project_cli()
+        return
+    if command in GROUP_MODULES:
+        subcommand = args[1] if len(args) > 1 and not args[1].startswith("-") else None
+        if subcommand is not None and (command, subcommand) in SUBCOMMAND_MODULES:
+            _import_modules(SUBCOMMAND_MODULES[(command, subcommand)])
+            return
+        _import_modules(GROUP_MODULES[command])
+        return
+    if command in TOP_LEVEL_MODULES:
+        _import_modules(TOP_LEVEL_MODULES[command])
+
+
+def setup_cli() -> None:
+    # Make sure entry-point CLI integrations are imported before command dispatch.
+    registry.cli.get_all()
+    load_for_argv(sys.argv[1:])
+    command = get_command(app)
+    command(prog_name=COMMAND)
+
+
+def __getattr__(name: str):
+    if name not in PUBLIC_ATTRS:
+        raise AttributeError(f"module 'spacy.cli' has no attribute {name!r}")
+    module_name, attr_name = PUBLIC_ATTRS[name]
+    module = import_module(module_name)
+    value = module if attr_name is None else getattr(module, attr_name)
+    globals()[name] = value
+    return value
+
+
+def __dir__():
+    return sorted(set(globals()) | set(PUBLIC_ATTRS))
+
+
+class _CLIModule(types.ModuleType):
+    def __setattr__(self, name, value):
+        if isinstance(value, types.ModuleType) and name in PUBLIC_ATTRS:
+            _, attr_name = PUBLIC_ATTRS[name]
+            if attr_name is not None:
+                super().__setattr__(name, getattr(value, attr_name))
+                return
+        super().__setattr__(name, value)
+
+
+sys.modules[__name__].__class__ = _CLIModule
 
 
 @app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
diff --git a/spacy/cli/_dispatch.py b/spacy/cli/_dispatch.py
new file mode 100644
index 00000000000..5ee4f654d39
--- /dev/null
+++ b/spacy/cli/_dispatch.py
@@ -0,0 +1,104 @@
+from typing import Dict, Iterable, Optional, Tuple
+
+CommandPath = Tuple[str, ...]
+
+
+TOP_LEVEL_MODULES: Dict[str, Tuple[str, ...]] = {
+    "apply": ("spacy.cli.apply",),
+    "assemble": ("spacy.cli.assemble",),
+    "convert": ("spacy.cli.convert",),
+    "debug-data": ("spacy.cli.debug_data",),
+    "download": ("spacy.cli.download",),
+    "evaluate": ("spacy.cli.evaluate",),
+    "find-function": ("spacy.cli.find_function",),
+    "find-threshold": ("spacy.cli.find_threshold",),
+    "info": ("spacy.cli.info",),
+    "package": ("spacy.cli.package",),
+    "pretrain": ("spacy.cli.pretrain",),
+    "profile": ("spacy.cli.profile",),
+    "train": ("spacy.cli.train",),
+    "validate": ("spacy.cli.validate",),
+}
+
+
+GROUP_MODULES: Dict[str, Tuple[str, ...]] = {
+    "benchmark": (
+        "spacy.cli.benchmark_speed",
+        "spacy.cli.evaluate",
+    ),
+    "debug": (
+        "spacy.cli.debug_config",
+        "spacy.cli.debug_data",
+        "spacy.cli.debug_diff",
+        "spacy.cli.debug_model",
+        "spacy.cli.profile",
+    ),
+    "init": (
+        "spacy.cli.init_config",
+        "spacy.cli.init_pipeline",
+    ),
+}
+
+
+SUBCOMMAND_MODULES: Dict[CommandPath, Tuple[str, ...]] = {
+    ("benchmark", "accuracy"): ("spacy.cli.evaluate",),
+    ("benchmark", "speed"): ("spacy.cli.benchmark_speed",),
+    ("debug", "config"): ("spacy.cli.debug_config",),
+    ("debug", "data"): ("spacy.cli.debug_data",),
+    ("debug", "diff-config"): ("spacy.cli.debug_diff",),
+    ("debug", "model"): ("spacy.cli.debug_model",),
+    ("debug", "profile"): ("spacy.cli.profile",),
+    ("init", "config"): ("spacy.cli.init_config",),
+    ("init", "fill-config"): ("spacy.cli.init_config",),
+    ("init", "labels"): ("spacy.cli.init_pipeline",),
+    ("init", "nlp"): ("spacy.cli.init_pipeline",),
+    ("init", "vectors"): ("spacy.cli.init_pipeline",),
+}
+
+
+PUBLIC_ATTRS: Dict[str, Tuple[str, Optional[str]]] = {
+    "app": ("spacy.cli._util", "app"),
+    "apply": ("spacy.cli.apply", "apply"),
+    "assemble_cli": ("spacy.cli.assemble", "assemble_cli"),
+    "benchmark_speed_cli": ("spacy.cli.benchmark_speed", "benchmark_speed_cli"),
+    "convert": ("spacy.cli.convert", "convert"),
+    "debug_config": ("spacy.cli.debug_config", "debug_config"),
+    "debug_data": ("spacy.cli.debug_data", "debug_data"),
+    "debug_diff": ("spacy.cli.debug_diff", "debug_diff"),
+    "debug_model": ("spacy.cli.debug_model", "debug_model"),
+    "download": ("spacy.cli.download", "download"),
+    "download_module": ("spacy.cli.download", None),
+    "evaluate": ("spacy.cli.evaluate", "evaluate"),
+    "fill_config": ("spacy.cli.init_config", "fill_config"),
+    "find_function": ("spacy.cli.find_function", "find_function"),
+    "find_threshold": ("spacy.cli.find_threshold", "find_threshold"),
+    "info": ("spacy.cli.info", "info"),
+    "init_config": ("spacy.cli.init_config", "init_config"),
+    "init_pipeline_cli": ("spacy.cli.init_pipeline", "init_pipeline_cli"),
+    "package": ("spacy.cli.package", "package"),
+    "pretrain": ("spacy.cli.pretrain", "pretrain"),
+    "profile": ("spacy.cli.profile", "profile"),
+    "project_assets": ("spacy.cli.project.assets", "project_assets"),
+    "project_clone": ("spacy.cli.project.clone", "project_clone"),
+    "project_document": ("spacy.cli.project.document", "project_document"),
+    "project_pull": ("spacy.cli.project.pull", "project_pull"),
+    "project_push": ("spacy.cli.project.push", "project_push"),
+    "project_run": ("spacy.cli.project.run", "project_run"),
+    "project_update_dvc": ("spacy.cli.project.dvc", "project_update_dvc"),
+    "train_cli": ("spacy.cli.train", "train_cli"),
+    "validate": ("spacy.cli.validate", "validate"),
+}
+
+
+def iter_builtin_modules() -> Iterable[str]:
+    seen = set()
+    for modules in TOP_LEVEL_MODULES.values():
+        for module in modules:
+            if module not in seen:
+                seen.add(module)
+                yield module
+    for modules in GROUP_MODULES.values():
+        for module in modules:
+            if module not in seen:
+                seen.add(module)
+                yield module
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index 309b6b1e79a..35f899b2cf8 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -1,15 +1,11 @@
-import hashlib
 import os
-import shutil
 import sys
 from configparser import InterpolationError
 from contextlib import contextmanager
 from pathlib import Path
 from typing import (
-    TYPE_CHECKING,
     Any,
     Dict,
-    Iterable,
     List,
     Optional,
     Tuple,
@@ -21,23 +17,15 @@
 import typer
 from click import NoSuchOption
 from click.shell_completion import split_arg_string
-from thinc.api import Config, ConfigValidationError, require_gpu
+from thinc.api import ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
-from typer.main import get_command
 from wasabi import Printer, msg
-from weasel import app as project_cli
 
-from .. import about
 from ..compat import Literal
-from ..schemas import validate
 from ..util import (
     ENV_VARS,
-    SimpleFrozenDict,
     import_file,
-    is_compatible_version,
     logger,
-    make_tempdir,
-    registry,
     run_command,
 )
 
@@ -68,23 +56,25 @@
 Arg = typer.Argument
 Opt = typer.Option
 
-app = typer.Typer(name=NAME, help=HELP)
+app = typer.Typer(name=NAME, help=HELP, rich_markup_mode=None)
 benchmark_cli = typer.Typer(name="benchmark", help=BENCHMARK_HELP, no_args_is_help=True)
 debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
 init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)
+_PROJECT_CLI_ADDED = False
 
-app.add_typer(project_cli, name="project", help=PROJECT_HELP, no_args_is_help=True)
 app.add_typer(debug_cli)
 app.add_typer(benchmark_cli)
 app.add_typer(init_cli)
 
 
-def setup_cli() -> None:
-    # Make sure the entry-point for CLI runs, so that they get imported.
-    registry.cli.get_all()
-    # Ensure that the help messages always display the correct prompt
-    command = get_command(app)
-    command(prog_name=COMMAND)
+def add_project_cli() -> None:
+    global _PROJECT_CLI_ADDED
+    if _PROJECT_CLI_ADDED:
+        return
+    from weasel import app as project_cli
+
+    app.add_typer(project_cli, name="project", help=PROJECT_HELP, no_args_is_help=True)
+    _PROJECT_CLI_ADDED = True
 
 
 def parse_config_overrides(
@@ -215,8 +205,8 @@ def get_git_version(
     """
     try:
         ret = run_command("git --version", capture=True)
-    except:
-        raise RuntimeError(error)
+    except Exception as err:
+        raise RuntimeError(error) from err
     stdout = ret.stdout.strip()
     if not stdout or not stdout.startswith("git version"):
         return 0, 0
diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py
index ffd8105060a..7671026f488 100644
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@@ -22,7 +22,7 @@
 
 out_help = "Path to save the resulting .spacy file"
 code_help = (
-    "Path to Python file with additional " "code (registered functions) to be imported"
+    "Path to Python file with additional code (registered functions) to be imported"
 )
 gold_help = "Use gold preprocessing provided in the .spacy files"
 force_msg = (
@@ -72,11 +72,15 @@ def apply_cli(
     data_path: Path = Arg(..., help=path_help, exists=True),
     output_file: Path = Arg(..., help=out_help, dir_okay=False),
     code_path: Optional[Path] = Opt(None, "--code", "-c", help=code_help),
-    text_key: str = Opt("text", "--text-key", "-tk", help="Key containing text string for JSONL"),
-    force_overwrite: bool = Opt(False, "--force", "-F", help="Force overwriting the output file"),
+    text_key: str = Opt(
+        "text", "--text-key", "-tk", help="Key containing text string for JSONL"
+    ),
+    force_overwrite: bool = Opt(
+        False, "--force", "-F", help="Force overwriting the output file"
+    ),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU."),
     batch_size: int = Opt(1, "--batch-size", "-b", help="Batch size."),
-    n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use.")
+    n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use."),
 ):
     """
     Apply a trained pipeline to documents to get predictions.
@@ -114,8 +118,7 @@ def apply(
     if len(paths) == 0:
         docbin.to_disk(output_file)
         msg.warn(
-            "Did not find data to process,"
-            f" {data_path} seems to be an empty directory."
+            f"Did not find data to process, {data_path} seems to be an empty directory."
         )
         return
     nlp = load_model(model)
diff --git a/spacy/cli/assemble.py b/spacy/cli/assemble.py
index f74bbacb555..bc97a9d594f 100644
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@@ -24,10 +24,25 @@
 def assemble_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    output_path: Path = Arg(
+        ..., help="Output directory to store assembled pipeline in"
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/benchmark_speed.py b/spacy/cli/benchmark_speed.py
index 4dd10049cda..052e7d43416 100644
--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@@ -24,13 +24,29 @@ def benchmark_speed_cli(
     # fmt: off
     ctx: typer.Context,
     model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    batch_size: Optional[int] = Opt(None, "--batch-size", "-b", min=1, help="Override the pipeline batch size"),
+    data_path: Path = Arg(
+        ..., help="Location of binary evaluation data in .spacy format", exists=True
+    ),
+    batch_size: Optional[int] = Opt(
+        None, "--batch-size", "-b", min=1, help="Override the pipeline batch size"
+    ),
     no_shuffle: bool = Opt(False, "--no-shuffle", help="Do not shuffle benchmark data"),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,),
-    warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    n_batches: int = Opt(
+        50,
+        "--batches",
+        help="Minimum number of batches to benchmark",
+        min=30,
+    ),
+    warmup_epochs: int = Opt(
+        3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
     # fmt: on
 ):
     """
@@ -151,7 +167,7 @@ def print_mean_with_ci(sample: numpy.ndarray):
     low = bootstrap_means[int(len(bootstrap_means) * 0.025)]
     high = bootstrap_means[int(len(bootstrap_means) * 0.975)]
 
-    print(f"Mean: {mean:.1f} words/s (95% CI: {low-mean:.1f} +{high-mean:.1f})")
+    print(f"Mean: {mean:.1f} words/s (95% CI: {low - mean:.1f} +{high - mean:.1f})")
 
 
 def print_outliers(sample: numpy.ndarray):
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index a66a68133b3..140999207f3 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -48,17 +48,47 @@ class FileTypes(str, Enum):
 def convert_cli(
     # fmt: off
     input_path: str = Arg(..., help="Input file or directory", exists=True),
-    output_dir: Path = Arg("-", help="Output directory. '-' for stdout.", allow_dash=True, exists=True),
-    file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"),
-    n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
-    seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"),
-    model: Optional[str] = Opt(None, "--model", "--base", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"),
-    morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"),
-    merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
-    converter: str = Opt(AUTO, "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"),
-    ner_map: Optional[Path] = Opt(None, "--ner-map", "-nm", help="NER tag mapping (as JSON-encoded dict of entity types)", exists=True),
-    lang: Optional[str] = Opt(None, "--lang", "-l", help="Language (if tokenizer required)"),
-    concatenate: bool = Opt(None, "--concatenate", "-C", help="Concatenate output to a single file"),
+    output_dir: Path = Arg(
+        "-", help="Output directory. '-' for stdout.", allow_dash=True, exists=True
+    ),
+    file_type: FileTypes = Opt(
+        "spacy", "--file-type", "-t", help="Type of data to produce"
+    ),
+    n_sents: int = Opt(
+        1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"
+    ),
+    seg_sents: bool = Opt(
+        False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"
+    ),
+    model: Optional[str] = Opt(
+        None,
+        "--model",
+        "--base",
+        "-b",
+        help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)",
+    ),
+    morphology: bool = Opt(
+        False, "--morphology", "-m", help="Enable appending morphology to tags"
+    ),
+    merge_subtokens: bool = Opt(
+        False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"
+    ),
+    converter: str = Opt(
+        AUTO, "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"
+    ),
+    ner_map: Optional[Path] = Opt(
+        None,
+        "--ner-map",
+        "-nm",
+        help="NER tag mapping (as JSON-encoded dict of entity types)",
+        exists=True,
+    ),
+    lang: Optional[str] = Opt(
+        None, "--lang", "-l", help="Language (if tokenizer required)"
+    ),
+    concatenate: bool = Opt(
+        None, "--concatenate", "-C", help="Concatenate output to a single file"
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
index 0e5382cd956..4876b6ff9e1 100644
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@@ -26,10 +26,28 @@
 def debug_config_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
-    show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code-path",
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    show_funcs: bool = Opt(
+        False,
+        "--show-functions",
+        "-F",
+        help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)",
+    ),
+    show_vars: bool = Opt(
+        False,
+        "--show-variables",
+        "-V",
+        help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.",
+    ),
     # fmt: on
 ):
     """Debug a config file and show validation errors. The command will
@@ -64,10 +82,10 @@ def debug_config(
         config = nlp.config.interpolate()
     msg.divider("Config validation for [initialize]")
     with show_validation_error(config_path):
-        T = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
+        T = registry.resolve(config["initialize"], schema=ConfigSchemaInit)  # type: ignore[arg-type]
     msg.divider("Config validation for [training]")
     with show_validation_error(config_path):
-        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
         dot_names = [T["train_corpus"], T["dev_corpus"]]
         util.resolve_dot_names(config, dot_names)
     msg.good("Config is valid")
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 1c9c0e0ea3a..6ba18e7f224 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -71,11 +71,28 @@
 def debug_data_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
-    verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"),
-    no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code-path",
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    ignore_warnings: bool = Opt(
+        False,
+        "--ignore-warnings",
+        "-IW",
+        help="Ignore warnings, only show stats and errors",
+    ),
+    verbose: bool = Opt(
+        False, "--verbose", "-V", help="Print additional information and explanations"
+    ),
+    no_format: bool = Opt(
+        False, "--no-format", "-NF", help="Don't pretty-print the results"
+    ),
     # fmt: on
 ):
     """
@@ -120,7 +137,7 @@ def debug_data(
         cfg = util.load_config(config_path, overrides=config_overrides)
         nlp = util.load_model_from_config(cfg)
         config = nlp.config.interpolate()
-        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     # Use original config here, not resolved version
     sourced_components = get_sourced_components(cfg)
     frozen_components = T["frozen_components"]
@@ -708,7 +725,7 @@ def debug_data(
         if len(dev_not_train) != 0:
             pct = len(dev_not_train) / len(trees_dev)
             msg.info(
-                f"{len(dev_not_train)} lemmatizer trees ({pct*100:.1f}% of dev trees)"
+                f"{len(dev_not_train)} lemmatizer trees ({pct * 100:.1f}% of dev trees)"
                 " were found exclusively in the dev data."
             )
         else:
diff --git a/spacy/cli/debug_diff.py b/spacy/cli/debug_diff.py
index c53b0acab50..71d8826bc5e 100644
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@@ -2,11 +2,10 @@
 from typing import Optional
 
 import typer
-from thinc.api import Config
 from wasabi import MarkdownRenderer, Printer, diff_strings
 
 from ..util import load_config
-from ._util import Arg, Opt, debug_cli, parse_config_overrides, show_validation_error
+from ._util import Arg, Opt, debug_cli, show_validation_error
 from .init_config import Optimizations, init_config
 
 
@@ -17,12 +16,36 @@
 def debug_diff_cli(
     # fmt: off
     ctx: typer.Context,
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    compare_to: Optional[Path] = Opt(None, help="Path to a config file to diff against, or `None` to compare against default settings", exists=True, allow_dash=True),
-    optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config."),
-    gpu: bool = Opt(False, "--gpu", "-G", help="Whether the original config can run on a GPU. Only relevant when comparing against the default config."),
-    pretraining: bool = Opt(False, "--pretraining", "--pt", help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config."),
-    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues")
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    compare_to: Optional[Path] = Opt(
+        None,
+        help="Path to a config file to diff against, or `None` to compare against default settings",
+        exists=True,
+        allow_dash=True,
+    ),
+    optimize: Optimizations = Opt(
+        Optimizations.efficiency.value,
+        "--optimize",
+        "-o",
+        help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config.",
+    ),
+    gpu: bool = Opt(
+        False,
+        "--gpu",
+        "-G",
+        help="Whether the original config can run on a GPU. Only relevant when comparing against the default config.",
+    ),
+    pretraining: bool = Opt(
+        False,
+        "--pretraining",
+        "--pt",
+        help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config.",
+    ),
+    markdown: bool = Opt(
+        False, "--markdown", "-md", help="Generate Markdown for GitHub issues"
+    ),
     # fmt: on
 ):
     """Show a diff of a config file with respect to spaCy's defaults or another config file. If
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 3c667e42a2b..dc0de3e1489 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -36,18 +36,26 @@
 def debug_model_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"),
-    layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    component: str = Arg(
+        ..., help="Name of the pipeline component of which the model should be analysed"
+    ),
+    layers: str = Opt(
+        "", "--layers", "-l", help="Comma-separated names of layer IDs to print"
+    ),
     dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
     parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
     gradients: bool = Opt(False, "--gradients", "-GRAD", help="Show gradients"),
     attributes: bool = Opt(False, "--attributes", "-ATTR", help="Show attributes"),
     P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"),
-    P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"),
+    P1: bool = Opt(
+        False, "--print-step1", "-P1", help="Print model after initialization"
+    ),
     P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"),
     P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     """
@@ -81,7 +89,7 @@ def debug_model_cli(
     with show_validation_error(config_path):
         nlp = util.load_model_from_config(raw_config)
         config = nlp.config.interpolate()
-        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+        T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     seed = T["seed"]
     if seed is not None:
         msg.info(f"Fixing random seed: {seed}")
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 8104fd2d285..8a1110dcef3 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -1,3 +1,4 @@
+import shutil
 import sys
 from typing import Optional, Sequence
 from urllib.parse import urljoin
@@ -27,9 +28,16 @@ def download_cli(
     # fmt: off
     ctx: typer.Context,
     model: str = Arg(..., help="Name of pipeline package to download"),
-    direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
-    sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
-    url: str = Opt(None, "--url", "-U", help="Download from given url")
+    direct: bool = Opt(
+        False, "--direct", "-d", "-D", help="Force direct download of name + version"
+    ),
+    sdist: bool = Opt(
+        False,
+        "--sdist",
+        "-S",
+        help="Download sdist (.tar.gz) archive instead of pre-built binary wheel",
+    ),
+    url: str = Opt(None, "--url", "-U", help="Download from given url"),
     # fmt: on
 ):
     """
@@ -176,5 +184,19 @@ def download_model(
     if not download_url.startswith(about.__download_url__):
         raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
     pip_args = list(user_pip_args) if user_pip_args is not None else []
-    cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
+    cmd = _get_pip_install_cmd() + pip_args + [download_url]
     run_command(cmd)
+
+
+def _get_pip_install_cmd() -> list:
+    if shutil.which("pip"):
+        return [sys.executable, "-m", "pip", "install"]
+    elif shutil.which("uv"):
+        return ["uv", "pip", "install"]
+    else:
+        msg.fail(
+            "No package installer found",
+            "spaCy requires either pip or uv to download models. "
+            "Please install one of them and try again.",
+            exits=1,
+        )
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 2276ca6b0d4..9704ea44413 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -1,13 +1,12 @@
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 import srsly
 from thinc.api import fix_random_seed
 from wasabi import Printer
 
 from .. import displacy, util
-from ..scorer import Scorer
 from ..tokens import Doc
 from ..training import Corpus
 from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu
@@ -20,15 +19,42 @@
 def evaluate_cli(
     # fmt: off
     model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    data_path: Path = Arg(
+        ..., help="Location of binary evaluation data in .spacy format", exists=True
+    ),
+    output: Optional[Path] = Opt(
+        None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
-    displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
-    displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
-    per_component: bool = Opt(False, "--per-component", "-P", help="Return scores per component, only applicable when an output JSON file is specified."),
-    spans_key: str = Opt("sc", "--spans-key", "-sk", help="Spans key to use when evaluating Doc.spans"),
+    gold_preproc: bool = Opt(
+        False, "--gold-preproc", "-G", help="Use gold preprocessing"
+    ),
+    displacy_path: Optional[Path] = Opt(
+        None,
+        "--displacy-path",
+        "-dp",
+        help="Directory to output rendered parses as HTML",
+        exists=True,
+        file_okay=False,
+    ),
+    displacy_limit: int = Opt(
+        25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"
+    ),
+    per_component: bool = Opt(
+        False,
+        "--per-component",
+        "-P",
+        help="Return scores per component, only applicable when an output JSON file is specified.",
+    ),
+    spans_key: str = Opt(
+        "sc", "--spans-key", "-sk", help="Spans key to use when evaluating Doc.spans"
+    ),
     # fmt: on
 ):
     """
@@ -123,7 +149,7 @@ def evaluate(
                     if key == "speed":
                         results[metric] = f"{scores[key]:.0f}"
                     else:
-                        results[metric] = f"{scores[key]*100:.2f}"
+                        results[metric] = f"{scores[key] * 100:.2f}"
                 else:
                     results[metric] = "-"
                 data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]
diff --git a/spacy/cli/find_function.py b/spacy/cli/find_function.py
index f99ce2adc9f..3b3b333337b 100644
--- a/spacy/cli/find_function.py
+++ b/spacy/cli/find_function.py
@@ -11,7 +11,9 @@
 def find_function_cli(
     # fmt: off
     func_name: str = Arg(..., help="Name of the registered function."),
-    registry_name: Optional[str] = Opt(None, "--registry", "-r", help="Name of the catalogue registry."),
+    registry_name: Optional[str] = Opt(
+        None, "--registry", "-r", help="Name of the catalogue registry."
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index ff7af32e6f6..1873f476fcd 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -27,15 +27,39 @@
 def find_threshold_cli(
     # fmt: off
     model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
+    data_path: Path = Arg(
+        ..., help="Location of binary evaluation data in .spacy format", exists=True
+    ),
     pipe_name: str = Arg(..., help="Name of pipe to examine thresholds for"),
-    threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
+    threshold_key: str = Arg(
+        ..., help="Key of threshold attribute in component's configuration"
+    ),
     scores_key: str = Arg(..., help="Metric to optimize"),
-    n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
+    n_trials: int = Opt(
+        _DEFAULTS["n_trials"],
+        "--n_trials",
+        "-n",
+        help="Number of trials to determine optimal thresholds",
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    use_gpu: int = Opt(
+        _DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"
+    ),
+    gold_preproc: bool = Opt(
+        _DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
     # fmt: on
 ):
     """
@@ -183,10 +207,10 @@ def filter_config(
             ),
         )
         if hasattr(pipe, "cfg"):
-            setattr(
-                nlp.get_pipe(pipe_name),
-                "cfg",
-                set_nested_item(getattr(pipe, "cfg"), config_keys, threshold),
+            nlp.get_pipe(pipe_name).cfg = set_nested_item(  # type: ignore[attr-defined]
+                pipe.cfg,
+                config_keys,
+                threshold,  # type: ignore[attr-defined]
             )
 
         eval_scores = nlp.evaluate(dev_dataset)
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 8bfc6b54f15..ed2394c564e 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -16,10 +16,24 @@
 def info_cli(
     # fmt: off
     model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
-    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
-    silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
-    exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
-    url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
+    markdown: bool = Opt(
+        False, "--markdown", "-md", help="Generate Markdown for GitHub issues"
+    ),
+    silent: bool = Opt(
+        False, "--silent", "-s", "-S", help="Don't print anything (just return)"
+    ),
+    exclude: str = Opt(
+        "labels",
+        "--exclude",
+        "-e",
+        help="Comma-separated keys to exclude from the print-out",
+    ),
+    url: bool = Opt(
+        False,
+        "--url",
+        "-u",
+        help="Print the URL to download the most recent compatible version of the pipeline",
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index a7fb2b5b81f..c7081040280 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -49,13 +49,44 @@ class InitValues:
 @init_cli.command("config")
 def init_config_cli(
     # fmt: off
-    output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
-    lang: str = Opt(InitValues.lang, "--lang", "-l", help="Two-letter code of the language to use"),
-    pipeline: str = Opt(",".join(InitValues.pipeline), "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
-    optimize: Optimizations = Opt(InitValues.optimize, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
-    gpu: bool = Opt(InitValues.gpu, "--gpu", "-G", help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
-    pretraining: bool = Opt(InitValues.pretraining, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
-    force_overwrite: bool = Opt(InitValues.force_overwrite, "--force", "-F", help="Force overwriting the output file"),
+    output_file: Path = Arg(
+        ...,
+        help="File to save the config to or - for stdout (will only output config and no additional logging info)",
+        allow_dash=True,
+    ),
+    lang: str = Opt(
+        InitValues.lang, "--lang", "-l", help="Two-letter code of the language to use"
+    ),
+    pipeline: str = Opt(
+        ",".join(InitValues.pipeline),
+        "--pipeline",
+        "-p",
+        help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')",
+    ),
+    optimize: Optimizations = Opt(
+        InitValues.optimize,
+        "--optimize",
+        "-o",
+        help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters.",
+    ),
+    gpu: bool = Opt(
+        InitValues.gpu,
+        "--gpu",
+        "-G",
+        help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters.",
+    ),
+    pretraining: bool = Opt(
+        InitValues.pretraining,
+        "--pretraining",
+        "-pt",
+        help="Include config for pretraining (with 'spacy pretrain')",
+    ),
+    force_overwrite: bool = Opt(
+        InitValues.force_overwrite,
+        "--force",
+        "-F",
+        help="Force overwriting the output file",
+    ),
     # fmt: on
 ):
     """
@@ -88,11 +119,28 @@ def init_config_cli(
 @init_cli.command("fill-config")
 def init_fill_config_cli(
     # fmt: off
-    base_path: Path = Arg(..., help="Path to base config to fill", exists=True, dir_okay=False),
-    output_file: Path = Arg("-", help="Path to output .cfg file (or - for stdout)", allow_dash=True),
-    pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
-    diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes"),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    base_path: Path = Arg(
+        ..., help="Path to base config to fill", exists=True, dir_okay=False
+    ),
+    output_file: Path = Arg(
+        "-", help="Path to output .cfg file (or - for stdout)", allow_dash=True
+    ),
+    pretraining: bool = Opt(
+        False,
+        "--pretraining",
+        "-pt",
+        help="Include config for pretraining (with 'spacy pretrain')",
+    ),
+    diff: bool = Opt(
+        False, "--diff", "-D", help="Print a visual diff highlighting the changes"
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code-path",
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
     # fmt: on
 ):
     """
@@ -168,7 +216,7 @@ def init_config(
     # Filter out duplicates since tok2vec and transformer are added by template
     pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
     defaults = RECOMMENDATIONS["__default__"]
-    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).dict()
+    reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).model_dump()
     variables = {
         "lang": lang,
         "components": pipeline,
diff --git a/spacy/cli/init_pipeline.py b/spacy/cli/init_pipeline.py
index 21eea8edf2f..1c0ff526235 100644
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@@ -26,13 +26,42 @@ def init_vectors_cli(
     lang: str = Arg(..., help="The language of the nlp object to create"),
     vectors_loc: Path = Arg(..., help="Vectors file in Word2Vec format", exists=True),
     output_dir: Path = Arg(..., help="Pipeline output directory"),
-    prune: int = Opt(-1, "--prune", "-p", help="Optional number of vectors to prune to"),
-    truncate: int = Opt(0, "--truncate", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
+    prune: int = Opt(
+        -1, "--prune", "-p", help="Optional number of vectors to prune to"
+    ),
+    truncate: int = Opt(
+        0,
+        "--truncate",
+        "-t",
+        help="Optional number of vectors to truncate to when reading in vectors file",
+    ),
     mode: str = Opt("default", "--mode", "-m", help="Vectors mode: default or floret"),
-    name: Optional[str] = Opt(None, "--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
-    attr: str = Opt("ORTH", "--attr", "-a", help="Optional token attribute to use for vectors, e.g. LOWER or NORM"),
+    name: Optional[str] = Opt(
+        None,
+        "--name",
+        "-n",
+        help="Optional name for the word vectors, e.g. en_core_web_lg.vectors",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    jsonl_loc: Optional[Path] = Opt(
+        None,
+        "--lexemes-jsonl",
+        "-j",
+        help="Location of JSONL-formatted attributes file",
+        hidden=True,
+    ),
+    attr: str = Opt(
+        "ORTH",
+        "--attr",
+        "-a",
+        help="Optional token attribute to use for vectors, e.g. LOWER or NORM",
+    ),
     # fmt: on
 ):
     """Convert word vectors for use with spaCy. Will export an nlp object that
@@ -81,11 +110,24 @@ def update_lexemes(nlp: Language, jsonl_loc: Path) -> None:
 def init_pipeline_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
     output_path: Path = Arg(..., help="Output directory for the prepared data"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     if verbose:
@@ -108,11 +150,24 @@ def init_pipeline_cli(
 def init_labels_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
     output_path: Path = Arg(..., help="Output directory for the labels"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     """Generate JSON files for the labels in the data. This helps speed up the
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 67b1d318651..9291aae2827 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -21,16 +21,56 @@
 @app.command("package")
 def package_cli(
     # fmt: off
-    input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
-    output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
-    code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"),
-    meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
-    create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"),
-    name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
-    version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
-    build: str = Opt("sdist", "--build", "-b", help="Comma-separated formats to build: sdist and/or wheel, or none."),
-    force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
-    require_parent: bool = Opt(True, "--require-parent/--no-require-parent", "-R", "-R", help="Include the parent package (e.g. spacy) in the requirements"),
+    input_dir: Path = Arg(
+        ..., help="Directory with pipeline data", exists=True, file_okay=False
+    ),
+    output_dir: Path = Arg(
+        ..., help="Output parent directory", exists=True, file_okay=False
+    ),
+    code_paths: str = Opt(
+        "",
+        "--code",
+        "-c",
+        help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package",
+    ),
+    meta_path: Optional[Path] = Opt(
+        None,
+        "--meta-path",
+        "--meta",
+        "-m",
+        help="Path to meta.json",
+        exists=True,
+        dir_okay=False,
+    ),
+    create_meta: bool = Opt(
+        False, "--create-meta", "-C", help="Create meta.json, even if one exists"
+    ),
+    name: Optional[str] = Opt(
+        None, "--name", "-n", help="Package name to override meta"
+    ),
+    version: Optional[str] = Opt(
+        None, "--version", "-v", help="Package version to override meta"
+    ),
+    build: str = Opt(
+        "sdist",
+        "--build",
+        "-b",
+        help="Comma-separated formats to build: sdist and/or wheel, or none.",
+    ),
+    force: bool = Opt(
+        False,
+        "--force",
+        "-f",
+        "-F",
+        help="Force overwriting existing data in output directory",
+    ),
+    require_parent: bool = Opt(
+        True,
+        "--require-parent/--no-require-parent",
+        "-R",
+        "-R",
+        help="Include the parent package (e.g. spacy) in the requirements",
+    ),
     # fmt: on
 ):
     """
@@ -410,7 +450,7 @@ def generate_readme(meta: Dict[str, Any]) -> str:
     pipeline = ", ".join([md.code(p) for p in meta.get("pipeline", [])])
     components = ", ".join([md.code(p) for p in meta.get("components", [])])
     vecs = meta.get("vectors", {})
-    vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({ vecs.get('width', 0)} dimensions)"
+    vectors = f"{vecs.get('keys', 0)} keys, {vecs.get('vectors', 0)} unique vectors ({vecs.get('width', 0)} dimensions)"
     author = meta.get("author") or "n/a"
     notes = meta.get("notes", "")
     license_name = meta.get("license")
@@ -469,7 +509,7 @@ def _format_accuracy(data: Dict[str, Any], exclude: List[str] = ["speed"]) -> st
     md = MarkdownRenderer()
     scalars = [(k, v) for k, v in data.items() if isinstance(v, (int, float))]
     scores = [
-        (md.code(acc.upper()), f"{score*100:.2f}")
+        (md.code(acc.upper()), f"{score * 100:.2f}")
         for acc, score in scalars
         if acc not in exclude
     ]
@@ -488,9 +528,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
         if not labels:
             continue
         col1 = md.bold(md.code(pipe))
-        col2 = ", ".join(
-            [md.code(str(label).replace("|", "\\|")) for label in labels]
-        )  # noqa: W605
+        col2 = ", ".join([md.code(str(label).replace("|", "\\|")) for label in labels])  # noqa: W605
         label_data.append((col1, col2))
         n_labels += len(labels)
         n_pipes += 1
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 446c40510df..daea861a952 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -25,13 +25,32 @@
 def pretrain_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True),
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True
+    ),
     output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
-    epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    resume_path: Optional[Path] = Opt(
+        None,
+        "--resume-path",
+        "-r",
+        help="Path to pretrained weights from which to resume pretraining",
+    ),
+    epoch_resume: Optional[int] = Opt(
+        None,
+        "--epoch-resume",
+        "-er",
+        help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files.",
+    ),
     use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    skip_last: bool = Opt(False, "--skip-last", "-L", help="Skip saving model-last.bin"),
+    skip_last: bool = Opt(
+        False, "--skip-last", "-L", help="Skip saving model-last.bin"
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index e5b8f11939f..03f7127149e 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -21,8 +21,15 @@ def profile_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read current calling context
     model: str = Arg(..., help="Trained pipeline to load"),
-    inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True),
-    n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"),
+    inputs: Optional[Path] = Arg(
+        None,
+        help="Location of input file. '-' for stdin.",
+        exists=True,
+        allow_dash=True,
+    ),
+    n_texts: int = Opt(
+        10000, "--n-texts", "-n", help="Maximum number of texts to use if available"
+    ),
     # fmt: on
 ):
     """
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index c72e13b2681..379268286ee 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -26,11 +26,30 @@
 def train_cli(
     # fmt: off
     ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+    config_path: Path = Arg(
+        ..., help="Path to config file", exists=True, allow_dash=True
+    ),
+    output_path: Optional[Path] = Opt(
+        None,
+        "--output",
+        "--output-path",
+        "-o",
+        help="Output directory to store trained pipeline in",
+    ),
+    code_path: Optional[Path] = Opt(
+        None,
+        "--code",
+        "-c",
+        help="Path to Python file with additional code (registered functions) to be imported",
+    ),
+    verbose: bool = Opt(
+        False,
+        "--verbose",
+        "-V",
+        "-VV",
+        help="Display more information for debugging purposes",
+    ),
+    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     # fmt: on
 ):
     """
diff --git a/spacy/compat.py b/spacy/compat.py
index a9e7d5a20b9..828ed1ba62e 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -35,7 +35,9 @@
 try:  # Python 3.8+
     import importlib.metadata as importlib_metadata
 except ImportError:
-    from catalogue import _importlib_metadata as importlib_metadata  # type: ignore[no-redef]    # noqa: F401
+    from catalogue import (  # type: ignore[no-redef]
+        _importlib_metadata as importlib_metadata,  # noqa: F401
+    )
 
 from thinc.api import Optimizer  # noqa: F401
 
diff --git a/spacy/lang/af/stop_words.py b/spacy/lang/af/stop_words.py
index 337afb57f8c..4b5a04a5eca 100644
--- a/spacy/lang/af/stop_words.py
+++ b/spacy/lang/af/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-af
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 'n
 aan
 af
@@ -52,4 +53,5 @@
 was
 wat
 ŉ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/am/lex_attrs.py b/spacy/lang/am/lex_attrs.py
index 9e111b8d5eb..c7b2aab35bf 100644
--- a/spacy/lang/am/lex_attrs.py
+++ b/spacy/lang/am/lex_attrs.py
@@ -60,7 +60,7 @@
     "አስራ ስምንተኛ",
     "አስራ ዘጠነኛ",
     "ሃያኛ",
-    "ሰላሳኛ" "አርባኛ",
+    "ሰላሳኛአርባኛ",
     "አምሳኛ",
     "ስድሳኛ",
     "ሰባኛ",
diff --git a/spacy/lang/am/stop_words.py b/spacy/lang/am/stop_words.py
index 8a04c555f74..5487ada5aeb 100644
--- a/spacy/lang/am/stop_words.py
+++ b/spacy/lang/am/stop_words.py
@@ -1,7 +1,8 @@
 # Stop words by Teshome Kassie http://etd.aau.edu.et/bitstream/handle/123456789/3315/Teshome%20Kassie.pdf?sequence=1&isAllowed=y
 # Stop words by Tihitina Petros http://etd.aau.edu.et/bitstream/handle/123456789/3384/Tihitina%20Petros.pdf?sequence=1&isAllowed=y
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ግን አንቺ አንተ እናንተ ያንተ ያንቺ የናንተ ራስህን ራስሽን ራሳችሁን
 ሁሉ ኋላ በሰሞኑ አሉ በኋላ ሁኔታ በኩል አስታውቀዋል ሆነ በውስጥ
 አስታውሰዋል ሆኑ ባጣም እስካሁን ሆኖም በተለይ አሳሰበ ሁል በተመለከተ
@@ -28,4 +29,5 @@
 በዚህም መሆን ምንጊዜም እነዚህም በዚህና ያለ ስም
 ሲኖር ከዚህም መሆኑን በሁኔታው የማያንስ እነዚህኑ ማንም ከነዚሁ
 ያላቸውን እጅግ ሲሆኑ ለሆኑ ሊሆን  ለማናቸውም
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ar/lex_attrs.py b/spacy/lang/ar/lex_attrs.py
index 6e943d064ee..54ad7a8c363 100644
--- a/spacy/lang/ar/lex_attrs.py
+++ b/spacy/lang/ar/lex_attrs.py
@@ -1,6 +1,7 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 صفر
 واحد
 إثنان
@@ -50,9 +51,11 @@
 مليون
 مليار
 مليارات
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 اول
 أول
 حاد
@@ -67,7 +70,8 @@
 ثامن
 تاسع
 عاشر
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/ar/stop_words.py b/spacy/lang/ar/stop_words.py
index 65c8992cbd6..f4da54dda29 100644
--- a/spacy/lang/ar/stop_words.py
+++ b/spacy/lang/ar/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 من
 نحو
 لعل
@@ -385,4 +386,5 @@
 وإن
 ولو
 يا
-""".split())
+""".split()
+)
diff --git a/spacy/lang/az/stop_words.py b/spacy/lang/az/stop_words.py
index 8beffa998da..2114939ba11 100644
--- a/spacy/lang/az/stop_words.py
+++ b/spacy/lang/az/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/eliasdabbas/advertools/blob/master/advertools/stopwords.py
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 amma
 arasında
 artıq
@@ -140,4 +141,5 @@
 əlbəttə
 ən
 əslində
-""".split())
+""".split()
+)
diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py
index 7d3e756054d..061850da594 100644
--- a/spacy/lang/bg/stop_words.py
+++ b/spacy/lang/bg/stop_words.py
@@ -4,7 +4,8 @@
     https://postvai.com/books/stop-dumi.pdf - Additions to the original list in order to improve it.
 """
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а автентичен аз ако ала
 
 бе без беше би бивш бивша бившо бивши бил била били било благодаря близо бъдат
@@ -75,4 +76,5 @@
 юмрук
 
 я як
-""".split())
+""".split()
+)
diff --git a/spacy/lang/bn/stop_words.py b/spacy/lang/bn/stop_words.py
index 5aec18b7f5b..bf38e32545e 100644
--- a/spacy/lang/bn/stop_words.py
+++ b/spacy/lang/bn/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত  অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে
 আগামী আগে আগেই আছে আজ আদ্যভাগে আপনার আপনি আবার আমরা আমাকে আমাদের আমার  আমি আর আরও
 ইত্যাদি ইহা
@@ -37,4 +38,5 @@
 সাধারণ সামনে সঙ্গে সঙ্গেও সব সবার সমস্ত সম্প্রতি সময় সহ সহিত সাথে সুতরাং সে  সেই সেখান সেখানে  সেটা সেটাই সেটাও সেটি স্পষ্ট স্বয়ং
 হইতে হইবে হইয়া হওয়া হওয়ায় হওয়ার হচ্ছে হত হতে হতেই হন হবে হবেন হয় হয়তো হয়নি হয়ে হয়েই হয়েছিল হয়েছে হাজার
 হয়েছেন হল হলে হলেই হলেও হলো হিসাবে হিসেবে হৈলে হোক হয় হয়ে হয়েছে হৈতে হইয়া  হয়েছিল হয়েছেন হয়নি হয়েই হয়তো হওয়া হওয়ার হওয়ায়
-""".split())
+""".split()
+)
diff --git a/spacy/lang/bo/stop_words.py b/spacy/lang/bo/stop_words.py
index 158e148b00b..407242c849b 100644
--- a/spacy/lang/bo/stop_words.py
+++ b/spacy/lang/bo/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://zenodo.org/records/10148636
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 འི་
 །
 དུ་
@@ -193,4 +194,5 @@
 གིང་
 ཚ་
 ཀྱང
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ca/stop_words.py b/spacy/lang/ca/stop_words.py
index 90cce5de885..1a87b2f9dbe 100644
--- a/spacy/lang/ca/stop_words.py
+++ b/spacy/lang/ca/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abans ací ah així això al aleshores algun alguna algunes alguns alhora allà allí allò
 als altra altre altres amb ambdues ambdós anar ans apa aquell aquella aquelles aquells
 aquest aquesta aquestes aquests aquí
@@ -47,4 +48,5 @@
 
 va vaig vam van vas veu vosaltres vostra vostre vostres
 
-""".split())
+""".split()
+)
diff --git a/spacy/lang/cs/stop_words.py b/spacy/lang/cs/stop_words.py
index 35db9fedc86..f61f424f6f4 100644
--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@@ -1,7 +1,8 @@
 # Source: https://github.com/Alir3z4/stop-words
 # Source: https://github.com/stopwords-iso/stopwords-cs/blob/master/stopwords-cs.txt
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 aby
 ahoj
@@ -360,4 +361,5 @@
 zatímco
 ze
 že
-""".split())
+""".split()
+)
diff --git a/spacy/lang/da/stop_words.py b/spacy/lang/da/stop_words.py
index 0e71dfde739..05b2084dde3 100644
--- a/spacy/lang/da/stop_words.py
+++ b/spacy/lang/da/stop_words.py
@@ -1,6 +1,7 @@
 # Source: Handpicked by Jens Dahl Møllerhøj.
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 af aldrig alene alle allerede alligevel alt altid anden andet andre at
 
 bag begge blandt blev blive bliver burde bør
@@ -40,4 +41,5 @@
 var ved vi via vil ville vore vores vær være været
 
 øvrigt
-""".split())
+""".split()
+)
diff --git a/spacy/lang/de/stop_words.py b/spacy/lang/de/stop_words.py
index 5fbd7428757..f52687eb9b3 100644
--- a/spacy/lang/de/stop_words.py
+++ b/spacy/lang/de/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 á a ab aber ach acht achte achten achter achtes ag alle allein allem allen
 aller allerdings alles allgemeinen als also am an andere anderen anderem andern
 anders auch auf aus ausser außer ausserdem außerdem
@@ -73,4 +74,5 @@
 
 zehn zehnte zehnten zehnter zehntes zeit zu zuerst zugleich zum zunächst zur
 zurück zusammen zwanzig zwar zwei zweite zweiten zweiter zweites zwischen
-""".split())
+""".split()
+)
diff --git a/spacy/lang/dsb/stop_words.py b/spacy/lang/dsb/stop_words.py
index 90735a6236a..376e04aa6e5 100644
--- a/spacy/lang/dsb/stop_words.py
+++ b/spacy/lang/dsb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abo aby ako ale až
 
 daniž dokulaž
@@ -10,4 +11,5 @@
 pak pótom
 
 teke togodla
-""".split())
+""".split()
+)
diff --git a/spacy/lang/el/stop_words.py b/spacy/lang/el/stop_words.py
index b5c1c36c41f..7c436219fa9 100644
--- a/spacy/lang/el/stop_words.py
+++ b/spacy/lang/el/stop_words.py
@@ -1,6 +1,7 @@
 # Stop words
 # Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 αδιάκοπα αι ακόμα ακόμη ακριβώς άλλα αλλά αλλαχού άλλες άλλη άλλην
 άλλης αλλιώς αλλιώτικα άλλο άλλοι αλλοιώς αλλοιώτικα άλλον άλλος άλλοτε αλλού
 άλλους άλλων άμα άμεσα αμέσως αν ανά ανάμεσα αναμεταξύ άνευ αντί αντίπερα αντίς
@@ -82,4 +83,5 @@
 χωρίς χωριστά
 
 ω ως ωσάν ωσότου ώσπου ώστε ωστόσο ωχ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/el/tokenizer_exceptions.py b/spacy/lang/el/tokenizer_exceptions.py
index 41317ba9770..d88d4837e2a 100644
--- a/spacy/lang/el/tokenizer_exceptions.py
+++ b/spacy/lang/el/tokenizer_exceptions.py
@@ -128,7 +128,6 @@
 _exc.update(_other_exc)
 
 for h in range(1, 12 + 1):
-
     for period in ["π.μ.", "πμ"]:
         _exc[f"{h}{period}"] = [
             {ORTH: f"{h}"},
diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py
index cbce281b491..1ca5cbc1670 100644
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@@ -1,5 +1,6 @@
 # Stop words
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a about above across after afterwards again against all almost alone along
 already also although always am among amongst amount an and another any anyhow
 anyone anything anyway anywhere are around as at
@@ -61,7 +62,8 @@
 whither who whoever whole whom whose why will with within without would
 
 yet you your yours yourself yourselves
-""".split())
+""".split()
+)
 
 contractions = ["n't", "'d", "'ll", "'m", "'re", "'s", "'ve"]
 STOP_WORDS.update(contractions)
diff --git a/spacy/lang/es/lemmatizer.py b/spacy/lang/es/lemmatizer.py
index ee5d38e8466..3102f3b9bc4 100644
--- a/spacy/lang/es/lemmatizer.py
+++ b/spacy/lang/es/lemmatizer.py
@@ -415,7 +415,10 @@ def lemmatize_verb_pron(
         else:
             rule = self.select_rule("verb", features)
             verb_lemma = self.lemmatize_verb(
-                verb, features - {"PronType=Prs"}, rule, index  # type: ignore[operator]
+                verb,
+                features - {"PronType=Prs"},  # type: ignore[operator]
+                rule,
+                index,  # type: ignore[operator]
             )[0]
         pron_lemmas = []
         for pron in prons:
diff --git a/spacy/lang/es/stop_words.py b/spacy/lang/es/stop_words.py
index 5099359e843..6d28854810a 100644
--- a/spacy/lang/es/stop_words.py
+++ b/spacy/lang/es/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a acuerdo adelante ademas además afirmó agregó ahi ahora ahí al algo alguna
 algunas alguno algunos algún alli allí alrededor ambos ante anterior antes
 apenas aproximadamente aquel aquella aquellas aquello aquellos aqui aquél
@@ -75,4 +76,5 @@
 vosotras vosotros voy vuestra vuestras vuestro vuestros
 
 y ya yo
-""".split())
+""".split()
+)
diff --git a/spacy/lang/et/stop_words.py b/spacy/lang/et/stop_words.py
index 248bcb61f08..e1da1f14d5e 100644
--- a/spacy/lang/et/stop_words.py
+++ b/spacy/lang/et/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-et
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aga
 ei
 et
@@ -36,4 +37,5 @@
 ta
 te
 ära
-""".split())
+""".split()
+)
diff --git a/spacy/lang/eu/stop_words.py b/spacy/lang/eu/stop_words.py
index 4a6661e7d20..d213b5b81a5 100644
--- a/spacy/lang/eu/stop_words.py
+++ b/spacy/lang/eu/stop_words.py
@@ -1,7 +1,8 @@
 # Source: https://github.com/stopwords-iso/stopwords-eu
 # https://www.ranks.nl/stopwords/basque
 # https://www.mustgo.com/worldlanguages/basque/
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 al
 anitz
 arabera
@@ -100,4 +101,5 @@
 zuek
 zuen
 zuten
-""".split())
+""".split()
+)
diff --git a/spacy/lang/fa/generate_verbs_exc.py b/spacy/lang/fa/generate_verbs_exc.py
index a6d79a386df..7ef82c3e8a9 100644
--- a/spacy/lang/fa/generate_verbs_exc.py
+++ b/spacy/lang/fa/generate_verbs_exc.py
@@ -611,8 +611,8 @@
 present_ends = ["م", "ی", "د", "یم", "ید", "ند"]
 
 # special case of '#هست':
-VERBS_EXC.update({conj: "هست" for conj in ["هست" + end for end in simple_ends]})
-VERBS_EXC.update({conj: "هست" for conj in ["نیست" + end for end in simple_ends]})
+VERBS_EXC.update(dict.fromkeys(["هست" + end for end in simple_ends], "هست"))
+VERBS_EXC.update(dict.fromkeys(["نیست" + end for end in simple_ends], "هست"))
 
 for verb_root in verb_roots:
     conjugations = []
@@ -648,4 +648,4 @@
             )
         )
 
-    VERBS_EXC.update({conj: (past,) if past else present for conj in conjugations})
+    VERBS_EXC.update(dict.fromkeys(conjugations, (past,) if past else present))
diff --git a/spacy/lang/fa/lex_attrs.py b/spacy/lang/fa/lex_attrs.py
index 9b0ff546e0d..065e81bd6af 100644
--- a/spacy/lang/fa/lex_attrs.py
+++ b/spacy/lang/fa/lex_attrs.py
@@ -5,7 +5,8 @@
 YE_NUN = "ین"
 
 
-_num_words = set("""
+_num_words = set(
+    """
 صفر
 یک
 دو
@@ -62,12 +63,15 @@
 کوادریلیون
 کادریلیارد
 کوینتیلیون
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 اول
 سوم
-سی‌ام""".split())
+سی‌ام""".split()
+)
 
 _ordinal_words.update({num + MIM for num in _num_words})
 _ordinal_words.update({num + ZWNJ_O_MIM for num in _num_words})
diff --git a/spacy/lang/fa/stop_words.py b/spacy/lang/fa/stop_words.py
index 93738c89263..f462f2e7a5d 100644
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@@ -1,5 +1,6 @@
 # Stop words from HAZM package
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 و
 در
 به
@@ -388,4 +389,5 @@
 لذا
 زاده
 گردد
-اینجا""".split())
+اینجا""".split()
+)
diff --git a/spacy/lang/fi/stop_words.py b/spacy/lang/fi/stop_words.py
index 742cacc2689..8e8dcfa565d 100644
--- a/spacy/lang/fi/stop_words.py
+++ b/spacy/lang/fi/stop_words.py
@@ -1,6 +1,7 @@
 # Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
 # Reformatted with some minor corrections
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aiemmin aika aikaa aikaan aikaisemmin aikaisin aikana aikoina aikoo aikovat
 aina ainakaan ainakin ainoa ainoat aiomme aion aiotte aivan ajan alas alemmas
 alkuisin alkuun alla alle aloitamme aloitan aloitat aloitatte aloitattivat
@@ -105,4 +106,5 @@
 ympäri
 
 älköön älä
-""".split())
+""".split()
+)
diff --git a/spacy/lang/fr/lex_attrs.py b/spacy/lang/fr/lex_attrs.py
index 8a9dfb82a8b..9cf508a07b9 100644
--- a/spacy/lang/fr/lex_attrs.py
+++ b/spacy/lang/fr/lex_attrs.py
@@ -1,20 +1,24 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 zero un une deux trois quatre cinq six sept huit neuf dix
 onze douze treize quatorze quinze seize dix-sept dix-huit dix-neuf
 vingt trente quarante cinquante soixante soixante-dix septante quatre-vingt huitante quatre-vingt-dix nonante
 cent mille mil million milliard billion quadrillion quintillion
 sextillion septillion octillion nonillion decillion
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 premier première deuxième second seconde troisième quatrième cinquième sixième septième huitième neuvième dixième
 onzième douzième treizième quatorzième quinzième seizième dix-septième dix-huitième dix-neuvième
 vingtième trentième quarantième cinquantième soixantième soixante-dixième septantième quatre-vingtième huitantième quatre-vingt-dixième nonantième
 centième millième millionnième milliardième billionnième quadrillionnième quintillionnième
 sextillionnième septillionnième octillionnième nonillionnième decillionnième
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/fr/stop_words.py b/spacy/lang/fr/stop_words.py
index 85ffe47baef..b32ee3d7173 100644
--- a/spacy/lang/fr/stop_words.py
+++ b/spacy/lang/fr/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a à â abord afin ah ai aie ainsi ait allaient allons
 alors anterieur anterieure anterieures antérieur antérieure antérieures
 apres après as assez attendu au
@@ -79,4 +80,5 @@
 
 y
 
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ga/lemmatizer.py b/spacy/lang/ga/lemmatizer.py
index c9fbfbc193a..cffcf1d3c49 100644
--- a/spacy/lang/ga/lemmatizer.py
+++ b/spacy/lang/ga/lemmatizer.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Tuple
+from typing import List, Tuple
 
 from ...pipeline import Lemmatizer
 from ...tokens import Token
diff --git a/spacy/lang/ga/stop_words.py b/spacy/lang/ga/stop_words.py
index e32ad6431f6..4ef052ca58a 100644
--- a/spacy/lang/ga/stop_words.py
+++ b/spacy/lang/ga/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a ach ag agus an aon ar arna as
 
 ba beirt bhúr
@@ -38,4 +39,5 @@
 í
 
 ó ón óna ónár
-""".split())
+""".split()
+)
diff --git a/spacy/lang/gd/stop_words.py b/spacy/lang/gd/stop_words.py
index 6f2c2856bec..d5132c35e31 100644
--- a/spacy/lang/gd/stop_words.py
+++ b/spacy/lang/gd/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 'ad
 'ar
 'd # iad
@@ -381,4 +382,5 @@
 ì
 ò
 ó
-""".split("\n"))
+""".split("\n")
+)
diff --git a/spacy/lang/grc/stop_words.py b/spacy/lang/grc/stop_words.py
index 51f5e9d9dac..cbb766a8ce1 100644
--- a/spacy/lang/grc/stop_words.py
+++ b/spacy/lang/grc/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 αὐτῷ αὐτοῦ αὐτῆς αὐτόν αὐτὸν αὐτῶν αὐτὸς αὐτὸ αὐτό αὐτός αὐτὴν αὐτοῖς αὐτοὺς αὔτ' αὐτὰ αὐτῇ αὐτὴ
 αὐτὼ αὑταὶ καὐτὸς αὐτά αὑτός αὐτοῖσι αὐτοῖσιν αὑτὸς αὐτήν αὐτοῖσί αὐτοί αὐτοὶ αὐτοῖο αὐτάων αὐτὰς
 αὐτέων αὐτώ αὐτάς αὐτούς αὐτή αὐταί αὐταὶ αὐτῇσιν τὠυτῷ τὠυτὸ ταὐτὰ ταύτῃ αὐτῇσι αὐτῇς αὐταῖς αὐτᾶς αὐτὰν ταὐτὸν
@@ -56,4 +57,5 @@
 
  ὣς ὡς ὥς ὧς ὥστ' ὥστε ὥσθ' ὤ ὢ 
  
- """.split())
+ """.split()
+)
diff --git a/spacy/lang/gu/stop_words.py b/spacy/lang/gu/stop_words.py
index 1d11a3ebd96..2c859681b05 100644
--- a/spacy/lang/gu/stop_words.py
+++ b/spacy/lang/gu/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 એમ
 આ
 એ
@@ -83,4 +84,5 @@
 દર
 એટલો
 પરંતુ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/he/stop_words.py b/spacy/lang/he/stop_words.py
index ea486722475..23bb5176de9 100644
--- a/spacy/lang/he/stop_words.py
+++ b/spacy/lang/he/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 אני
 את
 אתה
@@ -217,4 +218,5 @@
 אחרות
 אשר
 או
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hi/stop_words.py b/spacy/lang/hi/stop_words.py
index 9bc57bd3136..475b07da152 100644
--- a/spacy/lang/hi/stop_words.py
+++ b/spacy/lang/hi/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 अंदर
 अत
 अदि
@@ -234,4 +235,5 @@
 होते
 होना
 होने
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hr/stop_words.py b/spacy/lang/hr/stop_words.py
index 769ebe4db53..dd10f792d01 100644
--- a/spacy/lang/hr/stop_words.py
+++ b/spacy/lang/hr/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-hr
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 ah
 aha
@@ -339,4 +340,5 @@
 željeo
 zimus
 zum
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hsb/stop_words.py b/spacy/lang/hsb/stop_words.py
index 86021f555c1..e6fedaf4c92 100644
--- a/spacy/lang/hsb/stop_words.py
+++ b/spacy/lang/hsb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abo ale ani
 
 dokelž
@@ -14,4 +15,5 @@
 tež tohodla
 
 zo zoby
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ht/lemmatizer.py b/spacy/lang/ht/lemmatizer.py
index 52bf23d2390..7687865c300 100644
--- a/spacy/lang/ht/lemmatizer.py
+++ b/spacy/lang/ht/lemmatizer.py
@@ -1,6 +1,5 @@
 from typing import List, Tuple
 
-from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 from ...tokens import Token
 
diff --git a/spacy/lang/ht/lex_attrs.py b/spacy/lang/ht/lex_attrs.py
index 27a535dd746..ab1a39a8234 100644
--- a/spacy/lang/ht/lex_attrs.py
+++ b/spacy/lang/ht/lex_attrs.py
@@ -1,20 +1,24 @@
 from ...attrs import LIKE_NUM, NORM
 
 # Cardinal numbers in Creole
-_num_words = set("""
+_num_words = set(
+    """
 zewo youn en de twa kat senk sis sèt uit nèf dis
 onz douz trèz katoz kenz sèz disèt dizwit diznèf
 vent trant karant sinkant swasant swasann-dis
 san mil milyon milya
-""".split())
+""".split()
+)
 
 # Ordinal numbers in Creole (some are French-influenced, some simplified)
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 premye dezyèm twazyèm katryèm senkyèm sizyèm sètvyèm uitvyèm nèvyèm dizyèm
 onzèm douzyèm trèzyèm katozyèm kenzèm sèzyèm disetyèm dizwityèm diznèvyèm
 ventyèm trantyèm karantyèm sinkantyèm swasantyèm
 swasann-disyèm santyèm milyèm milyonnyèm milyadyèm
-""".split())
+""".split()
+)
 
 NORM_MAP = {
     "'m": "mwen",
diff --git a/spacy/lang/ht/stop_words.py b/spacy/lang/ht/stop_words.py
index fd85c2a197f..50998e0e5ff 100644
--- a/spacy/lang/ht/stop_words.py
+++ b/spacy/lang/ht/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a ak an ankò ant apre ap atò avan avanlè
 byen bò byenke
 
@@ -38,7 +39,8 @@
 
 men mèsi oswa osinon
 
-""".split())
+""".split()
+)
 
 # Add common contractions, with and without apostrophe variants
 contractions = ["m'", "n'", "w'", "y'", "l'", "t'", "k'"]
diff --git a/spacy/lang/hu/stop_words.py b/spacy/lang/hu/stop_words.py
index 1841557073a..e39a26d35ae 100644
--- a/spacy/lang/hu/stop_words.py
+++ b/spacy/lang/hu/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben
 amelyeket amelyet amelynek ami amikor amit amolyan amíg annak arra arról az
 azok azon azonban azt aztán azután azzal azért
@@ -57,4 +58,5 @@
 úgy új újabb újra
 
 ő őket
-""".split())
+""".split()
+)
diff --git a/spacy/lang/hy/stop_words.py b/spacy/lang/hy/stop_words.py
index 1bfd09a4b29..46d0f6b511c 100644
--- a/spacy/lang/hy/stop_words.py
+++ b/spacy/lang/hy/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 նա
 ողջը
 այստեղ
@@ -102,4 +103,5 @@
 այս
 մեջ
 թ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/id/_tokenizer_exceptions_list.py b/spacy/lang/id/_tokenizer_exceptions_list.py
index 11220a61e5b..a0b35fa1a2b 100644
--- a/spacy/lang/id/_tokenizer_exceptions_list.py
+++ b/spacy/lang/id/_tokenizer_exceptions_list.py
@@ -1,4 +1,5 @@
-ID_BASE_EXCEPTIONS = set("""
+ID_BASE_EXCEPTIONS = set(
+    """
 aba-aba
 abah-abah
 abal-abal
@@ -3897,4 +3898,5 @@
 yo-yo
 zam-zam
 zig-zag
-""".split())
+""".split()
+)
diff --git a/spacy/lang/id/stop_words.py b/spacy/lang/id/stop_words.py
index fc85f83679a..b1bfaea796e 100644
--- a/spacy/lang/id/stop_words.py
+++ b/spacy/lang/id/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ada adalah adanya adapun agak agaknya agar akan akankah akhir akhiri akhirnya
 aku akulah amat amatlah anda andalah antar antara antaranya apa apaan apabila
 apakah apalagi apatah artinya asal asalkan atas atau ataukah ataupun awal
@@ -113,4 +114,5 @@
 waduh wah wahai waktu waktunya walau walaupun wong
 
 yaitu yakin yakni yang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py
index 8dea4e97fd1..8e206262c10 100644
--- a/spacy/lang/id/tokenizer_exceptions.py
+++ b/spacy/lang/id/tokenizer_exceptions.py
@@ -156,7 +156,7 @@
     "S.T.",
     "S.T.Han",
     "S.Th.",
-    "S.Th.I" "S.TI.",
+    "S.Th.IS.TI.",
     "S.T.P.",
     "S.TrK",
     "S.Tekp.",
@@ -210,7 +210,7 @@
     "hlm.",
     "i/o",
     "n.b.",
-    "p.p." "pjs.",
+    "p.p.pjs.",
     "s.d.",
     "tel.",
     "u.p.",
diff --git a/spacy/lang/is/stop_words.py b/spacy/lang/is/stop_words.py
index 79f84ee6000..917fb6df444 100644
--- a/spacy/lang/is/stop_words.py
+++ b/spacy/lang/is/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/Xangis/extra-stopwords
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 afhverju
 aftan
 aftur
@@ -153,4 +154,5 @@
 því
 þær
 ætti
-""".split())
+""".split()
+)
diff --git a/spacy/lang/it/stop_words.py b/spacy/lang/it/stop_words.py
index 2a37236a9b9..42adc7904c8 100644
--- a/spacy/lang/it/stop_words.py
+++ b/spacy/lang/it/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl
 agli ahime ahimè ai al alcuna alcuni alcuno all alla alle allo allora altri
 altrimenti altro altrove altrui anche ancora anni anno ansa anticipo assai
@@ -78,4 +79,5 @@
 
 v' va vale vari varia varie vario verso vi via vicino visto vita voi volta volte
 vostra vostre vostri vostro
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ja/stop_words.py b/spacy/lang/ja/stop_words.py
index 661b5183594..98560d7e28b 100644
--- a/spacy/lang/ja/stop_words.py
+++ b/spacy/lang/ja/stop_words.py
@@ -2,7 +2,8 @@
 # filtering out everything that wasn't hiragana. ー (one) was also added.
 # Considered keeping some non-hiragana words but too many place names were
 # present.
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 あ あっ あまり あり ある あるいは あれ
 い いい いう いく いずれ いっ いつ いる いわ
 うち
@@ -43,4 +44,5 @@
 を
 ん
 一
-""".split())
+""".split()
+)
diff --git a/spacy/lang/kmr/stop_words.py b/spacy/lang/kmr/stop_words.py
index 93e6ea27f0c..aee33c2b748 100644
--- a/spacy/lang/kmr/stop_words.py
+++ b/spacy/lang/kmr/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 û
 li
 bi
@@ -39,4 +40,5 @@
 hemû
 kes
 tişt
-""".split())
+""".split()
+)
diff --git a/spacy/lang/kn/stop_words.py b/spacy/lang/kn/stop_words.py
index 528e5e3a8a8..dba9740af91 100644
--- a/spacy/lang/kn/stop_words.py
+++ b/spacy/lang/kn/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ಹಲವು
 ಮೂಲಕ
 ಹಾಗೂ
@@ -81,4 +82,5 @@
 ಎಂದು
 ನನ್ನ
 ಮೇಲೆ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ko/stop_words.py b/spacy/lang/ko/stop_words.py
index d4cdbc7a112..3eba9fc8299 100644
--- a/spacy/lang/ko/stop_words.py
+++ b/spacy/lang/ko/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 이
 있
 하
@@ -62,4 +63,5 @@
 원
 잘
 놓
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ky/stop_words.py b/spacy/lang/ky/stop_words.py
index fb8e2c84b95..ea40bdfa222 100644
--- a/spacy/lang/ky/stop_words.py
+++ b/spacy/lang/ky/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ага адам айтты айтымында айтып ал алар
 алардын алган алуу алып анда андан аны
 анын ар
@@ -37,4 +38,5 @@
 үч үчүн
 
 өз
-""".split())
+""".split()
+)
diff --git a/spacy/lang/la/stop_words.py b/spacy/lang/la/stop_words.py
index 47abf7384f4..8b590bb67b3 100644
--- a/spacy/lang/la/stop_words.py
+++ b/spacy/lang/la/stop_words.py
@@ -1,6 +1,7 @@
 # Corrected Perseus list, cf. https://wiki.digitalclassicist.org/Stopwords_for_Greek_and_Latin
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ab ac ad adhuc aliqui aliquis an ante apud at atque aut autem 
 
 cum cur 
@@ -32,4 +33,5 @@
 ubi uel uero
 
 vel vero
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lb/lex_attrs.py b/spacy/lang/lb/lex_attrs.py
index bbef72b9bb3..11923137418 100644
--- a/spacy/lang/lb/lex_attrs.py
+++ b/spacy/lang/lb/lex_attrs.py
@@ -1,18 +1,22 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 null eent zwee dräi véier fënnef sechs ziwen aacht néng zéng eelef zwielef dräizéng
 véierzéng foffzéng siechzéng siwwenzéng uechtzeng uechzeng nonnzéng nongzéng zwanzeg drësseg véierzeg foffzeg sechzeg siechzeg siwenzeg achtzeg achzeg uechtzeg uechzeg nonnzeg
 honnert dausend millioun milliard billioun billiard trillioun triliard
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 éischten zweeten drëtten véierten fënneften sechsten siwenten aachten néngten zéngten eeleften
 zwieleften dräizéngten véierzéngten foffzéngten siechzéngten uechtzéngen uechzéngten nonnzéngten nongzéngten zwanzegsten
 drëssegsten véierzegsten foffzegsten siechzegsten siwenzegsten uechzegsten nonnzegsten
 honnertsten dausendsten milliounsten
 milliardsten billiounsten billiardsten trilliounsten trilliardsten
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/lb/stop_words.py b/spacy/lang/lb/stop_words.py
index 386ce1222af..8f22ea6e694 100644
--- a/spacy/lang/lb/stop_words.py
+++ b/spacy/lang/lb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 à
 äis
@@ -206,4 +207,5 @@
 zu
 zum
 zwar
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lg/stop_words.py b/spacy/lang/lg/stop_words.py
index a9f99cbf40f..7bad59344fb 100644
--- a/spacy/lang/lg/stop_words.py
+++ b/spacy/lang/lg/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 abadde abalala abamu abangi abava ajja ali alina ani anti ateekeddwa atewamu
 atya awamu aweebwa ayinza ba baali babadde babalina bajja
 bajjanewankubade bali balina bandi bangi bano bateekeddwa baweebwa bayina bebombi beera bibye
@@ -14,4 +15,5 @@
 tetuteekeddwa tewali teyalina teyayina tolina tu tuyina tulina tuyina twafuna twetaaga wa wabula
 wabweru wadde waggulunnina wakati waliwobangi waliyo wandi wange wano wansi weebwa yabadde yaffe
 ye yenna yennyini yina yonna ziba zijja zonna
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lij/stop_words.py b/spacy/lang/lij/stop_words.py
index 37eb163ffe7..1d6f09d27ca 100644
--- a/spacy/lang/lij/stop_words.py
+++ b/spacy/lang/lij/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a à â a-a a-e a-i a-o aiva aloa an ancheu ancon apreuvo ascì atra atre atri atro avanti avei
 
 bella belle belli bello ben
@@ -34,4 +35,5 @@
 un uña unn' unna
 
 za zu
-""".split())
+""".split()
+)
diff --git a/spacy/lang/lv/stop_words.py b/spacy/lang/lv/stop_words.py
index 4ed61996ac1..2685c243083 100644
--- a/spacy/lang/lv/stop_words.py
+++ b/spacy/lang/lv/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-lv
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aiz
 ap
 apakš
@@ -162,4 +163,5 @@
 zem
 ārpus
 šaipus
-""".split())
+""".split()
+)
diff --git a/spacy/lang/mk/stop_words.py b/spacy/lang/mk/stop_words.py
index 90a27179852..312a456c5db 100644
--- a/spacy/lang/mk/stop_words.py
+++ b/spacy/lang/mk/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а
 абре
 aв
@@ -810,4 +811,5 @@
 џагара-магара
 џанам
 џив-џив
-    """.split())
+    """.split()
+)
diff --git a/spacy/lang/ml/stop_words.py b/spacy/lang/ml/stop_words.py
index 64b9acc1025..441e9358699 100644
--- a/spacy/lang/ml/stop_words.py
+++ b/spacy/lang/ml/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 അത്
 ഇത്
 ആയിരുന്നു
@@ -8,4 +9,5 @@
 അന്ന്
 ഇന്ന്
 ആണ്
-""".split())
+""".split()
+)
diff --git a/spacy/lang/mr/stop_words.py b/spacy/lang/mr/stop_words.py
index 3c9c6208916..9b0cee951ab 100644
--- a/spacy/lang/mr/stop_words.py
+++ b/spacy/lang/mr/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 न
 अतरी
 तो
@@ -187,4 +188,5 @@
 होता
 होती
 होते
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ms/_tokenizer_exceptions_list.py b/spacy/lang/ms/_tokenizer_exceptions_list.py
index e579e316ae9..fba1dd70f94 100644
--- a/spacy/lang/ms/_tokenizer_exceptions_list.py
+++ b/spacy/lang/ms/_tokenizer_exceptions_list.py
@@ -1,6 +1,7 @@
 # from https://prpm.dbp.gov.my/cari1?keyword=
 # dbp https://en.wikipedia.org/wiki/Dewan_Bahasa_dan_Pustaka
-MS_BASE_EXCEPTIONS = set("""
+MS_BASE_EXCEPTIONS = set(
+    """
 aba-aba
 abah-abah
 abar-abar
@@ -1938,4 +1939,5 @@
 water-cooled
 world-class
 yang-yang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ms/examples.py b/spacy/lang/ms/examples.py
index 1af439d4a5b..236e0c0f660 100644
--- a/spacy/lang/ms/examples.py
+++ b/spacy/lang/ms/examples.py
@@ -10,7 +10,7 @@
     "Berapa banyak pelajar yang akan menghadiri majlis perpisahan sekolah?",
     "Pengeluaran makanan berasal dari beberapa lokasi termasuk Cameron Highlands, Johor Bahru, dan Kuching.",
     "Syarikat XYZ telah menghasilkan 20,000 unit produk baharu dalam setahun terakhir",
-    "Kuala Lumpur merupakan ibu negara Malaysia." "Kau berada di mana semalam?",
+    "Kuala Lumpur merupakan ibu negara Malaysia.Kau berada di mana semalam?",
     "Siapa yang akan memimpin projek itu?",
     "Siapa perdana menteri Malaysia sekarang?",
 ]
diff --git a/spacy/lang/ms/stop_words.py b/spacy/lang/ms/stop_words.py
index fc85f83679a..b1bfaea796e 100644
--- a/spacy/lang/ms/stop_words.py
+++ b/spacy/lang/ms/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ada adalah adanya adapun agak agaknya agar akan akankah akhir akhiri akhirnya
 aku akulah amat amatlah anda andalah antar antara antaranya apa apaan apabila
 apakah apalagi apatah artinya asal asalkan atas atau ataukah ataupun awal
@@ -113,4 +114,5 @@
 waduh wah wahai waktu waktunya walau walaupun wong
 
 yaitu yakin yakni yang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/nb/stop_words.py b/spacy/lang/nb/stop_words.py
index bc1c54a4af3..d9ed414efdf 100644
--- a/spacy/lang/nb/stop_words.py
+++ b/spacy/lang/nb/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 alle allerede alt and andre annen annet at av
 
 bak bare bedre beste blant ble bli blir blitt bris by både
@@ -45,4 +46,5 @@
 å år
 
 ønsker
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ne/stop_words.py b/spacy/lang/ne/stop_words.py
index 95d7a375821..8470297b9f0 100644
--- a/spacy/lang/ne/stop_words.py
+++ b/spacy/lang/ne/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/sanjaalcorps/NepaliStopWords/blob/master/NepaliStopWords.txt
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 अक्सर
 अगाडि
 अगाडी
@@ -489,4 +490,5 @@
 होइन
 होकि
 होला
-""".split())
+""".split()
+)
diff --git a/spacy/lang/nl/lex_attrs.py b/spacy/lang/nl/lex_attrs.py
index 1b8602831ae..488224c2f20 100644
--- a/spacy/lang/nl/lex_attrs.py
+++ b/spacy/lang/nl/lex_attrs.py
@@ -1,17 +1,21 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien
 veertien twintig dertig veertig vijftig zestig zeventig tachtig negentig honderd
 duizend miljoen miljard biljoen biljard triljoen triljard
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 eerste tweede derde vierde vijfde zesde zevende achtste negende tiende elfde
 twaalfde dertiende veertiende twintigste dertigste veertigste vijftigste
 zestigste zeventigste tachtigste negentigste honderdste duizendste miljoenste
 miljardste biljoenste biljardste triljoenste triljardste
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/nl/stop_words.py b/spacy/lang/nl/stop_words.py
index a88c2905199..cd4fdefdf58 100644
--- a/spacy/lang/nl/stop_words.py
+++ b/spacy/lang/nl/stop_words.py
@@ -13,7 +13,8 @@
 # should have a Dutch counterpart here.
 
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aan af al alle alles allebei alleen allen als altijd ander anders andere anderen aangaande aangezien achter achterna
 afgelopen aldus alhoewel anderzijds
 
@@ -67,4 +68,5 @@
 
 zal ze zei zelf zich zij zijn zo zonder zou zeer zeker zekere zelfde zelfs zichzelf zijnde zijne zo’n zoals zodra zouden
  zoveel zowat zulk zulke zulks zullen zult
-""".split())
+""".split()
+)
diff --git a/spacy/lang/pl/stop_words.py b/spacy/lang/pl/stop_words.py
index 4418deedc0b..075aec39167 100644
--- a/spacy/lang/pl/stop_words.py
+++ b/spacy/lang/pl/stop_words.py
@@ -1,6 +1,7 @@
 # sources: https://github.com/bieli/stopwords/blob/master/polish.stopwords.txt and https://github.com/stopwords-iso/stopwords-pl
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a aby ach acz aczkolwiek aj albo ale alez
 ależ ani az aż
 
@@ -73,4 +74,5 @@
 z za zaden zadna zadne zadnych zapewne zawsze zaś
 ze zeby znow znowu znów zostal został
 
-żaden żadna żadne żadnych że żeby""".split())
+żaden żadna żadne żadnych że żeby""".split()
+)
diff --git a/spacy/lang/pt/punctuation.py b/spacy/lang/pt/punctuation.py
index b2d63cb3d63..60bd50da1eb 100644
--- a/spacy/lang/pt/punctuation.py
+++ b/spacy/lang/pt/punctuation.py
@@ -1,6 +1,8 @@
-from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
-from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
-from ..punctuation import TOKENIZER_SUFFIXES as BASE_TOKENIZER_SUFFIXES
+from ..punctuation import (
+    TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES,
+    TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES,
+    TOKENIZER_SUFFIXES as BASE_TOKENIZER_SUFFIXES,
+)
 
 _prefixes = [r"\w{1,3}\$"] + BASE_TOKENIZER_PREFIXES
 
diff --git a/spacy/lang/pt/stop_words.py b/spacy/lang/pt/stop_words.py
index 722aef80236..ce3c86ff570 100644
--- a/spacy/lang/pt/stop_words.py
+++ b/spacy/lang/pt/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes
 ao aos apenas apoia apoio apontar após aquela aquelas aquele aqueles aqui aquilo
 as assim através atrás até aí
@@ -61,4 +62,5 @@
 vossas vosso vossos vários vão vêm vós
 
 zero
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ro/lex_attrs.py b/spacy/lang/ro/lex_attrs.py
index a5880fc2fac..736aa911ac6 100644
--- a/spacy/lang/ro/lex_attrs.py
+++ b/spacy/lang/ro/lex_attrs.py
@@ -1,13 +1,16 @@
 from ...attrs import LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 zero unu doi două trei patru cinci șase șapte opt nouă zece
 unsprezece doisprezece douăsprezece treisprezece patrusprezece cincisprezece șaisprezece șaptesprezece optsprezece nouăsprezece
 douăzeci treizeci patruzeci cincizeci șaizeci șaptezeci optzeci nouăzeci
 sută mie milion miliard bilion trilion cvadrilion catralion cvintilion sextilion septilion enșpemii
-""".split())
+""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 primul doilea treilea patrulea cincilea șaselea șaptelea optulea nouălea zecelea
 prima doua treia patra cincia șasea șaptea opta noua zecea
 unsprezecelea doisprezecelea treisprezecelea patrusprezecelea cincisprezecelea șaisprezecelea șaptesprezecelea optsprezecelea nouăsprezecelea
@@ -15,7 +18,8 @@
 douăzecilea treizecilea patruzecilea cincizecilea șaizecilea șaptezecilea optzecilea nouăzecilea sutălea
 douăzecea treizecea patruzecea cincizecea șaizecea șaptezecea optzecea nouăzecea suta
 miilea mielea mia milionulea milioana miliardulea miliardelea miliarda enșpemia
-""".split())
+""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/ro/stop_words.py b/spacy/lang/ro/stop_words.py
index c7c0801f171..d68a81c4569 100644
--- a/spacy/lang/ro/stop_words.py
+++ b/spacy/lang/ro/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-ro
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 abia
 acea
@@ -494,4 +495,5 @@
 știu
 ți
 ție
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ru/lex_attrs.py b/spacy/lang/ru/lex_attrs.py
index 63b1cead810..e0b35bdc07f 100644
--- a/spacy/lang/ru/lex_attrs.py
+++ b/spacy/lang/ru/lex_attrs.py
@@ -1,6 +1,8 @@
 from ...attrs import LIKE_NUM
 
-_num_words = list(set("""
+_num_words = list(
+    set(
+        """
 ноль ноля нолю нолём ноле нулевой нулевого нулевому нулевым нулевом нулевая нулевую нулевое нулевые нулевых нулевыми 
 
 четверть четверти четвертью четвертей четвертям четвертями четвертях 
@@ -201,7 +203,9 @@
 квинтиллиону квинтиллионов квинтлн 
 
 i ii iii iv v vi vii viii ix x xi xii xiii xiv xv xvi xvii xviii xix xx xxi xxii xxiii xxiv xxv xxvi xxvii xxvii xxix
-""".split()))
+""".split()
+    )
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/ru/stop_words.py b/spacy/lang/ru/stop_words.py
index 3040adb52b1..d6ea6b42af9 100644
--- a/spacy/lang/ru/stop_words.py
+++ b/spacy/lang/ru/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а авось ага агу аж ай али алло ау ах ая
 
 б будем будет будете будешь буду будут будучи будь будьте бы был была были было
@@ -106,4 +107,5 @@
 ю
 
 я явно явных яко якобы якоже
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sa/stop_words.py b/spacy/lang/sa/stop_words.py
index eaf0ffaa2c9..30302a14dcb 100644
--- a/spacy/lang/sa/stop_words.py
+++ b/spacy/lang/sa/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://gist.github.com/Akhilesh28/fe8b8e180f64b72e64751bc31cb6d323
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 अहम्
 आवाम्
 वयम्
@@ -510,4 +511,5 @@
 ह
 हन्त
 हि
-""".split())
+""".split()
+)
diff --git a/spacy/lang/si/stop_words.py b/spacy/lang/si/stop_words.py
index acae5763b52..7d29bc1b4d8 100644
--- a/spacy/lang/si/stop_words.py
+++ b/spacy/lang/si/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 සහ
 සමග
 සමඟ
@@ -190,4 +191,5 @@
 ලෙස
 පරිදි
 එහෙත්
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sk/stop_words.py b/spacy/lang/sk/stop_words.py
index 6ef4818c3a2..017e7beef39 100644
--- a/spacy/lang/sk/stop_words.py
+++ b/spacy/lang/sk/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/Ardevop-sk/stopwords-sk
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 aby
 aj
@@ -419,4 +420,5 @@
 ňou
 ňu
 že
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sl/lex_attrs.py b/spacy/lang/sl/lex_attrs.py
index 6d6b40b4546..3c1493050a1 100644
--- a/spacy/lang/sl/lex_attrs.py
+++ b/spacy/lang/sl/lex_attrs.py
@@ -2,7 +2,8 @@
 
 from ...attrs import IS_CURRENCY, LIKE_NUM
 
-_num_words = set("""
+_num_words = set(
+    """
 	nula ničla nič ena dva tri štiri pet šest sedem osem
 	devet deset enajst dvanajst trinajst štirinajst petnajst
 	šestnajst sedemnajst osemnajst devetnajst dvajset trideset štirideset
@@ -17,9 +18,11 @@
 	šestnajstih šestnajstim šestnajstimi petnajstih petnajstim petnajstimi
 	sedemnajstih sedemnajstim sedemnajstimi osemnajstih osemnajstim osemnajstimi
 	devetnajstih devetnajstim devetnajstimi dvajsetih dvajsetim dvajsetimi  
-	""".split())
+	""".split()
+)
 
-_ordinal_words = set("""
+_ordinal_words = set(
+    """
 	prvi drugi tretji četrti peti šesti sedmi osmi
 	deveti deseti enajsti dvanajsti trinajsti štirinajsti
 	petnajsti šestnajsti sedemnajsti osemnajsti devetnajsti
@@ -89,9 +92,11 @@
 	osemnajstimi devetnajstimi dvajsetimi tridesetimi štiridesetimi petdesetimi šestdesetimi
 	sedemdesetimi osemdesetimi devetdesetimi stotimi tisočimi milijontimi bilijontimi
 	trilijontimi kvadrilijontimi neštetimi
-	""".split())
+	""".split()
+)
 
-_currency_words = set("""
+_currency_words = set(
+    """
 	evro evra evru evrom evrov evroma evrih evrom evre evri evr eur
 	cent centa centu cenom centov centoma centih centom cente centi
 	dolar dolarja dolarji dolarju dolarjem dolarjev dolarjema dolarjih dolarje usd
@@ -104,7 +109,8 @@
 	jen jena jeni jenu jenom jenov jenoma jenih jene
 	kuna kuni kune kuno kun kunama kunah kunam kunami
 	marka marki marke markama markah markami 
-	""".split())
+	""".split()
+)
 
 
 def like_num(text):
diff --git a/spacy/lang/sl/punctuation.py b/spacy/lang/sl/punctuation.py
index dadb54d315c..3be83eba382 100644
--- a/spacy/lang/sl/punctuation.py
+++ b/spacy/lang/sl/punctuation.py
@@ -5,14 +5,12 @@
     CONCAT_QUOTES,
     CURRENCY,
     HYPHENS,
-    LIST_CURRENCY,
     LIST_ELLIPSES,
     LIST_ICONS,
     LIST_PUNCT,
     LIST_QUOTES,
     PUNCT,
     UNITS,
-    merge_chars,
 )
 from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
 
diff --git a/spacy/lang/sl/stop_words.py b/spacy/lang/sl/stop_words.py
index a81c00db269..8491efcb580 100644
--- a/spacy/lang/sl/stop_words.py
+++ b/spacy/lang/sl/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/stopwords-iso/stopwords-sl
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a ali 
 
 b bi bil bila bile bili bilo biti blizu bo bodo bojo bolj bom bomo 
@@ -79,4 +80,5 @@
 z za zadaj zadnji zakaj zaprta zaprti zaprto zdaj zelo zunaj
 
 ž že
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sq/stop_words.py b/spacy/lang/sq/stop_words.py
index bf1c7a7039c..f2b1a4f4a7b 100644
--- a/spacy/lang/sq/stop_words.py
+++ b/spacy/lang/sq/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/andrixh/index-albanian
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a
 afert
 ai
@@ -224,4 +225,5 @@
 vjen
 yne
 zakonisht
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sr/stop_words.py b/spacy/lang/sr/stop_words.py
index 758964a5853..5df5509d2c4 100644
--- a/spacy/lang/sr/stop_words.py
+++ b/spacy/lang/sr/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 а
 авај
 ако
@@ -388,4 +389,5 @@
 ћете
 ћеш
 ћу
-""".split())
+""".split()
+)
diff --git a/spacy/lang/sv/stop_words.py b/spacy/lang/sv/stop_words.py
index 08251bcff32..2422b2a9e5a 100644
--- a/spacy/lang/sv/stop_words.py
+++ b/spacy/lang/sv/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 aderton adertonde adjö aldrig alla allas allt alltid alltså än andra andras
 annan annat ännu artonde arton åtminstone att åtta åttio åttionde åttonde av
 även
@@ -61,4 +62,5 @@
 vad vänster vänstra var vår vara våra varför varifrån varit varken värre
 varsågod vart vårt vem vems verkligen vi vid vidare viktig viktigare viktigast
 viktigt vilka vilken vilket vill
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ta/stop_words.py b/spacy/lang/ta/stop_words.py
index d6ef21f3b0a..abbff949d79 100644
--- a/spacy/lang/ta/stop_words.py
+++ b/spacy/lang/ta/stop_words.py
@@ -1,6 +1,7 @@
 # Stop words
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ஒரு
 என்று
 மற்றும்
@@ -126,4 +127,5 @@
 வரையில்
 சற்று
 எனக்
-""".split())
+""".split()
+)
diff --git a/spacy/lang/te/stop_words.py b/spacy/lang/te/stop_words.py
index d2834260898..b18dab697da 100644
--- a/spacy/lang/te/stop_words.py
+++ b/spacy/lang/te/stop_words.py
@@ -1,6 +1,7 @@
 # Source: https://github.com/Xangis/extra-stopwords (MIT License)
 
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 అందరూ
 అందుబాటులో
 అడగండి
@@ -51,4 +52,5 @@
 వేరుగా
 వ్యతిరేకంగా
 సంబంధం
-""".split())
+""".split()
+)
diff --git a/spacy/lang/th/stop_words.py b/spacy/lang/th/stop_words.py
index 3dd6e56525b..2823281ce95 100644
--- a/spacy/lang/th/stop_words.py
+++ b/spacy/lang/th/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ทั้งนี้ ดัง ขอ รวม หลังจาก เป็น หลัง หรือ ๆ เกี่ยวกับ ซึ่งได้แก่ ด้วยเพราะ ด้วยว่า ด้วยเหตุเพราะ
 ด้วยเหตุว่า สุดๆ เสร็จแล้ว เช่น เข้า ถ้า ถูก ถึง ต่างๆ ใคร เปิดเผย ครา รือ ตาม ใน ได้แก่ ได้แต่
 ได้ที่ ตลอดถึง นอกจากว่า นอกนั้น จริง อย่างดี ส่วน เพียงเพื่อ เดียว จัด ทั้งที ทั้งคน ทั้งตัว ไกลๆ
@@ -70,4 +71,5 @@
 แห่งนี้ แห่งโน้น แห่งไหน แหละ ให้แก่ ใหญ่ ใหญ่โต อย่างมาก อย่างยิ่ง อย่างไรก็ อย่างไรก็ได้ อย่างไรเสีย
 อย่างละ อย่างหนึ่ง อย่างๆ อัน อันจะ อันได้แก่ อันที่ อันที่จริง อันที่จะ อันเนื่องมาจาก อันละ อันๆ อาจจะ
 อาจเป็น อาจเป็นด้วย อื่น อื่นๆ เอ็ง เอา ฯ ฯล ฯลฯ 555 กำ ขอโทษ เยี่ยม นี่คือ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/ti/stop_words.py b/spacy/lang/ti/stop_words.py
index e0aaf47d3fe..9bd7122007a 100644
--- a/spacy/lang/ti/stop_words.py
+++ b/spacy/lang/ti/stop_words.py
@@ -1,7 +1,8 @@
 # Stop words from Tigrinya Wordcount: https://github.com/fgaim/Tigrinya-WordCount/blob/main/ti_stop_words.txt
 
 # Stop words
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 'ምበር 'ሞ 'ቲ 'ታ 'ኳ 'ውን 'ዚ 'የ 'ዩ 'ያ 'ዮም 'ዮን
 ልዕሊ ሒዙ ሒዛ ሕጂ መበል መን መንጎ መጠን ማለት ምስ ምባል
 ምእንቲ ምኽንያቱ ምኽንያት ምዃኑ ምዃንና ምዃኖም
@@ -22,4 +23,5 @@
 ዝነበረ ዝነበረት ዝነበሩ ዝካየድ ዝኸውን ዝኽእል ዝኾነ ዝዀነ
 የለን ይቕረብ ይብል ይኸውን ይኹን ይኽእል ደኣ ድሕሪ ድማ
 ገለ ገሊጹ ገና ገይሩ ግና ግን ጥራይ
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tl/stop_words.py b/spacy/lang/tl/stop_words.py
index a7bf541990a..2560cdaed6a 100644
--- a/spacy/lang/tl/stop_words.py
+++ b/spacy/lang/tl/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 akin
 aking
 ako
@@ -146,4 +147,5 @@
 tungkol
 una
 walang
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tn/stop_words.py b/spacy/lang/tn/stop_words.py
index a63a455f754..f614771dd11 100644
--- a/spacy/lang/tn/stop_words.py
+++ b/spacy/lang/tn/stop_words.py
@@ -1,5 +1,6 @@
 # Stop words
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ke gareng ga selekanyo tlhwatlhwa yo mongwe se
 sengwe fa go le jalo gongwe ba na mo tikologong
 jaaka kwa morago nna gonne ka sa pele nako teng
@@ -15,4 +16,5 @@
 bonala e tshwanang bogolo tsenya tsweetswee karolo
 sepe tlhalosa dirwa robedi robongwe lesomenngwe gaisa
 tlhano lesometlhano botlalo lekgolo
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index e80423e5150..b7d91d86f0d 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -34,11 +34,11 @@
     # host & domain names
     # mods: match is case-sensitive, so include [A-Z]
     r"(?:"  # noqa: E131
-      r"(?:"  # noqa: E131
-        r"[A-Za-z0-9\u00a1-\uffff]"  # noqa: E131
-        r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
-      r")?"
-      r"[A-Za-z0-9\u00a1-\uffff]\."
+    r"(?:"  # noqa: E131
+    r"[A-Za-z0-9\u00a1-\uffff]"  # noqa: E131
+    r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
+    r")?"
+    r"[A-Za-z0-9\u00a1-\uffff]\."
     r")+"
     # TLD identifier
     # mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
@@ -111,7 +111,8 @@
     BASE_EXCEPTIONS[orth] = [{ORTH: orth}]
 
 
-emoticons = set(r"""
+emoticons = set(
+    r"""
 :)
 :-)
 :))
@@ -242,7 +243,8 @@
 ¯\(ツ)/¯
 (╯°□°）╯︵┻━┻
 ><(((*>
-""".split())
+""".split()
+)
 
 
 for orth in emoticons:
diff --git a/spacy/lang/tr/stop_words.py b/spacy/lang/tr/stop_words.py
index 5323cf32d9c..85dcff6a53a 100644
--- a/spacy/lang/tr/stop_words.py
+++ b/spacy/lang/tr/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords-iso/stopwords-tr
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 acaba
 acep
 adamakıllı
@@ -552,4 +553,5 @@
 zaten
 zati
 zira
-""".split())
+""".split()
+)
diff --git a/spacy/lang/tt/stop_words.py b/spacy/lang/tt/stop_words.py
index 8f146d9150a..44169b757e5 100644
--- a/spacy/lang/tt/stop_words.py
+++ b/spacy/lang/tt/stop_words.py
@@ -1,6 +1,7 @@
 # Tatar stopwords are from https://github.com/aliiae/stopwords-tt
 
-STOP_WORDS = set("""алай алайса алар аларга аларда алардан аларны аларның аларча
+STOP_WORDS = set(
+    """алай алайса алар аларга аларда алардан аларны аларның аларча
 алары аларын аларынга аларында аларыннан аларының алтмыш алтмышынчы алтмышынчыга
 алтмышынчыда алтмышынчыдан алтмышынчылар алтмышынчыларга алтмышынчыларда
 алтмышынчылардан алтмышынчыларны алтмышынчыларның алтмышынчыны алтмышынчының
@@ -168,4 +169,5 @@
 
 өстәп өч өчен өченче өченчегә өченчедә өченчедән өченчеләр өченчеләргә
 өченчеләрдә өченчеләрдән өченчеләрне өченчеләрнең өченчене өченченең өчләп
-өчәрләп""".split())
+өчәрләп""".split()
+)
diff --git a/spacy/lang/uk/stop_words.py b/spacy/lang/uk/stop_words.py
index 517c300070a..b11d7a044a3 100644
--- a/spacy/lang/uk/stop_words.py
+++ b/spacy/lang/uk/stop_words.py
@@ -1,4 +1,5 @@
-STOP_WORDS = set("""а
+STOP_WORDS = set(
+    """а
 або
 адже
 аж
@@ -464,4 +465,5 @@
 якій
 якого
 якої
-якщо""".split())
+якщо""".split()
+)
diff --git a/spacy/lang/ur/lex_attrs.py b/spacy/lang/ur/lex_attrs.py
index 916a47bfd19..e590ed3e303 100644
--- a/spacy/lang/ur/lex_attrs.py
+++ b/spacy/lang/ur/lex_attrs.py
@@ -5,8 +5,7 @@
 # https://en.wikibooks.org/wiki/Urdu/Vocabulary/Numbers
 # https://www.urdu-english.com/lessons/beginner/numbers
 
-_num_words = (
-    """ایک دو تین چار پانچ چھ سات آٹھ نو دس گیارہ بارہ تیرہ چودہ پندرہ سولہ سترہ
+_num_words = """ایک دو تین چار پانچ چھ سات آٹھ نو دس گیارہ بارہ تیرہ چودہ پندرہ سولہ سترہ
  اٹهارا انیس بیس اکیس بائیس تئیس چوبیس پچیس چھببیس
 ستایس اٹھائس انتيس تیس اکتیس بتیس تینتیس چونتیس پینتیس
  چھتیس سینتیس ارتیس انتالیس چالیس اکتالیس بیالیس تیتالیس
@@ -18,7 +17,6 @@
  سٹیاسی اٹھیاسی نواسی نوے اکانوے بانوے ترانوے
 چورانوے پچانوے چھیانوے ستانوے اٹھانوے ننانوے سو
 """.split()
-)
 
 # source https://www.google.com/intl/ur/inputtools/try/
 
diff --git a/spacy/lang/ur/stop_words.py b/spacy/lang/ur/stop_words.py
index 00f0dd2d6b4..abfa3649713 100644
--- a/spacy/lang/ur/stop_words.py
+++ b/spacy/lang/ur/stop_words.py
@@ -1,5 +1,6 @@
 # Source: collected from different resource on internet
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 ثھی
 خو
 گی
@@ -508,4 +509,5 @@
 ہورہی
 ثبعث
 ضت
-""".split())
+""".split()
+)
diff --git a/spacy/lang/vi/stop_words.py b/spacy/lang/vi/stop_words.py
index 9163e10938e..3481701d5ea 100644
--- a/spacy/lang/vi/stop_words.py
+++ b/spacy/lang/vi/stop_words.py
@@ -1,5 +1,6 @@
 # Source: https://github.com/stopwords/vietnamese-stopwords
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 a_lô
 a_ha
 ai
@@ -1942,4 +1943,5 @@
 ừ_ào
 ừ_ừ
 ử
-""".split("\n"))
+""".split("\n")
+)
diff --git a/spacy/lang/zh/stop_words.py b/spacy/lang/zh/stop_words.py
index d54fe689504..42ae4a1de04 100644
--- a/spacy/lang/zh/stop_words.py
+++ b/spacy/lang/zh/stop_words.py
@@ -1,6 +1,7 @@
 # stop words as whitespace-separated list
 # Chinese stop words,maybe not enough
-STOP_WORDS = set("""
+STOP_WORDS = set(
+    """
 !
 "
 #
@@ -1894,4 +1895,5 @@
 ～±
 ～＋
 ￥
-""".split())
+""".split()
+)
diff --git a/spacy/language.py b/spacy/language.py
index dcf436c65fe..8e91018254e 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1323,7 +1323,7 @@ def get_examples():
         # Make sure the config is interpolated so we can resolve subsections
         config = self.config.interpolate()
         # These are the settings provided in the [initialize] block in the config
-        I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
+        I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)  # type: ignore[arg-type]
         before_init = I["before_init"]
         if before_init is not None:
             before_init(self)
@@ -1353,7 +1353,7 @@ def get_examples():
                 proc.initialize(get_examples, nlp=self, **p_settings)
         pretrain_cfg = config.get("pretraining")
         if pretrain_cfg:
-            P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)
+            P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)  # type: ignore[arg-type]
             init_tok2vec(self, P, I)
         self._link_components()
         self._optimizer = sgd
@@ -1589,9 +1589,7 @@ def pipe(  # noqa: F811
         if batch_size is None:
             batch_size = self.batch_size
 
-        pipes = (
-            []
-        )  # contains functools.partial objects to easily create multiprocess worker.
+        pipes = []  # contains functools.partial objects to easily create multiprocess worker.
         for name, proc in self.pipeline:
             if name in disable:
                 continue
@@ -1626,7 +1624,11 @@ def _has_gpu_model(self, disable: Iterable[str]):
             if name in disable or not is_trainable:
                 continue
 
-            if hasattr(proc, "model") and hasattr(proc.model, "ops") and isinstance(proc.model.ops, CupyOps):  # type: ignore
+            if (
+                hasattr(proc, "model")
+                and hasattr(proc.model, "ops")
+                and isinstance(proc.model.ops, CupyOps)
+            ):  # type: ignore
                 return True
 
         return False
@@ -1821,7 +1823,7 @@ def from_config(
         orig_pretraining = config.pop("pretraining", None)
         config["components"] = {}
         if auto_fill:
-            filled = registry.fill(config, validate=validate, schema=ConfigSchema)
+            filled = registry.fill(config, validate=validate, schema=ConfigSchema)  # type: ignore[arg-type]
         else:
             filled = config
         filled["components"] = orig_pipeline
@@ -1830,7 +1832,9 @@ def from_config(
             filled["pretraining"] = orig_pretraining
             config["pretraining"] = orig_pretraining
         resolved_nlp = registry.resolve(
-            filled["nlp"], validate=validate, schema=ConfigSchemaNlp
+            filled["nlp"],
+            validate=validate,
+            schema=ConfigSchemaNlp,  # type: ignore[arg-type]
         )
         create_tokenizer = resolved_nlp["tokenizer"]
         create_vectors = resolved_nlp["vectors"]
diff --git a/spacy/matcher/dependencymatcher.pyi b/spacy/matcher/dependencymatcher.pyi
index d84a30a58b0..3d744dfce4b 100644
--- a/spacy/matcher/dependencymatcher.pyi
+++ b/spacy/matcher/dependencymatcher.pyi
@@ -48,10 +48,12 @@ class DependencyMatcher:
         *,
         on_match: Optional[
             Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
-        ] = ...
+        ] = ...,
     ) -> None: ...
     def has_key(self, key: Union[str, int]) -> bool: ...
-    def get(self, key: Union[str, int], default: Optional[Any] = ...) -> Tuple[
+    def get(
+        self, key: Union[str, int], default: Optional[Any] = ...
+    ) -> Tuple[
         Optional[
             Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
         ],
diff --git a/spacy/matcher/matcher.pyi b/spacy/matcher/matcher.pyi
index c33b534cbd2..e474d250d22 100644
--- a/spacy/matcher/matcher.pyi
+++ b/spacy/matcher/matcher.pyi
@@ -33,7 +33,7 @@ class Matcher:
         on_match: Optional[
             Callable[[Matcher, Doc, int, List[Tuple[Any, ...]]], Any]
         ] = ...,
-        greedy: Optional[str] = ...
+        greedy: Optional[str] = ...,
     ) -> None: ...
     def remove(self, key: str) -> None: ...
     def has_key(self, key: Union[str, int]) -> bool: ...
@@ -56,7 +56,7 @@ class Matcher:
         *,
         as_spans: Literal[False] = ...,
         allow_missing: bool = ...,
-        with_alignments: bool = ...
+        with_alignments: bool = ...,
     ) -> List[Tuple[int, int, int]]: ...
     @overload
     def __call__(
@@ -65,6 +65,6 @@ class Matcher:
         *,
         as_spans: Literal[True],
         allow_missing: bool = ...,
-        with_alignments: bool = ...
+        with_alignments: bool = ...,
     ) -> List[Span]: ...
     def _normalize_key(self, key: Any) -> Any: ...
diff --git a/spacy/matcher/phrasematcher.pyi b/spacy/matcher/phrasematcher.pyi
index 27f6ba373fc..0f56699d63f 100644
--- a/spacy/matcher/phrasematcher.pyi
+++ b/spacy/matcher/phrasematcher.pyi
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union, overload
+from typing import Any, Callable, List, Optional, Tuple, Union, overload
 
 from ..compat import Literal
 from ..tokens import Doc, Span
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index ccc830e35c1..a71f85f6e63 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -57,7 +57,7 @@ cdef class PhraseMatcher:
                 attr = "ORTH"
             if attr == "IS_SENT_START":
                 attr = "SENT_START"
-            if attr.lower() not in TokenPattern().dict():
+            if attr.lower() not in TokenPattern().model_dump():
                 raise ValueError(Errors.E152.format(attr=attr))
             self.attr = IDS.get(attr)
 
diff --git a/spacy/ml/_character_embed.py b/spacy/ml/_character_embed.py
index fde73f35b5b..8cc4d25743e 100644
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@@ -4,7 +4,6 @@
 from thinc.types import Floats2d
 
 from ..tokens import Doc
-from ..util import registry
 
 
 def CharacterEmbed(nM: int, nC: int) -> Model[List[Doc], List[Floats2d]]:
diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py
index cdcac0c3812..464c32594dc 100644
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@@ -1,7 +1,5 @@
 from thinc.api import Model, normal_init
 
-from ..util import registry
-
 
 def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1):
     model = Model(
diff --git a/spacy/ml/callbacks.py b/spacy/ml/callbacks.py
index fefb170ba21..d9976cea80a 100644
--- a/spacy/ml/callbacks.py
+++ b/spacy/ml/callbacks.py
@@ -2,14 +2,12 @@
 import inspect
 import types
 import warnings
-from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set, Type
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set
 
 from thinc.layers import with_nvtx_range
-from thinc.model import Model, wrap_model_recursive
 from thinc.util import use_nvtx_range
 
 from ..errors import Warnings
-from ..util import registry
 
 if TYPE_CHECKING:
     # This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
index d571973122e..9f54b48899e 100644
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@@ -1,7 +1,6 @@
 from thinc.api import Model
 
 from ..attrs import LOWER
-from ..util import registry
 
 
 def extract_ngrams(ngram_size: int, attr: int = LOWER) -> Model:
diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
index d3456b705a6..925bfd45c31 100644
--- a/spacy/ml/extract_spans.py
+++ b/spacy/ml/extract_spans.py
@@ -3,8 +3,6 @@
 from thinc.api import Model, to_numpy
 from thinc.types import Ints1d, Ragged
 
-from ..util import registry
-
 
 def extract_spans() -> Model[Tuple[Ragged, Ragged], Ragged]:
     """Extract spans from a sequence of source arrays, as specified by an array
diff --git a/spacy/ml/featureextractor.py b/spacy/ml/featureextractor.py
index fb4e3c39aea..ad376e15f25 100644
--- a/spacy/ml/featureextractor.py
+++ b/spacy/ml/featureextractor.py
@@ -1,6 +1,6 @@
 from typing import Callable, List, Tuple, Union
 
-from thinc.api import Model, registry
+from thinc.api import Model
 from thinc.types import Ints2d
 
 from ..tokens import Doc
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 8b12720db20..05ad9a27287 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -23,7 +23,6 @@
     get_candidates_batch,
 )
 from ...tokens import Doc, Span
-from ...util import registry
 from ...vocab import Vocab
 from ..extract_spans import extract_spans
 
@@ -122,7 +121,7 @@ def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]:
     return get_candidates
 
 
-def create_candidates_batch() -> (
-    Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]]
-):
+def create_candidates_batch() -> Callable[
+    [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
+]:
     return get_candidates_batch
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index 7c68fe48126..9beecf878ad 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -1,5 +1,5 @@
 from functools import partial
-from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Tuple, cast
 
 import numpy
 from thinc.api import (
@@ -21,7 +21,7 @@
 
 from ...attrs import ID, ORTH
 from ...errors import Errors
-from ...util import OOV_RANK, registry
+from ...util import OOV_RANK
 from ...vectors import Mode as VectorsMode
 
 if TYPE_CHECKING:
@@ -199,7 +199,7 @@ def mlm_initialize(model: Model, X=None, Y=None):
         layers=[wrapped_model],
         init=mlm_initialize,
         refs={"wrapped": wrapped_model},
-        dims={dim: None for dim in wrapped_model.dim_names},
+        dims=dict.fromkeys(wrapped_model.dim_names),
     )
     mlm_model.set_ref("wrapped", wrapped_model)
     return mlm_model
diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index 9ff0ac8ba3c..20b8f6d6e80 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, cast
+from typing import List, Optional
 
 from thinc.api import Linear, Model, chain, list2array, use_ops, zero_init
 from thinc.types import Floats2d
@@ -6,7 +6,6 @@
 from ...compat import Literal
 from ...errors import Errors
 from ...tokens import Doc
-from ...util import registry
 from .._precomputable_affine import PrecomputableAffine
 from ..tb_framework import TransitionModel
 
diff --git a/spacy/ml/models/span_finder.py b/spacy/ml/models/span_finder.py
index 8081ed92b70..226b736c7eb 100644
--- a/spacy/ml/models/span_finder.py
+++ b/spacy/ml/models/span_finder.py
@@ -4,7 +4,6 @@
 from thinc.types import Floats1d, Floats2d
 
 from ...tokens import Doc
-from ...util import registry
 
 InT = List[Doc]
 OutT = Floats2d
diff --git a/spacy/ml/models/spancat.py b/spacy/ml/models/spancat.py
index 91dfb41ed7f..697d1df4d35 100644
--- a/spacy/ml/models/spancat.py
+++ b/spacy/ml/models/spancat.py
@@ -18,7 +18,6 @@
 from thinc.types import Floats2d, Ragged
 
 from ...tokens import Doc
-from ...util import registry
 from ..extract_spans import extract_spans
 
 
diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
index aec4276dbd8..d3b090de005 100644
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@@ -4,7 +4,6 @@
 from thinc.types import Floats2d
 
 from ...tokens import Doc
-from ...util import registry
 
 
 def build_tagger_model(
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index 49c0dd7077c..8194ab3101e 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -36,7 +36,6 @@
 from ...attrs import ORTH
 from ...errors import Errors
 from ...tokens import Doc
-from ...util import registry
 from ..extract_ngrams import extract_ngrams
 from ..staticvectors import StaticVectors
 from .tok2vec import get_tok2vec_width
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index b2b803b6ed0..ade84274475 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -17,14 +17,13 @@
     with_array,
     with_padded,
 )
-from thinc.types import Floats2d, Ints1d, Ints2d, Ragged
+from thinc.types import Floats2d, Ints2d, Ragged
 
 from ...attrs import intify_attr
 from ...errors import Errors
 from ...ml import _character_embed
 from ...pipeline.tok2vec import Tok2VecListener
 from ...tokens import Doc
-from ...util import registry
 from ..featureextractor import FeatureExtractor
 from ..staticvectors import StaticVectors
 
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index 122ef379544..d90acdaf008 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -1,7 +1,7 @@
 import warnings
-from typing import Callable, List, Optional, Sequence, Tuple, cast
+from typing import Callable, List, Optional, Tuple, cast
 
-from thinc.api import Model, Ops, registry
+from thinc.api import Model, Ops
 from thinc.initializers import glorot_uniform_init
 from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
 from thinc.util import partial
@@ -19,7 +19,7 @@ def StaticVectors(
     *,
     dropout: Optional[float] = None,
     init_W: Callable = glorot_uniform_init,
-    key_attr: str = "ORTH"
+    key_attr: str = "ORTH",
 ) -> Model[List[Doc], Ragged]:
     """Embed Doc objects with their vocab's vectors table, applying a learned
     linear projection to control the dimensionality. If a dropout rate is
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index 16c894f6c5c..e538b9e88c0 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -1,6 +1,5 @@
 from thinc.api import Model, noop
 
-from ..util import registry
 from .parser_model import ParserStepModel
 
 
diff --git a/spacy/pipe_analysis.py b/spacy/pipe_analysis.py
index d26884487d3..b564b466e50 100644
--- a/spacy/pipe_analysis.py
+++ b/spacy/pipe_analysis.py
@@ -23,7 +23,7 @@ def validate_attrs(values: Iterable[str]) -> Iterable[str]:
     values (Iterable[str]): The string attributes to check, e.g. `["token.pos"]`.
     RETURNS (Iterable[str]): The checked attributes.
     """
-    data = dot_to_dict({value: True for value in values})
+    data = dot_to_dict(dict.fromkeys(values, True))
     objs = {"doc": Doc, "token": Token, "span": Span}
     for obj_key, attrs in data.items():
         if obj_key == "span":
@@ -100,7 +100,7 @@ def analyze_pipes(
         all_attrs.update(meta.requires)
         result["summary"][name] = {key: getattr(meta, key, None) for key in keys}
         prev_pipes = nlp.pipeline[:i]
-        requires = {annot: False for annot in meta.requires}
+        requires = dict.fromkeys(meta.requires, False)
         if requires:
             for prev_name, prev_pipe in prev_pipes:
                 prev_meta = nlp.get_pipe_meta(prev_name)
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index 89f2861ceac..ef7a076b6cd 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,12 +1,16 @@
 from collections import defaultdict
 from typing import Any, Dict, List, Union
 
-try:
-    from pydantic.v1 import BaseModel, Field, ValidationError
-    from pydantic.v1.types import StrictBool, StrictInt, StrictStr
-except ImportError:
-    from pydantic import BaseModel, Field, ValidationError  # type: ignore
-    from pydantic.types import StrictBool, StrictInt, StrictStr  # type: ignore
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    RootModel,
+    StrictBool,
+    StrictInt,
+    StrictStr,
+    ValidationError,
+)
 
 
 class MatchNodeSchema(BaseModel):
@@ -15,20 +19,18 @@ class MatchNodeSchema(BaseModel):
     prefix_tree: StrictInt = Field(..., title="Prefix tree")
     suffix_tree: StrictInt = Field(..., title="Suffix tree")
 
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 class SubstNodeSchema(BaseModel):
     orig: Union[int, StrictStr] = Field(..., title="Original substring")
     subst: Union[int, StrictStr] = Field(..., title="Replacement substring")
 
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
-class EditTreeSchema(BaseModel):
-    __root__: Union[MatchNodeSchema, SubstNodeSchema]
+class EditTreeSchema(RootModel[Union[MatchNodeSchema, SubstNodeSchema]]):
+    pass
 
 
 def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
@@ -38,7 +40,7 @@ def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
     RETURNS (List[str]): A list of error messages, if available.
     """
     try:
-        EditTreeSchema.parse_obj(obj)
+        EditTreeSchema.model_validate(obj)
         return []
     except ValidationError as e:
         errors = e.errors()
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index cc1e2e37a64..d4f96ec014b 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
@@ -14,12 +13,14 @@
 from ..tokens import Doc, Span
 from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
 from ..training import Example
-from ..util import SimpleFrozenList, registry
+from ..util import SimpleFrozenList
 from ..vocab import Vocab
 from .pipe import Pipe
 
 MatcherPatternType = List[Dict[Union[int, str], Any]]
-AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
+AttributeRulerPatternType = Dict[
+    str, Union[List[MatcherPatternType], MatcherPatternType, Dict, int]
+]
 TagMapType = Dict[str, Dict[Union[int, str], Union[int, str]]]
 MorphRulesType = Dict[str, Dict[str, Dict[Union[int, str], Union[int, str]]]]
 
@@ -137,7 +138,8 @@ def match(self, doc: Doc):
         matches = self.matcher(doc, allow_missing=True, as_spans=False)
         # Sort by the attribute ID, so that later rules have precedence
         matches = [
-            (int(self.vocab.strings[m_id]), m_id, s, e) for m_id, s, e in matches  # type: ignore
+            (int(self.vocab.strings[m_id]), m_id, s, e)
+            for m_id, s, e in matches  # type: ignore
         ]
         matches.sort()
         return matches
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 0941b43c1ce..77f033b1c48 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from collections import Counter
 from itertools import islice
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 6a1ed11dfc5..4b23fee6249 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,6 +1,5 @@
 import importlib
 import random
-import sys
 from itertools import islice
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Union
@@ -16,9 +15,8 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span
 from ..training import Example, validate_examples, validate_get_examples
-from ..util import SimpleFrozenList, registry
+from ..util import SimpleFrozenList
 from ..vocab import Vocab
-from .legacy.entity_linker import EntityLinker_v1
 from .pipe import deserialize_config
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 2b8c9830720..0728c3f0006 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from collections import defaultdict
 from pathlib import Path
@@ -14,7 +13,7 @@
 from ..scorer import get_ner_prf
 from ..tokens import Doc, Span
 from ..training import Example
-from ..util import SimpleFrozenList, ensure_path, from_disk, registry, to_disk
+from ..util import SimpleFrozenList, ensure_path, from_disk, to_disk
 from .pipe import Pipe
 
 DEFAULT_ENT_ID_SEP = "||"
diff --git a/spacy/pipeline/factories.py b/spacy/pipeline/factories.py
index f796f2dc8a5..8c71067b32e 100644
--- a/spacy/pipeline/factories.py
+++ b/spacy/pipeline/factories.py
@@ -14,9 +14,10 @@
 )
 
 # Import factory default configurations
-from ..pipeline.entity_linker import DEFAULT_NEL_MODEL, EntityLinker, EntityLinker_v1
+from ..pipeline.entity_linker import DEFAULT_NEL_MODEL, EntityLinker
 from ..pipeline.entityruler import DEFAULT_ENT_ID_SEP, EntityRuler
 from ..pipeline.functions import DocCleaner, TokenSplitter
+from ..pipeline.legacy import EntityLinker_v1
 from ..pipeline.lemmatizer import Lemmatizer
 from ..pipeline.morphologizer import DEFAULT_MORPH_MODEL, Morphologizer
 from ..pipeline.multitask import DEFAULT_MT_MODEL, MultitaskObjective
@@ -24,8 +25,8 @@
 from ..pipeline.sentencizer import Sentencizer
 from ..pipeline.senter import DEFAULT_SENTER_MODEL, SentenceRecognizer
 from ..pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL, SpanFinder
-from ..pipeline.span_ruler import DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY
 from ..pipeline.span_ruler import (
+    DEFAULT_SPANS_KEY as SPAN_RULER_DEFAULT_SPANS_KEY,
     SpanRuler,
     prioritize_existing_ents_filter,
     prioritize_new_ents_filter,
diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py
index e4a3d6d1d5b..b2aa8b708c8 100644
--- a/spacy/pipeline/functions.py
+++ b/spacy/pipeline/functions.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from typing import Any, Dict
 
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index e8d467ef8db..f518e1072ac 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
@@ -13,7 +12,7 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Token
 from ..training import Example
-from ..util import SimpleFrozenList, logger, registry
+from ..util import SimpleFrozenList, logger
 from ..vocab import Vocab
 from .pipe import Pipe
 
diff --git a/spacy/pipeline/pipe.pyi b/spacy/pipeline/pipe.pyi
index 9a1c11cefea..55cfd1fec95 100644
--- a/spacy/pipeline/pipe.pyi
+++ b/spacy/pipeline/pipe.pyi
@@ -7,7 +7,6 @@ from typing import (
     Iterator,
     List,
     NoReturn,
-    Optional,
     Tuple,
     Union,
 )
diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
index 26c9efb6a9d..7ee19de04b0 100644
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
 from thinc.api import Config, Model, Optimizer, set_dropout_rate
@@ -10,7 +9,6 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span
 from ..training import Example
-from ..util import registry
 from .spancat import DEFAULT_SPANS_KEY
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py
index 98287ba1d22..703eda61561 100644
--- a/spacy/pipeline/span_ruler.py
+++ b/spacy/pipeline/span_ruler.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 import warnings
 from functools import partial
 from pathlib import Path
@@ -27,7 +26,7 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span
 from ..training import Example
-from ..util import SimpleFrozenList, ensure_path, registry
+from ..util import SimpleFrozenList, ensure_path
 from .pipe import Pipe
 
 PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 805a0538f01..9b945df35b5 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from dataclasses import dataclass
 from functools import partial
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
@@ -14,7 +13,6 @@
 from ..scorer import Scorer
 from ..tokens import Doc, Span, SpanGroup
 from ..training import Example, validate_examples
-from ..util import registry
 from ..vocab import Vocab
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 36b569edc63..7b03c7e81d4 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -1,18 +1,15 @@
 import importlib
-import sys
 from itertools import islice
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
 import numpy
-from thinc.api import Config, Model, Optimizer, get_array_module, set_dropout_rate
-from thinc.types import Floats2d
+from thinc.api import Config, Model, Optimizer, set_dropout_rate
 
 from ..errors import Errors
 from ..language import Language
 from ..scorer import Scorer
 from ..tokens import Doc
 from ..training import Example, validate_examples, validate_get_examples
-from ..util import registry
 from ..vocab import Vocab
 from .trainable_pipe import TrainablePipe
 
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 32845490d4e..cc094bf6197 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -1,17 +1,13 @@
 import importlib
-import sys
 from itertools import islice
-from typing import Any, Callable, Dict, Iterable, List, Optional
+from typing import Any, Callable, Dict, Iterable, Optional
 
 from thinc.api import Config, Model
-from thinc.types import Floats2d
 
 from ..errors import Errors
 from ..language import Language
 from ..scorer import Scorer
-from ..tokens import Doc
 from ..training import Example, validate_get_examples
-from ..util import registry
 from ..vocab import Vocab
 from .textcat import TextCategorizer
 
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index ce0296bf5f3..4e2e5af846f 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,5 +1,4 @@
 import importlib
-import sys
 from itertools import islice
 from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
 
diff --git a/spacy/schemas.py b/spacy/schemas.py
index fa987b90f19..359c3fd0f83 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -1,5 +1,4 @@
 import inspect
-import re
 from collections import defaultdict
 from enum import Enum
 from typing import (
@@ -16,34 +15,19 @@
     Union,
 )
 
-try:
-    from pydantic.v1 import (
-        BaseModel,
-        ConstrainedStr,
-        Field,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-        StrictStr,
-        ValidationError,
-        create_model,
-        validator,
-    )
-    from pydantic.v1.main import ModelMetaclass
-except ImportError:
-    from pydantic import (  # type: ignore
-        BaseModel,
-        ConstrainedStr,
-        Field,
-        StrictBool,
-        StrictFloat,
-        StrictInt,
-        StrictStr,
-        ValidationError,
-        create_model,
-        validator,
-    )
-    from pydantic.main import ModelMetaclass  # type: ignore
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    StrictBool,
+    StrictFloat,
+    StrictInt,
+    StrictStr,
+    ValidationError,
+    constr,
+    create_model,
+    field_validator,
+)
 from thinc.api import ConfigValidationError, Model, Optimizer
 from thinc.config import Promise
 
@@ -89,14 +73,9 @@ def validate(schema: Type[BaseModel], obj: Dict[str, Any]) -> List[str]:
 # Initialization
 
 
-class ArgSchemaConfig:
-    extra = "forbid"
-    arbitrary_types_allowed = True
+ArgSchemaConfig = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
-
-class ArgSchemaConfigExtra:
-    extra = "forbid"
-    arbitrary_types_allowed = True
+ArgSchemaConfigExtra = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 def get_arg_model(
@@ -105,7 +84,7 @@ def get_arg_model(
     exclude: Iterable[str] = tuple(),
     name: str = "ArgModel",
     strict: bool = True,
-) -> ModelMetaclass:
+) -> type[BaseModel]:
     """Generate a pydantic model for function arguments.
 
     func (Callable): The function to generate the schema for.
@@ -113,7 +92,7 @@ def get_arg_model(
     name (str): Name of created model class.
     strict (bool): Don't allow extra arguments if no variable keyword arguments
         are allowed on the function.
-    RETURNS (ModelMetaclass): A pydantic model.
+    RETURNS (type[BaseModel]): A pydantic model.
     """
     sig_args = {}
     try:
@@ -167,7 +146,7 @@ def validate_init_settings(
     """
     schema = get_arg_model(func, exclude=exclude, name="InitArgModel")
     try:
-        return schema(**settings).dict()
+        return schema.model_validate(settings).model_dump()
     except ValidationError as e:
         block = "initialize" if not section else f"initialize.{section}"
         title = f"Error validating initialization settings in [{block}]"
@@ -228,11 +207,10 @@ class TokenPatternString(BaseModel):
         None, alias="fuzzy9"
     )
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True  # allow alias and field name
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 
-    @validator("*", pre=True, each_item=True, allow_reuse=True)
+    @field_validator("*", mode="before")
+    @classmethod
     def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
@@ -253,11 +231,10 @@ class TokenPatternNumber(BaseModel):
     GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
     LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True  # allow alias and field name
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 
-    @validator("*", pre=True, each_item=True, allow_reuse=True)
+    @field_validator("*", mode="before")
+    @classmethod
     def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
@@ -271,11 +248,10 @@ class TokenPatternOperatorSimple(str, Enum):
     exclamation: StrictStr = StrictStr("!")
 
 
-class TokenPatternOperatorMinMax(ConstrainedStr):
-    regex = re.compile(r"^({\d+}|{\d+,\d*}|{\d*,\d+})$")
+TokenPatternOperatorMinMax = constr(pattern=r"^(\{\d+\}|\{\d+,\d*\}|\{\d*,\d+\})$")
 
 
-TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]
+TokenPatternOperator = Union[TokenPatternOperatorSimple, TokenPatternOperatorMinMax]  # type: ignore[valid-type]
 StringValue = Union[TokenPatternString, StrictStr]
 NumberValue = Union[TokenPatternNumber, StrictInt, StrictFloat]
 UnderscoreValue = Union[
@@ -323,12 +299,14 @@ class TokenPattern(BaseModel):
     op: Optional[TokenPatternOperator] = None
     underscore: Optional[Dict[StrictStr, UnderscoreValue]] = Field(None, alias="_")
 
-    class Config:
-        extra = "forbid"
-        allow_population_by_field_name = True
-        alias_generator = lambda value: value.upper()
+    model_config = ConfigDict(
+        extra="forbid",
+        populate_by_name=True,
+        alias_generator=lambda value: value.upper(),
+    )
 
-    @validator("*", pre=True, allow_reuse=True)
+    @field_validator("*", mode="before")
+    @classmethod
     def raise_for_none(cls, v):
         if v is None:
             raise ValueError("None / null is not allowed")
@@ -336,10 +314,9 @@ def raise_for_none(cls, v):
 
 
 class TokenPatternSchema(BaseModel):
-    pattern: List[TokenPattern] = Field(..., min_items=1)
+    pattern: List[TokenPattern] = Field(..., min_length=1)
 
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 # Model meta
@@ -397,9 +374,7 @@ class ConfigSchemaTraining(BaseModel):
     before_update: Optional[Callable[["Language", Dict[str, Any]], None]] = Field(..., title="Optional callback that is invoked at the start of each training step")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchemaNlp(BaseModel):
@@ -415,14 +390,11 @@ class ConfigSchemaNlp(BaseModel):
     vectors: Callable = Field(..., title="Vectors implementation")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchemaPretrainEmpty(BaseModel):
-    class Config:
-        extra = "forbid"
+    model_config = ConfigDict(extra="forbid")
 
 
 class ConfigSchemaPretrain(BaseModel):
@@ -439,9 +411,7 @@ class ConfigSchemaPretrain(BaseModel):
     objective: Callable[["Vocab", Model], Model] = Field(..., title="A function that creates the pretraining objective.")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchemaInit(BaseModel):
@@ -450,15 +420,13 @@ class ConfigSchemaInit(BaseModel):
     lookups: Optional[Lookups] = Field(..., title="Vocabulary lookups, e.g. lexeme normalization")
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
-    tokenizer: Dict[StrictStr, Any] = Field(..., help="Arguments to be passed into Tokenizer.initialize")
-    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., help="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
+    tokenizer: Dict[StrictStr, Any] = Field(..., title="Arguments to be passed into Tokenizer.initialize")
+    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., title="Arguments for TrainablePipe.initialize methods of pipeline components, keyed by component")
     before_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object before initialization")
     after_init: Optional[Callable[["Language"], "Language"]] = Field(..., title="Optional callback to modify nlp object after initialization")
     # fmt: on
 
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
 
 
 class ConfigSchema(BaseModel):
@@ -469,9 +437,7 @@ class ConfigSchema(BaseModel):
     corpora: Dict[str, Reader]
     initialize: ConfigSchemaInit
 
-    class Config:
-        extra = "allow"
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
 
 
 CONFIG_SCHEMAS = {
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index 65e851cae4e..a65cdb6fc62 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -205,11 +205,13 @@ cdef class StringStore:
         if mem is None:
             mem = Pool()
         self.mem = mem
-        yield mem
-        for key in self._transient_keys:
-            map_clear(self._map.c_map, key)
-        self._transient_keys.clear()
-        self.mem = self._non_temp_mem
+        try:
+            yield mem
+        finally:
+            for key in self._transient_keys:
+                map_clear(self._map.c_map, key)
+            self._transient_keys.clear()
+            self.mem = self._non_temp_mem
 
     def add(self, string: str, allow_transient: Optional[bool] = None) -> int:
         """Add a string to the StringStore.
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index d72c916efb0..ef098ec1a9f 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -60,12 +60,12 @@ def test_issue1757():
     """Test comparison against None doesn't cause segfault."""
     doc = Doc(Vocab(), words=["a", "b", "c"])
     assert not doc[0] < None
-    assert not doc[0] is None
+    assert doc[0] is not None
     assert doc[0] >= None
     assert not doc[:2] < None
-    assert not doc[:2] is None
+    assert doc[:2] is not None
     assert doc[:2] >= None
-    assert not doc.vocab["a"] is None
+    assert doc.vocab["a"] is not None
     assert not doc.vocab["a"] < None
 
 
diff --git a/spacy/tests/lang/bg/test_tokenizer.py b/spacy/tests/lang/bg/test_tokenizer.py
index 2e2c45001ef..b16ef12d880 100644
--- a/spacy/tests/lang/bg/test_tokenizer.py
+++ b/spacy/tests/lang/bg/test_tokenizer.py
@@ -1,6 +1,3 @@
-import pytest
-
-
 def test_bg_tokenizer_handles_final_diacritics(bg_tokenizer):
     text = "Ня̀маше яйца̀. Ня̀маше яйца̀."
     tokens = bg_tokenizer(text)
diff --git a/spacy/tests/lang/es/test_noun_chunks.py b/spacy/tests/lang/es/test_noun_chunks.py
index 8e5fe83540c..50d49fcc28e 100644
--- a/spacy/tests/lang/es/test_noun_chunks.py
+++ b/spacy/tests/lang/es/test_noun_chunks.py
@@ -48,13 +48,13 @@
             [(0,4)]
         ),
         # Tengo un gato y un perro -> un gato, un perro
-        ( 
+        (
             ["Tengo", "un", "gato", "y", "un", "perro"],
             [0, 2, 0, 5, 5, 0],
             ["ROOT", "det", "obj", "cc", "det", "conj"],
             ["VERB", "DET", "NOUN", "CCONJ", "DET", "NOUN"],
             [(1,3), (4,6)]
-         
+
         ),
         # Dom Pedro II -> Dom Pedro II
         (
@@ -101,11 +101,11 @@
             [1, 1, 3, 1, 5, 1],
             ['det', 'ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['DET', 'NOUN', 'ADP', 'PROPN', 'ADP', 'NOUN'],
-            [(0,2), (3,4), (5,6)]  
-       
+            [(0,2), (3,4), (5,6)]
+
         ),
         # El gato regordete de Susana y su amigo -> el gato regordete, Susana, su amigo
-        (  
+        (
             ['El', 'gato', 'regordete', 'de', 'Susana', 'y', 'su', 'amigo'],
             [1, 1, 1, 4, 1, 7, 7, 1],
             ['det', 'ROOT', 'amod', 'case', 'nmod', 'cc', 'det', 'conj'],
diff --git a/spacy/tests/lang/et/test_tokenizer.py b/spacy/tests/lang/et/test_tokenizer.py
index f0f8079cae8..8bee2288033 100644
--- a/spacy/tests/lang/et/test_tokenizer.py
+++ b/spacy/tests/lang/et/test_tokenizer.py
@@ -2,8 +2,7 @@
 
 ET_BASIC_TOKENIZATION_TESTS = [
     (
-        "Kedagi ei või piinata ega ebainimlikult või alandavalt kohelda "
-        "ega karistada.",
+        "Kedagi ei või piinata ega ebainimlikult või alandavalt kohelda ega karistada.",
         [
             "Kedagi",
             "ei",
diff --git a/spacy/tests/lang/fr/test_noun_chunks.py b/spacy/tests/lang/fr/test_noun_chunks.py
index 436e07b29d0..d413f1f2211 100644
--- a/spacy/tests/lang/fr/test_noun_chunks.py
+++ b/spacy/tests/lang/fr/test_noun_chunks.py
@@ -35,7 +35,7 @@
             [(0, 2)],
         ),
         # det + adj + noun
-        # Le vieux Londres  -> Le vieux Londres 
+        # Le vieux Londres  -> Le vieux Londres
         (
             ['Les', 'vieux', 'Londres'],
             [2, 2, 2],
@@ -144,13 +144,13 @@
         ),
         # Two NPs conjuncted
         # Il a un chien et un chat -> Il, un chien, un chat
-        ( 
+        (
             ['Il', 'a', 'un', 'chien', 'et', 'un', 'chat'],
             [1, 1, 3, 1, 6, 6, 3],
             ['nsubj', 'ROOT', 'det', 'obj', 'cc', 'det', 'conj'],
             ['PRON', 'VERB', 'DET', 'NOUN', 'CCONJ', 'DET', 'NOUN'],
             [(0,1), (2,4), (5,7)]
-         
+
         ),
         # Two NPs together
         # l'écrivain brésilien Aníbal Machado -> l'écrivain brésilien, Aníbal Machado
@@ -195,12 +195,12 @@
             [0, 2, 0, 4, 2],
             ['ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['NOUN', 'ADP', 'NOUN', 'ADP', 'PROPN'],
-            [(0,1), (2,3), (4,5)]  
-       
+            [(0,1), (2,3), (4,5)]
+
         ),
         # Several NPs
         # Le gros chat de Susana et son amie -> Le gros chat, Susana, son amie
-        (  
+        (
             ['Le', 'gros', 'chat', 'de', 'Susana', 'et', 'son', 'amie'],
             [2, 2, 2, 4, 2, 7, 7, 2],
             ['det', 'amod', 'ROOT', 'case', 'nmod', 'cc', 'det', 'conj'],
diff --git a/spacy/tests/lang/it/test_noun_chunks.py b/spacy/tests/lang/it/test_noun_chunks.py
index 7f6659ee7bd..5fd39ab01b9 100644
--- a/spacy/tests/lang/it/test_noun_chunks.py
+++ b/spacy/tests/lang/it/test_noun_chunks.py
@@ -62,7 +62,7 @@
             [(0,3)],
         ),
         # noun + adj plural
-        # mucche bianche 
+        # mucche bianche
         (
             ["mucche", "bianche"],
             [0, 0],
@@ -117,13 +117,13 @@
         ),
         # Two NPs conjuncted
         # Ho un cane e un gatto -> un cane, un gatto
-        ( 
+        (
             ['Ho', 'un', 'cane', 'e', 'un', 'gatto'],
             [0, 2, 0, 5, 5, 0],
             ['ROOT', 'det', 'obj', 'cc', 'det', 'conj'],
             ['VERB', 'DET', 'NOUN', 'CCONJ', 'DET', 'NOUN'],
             [(1,3), (4,6)]
-         
+
         ),
         # Two NPs together
         # lo scrittore brasiliano Aníbal Machado -> lo scrittore brasiliano, Aníbal Machado
@@ -177,12 +177,12 @@
             [0, 2, 0, 4, 2],
             ['ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['NOUN', 'ADP', 'NOUN', 'ADP', 'PROPN'],
-            [(0,1), (2,3), (4,5)]  
-       
+            [(0,1), (2,3), (4,5)]
+
         ),
         # Several NPs
         # Il gatto grasso di Susana e la sua amica -> Il gatto grasso, Susana, sua amica
-        (  
+        (
             ['Il', 'gatto', 'grasso', 'di', 'Susana', 'e', 'la', 'sua', 'amica'],
             [1, 1, 1, 4, 1, 8, 8, 8, 1],
             ['det', 'ROOT', 'amod', 'case', 'nmod', 'cc', 'det', 'det:poss', 'conj'],
diff --git a/spacy/tests/lang/la/test_exception.py b/spacy/tests/lang/la/test_exception.py
index 966ae22cfec..9a6e6a422c5 100644
--- a/spacy/tests/lang/la/test_exception.py
+++ b/spacy/tests/lang/la/test_exception.py
@@ -1,6 +1,3 @@
-import pytest
-
-
 def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
     text = "scio te omnia facturum, ut nobiscum quam primum sis"
     tokens = la_tokenizer(text)
diff --git a/spacy/tests/lang/pt/test_noun_chunks.py b/spacy/tests/lang/pt/test_noun_chunks.py
index eee96d593b1..5dd7bfd3b82 100644
--- a/spacy/tests/lang/pt/test_noun_chunks.py
+++ b/spacy/tests/lang/pt/test_noun_chunks.py
@@ -126,13 +126,13 @@
         ),
         # Two NPs conjuncted
         # Eu tenho um cachorro e um gato -> Eu, um cacharo, um gato
-        ( 
+        (
             ["Eu", "tenho", "um", "cachorro", "e", "um", "gato"],
             [1, 1, 3, 1, 6, 6, 3],
             ['nsubj', 'ROOT', 'det', 'obj', 'cc', 'det', 'conj'],
             ['PRON', 'VERB', 'DET', 'NOUN', 'CCONJ', 'DET', 'NOUN'],
             [(0,1), (2,4), (5,7)]
-         
+
         ),
         # Two NPs together
         # o escritor brasileiro Aníbal Machado -> o escritor brasileiro, Aníbal Machado
@@ -186,12 +186,12 @@
             [0, 2, 0, 4, 2],
             ['ROOT', 'case', 'nmod', 'case', 'nmod'],
             ['NOUN', 'ADP', 'NOUN', 'ADP', 'PROPN'],
-            [(0,1), (2,3), (4,5)]  
-       
+            [(0,1), (2,3), (4,5)]
+
         ),
         # Several NPs
         # O gato gordo da Susana e seu amigo -> O gato gordo, Susana, seu amigo
-        (  
+        (
             ['O', 'gato', 'gordo', 'da', 'Susana', 'e', 'seu', 'amigo'],
             [1, 1, 1, 4, 1, 7, 7, 1],
             ['det', 'ROOT', 'amod', 'case', 'nmod', 'cc', 'det', 'conj'],
diff --git a/spacy/tests/lang/sl/test_text.py b/spacy/tests/lang/sl/test_text.py
index a2a93207729..4781bebcdcb 100644
--- a/spacy/tests/lang/sl/test_text.py
+++ b/spacy/tests/lang/sl/test_text.py
@@ -1,6 +1,3 @@
-import pytest
-
-
 def test_long_text(sl_tokenizer):
     # Excerpt: European Convention on Human Rights
     text = """
diff --git a/spacy/tests/lang/sq/test_text.py b/spacy/tests/lang/sq/test_text.py
index 44eedaa5487..24d60afdf20 100644
--- a/spacy/tests/lang/sq/test_text.py
+++ b/spacy/tests/lang/sq/test_text.py
@@ -1,6 +1,3 @@
-import pytest
-
-
 def test_long_text(sq_tokenizer):
     # Excerpt: European Convention on Human Rights
     text = """
diff --git a/spacy/tests/lang/xx/test_text.py b/spacy/tests/lang/xx/test_text.py
index 477f0ebe271..a4eafdcb98e 100644
--- a/spacy/tests/lang/xx/test_text.py
+++ b/spacy/tests/lang/xx/test_text.py
@@ -1,6 +1,3 @@
-import pytest
-
-
 def test_long_text(xx_tokenizer):
     # Excerpt: Text in Skolt Sami taken from https://www.samediggi.fi
     text = """
diff --git a/spacy/tests/lang/zh/test_tokenizer.py b/spacy/tests/lang/zh/test_tokenizer.py
index cdba5e39709..cb9b4ec539a 100644
--- a/spacy/tests/lang/zh/test_tokenizer.py
+++ b/spacy/tests/lang/zh/test_tokenizer.py
@@ -1,5 +1,5 @@
 import pytest
-from thinc.api import ConfigValidationError
+from confection import ConfigValidationError
 
 from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
 
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index 45f9f4ee718..e0dc7d5a1dd 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -10,30 +10,46 @@
     # Bad patterns flagged in all cases
     ([{"XX": "foo"}], 1, 1),
     ([{"IS_ALPHA": {"==": True}}, {"LIKE_NUM": None}], 2, 1),
-    ([{"IS_PUNCT": True, "OP": "$"}], 1, 1),
+    (
+        [{"IS_PUNCT": True, "OP": "$"}],
+        2,
+        1,
+    ),  # v2: union reports 2 errors (enum + pattern)
     ([{"_": "foo"}], 1, 1),
     ('[{"TEXT": "foo"}, {"LOWER": "bar"}]', 1, 1),
     ([{"ENT_IOB": "foo"}], 1, 1),
     ([1, 2, 3], 3, 1),
-    ([{"TEXT": "foo", "OP": "{,}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{,4}4"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{a,3}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{a}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{,a}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{1,2,3}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{1, 3}"}], 1, 1),
-    ([{"TEXT": "foo", "OP": "{-2}"}], 1, 1),
+    ([{"TEXT": "foo", "OP": "{,}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{,4}4"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{a,3}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{a}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{,a}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{1,2,3}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{1, 3}"}], 2, 1),  # v2: union reports 2 errors
+    ([{"TEXT": "foo", "OP": "{-2}"}], 2, 1),  # v2: union reports 2 errors
     # Bad patterns flagged outside of Matcher
-    ([{"_": {"foo": "bar", "baz": {"IN": "foo"}}}], 2, 0),  # prev: (1, 0)
+    (
+        [{"_": {"foo": "bar", "baz": {"IN": "foo"}}}],
+        7,
+        0,
+    ),  # v2: more detailed union errors
     # Bad patterns not flagged with minimal checks
-    ([{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}], 2, 0),
-    ([{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}], 4, 0),  # prev: (2, 0)
-    ([{"LENGTH": {"VALUE": 5}}], 2, 0),  # prev: (1, 0)
-    ([{"TEXT": {"VALUE": "foo"}}], 2, 0),  # prev: (1, 0)
+    (
+        [{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}],
+        5,
+        0,
+    ),  # v2: more detailed union errors
+    (
+        [{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}],
+        5,
+        0,
+    ),  # v2: more detailed union errors
+    ([{"LENGTH": {"VALUE": 5}}], 3, 0),  # v2: more detailed union errors
+    ([{"TEXT": {"VALUE": "foo"}}], 2, 0),
     ([{"IS_DIGIT": -1}], 1, 0),
-    ([{"ORTH": -1}], 1, 0),
-    ([{"ENT_ID": -1}], 1, 0),
-    ([{"ENT_KB_ID": -1}], 1, 0),
+    ([{"ORTH": -1}], 2, 0),  # v2: union reports 2 errors
+    ([{"ENT_ID": -1}], 2, 0),  # v2: union reports 2 errors
+    ([{"ENT_KB_ID": -1}], 2, 0),  # v2: union reports 2 errors
     # Good patterns
     ([{"TEXT": "foo"}, {"LOWER": "bar"}], 0, 0),
     ([{"LEMMA": {"IN": ["love", "like"]}}, {"POS": "DET", "OP": "?"}], 0, 0),
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index ff07c5b454a..f4c6f056aa2 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -15,6 +15,7 @@ def test_build_dependencies():
         "cython-lint",
         "black",
         "isort",
+        "ruff",
         "mypy",
         "types-dataclasses",
         "types-mock",
@@ -37,7 +38,7 @@ def test_build_dependencies():
     req_dict = {}
 
     root_dir = Path(__file__).parent
-    req_file = root_dir / "requirements.txt"
+    req_file = root_dir / "test.txt"
     with req_file.open() as f:
         lines = f.readlines()
         for line in lines:
@@ -48,7 +49,7 @@ def test_build_dependencies():
                     req_dict[lib] = v
     # check setup.cfg and compare to requirements.txt
     # also fails when there are missing or additional libs
-    setup_file = root_dir / "setup.cfg"
+    setup_file = root_dir / "test.cfg"
     with setup_file.open() as f:
         lines = f.readlines()
 
@@ -59,9 +60,9 @@ def test_build_dependencies():
             lib, v = _parse_req(line)
             if lib and not lib.startswith("cupy") and lib not in libs_ignore_setup:
                 req_v = req_dict.get(lib, None)
-                assert (
-                    req_v is not None
-                ), "{} in setup.cfg but not in requirements.txt".format(lib)
+                assert req_v is not None, (
+                    "{} in setup.cfg but not in requirements.txt".format(lib)
+                )
                 assert (lib + v) == (lib + req_v), (
                     "{} has different version in setup.cfg and in requirements.txt: "
                     "{} and {} respectively".format(lib, v, req_v)
@@ -73,7 +74,7 @@ def test_build_dependencies():
 
     # check pyproject.toml and compare the versions of the libs to requirements.txt
     # does not fail when there are missing or additional libs
-    toml_file = root_dir / "pyproject.toml"
+    toml_file = root_dir / "test.toml"
     with toml_file.open() as f:
         lines = f.readlines()
     for line in lines:
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 1b6f49f4cde..74dd026e716 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1,9 +1,9 @@
-from typing import Any, Callable, Dict, Iterable, Tuple
+from typing import Any, Callable, Dict, Iterable
 
 import pytest
 from numpy.testing import assert_equal
 
-from spacy import Language, registry, util
+from spacy import registry, util
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
 from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase, get_candidates
@@ -496,15 +496,15 @@ def get_lowercased_candidates_batch(kb, spans):
         return [get_lowercased_candidates(kb, span) for span in spans]
 
     @registry.misc("spacy.LowercaseCandidateGenerator.v1")
-    def create_candidates() -> (
-        Callable[[InMemoryLookupKB, "Span"], Iterable[Candidate]]
-    ):
+    def create_candidates() -> Callable[
+        [InMemoryLookupKB, "Span"], Iterable[Candidate]
+    ]:
         return get_lowercased_candidates
 
     @registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
-    def create_candidates_batch() -> (
-        Callable[[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]]
-    ):
+    def create_candidates_batch() -> Callable[
+        [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]
+    ]:
         return get_lowercased_candidates_batch
 
     # replace the pipe with a new one with with a different candidate generator
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index 9854b391e60..71b12227f2c 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,10 +1,5 @@
 import pytest
-
-try:
-    from pydantic.v1 import StrictBool
-except ImportError:
-    from pydantic import StrictBool  # type: ignore
-
+from pydantic import StrictBool
 from thinc.api import ConfigValidationError
 
 from spacy.lang.en import English
@@ -51,7 +46,7 @@ def initialize(
     errors = e.value.errors
     assert len(errors) == 1
     assert errors[0]["loc"] == ("custom1",)
-    assert errors[0]["type"] == "value_error.missing"
+    assert errors[0]["type"] == "missing"
     init_cfg = {
         "tokenizer": {"custom": 1},
         "components": {name: {"custom1": "x", "custom2": 1}},
@@ -63,7 +58,7 @@ def initialize(
     errors = e.value.errors
     assert len(errors) == 1
     assert errors[0]["loc"] == ("custom2",)
-    assert errors[0]["type"] == "value_error.strictbool"
+    assert errors[0]["type"] == "bool_type"
     init_cfg = {
         "tokenizer": {"custom": 1},
         "components": {name: {"custom1": "x"}},
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index b355379bfd0..a8a6c7d136a 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,10 +1,5 @@
 import pytest
-
-try:
-    from pydantic.v1 import StrictInt, StrictStr
-except ImportError:
-    from pydantic import StrictInt, StrictStr  # type: ignore
-
+from pydantic import StrictInt, StrictStr
 from thinc.api import ConfigValidationError, Linear, Model
 
 import spacy
diff --git a/spacy/tests/pipeline/test_sentencizer.py b/spacy/tests/pipeline/test_sentencizer.py
index 9b1ddd53012..826086fc7fe 100644
--- a/spacy/tests/pipeline/test_sentencizer.py
+++ b/spacy/tests/pipeline/test_sentencizer.py
@@ -135,14 +135,38 @@ def test_sentencizer_serialize_bytes(en_vocab):
     # fmt: off
     "lang,text",
     [
-        ('bn', 'বাংলা ভাষা (বাঙলা, বাঙ্গলা, তথা বাঙ্গালা নামগুলোতেও পরিচিত) একটি ইন্দো-আর্য ভাষা, যা দক্ষিণ এশিয়ার বাঙালি জাতির প্রধান কথ্য ও লেখ্য ভাষা। মাতৃভাষীর সংখ্যায় বাংলা ইন্দো-ইউরোপীয় ভাষা পরিবারের চতুর্থ ও বিশ্বের ষষ্ঠ বৃহত্তম ভাষা।[৫] মোট ব্যবহারকারীর সংখ্যা অনুসারে বাংলা বিশ্বের সপ্তম বৃহত্তম ভাষা। বাংলা সার্বভৌম ভাষাভিত্তিক জাতিরাষ্ট্র বাংলাদেশের একমাত্র রাষ্ট্রভাষা তথা সরকারি ভাষা[৬] এবং ভারতের পশ্চিমবঙ্গ, ত্রিপুরা, আসামের বরাক উপত্যকার সরকারি ভাষা। বঙ্গোপসাগরে অবস্থিত আন্দামান দ্বীপপুঞ্জের প্রধান কথ্য ভাষা বাংলা। এছাড়া ভারতের ঝাড়খণ্ড, বিহার, মেঘালয়, মিজোরাম, উড়িষ্যা রাজ্যগুলোতে উল্লেখযোগ্য পরিমাণে বাংলাভাষী জনগণ রয়েছে। ভারতে হিন্দির পরেই সর্বাধিক প্রচলিত ভাষা বাংলা।[৭][৮] এছাড়াও মধ্য প্রাচ্য, আমেরিকা ও ইউরোপে উল্লেখযোগ্য পরিমাণে বাংলাভাষী অভিবাসী রয়েছে।[৯] সারা বিশ্বে সব মিলিয়ে ২৬ কোটির অধিক লোক দৈনন্দিন জীবনে বাংলা ব্যবহার করে।[২] বাংলাদেশের জাতীয় সঙ্গীত এবং ভারতের জাতীয় সঙ্গীত ও স্তোত্র বাংলাতে রচিত।'),
-        ('de', 'Die deutsche Sprache bzw. Deutsch ([dɔʏ̯t͡ʃ]; abgekürzt dt. oder dtsch.) ist eine westgermanische Sprache. Ihr Sprachraum umfasst Deutschland, Österreich, die Deutschschweiz, Liechtenstein, Luxemburg, Ostbelgien, Südtirol, das Elsass und Lothringen sowie Nordschleswig. Außerdem ist sie eine Minderheitensprache in einigen europäischen und außereuropäischen Ländern, z. B. in Rumänien und Südafrika, sowie Nationalsprache im afrikanischen Namibia.'),
-        ('hi', 'हिन्दी विश्व की एक प्रमुख भाषा है एवं भारत की राजभाषा है। केन्द्रीय स्तर पर भारत में दूसरी आधिकारिक भाषा अंग्रेजी है। यह हिंदुस्तानी भाषा की एक मानकीकृत रूप है जिसमें संस्कृत के तत्सम तथा तद्भव शब्दों का प्रयोग अधिक है और अरबी-फ़ारसी शब्द कम हैं। हिंदी संवैधानिक रूप से भारत की राजभाषा और भारत की सबसे अधिक बोली और समझी जाने वाली भाषा है। हालाँकि, हिन्दी भारत की राष्ट्रभाषा नहीं है,[3] क्योंकि भारत के संविधान में कोई भी भाषा को ऐसा दर्जा नहीं दिया गया था।[4][5] चीनी के बाद यह विश्व में सबसे अधिक बोली जाने वाली भाषा भी है। विश्व आर्थिक मंच की गणना के अनुसार यह विश्व की दस शक्तिशाली भाषाओं में से एक है।[6]'),
-        ('kn', 'ದ್ರಾವಿಡ ಭಾಷೆಗಳಲ್ಲಿ ಪ್ರಾಮುಖ್ಯವುಳ್ಳ ಭಾಷೆಯೂ ಭಾರತದ ಪುರಾತನವಾದ ಭಾಷೆಗಳಲ್ಲಿ ಒಂದೂ ಆಗಿರುವ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಅದರ ವಿವಿಧ ರೂಪಗಳಲ್ಲಿ ಸುಮಾರು ೪೫ ದಶಲಕ್ಷ ಜನರು ಆಡು ನುಡಿಯಾಗಿ ಬಳಸುತ್ತಲಿದ್ದಾರೆ. ಕನ್ನಡ ಕರ್ನಾಟಕ ರಾಜ್ಯದ ಆಡಳಿತ ಭಾಷೆ.[೧೧] ಜಗತ್ತಿನಲ್ಲಿ ಅತ್ಯಂತ ಹೆಚ್ಚು ಮಂದಿ ಮಾತನಾಡುವ ಭಾಷೆಯೆಂಬ ನೆಲೆಯಲ್ಲಿ ಇಪ್ಪತೊಂಬತ್ತನೆಯ ಸ್ಥಾನ ಕನ್ನಡಕ್ಕಿದೆ. ೨೦೧೧ರ ಜನಗಣತಿಯ ಪ್ರಕಾರ ಜಗತ್ತಿನಲ್ಲಿ ೬.೪ ಕೋಟಿ ಜನಗಳು ಕನ್ನಡ ಮಾತನಾಡುತ್ತಾರೆ ಎಂದು ತಿಳಿದುಬಂದಿದೆ. ಇವರಲ್ಲಿ ೫.೫ ಕೋಟಿ ಜನಗಳ ಮಾತೃಭಾಷೆ ಕನ್ನಡವಾಗಿದೆ. ಬ್ರಾಹ್ಮಿ ಲಿಪಿಯಿಂದ ರೂಪುಗೊಂಡ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಉಪಯೋಗಿಸಿ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಬರೆಯಲಾಗುತ್ತದೆ. ಕನ್ನಡ ಬರಹದ ಮಾದರಿಗಳಿಗೆ ಸಾವಿರದ ಐನೂರು ವರುಷಗಳ ಚರಿತ್ರೆಯಿದೆ. ಕ್ರಿ.ಶ. ಆರನೆಯ ಶತಮಾನದ ಪಶ್ಚಿಮ ಗಂಗ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ [೧೨] ಮತ್ತು ಒಂಬತ್ತನೆಯ ಶತಮಾನದ ರಾಷ್ಟ್ರಕೂಟ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ ಹಳಗನ್ನಡ ಸಾಹಿತ್ಯ ಅತ್ಯಂತ ಹೆಚ್ಚಿನ ರಾಜಾಶ್ರಯ ಪಡೆಯಿತು.[೧೩][೧೪] ಅದಲ್ಲದೆ ಸಾವಿರ ವರುಷಗಳ ಸಾಹಿತ್ಯ ಪರಂಪರೆ ಕನ್ನಡಕ್ಕಿದೆ.[೧೫]ವಿನೋಬಾ ಭಾವೆ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಲಿಪಿಗಳ ರಾಣಿಯೆಂದು ಹೊಗಳಿದ್ದಾರೆ.[ಸೂಕ್ತ ಉಲ್ಲೇಖನ ಬೇಕು]'),
-        ('si', 'ශ්‍රී ලංකාවේ ප්‍රධාන ජාතිය වන සිංහල ජනයාගේ මව් බස සිංහල වෙයි. අද වන විට මිලියන 20 කට අධික සිංහල සහ මිලියන 3කට අධික සිංහල නොවන ජනගහනයක් සිංහල භාෂාව භාවිත කරති. සිංහල‍ ඉන්දු-යුරෝපීය භාෂාවල උප ගණයක් වන ඉන්දු-ආර්ය භාෂා ගණයට අයිති වන අතර මාල දිවයින භාවිත කරන දිවෙහි භාෂාව සිංහලයෙන් පැවත එන්නකි. සිංහල ශ්‍රී ලංකාවේ නිල භාෂාවයි .'),
-        ('ta', 'தமிழ் மொழி (Tamil language) தமிழர்களினதும், தமிழ் பேசும் பலரதும் தாய்மொழி ஆகும். தமிழ் திராவிட மொழிக் குடும்பத்தின் முதன்மையான மொழிகளில் ஒன்றும் செம்மொழியும் ஆகும். இந்தியா, இலங்கை, மலேசியா, சிங்கப்பூர் ஆகிய நாடுகளில் அதிக அளவிலும், ஐக்கிய அரபு அமீரகம், தென்னாப்பிரிக்கா, மொரிசியசு, பிஜி, ரீயூனியன், டிரினிடாட் போன்ற நாடுகளில் சிறிய அளவிலும் தமிழ் பேசப்படுகிறது. 1997ஆம் ஆண்டுப் புள்ளி விவரப்படி உலகம் முழுவதிலும் 8 கோடி (80 மில்லியன்) மக்களால் பேசப்படும் தமிழ்[13], ஒரு மொழியைத் தாய்மொழியாகக் கொண்டு பேசும் மக்களின் எண்ணிக்கை அடிப்படையில் பதினெட்டாவது இடத்தில் உள்ளது.[14] இணையத்தில் அதிகம் பயன்படுத்தப்படும் இந்திய மொழிகளில் தமிழ் முதன்மையாக உள்ளதாக 2017 ஆவது ஆண்டில் நடைபெற்ற கூகுள் கணக்கெடுப்பில் தெரிய வந்தது.[15]'),
-        ('te', 'ఆంధ్ర ప్రదేశ్, తెలంగాణ రాష్ట్రాల అధికార భాష తెలుగు. భారత దేశంలో తెలుగు మాతృభాషగా మాట్లాడే 8.7 కోట్ల (2001) జనాభాతో [1] ప్రాంతీయ భాషలలో మొదటి స్థానంలో ఉంది. ప్రపంచంలోని ప్రజలు అత్యధికముగా మాట్లాడే భాషలలో 15 స్థానములోనూ, భారత దేశములో హిందీ, తర్వాత స్థానములోనూ నిలుస్తుంది. పాతవైన ప్రపంచ భాష గణాంకాల (ఎథ్నోలాగ్) ప్రకారం ప్రపంచవ్యాప్తంగా 7.4 కోట్లు మందికి మాతృభాషగా ఉంది.[2] మొదటి భాషగా మాట్లాడతారు. అతి ప్రాచీన దేశ భాషలలో సంస్కృతము తమిళముతో బాటు తెలుగు భాషను 2008 అక్టోబరు 31న భారత ప్రభుత్వము గుర్తించింది.'),
-        ('ur', 'اُردُو لشکری زبان[8] (یا جدید معیاری اردو) برصغیر کی معیاری زبانوں میں سے ایک ہے۔ یہ پاکستان کی قومی اور رابطہ عامہ کی زبان ہے، جبکہ بھارت کی چھے ریاستوں کی دفتری زبان کا درجہ رکھتی ہے۔ آئین ہند کے مطابق اسے 22 دفتری شناخت زبانوں میں شامل کیا جاچکا ہے۔ 2001ء کی مردم شماری کے مطابق اردو کو بطور مادری زبان بھارت میں 5.01% فیصد لوگ بولتے ہیں اور اس لحاظ سے یہ بھارت کی چھٹی بڑی زبان ہے جبکہ پاکستان میں اسے بطور مادری زبان 7.59% فیصد لوگ استعمال کرتے ہیں، یہ پاکستان کی پانچویں بڑی زبان ہے۔ اردو تاریخی طور پر ہندوستان کی مسلم آبادی سے جڑی ہے۔[حوالہ درکار] بعض ذخیرہ الفاظ کے علاوہ یہ زبان معیاری ہندی سے قابل فہم ہے جو اس خطے کی ہندوؤں سے منسوب ہے۔[حوالہ درکار] زبانِ اردو کو پہچان و ترقی اس وقت ملی جب برطانوی دور میں انگریز حکمرانوں نے اسے فارسی کی بجائے انگریزی کے ساتھ شمالی ہندوستان کے علاقوں اور جموں و کشمیر میں اسے سنہ 1846ء اور پنجاب میں سنہ 1849ء میں بطور دفتری زبان نافذ کیا۔ اس کے علاوہ خلیجی، یورپی، ایشیائی اور امریکی علاقوں میں اردو بولنے والوں کی ایک بڑی تعداد آباد ہے جو بنیادی طور پر جنوبی ایشیاء سے کوچ کرنے والے اہلِ اردو ہیں۔ 1999ء کے اعداد وشمار کے مطابق اردو زبان کے مجموعی متکلمین کی تعداد دس کروڑ ساٹھ لاکھ کے لگ بھگ تھی۔ اس لحاظ سے یہ دنیا کی نویں بڑی زبان ہے۔'),
+        (
+            "bn",
+            "বাংলা ভাষা (বাঙলা, বাঙ্গলা, তথা বাঙ্গালা নামগুলোতেও পরিচিত) একটি ইন্দো-আর্য ভাষা, যা দক্ষিণ এশিয়ার বাঙালি জাতির প্রধান কথ্য ও লেখ্য ভাষা। মাতৃভাষীর সংখ্যায় বাংলা ইন্দো-ইউরোপীয় ভাষা পরিবারের চতুর্থ ও বিশ্বের ষষ্ঠ বৃহত্তম ভাষা।[৫] মোট ব্যবহারকারীর সংখ্যা অনুসারে বাংলা বিশ্বের সপ্তম বৃহত্তম ভাষা। বাংলা সার্বভৌম ভাষাভিত্তিক জাতিরাষ্ট্র বাংলাদেশের একমাত্র রাষ্ট্রভাষা তথা সরকারি ভাষা[৬] এবং ভারতের পশ্চিমবঙ্গ, ত্রিপুরা, আসামের বরাক উপত্যকার সরকারি ভাষা। বঙ্গোপসাগরে অবস্থিত আন্দামান দ্বীপপুঞ্জের প্রধান কথ্য ভাষা বাংলা। এছাড়া ভারতের ঝাড়খণ্ড, বিহার, মেঘালয়, মিজোরাম, উড়িষ্যা রাজ্যগুলোতে উল্লেখযোগ্য পরিমাণে বাংলাভাষী জনগণ রয়েছে। ভারতে হিন্দির পরেই সর্বাধিক প্রচলিত ভাষা বাংলা।[৭][৮] এছাড়াও মধ্য প্রাচ্য, আমেরিকা ও ইউরোপে উল্লেখযোগ্য পরিমাণে বাংলাভাষী অভিবাসী রয়েছে।[৯] সারা বিশ্বে সব মিলিয়ে ২৬ কোটির অধিক লোক দৈনন্দিন জীবনে বাংলা ব্যবহার করে।[২] বাংলাদেশের জাতীয় সঙ্গীত এবং ভারতের জাতীয় সঙ্গীত ও স্তোত্র বাংলাতে রচিত।",
+        ),
+        (
+            "de",
+            "Die deutsche Sprache bzw. Deutsch ([dɔʏ̯t͡ʃ]; abgekürzt dt. oder dtsch.) ist eine westgermanische Sprache. Ihr Sprachraum umfasst Deutschland, Österreich, die Deutschschweiz, Liechtenstein, Luxemburg, Ostbelgien, Südtirol, das Elsass und Lothringen sowie Nordschleswig. Außerdem ist sie eine Minderheitensprache in einigen europäischen und außereuropäischen Ländern, z. B. in Rumänien und Südafrika, sowie Nationalsprache im afrikanischen Namibia.",
+        ),
+        (
+            "hi",
+            "हिन्दी विश्व की एक प्रमुख भाषा है एवं भारत की राजभाषा है। केन्द्रीय स्तर पर भारत में दूसरी आधिकारिक भाषा अंग्रेजी है। यह हिंदुस्तानी भाषा की एक मानकीकृत रूप है जिसमें संस्कृत के तत्सम तथा तद्भव शब्दों का प्रयोग अधिक है और अरबी-फ़ारसी शब्द कम हैं। हिंदी संवैधानिक रूप से भारत की राजभाषा और भारत की सबसे अधिक बोली और समझी जाने वाली भाषा है। हालाँकि, हिन्दी भारत की राष्ट्रभाषा नहीं है,[3] क्योंकि भारत के संविधान में कोई भी भाषा को ऐसा दर्जा नहीं दिया गया था।[4][5] चीनी के बाद यह विश्व में सबसे अधिक बोली जाने वाली भाषा भी है। विश्व आर्थिक मंच की गणना के अनुसार यह विश्व की दस शक्तिशाली भाषाओं में से एक है।[6]",
+        ),
+        (
+            "kn",
+            "ದ್ರಾವಿಡ ಭಾಷೆಗಳಲ್ಲಿ ಪ್ರಾಮುಖ್ಯವುಳ್ಳ ಭಾಷೆಯೂ ಭಾರತದ ಪುರಾತನವಾದ ಭಾಷೆಗಳಲ್ಲಿ ಒಂದೂ ಆಗಿರುವ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಅದರ ವಿವಿಧ ರೂಪಗಳಲ್ಲಿ ಸುಮಾರು ೪೫ ದಶಲಕ್ಷ ಜನರು ಆಡು ನುಡಿಯಾಗಿ ಬಳಸುತ್ತಲಿದ್ದಾರೆ. ಕನ್ನಡ ಕರ್ನಾಟಕ ರಾಜ್ಯದ ಆಡಳಿತ ಭಾಷೆ.[೧೧] ಜಗತ್ತಿನಲ್ಲಿ ಅತ್ಯಂತ ಹೆಚ್ಚು ಮಂದಿ ಮಾತನಾಡುವ ಭಾಷೆಯೆಂಬ ನೆಲೆಯಲ್ಲಿ ಇಪ್ಪತೊಂಬತ್ತನೆಯ ಸ್ಥಾನ ಕನ್ನಡಕ್ಕಿದೆ. ೨೦೧೧ರ ಜನಗಣತಿಯ ಪ್ರಕಾರ ಜಗತ್ತಿನಲ್ಲಿ ೬.೪ ಕೋಟಿ ಜನಗಳು ಕನ್ನಡ ಮಾತನಾಡುತ್ತಾರೆ ಎಂದು ತಿಳಿದುಬಂದಿದೆ. ಇವರಲ್ಲಿ ೫.೫ ಕೋಟಿ ಜನಗಳ ಮಾತೃಭಾಷೆ ಕನ್ನಡವಾಗಿದೆ. ಬ್ರಾಹ್ಮಿ ಲಿಪಿಯಿಂದ ರೂಪುಗೊಂಡ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಉಪಯೋಗಿಸಿ ಕನ್ನಡ ಭಾಷೆಯನ್ನು ಬರೆಯಲಾಗುತ್ತದೆ. ಕನ್ನಡ ಬರಹದ ಮಾದರಿಗಳಿಗೆ ಸಾವಿರದ ಐನೂರು ವರುಷಗಳ ಚರಿತ್ರೆಯಿದೆ. ಕ್ರಿ.ಶ. ಆರನೆಯ ಶತಮಾನದ ಪಶ್ಚಿಮ ಗಂಗ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ [೧೨] ಮತ್ತು ಒಂಬತ್ತನೆಯ ಶತಮಾನದ ರಾಷ್ಟ್ರಕೂಟ ಸಾಮ್ರಾಜ್ಯದ ಕಾಲದಲ್ಲಿ ಹಳಗನ್ನಡ ಸಾಹಿತ್ಯ ಅತ್ಯಂತ ಹೆಚ್ಚಿನ ರಾಜಾಶ್ರಯ ಪಡೆಯಿತು.[೧೩][೧೪] ಅದಲ್ಲದೆ ಸಾವಿರ ವರುಷಗಳ ಸಾಹಿತ್ಯ ಪರಂಪರೆ ಕನ್ನಡಕ್ಕಿದೆ.[೧೫]ವಿನೋಬಾ ಭಾವೆ ಕನ್ನಡ ಲಿಪಿಯನ್ನು ಲಿಪಿಗಳ ರಾಣಿಯೆಂದು ಹೊಗಳಿದ್ದಾರೆ.[ಸೂಕ್ತ ಉಲ್ಲೇಖನ ಬೇಕು]",
+        ),
+        (
+            "si",
+            "ශ්‍රී ලංකාවේ ප්‍රධාන ජාතිය වන සිංහල ජනයාගේ මව් බස සිංහල වෙයි. අද වන විට මිලියන 20 කට අධික සිංහල සහ මිලියන 3කට අධික සිංහල නොවන ජනගහනයක් සිංහල භාෂාව භාවිත කරති. සිංහල‍ ඉන්දු-යුරෝපීය භාෂාවල උප ගණයක් වන ඉන්දු-ආර්ය භාෂා ගණයට අයිති වන අතර මාල දිවයින භාවිත කරන දිවෙහි භාෂාව සිංහලයෙන් පැවත එන්නකි. සිංහල ශ්‍රී ලංකාවේ නිල භාෂාවයි .",
+        ),
+        (
+            "ta",
+            "தமிழ் மொழி (Tamil language) தமிழர்களினதும், தமிழ் பேசும் பலரதும் தாய்மொழி ஆகும். தமிழ் திராவிட மொழிக் குடும்பத்தின் முதன்மையான மொழிகளில் ஒன்றும் செம்மொழியும் ஆகும். இந்தியா, இலங்கை, மலேசியா, சிங்கப்பூர் ஆகிய நாடுகளில் அதிக அளவிலும், ஐக்கிய அரபு அமீரகம், தென்னாப்பிரிக்கா, மொரிசியசு, பிஜி, ரீயூனியன், டிரினிடாட் போன்ற நாடுகளில் சிறிய அளவிலும் தமிழ் பேசப்படுகிறது. 1997ஆம் ஆண்டுப் புள்ளி விவரப்படி உலகம் முழுவதிலும் 8 கோடி (80 மில்லியன்) மக்களால் பேசப்படும் தமிழ்[13], ஒரு மொழியைத் தாய்மொழியாகக் கொண்டு பேசும் மக்களின் எண்ணிக்கை அடிப்படையில் பதினெட்டாவது இடத்தில் உள்ளது.[14] இணையத்தில் அதிகம் பயன்படுத்தப்படும் இந்திய மொழிகளில் தமிழ் முதன்மையாக உள்ளதாக 2017 ஆவது ஆண்டில் நடைபெற்ற கூகுள் கணக்கெடுப்பில் தெரிய வந்தது.[15]",
+        ),
+        (
+            "te",
+            "ఆంధ్ర ప్రదేశ్, తెలంగాణ రాష్ట్రాల అధికార భాష తెలుగు. భారత దేశంలో తెలుగు మాతృభాషగా మాట్లాడే 8.7 కోట్ల (2001) జనాభాతో [1] ప్రాంతీయ భాషలలో మొదటి స్థానంలో ఉంది. ప్రపంచంలోని ప్రజలు అత్యధికముగా మాట్లాడే భాషలలో 15 స్థానములోనూ, భారత దేశములో హిందీ, తర్వాత స్థానములోనూ నిలుస్తుంది. పాతవైన ప్రపంచ భాష గణాంకాల (ఎథ్నోలాగ్) ప్రకారం ప్రపంచవ్యాప్తంగా 7.4 కోట్లు మందికి మాతృభాషగా ఉంది.[2] మొదటి భాషగా మాట్లాడతారు. అతి ప్రాచీన దేశ భాషలలో సంస్కృతము తమిళముతో బాటు తెలుగు భాషను 2008 అక్టోబరు 31న భారత ప్రభుత్వము గుర్తించింది.",
+        ),
+        (
+            "ur",
+            "اُردُو لشکری زبان[8] (یا جدید معیاری اردو) برصغیر کی معیاری زبانوں میں سے ایک ہے۔ یہ پاکستان کی قومی اور رابطہ عامہ کی زبان ہے، جبکہ بھارت کی چھے ریاستوں کی دفتری زبان کا درجہ رکھتی ہے۔ آئین ہند کے مطابق اسے 22 دفتری شناخت زبانوں میں شامل کیا جاچکا ہے۔ 2001ء کی مردم شماری کے مطابق اردو کو بطور مادری زبان بھارت میں 5.01% فیصد لوگ بولتے ہیں اور اس لحاظ سے یہ بھارت کی چھٹی بڑی زبان ہے جبکہ پاکستان میں اسے بطور مادری زبان 7.59% فیصد لوگ استعمال کرتے ہیں، یہ پاکستان کی پانچویں بڑی زبان ہے۔ اردو تاریخی طور پر ہندوستان کی مسلم آبادی سے جڑی ہے۔[حوالہ درکار] بعض ذخیرہ الفاظ کے علاوہ یہ زبان معیاری ہندی سے قابل فہم ہے جو اس خطے کی ہندوؤں سے منسوب ہے۔[حوالہ درکار] زبانِ اردو کو پہچان و ترقی اس وقت ملی جب برطانوی دور میں انگریز حکمرانوں نے اسے فارسی کی بجائے انگریزی کے ساتھ شمالی ہندوستان کے علاقوں اور جموں و کشمیر میں اسے سنہ 1846ء اور پنجاب میں سنہ 1849ء میں بطور دفتری زبان نافذ کیا۔ اس کے علاوہ خلیجی، یورپی، ایشیائی اور امریکی علاقوں میں اردو بولنے والوں کی ایک بڑی تعداد آباد ہے جو بنیادی طور پر جنوبی ایشیاء سے کوچ کرنے والے اہلِ اردو ہیں۔ 1999ء کے اعداد وشمار کے مطابق اردو زبان کے مجموعی متکلمین کی تعداد دس کروڑ ساٹھ لاکھ کے لگ بھگ تھی۔ اس لحاظ سے یہ دنیا کی نویں بڑی زبان ہے۔",
+        ),
     ],
     # fmt: on
 )
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 4310e41ab47..e7499404f63 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -24,12 +24,12 @@
 )
 from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
 from spacy.scorer import Scorer
+from spacy.tests.tok2vec import build_lazy_init_tok2vec as _  # noqa: F401
 from spacy.tokens import Doc, DocBin
 from spacy.training import Example
 from spacy.training.initialize import init_nlp
 
 # Ensure that the architecture gets added to the registry.
-from ..tok2vec import build_lazy_init_tok2vec as _
 from ..util import make_tempdir
 
 TRAIN_DATA_SINGLE_LABEL = [
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index 998f0472c7e..ddd9a990c65 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -65,10 +65,30 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
     "embed_arch,embed_config",
     # fmt: off
     [
-        ("spacy.MultiHashEmbed.v1", {"rows": [100, 100], "attrs": ["SHAPE", "LOWER"], "include_static_vectors": False}),
-        ("spacy.MultiHashEmbed.v1", {"rows": [100, 20], "attrs": ["ORTH", "PREFIX"], "include_static_vectors": False}),
-        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False}),
-        ("spacy.CharacterEmbed.v1", {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False}),
+        (
+            "spacy.MultiHashEmbed.v1",
+            {
+                "rows": [100, 100],
+                "attrs": ["SHAPE", "LOWER"],
+                "include_static_vectors": False,
+            },
+        ),
+        (
+            "spacy.MultiHashEmbed.v1",
+            {
+                "rows": [100, 20],
+                "attrs": ["ORTH", "PREFIX"],
+                "include_static_vectors": False,
+            },
+        ),
+        (
+            "spacy.CharacterEmbed.v1",
+            {"rows": 100, "nM": 64, "nC": 8, "include_static_vectors": False},
+        ),
+        (
+            "spacy.CharacterEmbed.v1",
+            {"rows": 100, "nM": 16, "nC": 2, "include_static_vectors": False},
+        ),
     ],
     # fmt: on
 )
@@ -76,10 +96,26 @@ def test_tok2vec_batch_sizes(batch_size, width, embed_size):
     "tok2vec_arch,encode_arch,encode_config",
     # fmt: off
     [
-        ("spacy.Tok2Vec.v1", "spacy.MaxoutWindowEncoder.v1", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
-        ("spacy.Tok2Vec.v2", "spacy.MaxoutWindowEncoder.v2", {"window_size": 1, "maxout_pieces": 3, "depth": 2}),
-        ("spacy.Tok2Vec.v1", "spacy.MishWindowEncoder.v1", {"window_size": 1, "depth": 6}),
-        ("spacy.Tok2Vec.v2", "spacy.MishWindowEncoder.v2", {"window_size": 1, "depth": 6}),
+        (
+            "spacy.Tok2Vec.v1",
+            "spacy.MaxoutWindowEncoder.v1",
+            {"window_size": 1, "maxout_pieces": 3, "depth": 2},
+        ),
+        (
+            "spacy.Tok2Vec.v2",
+            "spacy.MaxoutWindowEncoder.v2",
+            {"window_size": 1, "maxout_pieces": 3, "depth": 2},
+        ),
+        (
+            "spacy.Tok2Vec.v1",
+            "spacy.MishWindowEncoder.v1",
+            {"window_size": 1, "depth": 6},
+        ),
+        (
+            "spacy.Tok2Vec.v2",
+            "spacy.MishWindowEncoder.v2",
+            {"window_size": 1, "depth": 6},
+        ),
     ],
     # fmt: on
 )
@@ -164,9 +200,9 @@ def test_init_tok2vec():
 @pytest.mark.parametrize("with_vectors", (False, True))
 def test_tok2vec_listener(with_vectors):
     orig_config = Config().from_str(cfg_string)
-    orig_config["components"]["tok2vec"]["model"]["embed"][
-        "include_static_vectors"
-    ] = with_vectors
+    orig_config["components"]["tok2vec"]["model"]["embed"]["include_static_vectors"] = (
+        with_vectors
+    )
     nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
 
     if with_vectors:
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 43d5f62837a..4bac40f0b89 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -153,21 +153,171 @@ def test_issue12566(factory: str, output_file: str):
             "Briana McNaira - Cultural Chaos .",
             "tokens": [
                 # fmt: off
-                {"id": 0, "start": 0, "end": 8, "tag": "ADV", "pos": "ADV", "morph": "Degree=Pos", "lemma": "niedawno", "dep": "advmod", "head": 1, },
-                {"id": 1, "start": 9, "end": 15, "tag": "PRAET", "pos": "VERB", "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act", "lemma": "czytać", "dep": "ROOT", "head": 1, },
-                {"id": 2, "start": 16, "end": 18, "tag": "AGLT", "pos": "NOUN", "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing", "lemma": "em", "dep": "iobj", "head": 1, },
-                {"id": 3, "start": 19, "end": 23, "tag": "ADJ", "pos": "ADJ", "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing", "lemma": "nowy", "dep": "amod", "head": 4, },
-                {"id": 4, "start": 24, "end": 31, "tag": "SUBST", "pos": "NOUN", "morph": "Case=Acc|Gender=Fem|Number=Sing", "lemma": "książka", "dep": "obj", "head": 1, },
-                {"id": 5, "start": 32, "end": 43, "tag": "ADJ", "pos": "ADJ", "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing", "lemma": "znakomit", "dep": "acl", "head": 4, },
-                {"id": 6, "start": 44, "end": 54, "tag": "ADJ", "pos": "ADJ", "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing", "lemma": "szkockiy", "dep": "amod", "head": 7, },
-                {"id": 7, "start": 55, "end": 66, "tag": "SUBST", "pos": "NOUN", "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing", "lemma": "medioznawca", "dep": "iobj", "head": 5, },
-                {"id": 8, "start": 67, "end": 68, "tag": "INTERP", "pos": "PUNCT", "morph": "PunctType=Comm", "lemma": ",", "dep": "punct", "head": 9, },
-                {"id": 9, "start": 69, "end": 75, "tag": "SUBST", "pos": "PROPN", "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing", "lemma": "Brian", "dep": "nmod", "head": 4, },
-                {"id": 10, "start": 76, "end": 83, "tag": "SUBST", "pos": "PROPN", "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing", "lemma": "McNair", "dep": "flat", "head": 9, },
-                {"id": 11, "start": 84, "end": 85, "tag": "INTERP", "pos": "PUNCT", "morph": "PunctType=Dash", "lemma": "-", "dep": "punct", "head": 12, },
-                {"id": 12, "start": 86, "end": 94, "tag": "SUBST", "pos": "PROPN", "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing", "lemma": "Cultural", "dep": "conj", "head": 4, },
-                {"id": 13, "start": 95, "end": 100, "tag": "SUBST", "pos": "NOUN", "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing", "lemma": "Chaos", "dep": "flat", "head": 12, },
-                {"id": 14, "start": 101, "end": 102, "tag": "INTERP", "pos": "PUNCT", "morph": "PunctType=Peri", "lemma": ".", "dep": "punct", "head": 1, },
+                {
+                    "id": 0,
+                    "start": 0,
+                    "end": 8,
+                    "tag": "ADV",
+                    "pos": "ADV",
+                    "morph": "Degree=Pos",
+                    "lemma": "niedawno",
+                    "dep": "advmod",
+                    "head": 1,
+                },
+                {
+                    "id": 1,
+                    "start": 9,
+                    "end": 15,
+                    "tag": "PRAET",
+                    "pos": "VERB",
+                    "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
+                    "lemma": "czytać",
+                    "dep": "ROOT",
+                    "head": 1,
+                },
+                {
+                    "id": 2,
+                    "start": 16,
+                    "end": 18,
+                    "tag": "AGLT",
+                    "pos": "NOUN",
+                    "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing",
+                    "lemma": "em",
+                    "dep": "iobj",
+                    "head": 1,
+                },
+                {
+                    "id": 3,
+                    "start": 19,
+                    "end": 23,
+                    "tag": "ADJ",
+                    "pos": "ADJ",
+                    "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing",
+                    "lemma": "nowy",
+                    "dep": "amod",
+                    "head": 4,
+                },
+                {
+                    "id": 4,
+                    "start": 24,
+                    "end": 31,
+                    "tag": "SUBST",
+                    "pos": "NOUN",
+                    "morph": "Case=Acc|Gender=Fem|Number=Sing",
+                    "lemma": "książka",
+                    "dep": "obj",
+                    "head": 1,
+                },
+                {
+                    "id": 5,
+                    "start": 32,
+                    "end": 43,
+                    "tag": "ADJ",
+                    "pos": "ADJ",
+                    "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing",
+                    "lemma": "znakomit",
+                    "dep": "acl",
+                    "head": 4,
+                },
+                {
+                    "id": 6,
+                    "start": 44,
+                    "end": 54,
+                    "tag": "ADJ",
+                    "pos": "ADJ",
+                    "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing",
+                    "lemma": "szkockiy",
+                    "dep": "amod",
+                    "head": 7,
+                },
+                {
+                    "id": 7,
+                    "start": 55,
+                    "end": 66,
+                    "tag": "SUBST",
+                    "pos": "NOUN",
+                    "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing",
+                    "lemma": "medioznawca",
+                    "dep": "iobj",
+                    "head": 5,
+                },
+                {
+                    "id": 8,
+                    "start": 67,
+                    "end": 68,
+                    "tag": "INTERP",
+                    "pos": "PUNCT",
+                    "morph": "PunctType=Comm",
+                    "lemma": ",",
+                    "dep": "punct",
+                    "head": 9,
+                },
+                {
+                    "id": 9,
+                    "start": 69,
+                    "end": 75,
+                    "tag": "SUBST",
+                    "pos": "PROPN",
+                    "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing",
+                    "lemma": "Brian",
+                    "dep": "nmod",
+                    "head": 4,
+                },
+                {
+                    "id": 10,
+                    "start": 76,
+                    "end": 83,
+                    "tag": "SUBST",
+                    "pos": "PROPN",
+                    "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing",
+                    "lemma": "McNair",
+                    "dep": "flat",
+                    "head": 9,
+                },
+                {
+                    "id": 11,
+                    "start": 84,
+                    "end": 85,
+                    "tag": "INTERP",
+                    "pos": "PUNCT",
+                    "morph": "PunctType=Dash",
+                    "lemma": "-",
+                    "dep": "punct",
+                    "head": 12,
+                },
+                {
+                    "id": 12,
+                    "start": 86,
+                    "end": 94,
+                    "tag": "SUBST",
+                    "pos": "PROPN",
+                    "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing",
+                    "lemma": "Cultural",
+                    "dep": "conj",
+                    "head": 4,
+                },
+                {
+                    "id": 13,
+                    "start": 95,
+                    "end": 100,
+                    "tag": "SUBST",
+                    "pos": "NOUN",
+                    "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing",
+                    "lemma": "Chaos",
+                    "dep": "flat",
+                    "head": 12,
+                },
+                {
+                    "id": 14,
+                    "start": 101,
+                    "end": 102,
+                    "tag": "INTERP",
+                    "pos": "PUNCT",
+                    "morph": "PunctType=Peri",
+                    "lemma": ".",
+                    "dep": "punct",
+                    "head": 1,
+                },
                 # fmt: on
             ],
         }
@@ -420,8 +570,14 @@ def test_cli_converters_conll_ner_to_docs():
         (["--x.foo=bar"], {"x.foo": "bar"}),
         (["--x.foo", "--x.bar", "baz"], {"x.foo": True, "x.bar": "baz"}),
         (["--x.foo", "--x.bar=baz"], {"x.foo": True, "x.bar": "baz"}),
-        (["--x.foo", "10.1", "--x.bar", "--x.baz", "false"], {"x.foo": 10.1, "x.bar": True, "x.baz": False}),
-        (["--x.foo", "10.1", "--x.bar", "--x.baz=false"], {"x.foo": 10.1, "x.bar": True, "x.baz": False})
+        (
+            ["--x.foo", "10.1", "--x.bar", "--x.baz", "false"],
+            {"x.foo": 10.1, "x.bar": True, "x.baz": False},
+        ),
+        (
+            ["--x.foo", "10.1", "--x.bar", "--x.baz=false"],
+            {"x.foo": 10.1, "x.bar": True, "x.baz": False},
+        ),
         # fmt: on
     ],
 )
@@ -499,11 +655,11 @@ def test_model_recommendations():
         # fmt: off
         "parser,textcat,tagger",
         " parser, textcat ,tagger ",
-        'parser,textcat,tagger',
-        ' parser, textcat ,tagger ',
+        "parser,textcat,tagger",
+        " parser, textcat ,tagger ",
         ' "parser"," textcat " ,"tagger "',
         " 'parser',' textcat ' ,'tagger '",
-        '[parser,textcat,tagger]',
+        "[parser,textcat,tagger]",
         '["parser","textcat","tagger"]',
         '[" parser" ,"textcat ", " tagger " ]',
         "[parser,textcat,tagger]",
@@ -522,7 +678,7 @@ def test_string_to_list(value):
     [
         # fmt: off
         "1,2,3",
-        '[1,2,3]',
+        "[1,2,3]",
         '["1","2","3"]',
         '[" 1" ,"2 ", " 3 " ]',
         "[' 1' , '2', ' 3 ' ]",
@@ -1073,6 +1229,9 @@ def test_download_rejects_relative_urls(monkeypatch):
     relative path in the filename"""
 
     monkeypatch.setattr(download_module, "run_command", lambda cmd: None)
+    monkeypatch.setattr(
+        download_module, "_get_pip_install_cmd", lambda: ["pip", "install"]
+    )
 
     # Check that normal download works
     download_module.download("en_core_web_sm-3.7.1", direct=True)
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 1789d60ea4c..c72e26c3444 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -1,16 +1,18 @@
 import os
-import sys
 from pathlib import Path
 
 import pytest
 import srsly
 from typer.testing import CliRunner
 
+from spacy.cli import load_all_commands
 from spacy.cli._util import app, get_git_version
 from spacy.tokens import Doc, DocBin, Span
 
 from .util import make_tempdir, normalize_whitespace
 
+load_all_commands()
+
 
 def has_git():
     try:
@@ -285,30 +287,30 @@ def test_find_function_invalid():
 example_ents = ["O", "O", "I-ANIMAL"]
 example_spans = [(2, 3, "ANIMAL")]
 
-TRAIN_EXAMPLE_1 = dict(
-    words=example_words_1,
-    lemmas=example_lemmas_1,
-    tags=example_tags,
-    morphs=example_morphs,
-    deps=example_deps,
-    heads=[1, 1, 1],
-    pos=example_pos,
-    ents=example_ents,
-    spans=example_spans,
-    cats={"CAT": 1.0, "DOG": 0.0},
-)
-TRAIN_EXAMPLE_2 = dict(
-    words=example_words_2,
-    lemmas=example_lemmas_2,
-    tags=example_tags,
-    morphs=example_morphs,
-    deps=example_deps,
-    heads=[1, 1, 1],
-    pos=example_pos,
-    ents=example_ents,
-    spans=example_spans,
-    cats={"CAT": 0.0, "DOG": 1.0},
-)
+TRAIN_EXAMPLE_1 = {
+    "words": example_words_1,
+    "lemmas": example_lemmas_1,
+    "tags": example_tags,
+    "morphs": example_morphs,
+    "deps": example_deps,
+    "heads": [1, 1, 1],
+    "pos": example_pos,
+    "ents": example_ents,
+    "spans": example_spans,
+    "cats": {"CAT": 1.0, "DOG": 0.0},
+}
+TRAIN_EXAMPLE_2 = {
+    "words": example_words_2,
+    "lemmas": example_lemmas_2,
+    "tags": example_tags,
+    "morphs": example_morphs,
+    "deps": example_deps,
+    "heads": [1, 1, 1],
+    "pos": example_pos,
+    "ents": example_ents,
+    "spans": example_spans,
+    "cats": {"CAT": 0.0, "DOG": 1.0},
+}
 
 
 @pytest.mark.slow
diff --git a/spacy/tests/test_cli_launcher.py b/spacy/tests/test_cli_launcher.py
new file mode 100644
index 00000000000..c9af62a509d
--- /dev/null
+++ b/spacy/tests/test_cli_launcher.py
@@ -0,0 +1,123 @@
+import importlib
+import subprocess
+import sys
+
+import pytest
+
+from spacy_cli.static import load_manifest
+
+launcher_module = importlib.import_module("spacy_cli.main")
+
+
+def _run_python(code: str) -> str:
+    result = subprocess.run(
+        [sys.executable, "-c", code],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return result.stdout.strip()
+
+
+def test_cli_package_import_is_lazy():
+    output = _run_python(
+        "import sys; import spacy.cli; "
+        "print('spacy.cli.train' in sys.modules); print('weasel' in sys.modules)"
+    )
+    assert output.splitlines() == ["False", "False"]
+
+
+def test_load_for_argv_imports_only_requested_command():
+    output = _run_python(
+        "import sys; from spacy.cli import load_for_argv; "
+        "load_for_argv(['train', '--help']); "
+        "print('spacy.cli.train' in sys.modules); print('weasel' in sys.modules)"
+    )
+    assert output.splitlines() == ["True", "False"]
+
+
+def test_load_for_argv_imports_project_on_demand():
+    output = _run_python(
+        "import sys; from spacy.cli import load_for_argv; "
+        "load_for_argv(['project', '--help']); print('weasel' in sys.modules)"
+    )
+    assert output == "True"
+
+
+def test_manifest_is_current():
+    # Run in a subprocess to avoid command registration order being affected
+    # by other test modules importing CLI submodules (which register commands
+    # as a side effect of import).
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-c",
+            "import json; "
+            "from spacy_cli.build_manifest import build_manifest; "
+            "from spacy_cli.static import load_manifest; "
+            "b, l = build_manifest(), load_manifest(); "
+            "diffs = {}; "
+            "[diffs.update({f'{k}.{sk}': (repr(b[k][sk])[:120], repr(l[k][sk])[:120])}) "
+            "for k in b if isinstance(b[k], dict) and b[k] != l[k] "
+            "for sk in b[k] if b[k].get(sk) != l[k].get(sk)]; "
+            "[diffs.update({k: (repr(b[k])[:120], repr(l[k])[:120])}) "
+            "for k in b if not isinstance(b[k], dict) and b[k] != l[k]]; "
+            "assert b == l, json.dumps(diffs, indent=2)",
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, result.stderr
+
+
+def test_launcher_root_help_uses_static(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["--help"])
+    assert exc.value.code == 0
+    assert capsys.readouterr().out == load_manifest()["root_help"]
+
+
+def test_launcher_command_help_uses_static(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["train", "--help"])
+    assert exc.value.code == 0
+    assert capsys.readouterr().out == load_manifest()["command_help"]["train"]
+
+
+def test_launcher_unknown_command_uses_static_error(capsys, monkeypatch):
+    monkeypatch.setattr(
+        launcher_module, "_run_live", lambda: (_ for _ in ()).throw(AssertionError)
+    )
+    with pytest.raises(SystemExit) as exc:
+        launcher_module.main(["definitely-not-a-command"])
+    assert exc.value.code == 2
+    assert "No such command 'definitely-not-a-command'" in capsys.readouterr().out
+
+
+def test_launcher_non_help_command_falls_back_to_live(monkeypatch):
+    called = []
+
+    def fake_run_live():
+        called.append(True)
+
+    monkeypatch.setattr(launcher_module, "_run_live", fake_run_live)
+    launcher_module.main(["train", "config.cfg"])
+    assert called == [True]
+
+
+def test_launcher_root_help_falls_back_with_plugins(monkeypatch):
+    called = []
+
+    def fake_run_live():
+        called.append(True)
+
+    monkeypatch.setattr(launcher_module, "_run_live", fake_run_live)
+    monkeypatch.setattr(launcher_module, "get_plugin_command_names", lambda: {"custom"})
+    launcher_module.main(["--help"])
+    assert called == [True]
diff --git a/spacy/tests/test_factory_imports.py b/spacy/tests/test_factory_imports.py
index a975af0bbd2..7a1b4a769a8 100644
--- a/spacy/tests/test_factory_imports.py
+++ b/spacy/tests/test_factory_imports.py
@@ -67,16 +67,16 @@ def test_factory_import_compatibility(factory_name, original_module, compat_modu
     # Import from the original module (registrations.py)
     original_module_obj = importlib.import_module(original_module)
     original_factory = getattr(original_module_obj, factory_name)
-    assert (
-        original_factory is not None
-    ), f"Could not import {factory_name} from {original_module}"
+    assert original_factory is not None, (
+        f"Could not import {factory_name} from {original_module}"
+    )
 
     # Import from the compatibility module (component file)
     compat_module_obj = importlib.import_module(compat_module)
     compat_factory = getattr(compat_module_obj, factory_name)
-    assert (
-        compat_factory is not None
-    ), f"Could not import {factory_name} from {compat_module}"
+    assert compat_factory is not None, (
+        f"Could not import {factory_name} from {compat_module}"
+    )
 
     # Test that they're the same function (identity)
     assert original_factory is compat_factory, (
diff --git a/spacy/tests/test_factory_registrations.py b/spacy/tests/test_factory_registrations.py
index 8e93f54f0b0..eb69265e3f3 100644
--- a/spacy/tests/test_factory_registrations.py
+++ b/spacy/tests/test_factory_registrations.py
@@ -1,17 +1,14 @@
-import inspect
 import json
 from pathlib import Path
 
 import pytest
 
-from spacy.language import Language
 from spacy.util import registry
 
 # Path to the reference factory registrations, relative to this file
 REFERENCE_FILE = Path(__file__).parent / "factory_registrations.json"
 
 # Monkey patch the util.is_same_func to handle Cython functions
-import inspect
 
 from spacy import util
 
@@ -82,9 +79,9 @@ def test_factory_registrations_preserved(reference_factory_registrations):
     missing_registrations = set(reference_factory_registrations.keys()) - set(
         current_registrations.keys()
     )
-    assert (
-        not missing_registrations
-    ), f"Missing factory registrations: {', '.join(sorted(missing_registrations))}"
+    assert not missing_registrations, (
+        f"Missing factory registrations: {', '.join(sorted(missing_registrations))}"
+    )
 
     # Check for new registrations (not an error, but informative)
     new_registrations = set(current_registrations.keys()) - set(
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index d2a41ff0fed..309c57b0926 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -3,12 +3,7 @@
 from pathlib import Path
 
 import pytest
-
-try:
-    from pydantic.v1 import ValidationError
-except ImportError:
-    from pydantic import ValidationError  # type: ignore
-
+from pydantic import ValidationError
 from thinc.api import (
     Config,
     ConfigValidationError,
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 5228b4544fd..706203ffd63 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -95,7 +95,7 @@ def test_multi_hash_embed():
     hash_embeds = [node for node in embed.walk() if node.name == "hashembed"]
     assert len(hash_embeds) == 3
     # Check they look at different columns.
-    assert list(sorted(he.attrs["column"] for he in hash_embeds)) == [0, 1, 2]
+    assert sorted(he.attrs["column"] for he in hash_embeds) == [0, 1, 2]
     # Check they use different seeds
     assert len(set(he.attrs["seed"] for he in hash_embeds)) == 3
     # Check they all have the same number of rows
diff --git a/spacy/tests/test_registry_population.py b/spacy/tests/test_registry_population.py
index 592e74dd20a..e72f3d9f8e8 100644
--- a/spacy/tests/test_registry_population.py
+++ b/spacy/tests/test_registry_population.py
@@ -1,5 +1,4 @@
 import json
-import os
 from pathlib import Path
 
 import pytest
@@ -50,6 +49,6 @@ def test_registry_entries(reference_registry):
         # Check for missing entries - these would indicate our new registry population
         # mechanism is missing something
         missing_entries = expected_set - current_set
-        assert (
-            not missing_entries
-        ), f"Registry '{registry_name}' missing entries: {', '.join(missing_entries)}"
+        assert not missing_entries, (
+            f"Registry '{registry_name}' missing entries: {', '.join(missing_entries)}"
+        )
diff --git a/spacy/tests/training/test_corpus.py b/spacy/tests/training/test_corpus.py
index e7cae989384..ded6a53833c 100644
--- a/spacy/tests/training/test_corpus.py
+++ b/spacy/tests/training/test_corpus.py
@@ -1,7 +1,6 @@
-import tempfile
 from contextlib import contextmanager
 from pathlib import Path
-from typing import IO, Generator, Iterable, List, TextIO, Tuple
+from typing import Generator, Iterable, List, Tuple
 
 import pytest
 
diff --git a/spacy/tests/vocab_vectors/test_lexeme.py b/spacy/tests/vocab_vectors/test_lexeme.py
index 156e3391aa2..3c01055b552 100644
--- a/spacy/tests/vocab_vectors/test_lexeme.py
+++ b/spacy/tests/vocab_vectors/test_lexeme.py
@@ -2,7 +2,6 @@
 import pytest
 
 from spacy.attrs import IS_ALPHA, IS_DIGIT
-from spacy.lookups import Lookups
 from spacy.tokens import Doc
 from spacy.util import OOV_RANK
 from spacy.vocab import Vocab
diff --git a/spacy/tests/vocab_vectors/test_memory_zone.py b/spacy/tests/vocab_vectors/test_memory_zone.py
index 910d2664eb4..f718afa2f6e 100644
--- a/spacy/tests/vocab_vectors/test_memory_zone.py
+++ b/spacy/tests/vocab_vectors/test_memory_zone.py
@@ -34,3 +34,26 @@ def test_memory_zone_redundant_insertion():
         _ = vocab["dog"]
     assert "dog" in vocab
     assert "horse" not in vocab
+
+
+def test_memory_zone_exception_cleanup():
+    """Test that if an exception occurs inside a memory zone, the vocab
+    is properly cleaned up and remains usable afterward."""
+    vocab = Vocab()
+    _ = vocab["dog"]
+    assert "dog" in vocab
+    try:
+        with vocab.memory_zone():
+            _ = vocab["horse"]
+            raise ValueError("simulated error")
+    except ValueError:
+        pass
+    # Vocab should not be stuck in memory zone state
+    assert not vocab.in_memory_zone
+    # Pre-existing words should still work
+    assert "dog" in vocab
+    # Transient word from failed zone should be cleaned up
+    assert "horse" not in vocab
+    # Vocab should be fully usable for new operations
+    lex = vocab["cat"]
+    assert lex.text == "cat"
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 873d85835f0..51f5740c25c 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -13,8 +13,7 @@
 from ..util import SimpleFrozenList, ensure_path
 from ..vocab import Vocab
 from ._dict_proxies import SpanGroups
-from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS
-from .doc import Doc
+from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS, Doc
 
 
 class DocBin:
@@ -207,7 +206,7 @@ def to_bytes(self) -> bytes:
             "tokens": tokens.tobytes("C"),
             "spaces": spaces.tobytes("C"),
             "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"),
-            "strings": list(sorted(self.strings)),
+            "strings": sorted(self.strings),
             "cats": self.cats,
             "flags": self.flags,
             "span_groups": self.span_groups,
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index d92f04d0564..b8b26ce8b0d 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -57,7 +57,9 @@ class Doc:
         force: bool = ...,
     ) -> None: ...
     @classmethod
-    def get_extension(cls, name: str) -> Tuple[
+    def get_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[DocMethod],
         Optional[Callable[[Doc], Any]],
@@ -66,7 +68,9 @@ class Doc:
     @classmethod
     def has_extension(cls, name: str) -> bool: ...
     @classmethod
-    def remove_extension(cls, name: str) -> Tuple[
+    def remove_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[DocMethod],
         Optional[Callable[[Doc], Any]],
@@ -144,7 +148,7 @@ class Doc:
         blocked: Optional[List[Span]] = ...,
         missing: Optional[List[Span]] = ...,
         outside: Optional[List[Span]] = ...,
-        default: str = ...
+        default: str = ...,
     ) -> None: ...
     @property
     def noun_chunks(self) -> Iterator[Span]: ...
diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi
index 070aaffb3a8..b982eb810b8 100644
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@@ -23,7 +23,9 @@ class Span:
         force: bool = ...,
     ) -> None: ...
     @classmethod
-    def get_extension(cls, name: str) -> Tuple[
+    def get_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[SpanMethod],
         Optional[Callable[[Span], Any]],
@@ -32,7 +34,9 @@ class Span:
     @classmethod
     def has_extension(cls, name: str) -> bool: ...
     @classmethod
-    def remove_extension(cls, name: str) -> Tuple[
+    def remove_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[SpanMethod],
         Optional[Callable[[Span], Any]],
diff --git a/spacy/tokens/span_group.pyi b/spacy/tokens/span_group.pyi
index d063bb59533..3bd2b6788fb 100644
--- a/spacy/tokens/span_group.pyi
+++ b/spacy/tokens/span_group.pyi
@@ -12,7 +12,7 @@ class SpanGroup:
         *,
         name: str = ...,
         attrs: Dict[str, Any] = ...,
-        spans: Iterable[Span] = ...
+        spans: Iterable[Span] = ...,
     ) -> None: ...
     def __repr__(self) -> str: ...
     @property
diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi
index 7e56ae3bccd..435ace52707 100644
--- a/spacy/tokens/token.pyi
+++ b/spacy/tokens/token.pyi
@@ -27,7 +27,9 @@ class Token:
         force: bool = ...,
     ) -> None: ...
     @classmethod
-    def get_extension(cls, name: str) -> Tuple[
+    def get_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[TokenMethod],
         Optional[Callable[[Token], Any]],
@@ -36,7 +38,9 @@ class Token:
     @classmethod
     def has_extension(cls, name: str) -> bool: ...
     @classmethod
-    def remove_extension(cls, name: str) -> Tuple[
+    def remove_extension(
+        cls, name: str
+    ) -> Tuple[
         Optional[Any],
         Optional[TokenMethod],
         Optional[Callable[[Token], Any]],
diff --git a/spacy/training/augment.py b/spacy/training/augment.py
index da5ae3d087a..ba4368acef1 100644
--- a/spacy/training/augment.py
+++ b/spacy/training/augment.py
@@ -3,7 +3,6 @@
 from functools import partial
 from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Optional, Tuple
 
-from ..util import registry
 from .example import Example
 from .iob_utils import _doc_to_biluo_tags_with_partial, split_bilu_label
 
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index 4a1dfa94515..40e437dcc8c 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -12,7 +12,7 @@
     Union,
 )
 
-from ..util import minibatch, registry
+from ..util import minibatch
 
 Sizing = Union[Sequence[int], int]
 ItemT = TypeVar("ItemT")
@@ -24,7 +24,7 @@ def configure_minibatch_by_padded_size(
     size: Sizing,
     buffer: int,
     discard_oversize: bool,
-    get_length: Optional[Callable[[ItemT], int]] = None
+    get_length: Optional[Callable[[ItemT], int]] = None,
 ) -> BatcherT:
     """Create a batcher that uses the `batch_by_padded_size` strategy.
 
@@ -49,7 +49,7 @@ def configure_minibatch_by_padded_size(
         size=size,
         buffer=buffer,
         discard_oversize=discard_oversize,
-        **optionals
+        **optionals,
     )
 
 
@@ -58,7 +58,7 @@ def configure_minibatch_by_words(
     size: Sizing,
     tolerance: float,
     discard_oversize: bool,
-    get_length: Optional[Callable[[ItemT], int]] = None
+    get_length: Optional[Callable[[ItemT], int]] = None,
 ) -> BatcherT:
     """Create a batcher that uses the "minibatch by words" strategy.
 
@@ -76,7 +76,7 @@ def configure_minibatch_by_words(
         size=size,
         tolerance=tolerance,
         discard_oversize=discard_oversize,
-        **optionals
+        **optionals,
     )
 
 
@@ -232,6 +232,6 @@ def _batch_by_length(
         batches.append(batch)
     # Check lengths match
     assert sum(len(b) for b in batches) == len(seqs)
-    batches = [list(sorted(batch)) for batch in batches]
+    batches = [sorted(batch) for batch in batches]
     batches.reverse()
     return batches
diff --git a/spacy/training/callbacks.py b/spacy/training/callbacks.py
index 714deea6dcd..19382757a95 100644
--- a/spacy/training/callbacks.py
+++ b/spacy/training/callbacks.py
@@ -1,7 +1,7 @@
 from typing import TYPE_CHECKING, Callable, Optional
 
 from ..errors import Errors
-from ..util import load_model, logger, registry
+from ..util import load_model, logger
 
 if TYPE_CHECKING:
     from ..language import Language
diff --git a/spacy/training/converters/conll_ner_to_docs.py b/spacy/training/converters/conll_ner_to_docs.py
index b19d1791b27..e66a8a8dfed 100644
--- a/spacy/training/converters/conll_ner_to_docs.py
+++ b/spacy/training/converters/conll_ner_to_docs.py
@@ -74,8 +74,7 @@ def conll_ner_to_docs(
     # provide warnings for problematic data
     if "\n\n" not in input_data:
         msg.warn(
-            "No sentence boundaries found. Use `-s` to automatically segment "
-            "sentences."
+            "No sentence boundaries found. Use `-s` to automatically segment sentences."
         )
     if doc_delimiter not in input_data:
         msg.warn(
diff --git a/spacy/training/converters/conllu_to_docs.py b/spacy/training/converters/conllu_to_docs.py
index bda5c88c3d4..3a60c4e024b 100644
--- a/spacy/training/converters/conllu_to_docs.py
+++ b/spacy/training/converters/conllu_to_docs.py
@@ -15,7 +15,7 @@ def conllu_to_docs(
     ner_map=None,
     merge_subtokens=False,
     no_print=False,
-    **_
+    **_,
 ):
     """
     Convert conllu files into JSON format for use with train cli.
diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py
index 5cc2733a540..30e32911e6b 100644
--- a/spacy/training/corpus.py
+++ b/spacy/training/corpus.py
@@ -50,9 +50,10 @@ def create_jsonl_reader(
 
 
 @util.registry.readers("spacy.read_labels.v1")
-def read_labels(path: Path, *, require: bool = False):
+def read_labels(path: Union[str, Path], *, require: bool = False):
     # I decided not to give this a generic name, because I don't want people to
     # use it for arbitrary stuff, as I want this require arg with default False.
+    path = Path(path)
     if not require and not path.exists():
         return None
     return srsly.read_json(path)
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 0621702214c..164a0867494 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -25,8 +25,7 @@
     registry,
     resolve_dot_names,
 )
-from ..vectors import Mode as VectorsMode
-from ..vectors import Vectors
+from ..vectors import Mode as VectorsMode, Vectors
 from .pretrain import get_tok2vec_ref
 
 if TYPE_CHECKING:
@@ -51,7 +50,7 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
     logger.info("Set up nlp object from config")
     config = nlp.config.interpolate()
     # Resolve all training-relevant sections using the filled nlp config
-    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     dot_names = [T["train_corpus"], T["dev_corpus"]]
     if not isinstance(T["train_corpus"], str):
         raise ConfigValidationError(
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index 488ca4a7136..7f200545ca0 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -8,7 +8,6 @@
 
 from .. import util
 from ..errors import Errors
-from ..util import registry
 
 if TYPE_CHECKING:
     from ..language import Language  # noqa: F401
@@ -176,7 +175,7 @@ def log_step(info: Optional[Dict[str, Any]]) -> None:
                         initial = info["step"]
                     else:
                         total = eval_frequency
-                        desc = f"Epoch {info['epoch']+1}"
+                        desc = f"Epoch {info['epoch'] + 1}"
                         initial = 0
                     # Set disable=None, so that it disables on non-TTY
                     progress = tqdm.tqdm(
diff --git a/spacy/training/loop.py b/spacy/training/loop.py
index 6f5099858f1..d6f1ad7d608 100644
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@@ -62,7 +62,7 @@ def train(
     allocator = config["training"]["gpu_allocator"]
     if use_gpu >= 0 and allocator:
         set_gpu_allocator(allocator)
-    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
+    T = registry.resolve(config["training"], schema=ConfigSchemaTraining)  # type: ignore[arg-type]
     dot_names = [T["train_corpus"], T["dev_corpus"]]
     train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
     optimizer = T["optimizer"]
diff --git a/spacy/training/pretrain.py b/spacy/training/pretrain.py
index 14a813a0993..32eada4d749 100644
--- a/spacy/training/pretrain.py
+++ b/spacy/training/pretrain.py
@@ -42,7 +42,7 @@ def pretrain(
     config["initialize"]["init_tok2vec"] = None
     nlp = load_model_from_config(config)
     _config = nlp.config.interpolate()
-    P = registry.resolve(_config["pretraining"], schema=ConfigSchemaPretrain)
+    P = registry.resolve(_config["pretraining"], schema=ConfigSchemaPretrain)  # type: ignore[arg-type]
     corpus = dot_to_object(_config, P["corpus"])
     corpus = registry.resolve({"corpus": corpus})["corpus"]
     batcher = P["batcher"]
diff --git a/spacy/ty.py b/spacy/ty.py
index b37f2e18a1f..c18ce284dc0 100644
--- a/spacy/ty.py
+++ b/spacy/ty.py
@@ -29,7 +29,7 @@ def update(
         *,
         drop: float = 0.0,
         sgd: Optional[Optimizer] = None,
-        losses: Optional[Dict[str, float]] = None
+        losses: Optional[Dict[str, float]] = None,
     ) -> Dict[str, float]: ...
 
     def finish_update(self, sgd: Optimizer) -> None: ...
@@ -41,7 +41,7 @@ def initialize(
         self,
         get_examples: Callable[[], Iterable["Example"]],
         nlp: "Language",
-        **kwargs: Any
+        **kwargs: Any,
     ): ...
 
 
diff --git a/spacy/util.py b/spacy/util.py
index ad5a7e0bada..14d7b539994 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -557,7 +557,9 @@ def load_model_from_package(
     RETURNS (Language): The loaded nlp object.
     """
     cls = importlib.import_module(name)
-    return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config)  # type: ignore[attr-defined]
+    return cls.load(
+        vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config
+    )  # type: ignore[attr-defined]
 
 
 def load_model_from_path(
diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi
index ee7636f02c8..906a4c0d978 100644
--- a/spacy/vocab.pyi
+++ b/spacy/vocab.pyi
@@ -5,7 +5,6 @@ from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Unio
 from cymem.cymem import Pool
 from thinc.types import Floats1d, FloatsXd
 
-from . import Language
 from .lexeme import Lexeme
 from .lookups import Lookups
 from .morphology import Morphology
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 11043c17ae7..4bf80c85d8e 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -150,9 +150,11 @@ cdef class Vocab:
             if hasattr(self._vectors, "memory_zone"):
                 contexts.append(stack.enter_context(self._vectors.memory_zone(mem)))
             self.mem = mem
-            yield mem
-        self._clear_transient_orths()
-        self.mem = self._non_temp_mem
+            try:
+                yield mem
+            finally:
+                self._clear_transient_orths()
+                self.mem = self._non_temp_mem
 
     def add_flag(self, flag_getter, int flag_id=-1):
         """Set a new boolean flag to words in the vocabulary.
diff --git a/spacy_cli/__init__.py b/spacy_cli/__init__.py
new file mode 100644
index 00000000000..a2cb1f66b78
--- /dev/null
+++ b/spacy_cli/__init__.py
@@ -0,0 +1 @@
+"""Lightweight launcher package for the spaCy console script."""
diff --git a/spacy_cli/build_manifest.py b/spacy_cli/build_manifest.py
new file mode 100644
index 00000000000..71982d82d77
--- /dev/null
+++ b/spacy_cli/build_manifest.py
@@ -0,0 +1,99 @@
+import json
+from pathlib import Path
+from typing import Dict, Iterable, List
+
+from typer.main import get_command
+from typer.testing import CliRunner
+
+from spacy.cli import load_all_commands
+from spacy.cli._util import COMMAND, app
+
+from .static import MANIFEST_FILE, UNKNOWN_COMMAND_TOKEN, UNKNOWN_SUBCOMMAND_TOKEN
+
+DEFAULT_ENV = {"COLUMNS": "100", "LINES": "40", "TERM": "xterm-256color"}
+
+
+def _invoke(runner: CliRunner, cli, args: Iterable[str]):
+    return runner.invoke(cli, list(args), prog_name=COMMAND, env=DEFAULT_ENV)
+
+
+def _get_help(runner: CliRunner, cli, args: Iterable[str]) -> str:
+    result = _invoke(runner, cli, [*list(args), "--help"])
+    if result.exit_code != 0:
+        err = f"Could not render help for: {' '.join(args) or '<root>'}"
+        raise RuntimeError(err)
+    return result.stdout
+
+
+def _maybe_get_help(runner: CliRunner, cli, args: Iterable[str]):
+    result = _invoke(runner, cli, [*list(args), "--help"])
+    if result.exit_code != 0:
+        return None
+    return result.stdout
+
+
+def build_manifest() -> Dict[str, object]:
+    load_all_commands()
+    cli = get_command(app)
+    runner = CliRunner()
+    known_top_level: List[str] = sorted(cli.commands.keys())
+    known_groups: Dict[str, List[str]] = {}
+    hidden_top_level: List[str] = []
+    hidden_group_commands: Dict[str, List[str]] = {}
+    group_help: Dict[str, str] = {}
+    command_help: Dict[str, str] = {}
+    unknown_subcommand: Dict[str, str] = {}
+
+    for name, command in cli.commands.items():
+        if getattr(command, "hidden", False):
+            hidden_top_level.append(name)
+        if hasattr(command, "commands"):
+            subcommands = sorted(command.commands.keys())
+            known_groups[name] = subcommands
+            hidden_group_commands[name] = sorted(
+                sub_name
+                for sub_name, sub_cmd in command.commands.items()
+                if getattr(sub_cmd, "hidden", False)
+            )
+            group_help[name] = _get_help(runner, app, [name])
+            unknown_subcommand[name] = _invoke(
+                runner, app, [name, UNKNOWN_SUBCOMMAND_TOKEN]
+            ).output
+            for sub_name in subcommands:
+                help_text = _maybe_get_help(runner, app, [name, sub_name])
+                if help_text is not None:
+                    command_help[f"{name} {sub_name}"] = help_text
+        else:
+            command_help[name] = _get_help(runner, app, [name])
+
+    return {
+        "command": COMMAND,
+        "known_top_level": known_top_level,
+        "known_groups": known_groups,
+        "hidden_top_level": hidden_top_level,
+        "hidden_group_commands": hidden_group_commands,
+        "root_help": _get_help(runner, app, []),
+        "group_help": group_help,
+        "command_help": command_help,
+        "errors": {
+            "missing_command": _invoke(runner, app, []).output,
+            "unknown_command": _invoke(runner, app, [UNKNOWN_COMMAND_TOKEN]).output,
+            "unknown_subcommand": unknown_subcommand,
+        },
+    }
+
+
+def write_manifest(path: Path) -> Path:
+    data = build_manifest()
+    path.write_text(
+        json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True) + "\n"
+    )
+    return path
+
+
+def main() -> None:
+    write_manifest(Path(__file__).with_name(MANIFEST_FILE))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/spacy_cli/cli_manifest.json b/spacy_cli/cli_manifest.json
new file mode 100644
index 00000000000..e756c058eb3
--- /dev/null
+++ b/spacy_cli/cli_manifest.json
@@ -0,0 +1,118 @@
+{
+  "command": "python -m spacy",
+  "command_help": {
+    "apply": "Usage: python -m spacy apply [OPTIONS] MODEL DATA_PATH OUTPUT_FILE\n\n  Apply a trained pipeline to documents to get predictions. Expects a loadable\n  spaCy pipeline and path to the data, which can be a directory or a file. The\n  data files can be provided in multiple formats:     1. .spacy files     2.\n  .jsonl files with a specified \"field\" to read the text from.     3. Files with\n  any other extension are assumed to be containing        a single document.\n  DOCS: https://spacy.io/api/cli#apply\n\nArguments:\n  MODEL        Model name or path  [required]\n  DATA_PATH    Location of the documents to predict on. Can be a single file in\n               .spacy format or a .jsonl file. Files with other extensions are\n               treated as single plain text documents. If a directory is\n               provided it is traversed recursively to grab all files to be\n               processed. The files can be a mixture of .spacy, .jsonl and text\n               files. If .jsonl is provided the specified field is going to be\n               grabbed (\"text\" by default).  [required]\n  OUTPUT_FILE  Path to save the resulting .spacy file  [required]\n\nOptions:\n  -c, --code PATH           Path to Python file with additional code (registered\n                            functions) to be imported\n  -tk, --text-key TEXT      Key containing text string for JSONL  [default:\n                            text]\n  -F, --force               Force overwriting the output file\n  -g, --gpu-id INTEGER      GPU ID or -1 for CPU.  [default: -1]\n  -b, --batch-size INTEGER  Batch size.  [default: 1]\n  -n, --n-process INTEGER   number of processors to use.  [default: 1]\n  --help                    Show this message and exit.\n",
+    "assemble": "Usage: python -m spacy assemble [OPTIONS] CONFIG_PATH OUTPUT_PATH\n\n  Assemble a spaCy pipeline from a config file. The config file includes all\n  settings for initializing the pipeline. To override settings in the config,\n  e.g. settings that point to local paths or that you want to experiment with,\n  you can override them as command line options. The --code argument lets you\n  pass in a Python file that can be used to register custom functions that are\n  referenced in the config.\n\n  DOCS: https://spacy.io/api/cli#assemble\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_PATH  Output directory to store assembled pipeline in  [required]\n\nOptions:\n  -c, --code PATH     Path to Python file with additional code (registered\n                      functions) to be imported\n  -V, -VV, --verbose  Display more information for debugging purposes\n  --help              Show this message and exit.\n",
+    "benchmark accuracy": "Usage: python -m spacy benchmark accuracy [OPTIONS] MODEL DATA_PATH\n\n  Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation\n  data in the binary .spacy format. The --gold-preproc option sets up the\n  evaluation examples with gold-standard sentences and tokens for the\n  predictions. Gold preprocessing helps the annotations align to the\n  tokenization, and may result in sequences of more consistent length. However,\n  it may reduce runtime accuracy due to train/test skew. To render a sample of\n  dependency parses in a HTML file, set as output directory as the displacy_path\n  argument.\n\n  DOCS: https://spacy.io/api/cli#benchmark-accuracy\n\nArguments:\n  MODEL      Model name or path  [required]\n  DATA_PATH  Location of binary evaluation data in .spacy format  [required]\n\nOptions:\n  -o, --output FILE               Output JSON file for metrics\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  -G, --gold-preproc              Use gold preprocessing\n  -dp, --displacy-path DIRECTORY  Directory to output rendered parses as HTML\n  -dl, --displacy-limit INTEGER   Limit of parses to render as HTML  [default:\n                                  25]\n  -P, --per-component             Return scores per component, only applicable\n                                  when an output JSON file is specified.\n  -sk, --spans-key TEXT           Spans key to use when evaluating Doc.spans\n                                  [default: sc]\n  --help                          Show this message and exit.\n",
+    "benchmark speed": "Usage: python -m spacy benchmark speed [OPTIONS] MODEL DATA_PATH\n\n  Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark data in\n  the binary .spacy format.\n\nArguments:\n  MODEL      Model name or path  [required]\n  DATA_PATH  Location of binary evaluation data in .spacy format  [required]\n\nOptions:\n  -b, --batch-size INTEGER RANGE  Override the pipeline batch size  [x>=1]\n  --no-shuffle                    Do not shuffle benchmark data\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  --batches INTEGER RANGE         Minimum number of batches to benchmark\n                                  [default: 50; x>=30]\n  -w, --warmup INTEGER RANGE      Number of iterations over the data for warmup\n                                  [default: 3; x>=0]\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  --help                          Show this message and exit.\n",
+    "convert": "Usage: python -m spacy convert [OPTIONS] INPUT_PATH [OUTPUT_DIR]\n\n  Convert files into json or DocBin format for training. The resulting .spacy\n  file can be used with the train command and other experiment management\n  functions.\n\n  If no output_dir is specified and the output format is JSON, the data is\n  written to stdout, so you can pipe them forward to a JSON file: $ spacy\n  convert some_file.conllu --file-type json > some_file.json\n\n  DOCS: https://spacy.io/api/cli#convert\n\nArguments:\n  INPUT_PATH    Input file or directory  [required]\n  [OUTPUT_DIR]  Output directory. '-' for stdout.  [default: -]\n\nOptions:\n  -t, --file-type [json|spacy]  Type of data to produce  [default: spacy]\n  -n, --n-sents INTEGER         Number of sentences per doc (0 to disable)\n                                [default: 1]\n  -s, --seg-sents               Segment sentences (for -c ner)\n  -b, --model, --base TEXT      Trained spaCy pipeline for sentence segmentation\n                                to use as base (for --seg-sents)\n  -m, --morphology              Enable appending morphology to tags\n  -T, --merge-subtokens         Merge CoNLL-U subtokens\n  -c, --converter TEXT          Converter: ('conllubio', 'conllu', 'conll',\n                                'ner', 'iob', 'json')  [default: auto]\n  -nm, --ner-map PATH           NER tag mapping (as JSON-encoded dict of entity\n                                types)\n  -l, --lang TEXT               Language (if tokenizer required)\n  -C, --concatenate             Concatenate output to a single file\n  --help                        Show this message and exit.\n",
+    "debug config": "Usage: python -m spacy debug config [OPTIONS] CONFIG_PATH\n\n  Debug a config file and show validation errors. The command will create all\n  objects in the tree and validate them. Note that some config validation errors\n  are blocking and will prevent the rest of the config from being resolved. This\n  means that you may not see all validation errors at once and some issues are\n  only shown once previous errors have been fixed. Similar as with the 'train'\n  command, you can override settings from the config as command line options.\n  For instance, --training.batch_size 128 overrides the value of \"batch_size\" in\n  the block \"[training]\".\n\n  DOCS: https://spacy.io/api/cli#debug-config\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  -F, --show-functions          Show an overview of all registered functions\n                                used in the config and where they come from\n                                (modules, files etc.)\n  -V, --show-variables          Show an overview of all variables referenced in\n                                the config and their values. This will also\n                                reflect variables overwritten on the CLI.\n  --help                        Show this message and exit.\n",
+    "debug data": "Usage: python -m spacy debug data [OPTIONS] CONFIG_PATH\n\n  Analyze, debug and validate your training and development data. Outputs useful\n  stats, and can help you find problems like invalid entity annotations, cyclic\n  dependencies, low data labels and more.\n\n  DOCS: https://spacy.io/api/cli#debug-data\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  -IW, --ignore-warnings        Ignore warnings, only show stats and errors\n  -V, --verbose                 Print additional information and explanations\n  -NF, --no-format              Don't pretty-print the results\n  --help                        Show this message and exit.\n",
+    "debug diff-config": "Usage: python -m spacy debug diff-config [OPTIONS] CONFIG_PATH\n\n  Show a diff of a config file with respect to spaCy's defaults or another\n  config file. If additional settings were used in the creation of the config\n  file, then you must supply these as extra parameters to the command when\n  comparing to the default settings. The generated diff can also be used when\n  posting to the discussion forum to provide more information for the\n  maintainers.\n\n  The `optimize`, `gpu`, and `pretraining` options are only relevant when\n  comparing against the default configuration (or specifically when `compare_to`\n  is None).\n\n  DOCS: https://spacy.io/api/cli#debug-diff\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  --compare-to PATH               Path to a config file to diff against, or\n                                  `None` to compare against default settings\n  -o, --optimize [efficiency|accuracy]\n                                  Whether the user config was optimized for\n                                  efficiency or accuracy. Only relevant when\n                                  comparing against the default config.\n                                  [default: efficiency]\n  -G, --gpu                       Whether the original config can run on a GPU.\n                                  Only relevant when comparing against the\n                                  default config.\n  --pretraining, --pt             Whether to compare on a config with\n                                  pretraining involved. Only relevant when\n                                  comparing against the default config.\n  -md, --markdown                 Generate Markdown for GitHub issues\n  --help                          Show this message and exit.\n",
+    "debug model": "Usage: python -m spacy debug model [OPTIONS] CONFIG_PATH COMPONENT\n\n  Analyze a Thinc model implementation. Includes checks for internal structure\n  and activations during training.\n\n  DOCS: https://spacy.io/api/cli#debug-model\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  COMPONENT    Name of the pipeline component of which the model should be\n               analysed  [required]\n\nOptions:\n  -l, --layers TEXT     Comma-separated names of layer IDs to print\n  -DIM, --dimensions    Show dimensions\n  -PAR, --parameters    Show parameters\n  -GRAD, --gradients    Show gradients\n  -ATTR, --attributes   Show attributes\n  -P0, --print-step0    Print model before training\n  -P1, --print-step1    Print model after initialization\n  -P2, --print-step2    Print model after training\n  -P3, --print-step3    Print final predictions\n  -g, --gpu-id INTEGER  GPU ID or -1 for CPU  [default: -1]\n  --help                Show this message and exit.\n",
+    "debug profile": "Usage: python -m spacy debug profile [OPTIONS] MODEL [INPUTS]\n\n  Profile which functions take the most time in a spaCy pipeline. Input should\n  be formatted as one JSON object per line with a key \"text\". It can either be\n  provided as a JSONL file, or be read from sys.sytdin. If no input file is\n  specified, the IMDB dataset is loaded via Thinc.\n\n  DOCS: https://spacy.io/api/cli#debug-profile\n\nArguments:\n  MODEL     Trained pipeline to load  [required]\n  [INPUTS]  Location of input file. '-' for stdin.\n\nOptions:\n  -n, --n-texts INTEGER  Maximum number of texts to use if available  [default:\n                         10000]\n  --help                 Show this message and exit.\n",
+    "debug-data": "Usage: python -m spacy debug-data [OPTIONS] CONFIG_PATH\n\n  Analyze, debug and validate your training and development data. Outputs useful\n  stats, and can help you find problems like invalid entity annotations, cyclic\n  dependencies, low data labels and more.\n\n  DOCS: https://spacy.io/api/cli#debug-data\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  -IW, --ignore-warnings        Ignore warnings, only show stats and errors\n  -V, --verbose                 Print additional information and explanations\n  -NF, --no-format              Don't pretty-print the results\n  --help                        Show this message and exit.\n",
+    "download": "Usage: python -m spacy download [OPTIONS] MODEL\n\n  Download compatible trained pipeline from the default download path using pip.\n  If --direct flag is set, the command expects the full package name with\n  version. For direct downloads, the compatibility check will be skipped. All\n  additional arguments provided to this command will be passed to `pip install`\n  on package installation.\n\n  DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES:\n  https://spacy.io/models\n\nArguments:\n  MODEL  Name of pipeline package to download  [required]\n\nOptions:\n  -d, -D, --direct  Force direct download of name + version\n  -S, --sdist       Download sdist (.tar.gz) archive instead of pre-built binary\n                    wheel\n  -U, --url TEXT    Download from given url\n  --help            Show this message and exit.\n",
+    "evaluate": "Usage: python -m spacy evaluate [OPTIONS] MODEL DATA_PATH\n\n  Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation\n  data in the binary .spacy format. The --gold-preproc option sets up the\n  evaluation examples with gold-standard sentences and tokens for the\n  predictions. Gold preprocessing helps the annotations align to the\n  tokenization, and may result in sequences of more consistent length. However,\n  it may reduce runtime accuracy due to train/test skew. To render a sample of\n  dependency parses in a HTML file, set as output directory as the displacy_path\n  argument.\n\n  DOCS: https://spacy.io/api/cli#benchmark-accuracy\n\nArguments:\n  MODEL      Model name or path  [required]\n  DATA_PATH  Location of binary evaluation data in .spacy format  [required]\n\nOptions:\n  -o, --output FILE               Output JSON file for metrics\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  -G, --gold-preproc              Use gold preprocessing\n  -dp, --displacy-path DIRECTORY  Directory to output rendered parses as HTML\n  -dl, --displacy-limit INTEGER   Limit of parses to render as HTML  [default:\n                                  25]\n  -P, --per-component             Return scores per component, only applicable\n                                  when an output JSON file is specified.\n  -sk, --spans-key TEXT           Spans key to use when evaluating Doc.spans\n                                  [default: sc]\n  --help                          Show this message and exit.\n",
+    "find-function": "Usage: python -m spacy find-function [OPTIONS] FUNC_NAME\n\n  Find the module, path and line number to the file the registered function is\n  defined in, if available.\n\n  func_name (str): Name of the registered function. registry_name\n  (Optional[str]): Name of the catalogue registry.\n\n  DOCS: https://spacy.io/api/cli#find-function\n\nArguments:\n  FUNC_NAME  Name of the registered function.  [required]\n\nOptions:\n  -r, --registry TEXT  Name of the catalogue registry.\n  --help               Show this message and exit.\n",
+    "find-threshold": "Usage: python -m spacy find-threshold [OPTIONS] MODEL DATA_PATH PIPE_NAME\n                                      THRESHOLD_KEY SCORES_KEY\n\n  Runs prediction trials for a trained model with varying thresholds to maximize\n  the specified metric. The search space for the threshold is traversed linearly\n  from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`\n  (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`\n  returns all results).\n\n  This is applicable only for components whose predictions are influenced by\n  thresholds - e.g. `textcat_multilabel` and `spancat`, but not `textcat`. Note\n  that the full path to the corresponding threshold attribute in the config has\n  to be provided.\n\n  DOCS: https://spacy.io/api/cli#find-threshold\n\nArguments:\n  MODEL          Model name or path  [required]\n  DATA_PATH      Location of binary evaluation data in .spacy format  [required]\n  PIPE_NAME      Name of pipe to examine thresholds for  [required]\n  THRESHOLD_KEY  Key of threshold attribute in component's configuration\n                 [required]\n  SCORES_KEY     Metric to optimize  [required]\n\nOptions:\n  -n, --n_trials INTEGER  Number of trials to determine optimal thresholds\n                          [default: 11]\n  -c, --code PATH         Path to Python file with additional code (registered\n                          functions) to be imported\n  -g, --gpu-id INTEGER    GPU ID or -1 for CPU  [default: -1]\n  -G, --gold-preproc      Use gold preprocessing\n  -V, -VV, --verbose      Display more information for debugging purposes\n  --help                  Show this message and exit.\n",
+    "info": "Usage: python -m spacy info [OPTIONS] [MODEL]\n\n  Print info about spaCy installation. If a pipeline is specified as an\n  argument, print its meta information. Flag --markdown prints details in\n  Markdown for easy copy-pasting to GitHub issues.\n\n  Flag --url prints only the download URL of the most recent compatible version\n  of the pipeline.\n\n  DOCS: https://spacy.io/api/cli#info\n\nArguments:\n  [MODEL]  Optional loadable spaCy pipeline\n\nOptions:\n  -md, --markdown     Generate Markdown for GitHub issues\n  -s, -S, --silent    Don't print anything (just return)\n  -e, --exclude TEXT  Comma-separated keys to exclude from the print-out\n                      [default: labels]\n  -u, --url           Print the URL to download the most recent compatible\n                      version of the pipeline\n  --help              Show this message and exit.\n",
+    "init config": "Usage: python -m spacy init config [OPTIONS] OUTPUT_FILE\n\n  Generate a starter config file for training. Based on your requirements\n  specified via the CLI arguments, this command generates a config with the\n  optimal settings for your use case. This includes the choice of architecture,\n  pretrained weights and related hyperparameters.\n\n  DOCS: https://spacy.io/api/cli#init-config\n\nArguments:\n  OUTPUT_FILE  File to save the config to or - for stdout (will only output\n               config and no additional logging info)  [required]\n\nOptions:\n  -l, --lang TEXT                 Two-letter code of the language to use\n                                  [default: en]\n  -p, --pipeline TEXT             Comma-separated names of trainable pipeline\n                                  components to include (without 'tok2vec' or\n                                  'transformer')  [default: tagger,parser,ner]\n  -o, --optimize [efficiency|accuracy]\n                                  Whether to optimize for efficiency (faster\n                                  inference, smaller model, lower memory\n                                  consumption) or higher accuracy (potentially\n                                  larger and slower model). This will impact the\n                                  choice of architecture, pretrained weights and\n                                  related hyperparameters.  [default:\n                                  efficiency]\n  -G, --gpu                       Whether the model can run on GPU. This will\n                                  impact the choice of architecture, pretrained\n                                  weights and related hyperparameters.\n  -pt, --pretraining              Include config for pretraining (with 'spacy\n                                  pretrain')\n  -F, --force                     Force overwriting the output file\n  --help                          Show this message and exit.\n",
+    "init fill-config": "Usage: python -m spacy init fill-config [OPTIONS] BASE_PATH [OUTPUT_FILE]\n\n  Fill partial config file with default values. Will add all missing settings\n  from the default config and will create all objects, check the registered\n  functions for their default values and update the base config. This command\n  can be used with a config generated via the training quickstart widget:\n  https://spacy.io/usage/training#quickstart\n\n  DOCS: https://spacy.io/api/cli#init-fill-config\n\nArguments:\n  BASE_PATH      Path to base config to fill  [required]\n  [OUTPUT_FILE]  Path to output .cfg file (or - for stdout)  [default: -]\n\nOptions:\n  -pt, --pretraining            Include config for pretraining (with 'spacy\n                                pretrain')\n  -D, --diff                    Print a visual diff highlighting the changes\n  -c, --code-path, --code PATH  Path to Python file with additional code\n                                (registered functions) to be imported\n  --help                        Show this message and exit.\n",
+    "init labels": "Usage: python -m spacy init labels [OPTIONS] CONFIG_PATH OUTPUT_PATH\n\n  Generate JSON files for the labels in the data. This helps speed up the\n  training process, since spaCy won't have to preprocess the data to extract the\n  labels.\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_PATH  Output directory for the labels  [required]\n\nOptions:\n  -c, --code PATH       Path to Python file with additional code (registered\n                        functions) to be imported\n  -V, -VV, --verbose    Display more information for debugging purposes\n  -g, --gpu-id INTEGER  GPU ID or -1 for CPU  [default: -1]\n  --help                Show this message and exit.\n",
+    "init nlp": "Usage: python -m spacy init nlp [OPTIONS] CONFIG_PATH OUTPUT_PATH\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_PATH  Output directory for the prepared data  [required]\n\nOptions:\n  -c, --code PATH       Path to Python file with additional code (registered\n                        functions) to be imported\n  -V, -VV, --verbose    Display more information for debugging purposes\n  -g, --gpu-id INTEGER  GPU ID or -1 for CPU  [default: -1]\n  --help                Show this message and exit.\n",
+    "init vectors": "Usage: python -m spacy init vectors [OPTIONS] LANG VECTORS_LOC OUTPUT_DIR\n\n  Convert word vectors for use with spaCy. Will export an nlp object that you\n  can use in the [initialize] block of your config to initialize a model with\n  vectors.\n\nArguments:\n  LANG         The language of the nlp object to create  [required]\n  VECTORS_LOC  Vectors file in Word2Vec format  [required]\n  OUTPUT_DIR   Pipeline output directory  [required]\n\nOptions:\n  -p, --prune INTEGER     Optional number of vectors to prune to  [default: -1]\n  -t, --truncate INTEGER  Optional number of vectors to truncate to when reading\n                          in vectors file  [default: 0]\n  -m, --mode TEXT         Vectors mode: default or floret  [default: default]\n  -n, --name TEXT         Optional name for the word vectors, e.g.\n                          en_core_web_lg.vectors\n  -V, -VV, --verbose      Display more information for debugging purposes\n  -a, --attr TEXT         Optional token attribute to use for vectors, e.g.\n                          LOWER or NORM  [default: ORTH]\n  --help                  Show this message and exit.\n",
+    "link": "Usage: python -m spacy link [OPTIONS] ARGS KWARGS\n\n  As of spaCy v3.0, symlinks like \"en\" are not supported anymore. You can load\n  trained pipeline packages using their full names or from a directory path.\n  (DEPRECATED)\n\nArguments:\n  ARGS    [required]\n  KWARGS  [required]\n\nOptions:\n  --help  Show this message and exit.\n",
+    "package": "Usage: python -m spacy package [OPTIONS] INPUT_DIR OUTPUT_DIR\n\n  Generate an installable Python package for a pipeline. Includes binary data,\n  meta and required installation files. A new directory will be created in the\n  specified output directory, and the data will be copied over. If --create-meta\n  is set and a meta.json already exists in the output directory, the existing\n  values will be used as the defaults in the command-line prompt. After\n  packaging, \"python -m build --sdist\" is run in the package directory, which\n  will create a .tar.gz archive that can be installed via \"pip install\".\n\n  If additional code files are provided (e.g. Python files containing custom\n  registered functions like pipeline components), they are copied into the\n  package and imported in the __init__.py.\n\n  DOCS: https://spacy.io/api/cli#package\n\nArguments:\n  INPUT_DIR   Directory with pipeline data  [required]\n  OUTPUT_DIR  Output parent directory  [required]\n\nOptions:\n  -c, --code TEXT                 Comma-separated paths to Python file with\n                                  additional code (registered functions) to be\n                                  included in the package\n  -m, --meta-path, --meta FILE    Path to meta.json\n  -C, --create-meta               Create meta.json, even if one exists\n  -n, --name TEXT                 Package name to override meta\n  -v, --version TEXT              Package version to override meta\n  -b, --build TEXT                Comma-separated formats to build: sdist and/or\n                                  wheel, or none.  [default: sdist]\n  -f, -F, --force                 Force overwriting existing data in output\n                                  directory\n  -R, -R, --require-parent / --no-require-parent\n                                  Include the parent package (e.g. spacy) in the\n                                  requirements  [default: require-parent]\n  --help                          Show this message and exit.\n",
+    "pretrain": "Usage: python -m spacy pretrain [OPTIONS] CONFIG_PATH OUTPUT_DIR\n\n  Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using\n  an approximate language-modelling objective. Two objective types are\n  available, vector-based and character-based.\n\n  In the vector-based objective, we load word vectors that have been trained\n  using a word2vec-style distributional similarity algorithm, and train a\n  component like a CNN, BiLSTM, etc to predict vectors which match the\n  pretrained ones. The weights are saved to a directory after each epoch. You\n  can then pass a path to one of these pretrained weights files to the 'spacy\n  train' command.\n\n  This technique may be especially helpful if you have little labelled data.\n  However, it's still quite experimental, so your mileage may vary.\n\n  To load the weights back in during 'spacy train', you need to ensure all\n  settings are the same between pretraining and training. Ideally, this is done\n  by using the same config file for both commands.\n\n  DOCS: https://spacy.io/api/cli#pretrain\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n  OUTPUT_DIR   Directory to write weights to on each epoch  [required]\n\nOptions:\n  -c, --code PATH              Path to Python file with additional code\n                               (registered functions) to be imported\n  -r, --resume-path PATH       Path to pretrained weights from which to resume\n                               pretraining\n  -er, --epoch-resume INTEGER  The epoch to resume counting from when using\n                               --resume-path. Prevents unintended overwriting of\n                               existing weight files.\n  -g, --gpu-id INTEGER         GPU ID or -1 for CPU  [default: -1]\n  -L, --skip-last              Skip saving model-last.bin\n  --help                       Show this message and exit.\n",
+    "profile": "Usage: python -m spacy profile [OPTIONS] MODEL [INPUTS]\n\n  Profile which functions take the most time in a spaCy pipeline. Input should\n  be formatted as one JSON object per line with a key \"text\". It can either be\n  provided as a JSONL file, or be read from sys.sytdin. If no input file is\n  specified, the IMDB dataset is loaded via Thinc.\n\n  DOCS: https://spacy.io/api/cli#debug-profile\n\nArguments:\n  MODEL     Trained pipeline to load  [required]\n  [INPUTS]  Location of input file. '-' for stdin.\n\nOptions:\n  -n, --n-texts INTEGER  Maximum number of texts to use if available  [default:\n                         10000]\n  --help                 Show this message and exit.\n",
+    "project assets": "Usage: python -m spacy project assets [OPTIONS] [PROJECT_DIR]\n\n  Fetch project assets like datasets and pretrained weights. Assets are defined\n  in the \"assets\" section of the project.yml. If a checksum is provided in the\n  project.yml, the file is only downloaded if no local file with the same\n  checksum exists.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/tutorial/directory-\n  and-assets.md\n\nArguments:\n  [PROJECT_DIR]  Path to cloned project. Defaults to current working directory.\n                 [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  -S, --sparse  Use sparse checkout for assets provided via Git, to only check\n                out and clone the files needed. Requires Git v22.2+.\n  -e, --extra   Download all assets, including those marked as 'extra'.\n  --help        Show this message and exit.\n",
+    "project clone": "Usage: python -m spacy project clone [OPTIONS] NAME [DEST]\n\n  Clone a project template from a repository. Calls into \"git\" and will only\n  download the files from the given subdirectory. The GitHub repo defaults to\n  the official Weasel template repo, but can be customized (including using a\n  private repo).\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#clipboard-\n  clone\n\nArguments:\n  NAME    The name of the template to clone  [required]\n  [DEST]  Where to clone the project. Defaults to current working directory\n\nOptions:\n  -r, --repo TEXT    The repository to clone from  [default:\n                     https://github.com/explosion/projects]\n  -b, --branch TEXT  The branch to clone from. If not provided, will attempt\n                     main, master\n  -S, --sparse       Use sparse Git checkout to only check out and clone the\n                     files needed. Requires Git v22.2+.\n  --help             Show this message and exit.\n",
+    "project document": "Usage: python -m spacy project document [OPTIONS] [PROJECT_DIR]\n\n  Auto-generate a README.md for a project. If the content is saved to a file,\n  hidden markers are added so you can add custom content before or after the\n  auto-generated section and only the auto-generated docs will be replaced when\n  you re-run the command.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#closed_book-\n  document\n\nArguments:\n  [PROJECT_DIR]  Path to cloned project. Defaults to current working directory.\n                 [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  -o, --output PATH  Path to output Markdown file for output. Defaults to - for\n                     standard output  [default: -]\n  -NE, --no-emoji    Don't use emoji\n  --help             Show this message and exit.\n",
+    "project dvc": "Usage: python -m spacy project dvc [OPTIONS] [PROJECT_DIR] [WORKFLOW]\n\n  Auto-generate Data Version Control (DVC) config. A DVC project can only define\n  one pipeline, so you need to specify one workflow defined in the project.yml.\n  If no workflow is specified, the first defined workflow is used. The DVC\n  config will only be updated if the project.yml changed.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#repeat-dvc\n\nArguments:\n  [PROJECT_DIR]  Location of project directory. Defaults to current working\n                 directory.  [default: /Users/matt/repos/spacy-monorepo/spacy]\n  [WORKFLOW]     Name of workflow defined in project.yml. Defaults to first\n                 workflow if not set.\n\nOptions:\n  -V, --verbose  Print more info\n  -q, --quiet    Print less info\n  -F, --force    Force update DVC config\n  --help         Show this message and exit.\n",
+    "project pull": "Usage: python -m spacy project pull [OPTIONS] [REMOTE] [PROJECT_DIR]\n\n  Retrieve available precomputed outputs from a remote storage. You can alias\n  remotes in your project.yml by mapping them to storage paths. A storage can be\n  anything that the smart_open library can upload to, e.g. AWS, Google Cloud\n  Storage, SSH, local directories etc.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_down-\n  push\n\nArguments:\n  [REMOTE]       Name or path of remote storage  [default: default]\n  [PROJECT_DIR]  Location of project directory. Defaults to current working\n                 directory.  [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  --help  Show this message and exit.\n",
+    "project push": "Usage: python -m spacy project push [OPTIONS] [REMOTE] [PROJECT_DIR]\n\n  Persist outputs to a remote storage. You can alias remotes in your project.yml\n  by mapping them to storage paths. A storage can be anything that the\n  smart_open library can upload to, e.g. AWS, Google Cloud Storage, SSH, local\n  directories etc.\n\n  DOCS: https://github.com/explosion/weasel/tree/main/docs/cli.md#arrow_up-push\n\nArguments:\n  [REMOTE]       Name or path of remote storage  [default: default]\n  [PROJECT_DIR]  Location of project directory. Defaults to current working\n                 directory.  [default: /Users/matt/repos/spacy-monorepo/spacy]\n\nOptions:\n  --help  Show this message and exit.\n",
+    "train": "Usage: python -m spacy train [OPTIONS] CONFIG_PATH\n\n  Train or update a spaCy pipeline. Requires data in spaCy's binary format. To\n  convert data from other formats, use the `spacy convert` command. The config\n  file includes all settings and hyperparameters used during training. To\n  override settings in the config, e.g. settings that point to local paths or\n  that you want to experiment with, you can override them as command line\n  options. For instance, --training.batch_size 128 overrides the value of\n  \"batch_size\" in the block \"[training]\". The --code argument lets you pass in a\n  Python file that's imported before training. It can be used to register custom\n  functions and architectures that can then be referenced in the config.\n\n  DOCS: https://spacy.io/api/cli#train\n\nArguments:\n  CONFIG_PATH  Path to config file  [required]\n\nOptions:\n  -o, --output, --output-path PATH\n                                  Output directory to store trained pipeline in\n  -c, --code PATH                 Path to Python file with additional code\n                                  (registered functions) to be imported\n  -V, -VV, --verbose              Display more information for debugging\n                                  purposes\n  -g, --gpu-id INTEGER            GPU ID or -1 for CPU  [default: -1]\n  --help                          Show this message and exit.\n",
+    "validate": "Usage: python -m spacy validate [OPTIONS]\n\n  Validate the currently installed pipeline packages and spaCy version. Checks\n  if the installed packages are compatible and shows upgrade instructions if\n  available. Should be run after `pip install -U spacy`.\n\n  DOCS: https://spacy.io/api/cli#validate\n\nOptions:\n  --help  Show this message and exit.\n"
+  },
+  "errors": {
+    "missing_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n\nError: Missing command.\n",
+    "unknown_command": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_COMMAND__'.\n",
+    "unknown_subcommand": {
+      "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy benchmark --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n",
+      "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy debug --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n",
+      "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy init --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n",
+      "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\nTry 'python -m spacy project --help' for help.\n\nError: No such command '__SPACY_UNKNOWN_SUBCOMMAND__'.\n"
+    }
+  },
+  "group_help": {
+    "benchmark": "Usage: python -m spacy benchmark [OPTIONS] COMMAND [ARGS]...\n\n  Commands for benchmarking pipelines.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  accuracy  Evaluate a trained pipeline.\n  speed     Benchmark a pipeline.\n",
+    "debug": "Usage: python -m spacy debug [OPTIONS] COMMAND [ARGS]...\n\n  Suite of helpful commands for debugging and profiling. Includes commands to\n  check and validate your config files, training and evaluation data, and custom\n  model implementations.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  data         Analyze, debug and validate your training and development data.\n  profile      Profile which functions take the most time in a spaCy pipeline.\n  config       Debug a config file and show validation errors.\n  diff-config  Show a diff of a config file with respect to spaCy's...\n  model        Analyze a Thinc model implementation.\n",
+    "init": "Usage: python -m spacy init [OPTIONS] COMMAND [ARGS]...\n\n  Commands for initializing configs and pipeline packages.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  config       Generate a starter config file for training.\n  fill-config  Fill partial config file with default values.\n  vectors      Convert word vectors for use with spaCy.\n  labels       Generate JSON files for the labels in the data.\n",
+    "project": "Usage: python -m spacy project [OPTIONS] COMMAND [ARGS]...\n\n  Command-line interface for spaCy projects and templates. You'd typically start\n  by cloning a project template to a local directory and fetching its assets\n  like datasets etc. See the project's project.yml for the available commands.\n\nOptions:\n  --help  Show this message and exit.\n\nCommands:\n  assets    Fetch project assets like datasets and pretrained weights.\n  clone     Clone a project template from a repository.\n  document  Auto-generate a README.md for a project.\n  dvc       Auto-generate Data Version Control (DVC) config.\n  run       Run a named command or workflow defined in the project.yml.\n  pull      Retrieve available precomputed outputs from a remote storage.\n  push      Persist outputs to a remote storage.\n"
+  },
+  "hidden_group_commands": {
+    "benchmark": [],
+    "debug": [],
+    "init": [
+      "nlp"
+    ],
+    "project": []
+  },
+  "hidden_top_level": [
+    "link",
+    "debug-data",
+    "profile"
+  ],
+  "known_groups": {
+    "benchmark": [
+      "accuracy",
+      "speed"
+    ],
+    "debug": [
+      "config",
+      "data",
+      "diff-config",
+      "model",
+      "profile"
+    ],
+    "init": [
+      "config",
+      "fill-config",
+      "labels",
+      "nlp",
+      "vectors"
+    ],
+    "project": [
+      "assets",
+      "clone",
+      "document",
+      "dvc",
+      "pull",
+      "push",
+      "run"
+    ]
+  },
+  "known_top_level": [
+    "apply",
+    "assemble",
+    "benchmark",
+    "convert",
+    "debug",
+    "debug-data",
+    "download",
+    "evaluate",
+    "find-function",
+    "find-threshold",
+    "info",
+    "init",
+    "link",
+    "package",
+    "pretrain",
+    "profile",
+    "project",
+    "train",
+    "validate"
+  ],
+  "root_help": "Usage: python -m spacy [OPTIONS] COMMAND [ARGS]...\n\n  spaCy Command-line Interface\n\n  DOCS: https://spacy.io/api/cli\n\nOptions:\n  --install-completion  Install completion for the current shell.\n  --show-completion     Show completion for the current shell, to copy it or\n                        customize the installation.\n  --help                Show this message and exit.\n\nCommands:\n  download        Download compatible trained pipeline from the default...\n  info            Print info about spaCy installation.\n  apply           Apply a trained pipeline to documents to get predictions.\n  assemble        Assemble a spaCy pipeline from a config file.\n  convert         Convert files into json or DocBin format for training.\n  evaluate        Evaluate a trained pipeline.\n  find-function   Find the module, path and line number to the file the...\n  find-threshold  Runs prediction trials for a trained model with varying...\n  package         Generate an installable Python package for a pipeline.\n  pretrain        Pre-train the 'token-to-vector' (tok2vec) layer of...\n  train           Train or update a spaCy pipeline.\n  validate        Validate the currently installed pipeline packages and...\n  debug           Suite of helpful commands for debugging and profiling.\n  benchmark       Commands for benchmarking pipelines.\n  init            Commands for initializing configs and pipeline packages.\n  project         Command-line interface for spaCy projects and templates.\n"
+}
diff --git a/spacy_cli/main.py b/spacy_cli/main.py
new file mode 100644
index 00000000000..f8e6cabe808
--- /dev/null
+++ b/spacy_cli/main.py
@@ -0,0 +1,73 @@
+import sys
+from typing import Iterable, Optional
+
+from .static import HELP_OPTIONS, UNKNOWN_COMMAND_TOKEN, UNKNOWN_SUBCOMMAND_TOKEN
+from .static import get_plugin_command_names, load_manifest
+
+
+def _write_output(text: str) -> None:
+    sys.stdout.write(text)
+    if not text.endswith("\n"):
+        sys.stdout.write("\n")
+
+
+def _run_live() -> None:
+    from spacy.cli import setup_cli
+
+    setup_cli()
+
+
+def _try_static(argv: Iterable[str]):
+    args = list(argv)
+    manifest = load_manifest()
+    plugin_command_names = get_plugin_command_names()
+    known_groups = manifest["known_groups"]
+    known_top_level = set(manifest["known_top_level"])
+    if not args:
+        return manifest["errors"]["missing_command"], 2
+    first = args[0]
+    if first in HELP_OPTIONS:
+        if plugin_command_names:
+            return None
+        return manifest["root_help"], 0
+    if first.startswith("-"):
+        return None
+    if first not in known_top_level:
+        if first in plugin_command_names:
+            return None
+        template = manifest["errors"]["unknown_command"]
+        return template.replace(UNKNOWN_COMMAND_TOKEN, first), 2
+    if first in known_groups:
+        return _try_static_group(args, first, manifest, known_groups, plugin_command_names)
+    if any(arg in HELP_OPTIONS for arg in args[1:]):
+        return manifest["command_help"][first], 0
+    return None
+
+
+def _try_static_group(args, first, manifest, known_groups, plugin_command_names):
+    if len(args) == 1 or args[1] in HELP_OPTIONS:
+        if plugin_command_names:
+            return None
+        return manifest["group_help"][first], 0
+    second = args[1]
+    if second not in known_groups[first]:
+        if plugin_command_names:
+            return None
+        template = manifest["errors"]["unknown_subcommand"][first]
+        return template.replace(UNKNOWN_SUBCOMMAND_TOKEN, second), 2
+    if any(arg in HELP_OPTIONS for arg in args[2:]):
+        return manifest["command_help"][f"{first} {second}"], 0
+    return None
+
+
+def main(argv: Optional[Iterable[str]] = None) -> None:
+    args = sys.argv[1:] if argv is None else list(argv)
+    try:
+        static_result = _try_static(args)
+    except Exception:
+        return _run_live()
+    if static_result is None:
+        return _run_live()
+    text, code = static_result
+    _write_output(text)
+    raise SystemExit(code)
diff --git a/spacy_cli/static.py b/spacy_cli/static.py
new file mode 100644
index 00000000000..51594ceef9a
--- /dev/null
+++ b/spacy_cli/static.py
@@ -0,0 +1,24 @@
+import json
+from functools import lru_cache
+from importlib.metadata import entry_points
+from importlib.resources import files
+from typing import Any, Dict, Set
+
+
+HELP_OPTIONS = {"--help", "-h"}
+PLUGIN_ENTRY_POINT_GROUP = "spacy_cli"
+MANIFEST_FILE = "cli_manifest.json"
+UNKNOWN_COMMAND_TOKEN = "__SPACY_UNKNOWN_COMMAND__"
+UNKNOWN_SUBCOMMAND_TOKEN = "__SPACY_UNKNOWN_SUBCOMMAND__"
+
+
+@lru_cache(maxsize=1)
+def load_manifest() -> Dict[str, Any]:
+    data = files("spacy_cli").joinpath(MANIFEST_FILE).read_text(encoding="utf8")
+    return json.loads(data)
+
+
+def get_plugin_command_names() -> Set[str]:
+    return {
+        entry_point.name for entry_point in entry_points(group=PLUGIN_ENTRY_POINT_GROUP)
+    }