From 2af19526a7438899f4d81cdd518af328aa7b2698 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 10:25:31 +0000 Subject: [PATCH 1/7] CU-869bbj5u4: Add explict checks for optional deps for conversion modules --- medcat-v2/medcat/utils/legacy/convert_meta_cat.py | 9 +++++++-- medcat-v2/medcat/utils/legacy/convert_rel_cat.py | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/medcat-v2/medcat/utils/legacy/convert_meta_cat.py b/medcat-v2/medcat/utils/legacy/convert_meta_cat.py index 061d5bdab..6d1aa7036 100644 --- a/medcat-v2/medcat/utils/legacy/convert_meta_cat.py +++ b/medcat-v2/medcat/utils/legacy/convert_meta_cat.py @@ -3,8 +3,6 @@ import json import logging -import torch - from medcat.components.addons.meta_cat import MetaCAT, MetaCATAddon from medcat.components.addons.meta_cat.mctokenizers.tokenizers import ( TokenizerWrapperBase, load_tokenizer) @@ -13,6 +11,13 @@ from medcat.utils.legacy.helpers import fix_old_style_cnf +# NOTE: needs to be before torch since default doesn't include torch +from medcat.utils.import_utils import ensure_optional_extras_installed +_EXTRA_NAME = "meta-cat" +ensure_optional_extras_installed("medcat", _EXTRA_NAME) + +import torch # noqa + logger = logging.getLogger(__name__) diff --git a/medcat-v2/medcat/utils/legacy/convert_rel_cat.py b/medcat-v2/medcat/utils/legacy/convert_rel_cat.py index 4d077c6f4..9b28e59d4 100644 --- a/medcat-v2/medcat/utils/legacy/convert_rel_cat.py +++ b/medcat-v2/medcat/utils/legacy/convert_rel_cat.py @@ -2,8 +2,6 @@ import json import logging -import torch - from medcat.cdb import CDB from medcat.components.addons.relation_extraction.rel_cat import ( RelCAT, RelCATAddon) @@ -13,6 +11,13 @@ from medcat.tokenizing.tokenizers import BaseTokenizer, create_tokenizer from medcat.utils.legacy.helpers import fix_old_style_cnf +# NOTE: needs to be before torch since default doesn't include torch +from medcat.utils.import_utils import ensure_optional_extras_installed +_EXTRA_NAME = "rel-cat" +ensure_optional_extras_installed("medcat", _EXTRA_NAME) + +import torch # noqa + logger = logging.getLogger(__name__) From 163f0e6d5953067f0357adc63b8170a2fc06ff7d Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 10:31:36 +0000 Subject: [PATCH 2/7] CU-869bbj5u4: Add missing core dependencies (packaging, pyyaml, requests) --- medcat-v2/pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/medcat-v2/pyproject.toml b/medcat-v2/pyproject.toml index 2488f6ade..8ed6c32fc 100644 --- a/medcat-v2/pyproject.toml +++ b/medcat-v2/pyproject.toml @@ -65,6 +65,9 @@ dependencies = [ # Optional "xxhash>=3.5.0,<4.0", "pydantic>2.0", "typing-extensions", + "packaging", + "pyyaml", + "requests", # TODO - others ] From 51a1416e68754072256d561b90c5d1e061df4306 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 10:46:26 +0000 Subject: [PATCH 3/7] CU-869bbj5u4: Add script to be able to test taht base install can import al its parts (or has explicit checks for optional extras) --- .../other/test_base_install_can_import_all.py | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 medcat-v2/tests/other/test_base_install_can_import_all.py diff --git a/medcat-v2/tests/other/test_base_install_can_import_all.py b/medcat-v2/tests/other/test_base_install_can_import_all.py new file mode 100644 index 000000000..b6ff97053 --- /dev/null +++ b/medcat-v2/tests/other/test_base_install_can_import_all.py @@ -0,0 +1,134 @@ +from typing import Iterable +import os +import subprocess +import sys +import re +from collections import Counter + +MISSING_DEP_PATTERN = re.compile( + r"The optional dependency set '([\w\-_]*)' is missing") + + +def walk_packages(path: str, + base_pkg_name: str, + base_path: str = '') -> Iterable[str]: + if not base_path: + base_path = path + pkg_path = path.removeprefix(base_path).replace( + os.path.sep, '.').strip(".") + pkg_to_here = f"{base_pkg_name}.{pkg_path}" if pkg_path else base_pkg_name + for fn in os.listdir(path): + cur_path = os.path.join(path, fn) + if os.path.isdir(cur_path) and ( + not fn.startswith("__") and not fn.endswith("__")): + yield from walk_packages(cur_path, base_pkg_name=base_pkg_name, + base_path=base_path) + continue + if not fn.endswith(".py"): + continue + if fn == "__init__.py": + yield pkg_to_here + continue + if fn == "__main__.py": + continue + yield f"{pkg_to_here}.{fn.removesuffix('.py')}" + + +def find_all_modules(package_name, package_path=None): + """Find all importable modules in a package.""" + if package_path is None: + # Import the package to get its path + try: + pkg = __import__(package_name) + package_path = pkg.__path__ + except ImportError: + print(f"Could not import {package_name}") + return [] + + modules = [] + for modname in walk_packages(package_path[0], + base_pkg_name=package_name): + modules.append(modname) + + return modules + + +def test_import(module_name): + """Test if a module can be imported in isolation.""" + code = f"import {module_name}" + result = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + timeout=30, + ) + return result.returncode == 0, result.stderr + + +def get_missing_dep_set(error: str) -> str | None: + err1 = error.strip().split('\n')[-1] + if "MissingDependenciesError" not in err1: + return None + matches = MISSING_DEP_PATTERN.findall(err1) + if len(matches) != 1: + raise ValueError(f"Unknown error:\n'{error}'\nLookin at:\n{err1}" + f"\ngot: {matches}") + return matches[0] + + +def main(): + if len(sys.argv) < 2: + print("Usage: python check_imports.py ") + sys.exit(1) + + package_name = sys.argv[1] + + print(f"Finding all modules in {package_name}...") + modules = find_all_modules(package_name) + + if not modules: + print(f"No modules found in {package_name}") + sys.exit(1) + + print(f"Found {len(modules)} modules. Testing imports...\n") + + successful = [] + missing_opt_dep_expl = [] + failed = [] + + for module in modules: + success, error = test_import(module) + if success: + successful.append(module) + print(f"✓ {module}") + elif (missing_dep := get_missing_dep_set(error)): + missing_opt_dep_expl.append((module, missing_dep)) + print(f"M {module}: missing {missing_dep}") + else: + failed.append((module, error)) + print(f"✗ {module}") + # Print the first line of error for quick diagnosis + first_error_line = ( + error.strip().split('\n')[-1] if error else "Unknown error") + print(f" → {first_error_line}") + + # Summary + print("\n" + "="*60) + per_opt_dep_missing = Counter() + for _, missing_dep in missing_opt_dep_expl: + per_opt_dep_missing[missing_dep] += 1 + print(f"Results: {len(successful)} successful, " + f"{len(missing_opt_dep_expl)} missing optional deps " + f"({per_opt_dep_missing}), {len(failed)} failed") + print("="*60) + + if failed: + print("\nFailed imports:") + for module, error in failed: + print(f"\n{module}:") + print(error) + sys.exit(1) + + +if __name__ == "__main__": + main() From 8dd68da35dcb381efd848bc2b17ae6510eea3304 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 10:51:08 +0000 Subject: [PATCH 4/7] CU-869bbj5u4: Rename base install check module --- ...all_can_import_all.py => check_base_install_can_import_all.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename medcat-v2/tests/other/{test_base_install_can_import_all.py => check_base_install_can_import_all.py} (100%) diff --git a/medcat-v2/tests/other/test_base_install_can_import_all.py b/medcat-v2/tests/other/check_base_install_can_import_all.py similarity index 100% rename from medcat-v2/tests/other/test_base_install_can_import_all.py rename to medcat-v2/tests/other/check_base_install_can_import_all.py From 58825ace5a05dad4a48a4ed6d981b0b122590e47 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 10:51:46 +0000 Subject: [PATCH 5/7] CU-869bbj5u4: Include __main__ in import checks --- medcat-v2/tests/other/check_base_install_can_import_all.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/medcat-v2/tests/other/check_base_install_can_import_all.py b/medcat-v2/tests/other/check_base_install_can_import_all.py index b6ff97053..0a04c61e1 100644 --- a/medcat-v2/tests/other/check_base_install_can_import_all.py +++ b/medcat-v2/tests/other/check_base_install_can_import_all.py @@ -29,8 +29,6 @@ def walk_packages(path: str, if fn == "__init__.py": yield pkg_to_here continue - if fn == "__main__.py": - continue yield f"{pkg_to_here}.{fn.removesuffix('.py')}" From 82d03f2938aaf31c121e7cafe712f392a1f67d2c Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 10:55:30 +0000 Subject: [PATCH 6/7] CU-869bbj5u4: Update workflow to add check of base installs --- .github/workflows/medcat-v2_main.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2_main.yml b/.github/workflows/medcat-v2_main.yml index 6b2a510a3..cedb25972 100644 --- a/.github/workflows/medcat-v2_main.yml +++ b/.github/workflows/medcat-v2_main.yml @@ -11,7 +11,25 @@ defaults: run: working-directory: ./medcat-v2 jobs: - build: + base-install-imports: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + # NOTE: using oldest supported python version + - name: Install uv for Python 3.10 + uses: astral-sh/setup-uv@v7 + with: + python-version: 3.10 + enable-cache: true + cache-dependency-glob: "medcat-v2/uv.lock" + - name: Install the project + run: | + uv sync # NO extras + - name: Check that all packages / modules can be imported with default / no-extras install + run: | + uv run python tests/other/check_base_install_can_import_all.py medcat + + types-lints-tests-regression: runs-on: ubuntu-latest strategy: matrix: From aba75ea0b9de5a563ad622b765fd6fc51f4b9ceb Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 2 Dec 2025 11:04:59 +0000 Subject: [PATCH 7/7] CU-869bbj5u4: Fix python version --- .github/workflows/medcat-v2_main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2_main.yml b/.github/workflows/medcat-v2_main.yml index cedb25972..ab70ef8a3 100644 --- a/.github/workflows/medcat-v2_main.yml +++ b/.github/workflows/medcat-v2_main.yml @@ -19,7 +19,7 @@ jobs: - name: Install uv for Python 3.10 uses: astral-sh/setup-uv@v7 with: - python-version: 3.10 + python-version: "3.10" enable-cache: true cache-dependency-glob: "medcat-v2/uv.lock" - name: Install the project