diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..29de38d --- /dev/null +++ b/mypy.ini @@ -0,0 +1,11 @@ +[mypy] +python_version = 3.12 +ignore_missing_imports = True +show_error_codes = True + +# Optional features (container execution, vector stores, store CLI) rely on +# third-party libraries and loose typing; they are validated via tests. +exclude = (?x) + ^src/py_code_mode/execution/container/| + ^src/py_code_mode/workflows/vector_stores/| + ^src/py_code_mode/cli/store\.py$ diff --git a/pyproject.toml b/pyproject.toml index 7c57786..c898e0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,7 @@ dev-dependencies = [ "pytest-xdist>=3.8.0", "testcontainers[redis]>=4.13.3", "chromadb>=0.5", + "types-pyyaml>=6.0.12.20250915", ] [tool.pytest.ini_options] diff --git a/src/py_code_mode/cli/mcp_server.py b/src/py_code_mode/cli/mcp_server.py index 02c27ec..f9e77e1 100644 --- a/src/py_code_mode/cli/mcp_server.py +++ b/src/py_code_mode/cli/mcp_server.py @@ -276,7 +276,9 @@ async def create_session(args: argparse.Namespace) -> Session: """Create session based on CLI args.""" from py_code_mode import Session from py_code_mode.execution.subprocess import SubprocessConfig, SubprocessExecutor + from py_code_mode.storage import StorageBackend + storage: StorageBackend if args.redis: from py_code_mode import RedisStorage diff --git a/src/py_code_mode/execution/in_process/executor.py b/src/py_code_mode/execution/in_process/executor.py index 892a74d..ba43519 100644 --- a/src/py_code_mode/execution/in_process/executor.py +++ b/src/py_code_mode/execution/in_process/executor.py @@ -24,6 +24,7 @@ PackageInstaller, collect_configured_deps, ) +from py_code_mode.deps.store import DepsStore, MemoryDepsStore from py_code_mode.execution.in_process.config import InProcessConfig from py_code_mode.execution.in_process.workflows_namespace import WorkflowsNamespace from py_code_mode.execution.protocol import Capability, validate_storage_not_access @@ -287,15 +288,12 @@ async def start( installer = PackageInstaller() # Use a file-backed store if deps_file is configured, otherwise in-memory if self._config.deps_file: - deps_store = FileDepsStore(self._config.deps_file.parent) + deps_store: DepsStore = FileDepsStore(self._config.deps_file.parent) # Pre-populate store with config deps for dep in initial_deps: - if not deps_store.exists(dep): - deps_store.add(dep) + deps_store.add(dep) else: # In-memory store for deps when no file configured - from py_code_mode.deps.store import MemoryDepsStore - deps_store = MemoryDepsStore() for dep in initial_deps: deps_store.add(dep) diff --git a/src/py_code_mode/execution/in_process/workflows_namespace.py b/src/py_code_mode/execution/in_process/workflows_namespace.py index b8b3749..4bbf680 100644 --- a/src/py_code_mode/execution/in_process/workflows_namespace.py +++ b/src/py_code_mode/execution/in_process/workflows_namespace.py @@ -64,7 +64,7 @@ def library(self) -> WorkflowLibrary: """ return self._library - def search(self, query: str, limit: int = 10) -> list[dict[str, Any]]: + def search(self, query: str, limit: int = 10) -> builtins.list[dict[str, Any]]: """Search for workflows matching query. Returns simplified workflow info.""" workflows = self._library.search(query, limit) return [self._simplify(w) for w in workflows] @@ -73,7 +73,7 @@ def get(self, name: str) -> Any: """Get a workflow by name.""" return self._library.get(name) - def list(self) -> list[dict[str, Any]]: + def list(self) -> builtins.list[dict[str, Any]]: """List all available workflows. Returns simplified workflow info.""" workflows = self._library.list() return [self._simplify(w) for w in workflows] diff --git a/src/py_code_mode/storage/backends.py b/src/py_code_mode/storage/backends.py index dd676a0..4377b4d 100644 --- a/src/py_code_mode/storage/backends.py +++ b/src/py_code_mode/storage/backends.py @@ -243,6 +243,8 @@ def __init__( if url is None and redis is None: raise ValueError("Either 'url' or 'redis' must be provided") + self._url: str | None + if url is not None: from redis import Redis as RedisClient diff --git a/src/py_code_mode/tools/adapters/mcp.py b/src/py_code_mode/tools/adapters/mcp.py index 4a050b4..211d086 100644 --- a/src/py_code_mode/tools/adapters/mcp.py +++ b/src/py_code_mode/tools/adapters/mcp.py @@ -3,7 +3,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable +from typing import TYPE_CHECKING, Any, Protocol, cast, runtime_checkable from py_code_mode.errors import ToolCallError, ToolNotFoundError from py_code_mode.tools.types import Tool, ToolCallable, ToolParameter @@ -11,12 +11,15 @@ logger = logging.getLogger(__name__) # MCP SDK exception types (optional dependency) +MCP_ERRORS: tuple[type[BaseException], ...] try: from mcp import JSONRPCError, McpError - - MCP_ERRORS = (McpError, JSONRPCError) except ImportError: MCP_ERRORS = () +else: + # Stubs for mcp may not model exception inheritance precisely; the runtime objects + # are still safe to use in `except MCP_ERRORS`. + MCP_ERRORS = cast(tuple[type[BaseException], ...], (McpError, JSONRPCError)) if TYPE_CHECKING: from contextlib import AsyncExitStack diff --git a/src/py_code_mode/tools/namespace.py b/src/py_code_mode/tools/namespace.py index 0192e56..a3f8b6f 100644 --- a/src/py_code_mode/tools/namespace.py +++ b/src/py_code_mode/tools/namespace.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import builtins from typing import TYPE_CHECKING, Any from py_code_mode.tools.types import Tool, ToolCallable @@ -56,11 +57,11 @@ def __getattr__(self, tool_name: str) -> ToolProxy: return ToolProxy(self._registry, tool, self._loop) - def list(self) -> list[Tool]: + def list(self) -> builtins.list[Tool]: """List all available tools.""" return self._registry.get_all_tools() - def search(self, query: str, limit: int = 5) -> list[Tool]: + def search(self, query: str, limit: int = 5) -> builtins.list[Tool]: """Search tools by query string.""" from py_code_mode.tools.registry import substring_search diff --git a/src/py_code_mode/workflows/library.py b/src/py_code_mode/workflows/library.py index 06c42e0..6fc698e 100644 --- a/src/py_code_mode/workflows/library.py +++ b/src/py_code_mode/workflows/library.py @@ -2,6 +2,7 @@ from __future__ import annotations +import builtins from dataclasses import dataclass, field from typing import TYPE_CHECKING @@ -138,7 +139,7 @@ def add(self, workflow: PythonWorkflow) -> None: # Index locally for semantic search self._index_workflow(workflow) - def list(self) -> list[PythonWorkflow]: + def list(self) -> builtins.list[PythonWorkflow]: """List all workflows.""" return list(self._workflows.values()) @@ -173,7 +174,7 @@ def search( self, query: str, limit: int = 10, - ) -> list[PythonWorkflow]: + ) -> builtins.list[PythonWorkflow]: """Search for workflows by semantic similarity. Args: diff --git a/src/py_code_mode/workflows/store.py b/src/py_code_mode/workflows/store.py index e3c5f11..2e3b20e 100644 --- a/src/py_code_mode/workflows/store.py +++ b/src/py_code_mode/workflows/store.py @@ -265,12 +265,10 @@ def list_all(self) -> list[PythonWorkflow]: return [] workflows = [] - for name, value in all_data.items(): + for raw_name, raw_value in all_data.items(): + name = raw_name.decode() if isinstance(raw_name, bytes) else raw_name + value = raw_value.decode() if isinstance(raw_value, bytes) else raw_value try: - if isinstance(value, bytes): - value = value.decode() - if isinstance(name, bytes): - name = name.decode() data = json.loads(value) workflows.append(self._deserialize_workflow(data)) except (json.JSONDecodeError, ValueError, SyntaxError, KeyError) as e: diff --git a/src/py_code_mode/workflows/workflow.py b/src/py_code_mode/workflows/workflow.py index d8c9e20..52f04fa 100644 --- a/src/py_code_mode/workflows/workflow.py +++ b/src/py_code_mode/workflows/workflow.py @@ -1,17 +1,21 @@ -"""Workflows system - Python workflows with IDE support.""" +"""Workflows system - Python workflows. + +Security model: +- Workflow source is validated and indexed using AST only (no execution). +- Workflow invocation executes the workflow source in a fresh namespace each time + (stateless; module globals do not persist across invocations). +""" from __future__ import annotations import ast import builtins -import importlib.util import inspect import logging -from collections.abc import Callable -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import UTC, datetime from pathlib import Path -from typing import Any, get_type_hints +from typing import Any logger = logging.getLogger(__name__) @@ -48,54 +52,123 @@ class WorkflowParameter: default: Any = None -# Map Python types to our type strings -_PYTHON_TYPE_MAP: dict[type, str] = { - str: "string", - int: "integer", - float: "number", - bool: "boolean", - list: "array", - dict: "object", -} - # Special parameters that are injected, not user-provided _INJECTED_PARAMS = {"tools", "workflows", "artifacts", "deps"} -def _extract_parameters(func: Callable[..., Any], name: str) -> list[WorkflowParameter]: - """Extract WorkflowParameter list from a function's signature.""" - sig = inspect.signature(func) +def _annotation_to_type_str(annotation: ast.expr | None) -> str: + """Best-effort mapping from annotation AST to our simplified type strings.""" + + def _base_name(expr: ast.expr) -> str | None: + if isinstance(expr, ast.Name): + return expr.id + if isinstance(expr, ast.Attribute): + # e.g. typing.List, module.Type + return expr.attr + return None + + if annotation is None: + return "string" + + # Handle list[int], dict[str, int], etc. + if isinstance(annotation, ast.Subscript): + base = _base_name(annotation.value) + if base in {"list", "List", "Sequence", "Iterable"}: + return "array" + if base in {"dict", "Dict", "Mapping"}: + return "object" + + base = _base_name(annotation) + if base in {"str", "String"}: + return "string" + if base in {"int", "Integer"}: + return "integer" + if base in {"float", "number", "Number"}: + return "number" + if base in {"bool", "Boolean"}: + return "boolean" + if base in {"list", "List"}: + return "array" + if base in {"dict", "Dict"}: + return "object" + + return "string" + + +def _default_expr_to_value(expr: ast.expr) -> Any: + """Return a safe representation for a default expression. + + - If it's a Python literal (incl. containers), returns the concrete value. + - Otherwise returns a string representation of the expression. + """ try: - type_hints = get_type_hints(func) - except (NameError, AttributeError, TypeError) as e: - # NameError: unresolved forward references - # AttributeError: issues accessing type attributes - # TypeError: invalid type annotations - logger.debug(f"Type hint extraction failed for {name}: {type(e).__name__}: {e}") - type_hints = {} - - parameters = [] - for param_name, param in sig.parameters.items(): - if param_name in _INJECTED_PARAMS: - continue - - python_type = type_hints.get(param_name, str) - type_str = _PYTHON_TYPE_MAP.get(python_type, "string") - has_default = param.default is not inspect.Parameter.empty - default = param.default if has_default else None + return ast.literal_eval(expr) + except Exception: + try: + return ast.unparse(expr) + except Exception: + return None + + +def _extract_parameters_from_ast(run_func: ast.AsyncFunctionDef) -> list[WorkflowParameter]: + """Extract WorkflowParameter list from an async run() AST node. + This avoids executing workflow code at load/index time. + """ + args = run_func.args + + # Positional args are posonlyargs + args; defaults apply to the last N of these. + pos_args = list(args.posonlyargs) + list(args.args) + defaults = list(args.defaults) + pos_defaults: dict[str, ast.expr] = {} + if defaults: + for arg_node, default_node in zip(pos_args[-len(defaults) :], defaults, strict=True): + pos_defaults[arg_node.arg] = default_node + + kw_defaults: dict[str, ast.expr] = {} + for kw_arg, kw_default in zip(args.kwonlyargs, args.kw_defaults, strict=True): + if kw_default is not None: + kw_defaults[kw_arg.arg] = kw_default + + parameters: list[WorkflowParameter] = [] + + def _add_param(arg_node: ast.arg, default_node: ast.expr | None) -> None: + if arg_node.arg in _INJECTED_PARAMS: + return + + if default_node is not None: + default_val = _default_expr_to_value(default_node) + has_default = True + else: + default_val = None + has_default = False parameters.append( WorkflowParameter( - name=param_name, - type=type_str, + name=arg_node.arg, + type=_annotation_to_type_str(arg_node.annotation), description="", required=not has_default, - default=default, + default=default_val, ) ) + + for a in pos_args: + _add_param(a, pos_defaults.get(a.arg)) + + for a in args.kwonlyargs: + _add_param(a, kw_defaults.get(a.arg)) + return parameters +def _find_run_async_def(tree: ast.Module) -> ast.AsyncFunctionDef: + """Find the top-level async def run() function in a module AST.""" + for node in tree.body: + if isinstance(node, ast.AsyncFunctionDef) and node.name == "run": + return node + raise ValueError("Workflow must define an 'async def run()' function") + + @dataclass class PythonWorkflow: """A workflow defined as a Python module with run() entrypoint. @@ -108,7 +181,6 @@ class PythonWorkflow: description: str parameters: list[WorkflowParameter] source: str - _func: Callable[..., Any] = field(repr=False) metadata: WorkflowMetadata | None = None @classmethod @@ -149,27 +221,12 @@ def from_source( except SyntaxError as e: raise SyntaxError(f"Syntax error in workflow code: {e}") - has_async_run = False - has_sync_run = False - for node in ast.walk(tree): - if isinstance(node, ast.AsyncFunctionDef) and node.name == "run": - has_async_run = True - break - if isinstance(node, ast.FunctionDef) and node.name == "run": - has_sync_run = True - - if has_sync_run and not has_async_run: + has_sync_run = any( + isinstance(node, ast.FunctionDef) and node.name == "run" for node in tree.body + ) + run_node = _find_run_async_def(tree) + if has_sync_run: raise ValueError("Workflow must define 'async def run()', not 'def run()'") - if not has_async_run: - raise ValueError("Workflow must define an 'async def run()' function") - - # Compile and execute to get the function - namespace: dict[str, Any] = {} - _run_code(compile(tree, f"", "exec"), namespace) - - func = namespace.get("run") - if not callable(func): - raise ValueError("run must be a callable function") # Extract description from source if not provided if not description: @@ -180,17 +237,18 @@ def from_source( if isinstance(doc, str): description = doc.strip().split("\n")[0] # Try function docstring - if not description and func.__doc__: - description = func.__doc__.strip().split("\n")[0] + if not description: + func_doc = ast.get_docstring(run_node) + if func_doc: + description = func_doc.strip().split("\n")[0] - parameters = _extract_parameters(func, name) + parameters = _extract_parameters_from_ast(run_node) return cls( name=name, description=description, parameters=parameters, source=source, - _func=func, metadata=metadata or WorkflowMetadata.now(), ) @@ -210,42 +268,24 @@ def from_file(cls, path: Path) -> PythonWorkflow: except SyntaxError as e: raise SyntaxError(f"Syntax error in workflow {path}: {e}") - has_async_run = False - has_sync_run = False - for node in ast.walk(tree): - if isinstance(node, ast.AsyncFunctionDef) and node.name == "run": - has_async_run = True - break - if isinstance(node, ast.FunctionDef) and node.name == "run": - has_sync_run = True - - if has_sync_run and not has_async_run: + has_sync_run = any( + isinstance(node, ast.FunctionDef) and node.name == "run" for node in tree.body + ) + run_node = _find_run_async_def(tree) + if has_sync_run: raise ValueError(f"Workflow {path} must define 'async def run()', not 'def run()'") - if not has_async_run: - raise ValueError(f"Workflow {path} must define an 'async def run()' function") - - # Load module dynamically - spec = importlib.util.spec_from_file_location(path.stem, path) - if spec is None or spec.loader is None: - raise ValueError(f"Could not load module from {path}") - - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - func = module.run # Extract description from module or function docstring - description = module.__doc__ or func.__doc__ or "" - description = description.strip().split("\n")[0] # First line + description = ast.get_docstring(tree) or ast.get_docstring(run_node) or "" + description = description.strip().split("\n")[0] - parameters = _extract_parameters(func, path.stem) + parameters = _extract_parameters_from_ast(run_node) return cls( name=path.stem, description=description, parameters=parameters, source=source, - _func=func, metadata=WorkflowMetadata( created_at=datetime.now(UTC), created_by="human", @@ -254,11 +294,21 @@ def from_file(cls, path: Path) -> PythonWorkflow: ) async def invoke(self, **kwargs: Any) -> Any: - """Invoke the workflow with given parameters. + """Invoke the workflow in a fresh namespace (stateless). - Awaits the async run() function. + Note: This executes the workflow module source on each invocation, so + module-level globals do not persist across calls. """ - return await self._func(**kwargs) + namespace: dict[str, Any] = {} + tree = ast.parse(self.source) + _run_code(compile(tree, f"", "exec"), namespace) + run_func = namespace.get("run") + if not callable(run_func): + raise ValueError(f"Workflow {self.name} has no run() function") + result = run_func(**kwargs) + if inspect.iscoroutine(result): + return await result + return result @property def tags(self) -> frozenset[str]: diff --git a/tests/test_skill_store.py b/tests/test_skill_store.py index b84d6d9..798fb6a 100644 --- a/tests/test_skill_store.py +++ b/tests/test_skill_store.py @@ -367,7 +367,6 @@ def _make_workflow_with_invalid_name(name: str) -> PythonWorkflow: description=valid_workflow.description, parameters=valid_workflow.parameters, source=valid_workflow.source, - _func=valid_workflow._func, metadata=valid_workflow.metadata, ) diff --git a/uv.lock b/uv.lock index 74dcef8..490decc 100644 --- a/uv.lock +++ b/uv.lock @@ -2779,6 +2779,7 @@ dev = [ { name = "redis" }, { name = "ruff" }, { name = "testcontainers", extra = ["redis"] }, + { name = "types-pyyaml" }, { name = "uvicorn" }, ] @@ -2825,6 +2826,7 @@ dev = [ { name = "redis", specifier = ">=7.1.0" }, { name = "ruff", specifier = ">=0.8" }, { name = "testcontainers", extras = ["redis"], specifier = ">=4.13.3" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, { name = "uvicorn", specifier = ">=0.38.0" }, ] @@ -4134,6 +4136,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" }, ] +[[package]] +name = "types-pyyaml" +version = "6.0.12.20250915" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"