Skip to content
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c2d49f6
feat: multiagent session interface
JackYPCOnline Sep 25, 2025
49253bc
feat: enable multiagent session persistence
JackYPCOnline Sep 29, 2025
41d34a9
fix: add write file fallback for window permission handling
JackYPCOnline Sep 29, 2025
2e854f8
fix: remove persistence_hooks, use session_manager to subscribe multi…
JackYPCOnline Sep 30, 2025
0380567
Update src/strands/multiagent/base.py
JackYPCOnline Oct 1, 2025
93909d0
Update src/strands/multiagent/base.py
JackYPCOnline Oct 1, 2025
023cfba
Update src/strands/session/session_manager.py
JackYPCOnline Oct 2, 2025
d064038
Update src/strands/session/session_manager.py
JackYPCOnline Oct 2, 2025
b00f58d
feat: add restricted type check to serialization/deserialization func…
JackYPCOnline Oct 6, 2025
d67c848
fix: remove persistence_hooks, use session_manager to subscribe multi…
JackYPCOnline Oct 6, 2025
e6b4af2
fix: remove optional from invoke_callbacks
JackYPCOnline Oct 7, 2025
1fcfdc0
fix: fix from_dict consistency
JackYPCOnline Oct 7, 2025
3fe3978
fix: fix from_dic consistency
JackYPCOnline Oct 7, 2025
683a14f
fix: fix file session creation issue
JackYPCOnline Oct 7, 2025
7411f2c
fix: remove completed_nodes, rename execution_order to node_history i…
JackYPCOnline Oct 7, 2025
e9c2d57
fix: address comments, adding more tests and integration tests in ne…
JackYPCOnline Oct 10, 2025
8233c62
feat: add more unit tests and integration tests to validate Graph/Swa…
JackYPCOnline Oct 13, 2025
1e9851a
Merge branch 'main' into multi-agent-session
JackYPCOnline Oct 13, 2025
f1aac16
fix: fix bad rebase
JackYPCOnline Oct 13, 2025
7735ed3
fix: revert single agent session_manager validator
JackYPCOnline Oct 15, 2025
7b3aabb
fix: refine code structures, address related comments
JackYPCOnline Oct 17, 2025
58424ba
Merge branch 'main' into multi-agent-session
JackYPCOnline Oct 17, 2025
8ed2e21
feat: add BeforeNodeCallEvent to swarm & graph
JackYPCOnline Oct 17, 2025
d3adef3
fix: fix bad rebase
JackYPCOnline Oct 17, 2025
80c8169
fix: address comments, move from_dict() to AgentResult, fix docstring…
JackYPCOnline Oct 20, 2025
2ff4035
fix: fix typo and pattern
JackYPCOnline Oct 20, 2025
191f5e0
fix: rename multiagent dictory
JackYPCOnline Oct 20, 2025
a1e10ed
fix: address PR comments
JackYPCOnline Oct 20, 2025
734c59b
fix: address comment
JackYPCOnline Oct 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/strands/experimental/hooks/multiagent_hooks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Multi-agent session management for persistent execution.

This package provides session persistence capabilities for multi-agent orchestrators,
enabling resumable execution after interruptions or failures.
"""

from .multiagent_events import (
AfterMultiAgentInvocationEvent,
AfterNodeCallEvent,
BeforeMultiAgentInvocationEvent,
BeforeNodeCallEvent,
MultiAgentInitializedEvent,
)

__all__ = [
"AfterMultiAgentInvocationEvent",
"AfterNodeCallEvent",
"BeforeMultiAgentInvocationEvent",
"BeforeNodeCallEvent",
"MultiAgentInitializedEvent",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Multi-agent execution lifecycle events for hook system integration.
These events are fired by orchestrators (Graph/Swarm) at key points so
hooks can persist, monitor, or debug execution. No intermediate state model
is used—hooks read from the orchestrator directly.
"""

from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from ....hooks import BaseHookEvent

if TYPE_CHECKING:
from ....multiagent.base import MultiAgentBase


@dataclass
class MultiAgentInitializedEvent(BaseHookEvent):
"""Event triggered when multi-agent orchestrator initialized.
Attributes:
source: The multi-agent orchestrator instance
invocation_state: Configuration that user pass in
"""

source: "MultiAgentBase"
invocation_state: dict[str, Any] | None = None


@dataclass
class BeforeNodeCallEvent(BaseHookEvent):
"""Event triggered before individual node execution completes. This event corresponds to the After event.
Attributes:
source: The multi-agent orchestrator instance
node_id: ID of the node that just completed execution
invocation_state: Configuration that user pass in
"""

source: "MultiAgentBase"
node_id: str
invocation_state: dict[str, Any] | None = None


@dataclass
class AfterNodeCallEvent(BaseHookEvent):
"""Event triggered after individual node execution completes.
Attributes:
source: The multi-agent orchestrator instance
node_id: ID of the node that just completed execution
invocation_state: Configuration that user pass in
"""

source: "MultiAgentBase"
node_id: str
invocation_state: dict[str, Any] | None = None

@property
def should_reverse_callbacks(self) -> bool:
"""True to invoke callbacks in reverse order."""
return True


@dataclass
class BeforeMultiAgentInvocationEvent(BaseHookEvent):
"""Event triggered after orchestrator execution completes. This event corresponds to the After event.
Attributes:
source: The multi-agent orchestrator instance
invocation_state: Configuration that user pass in
"""

source: "MultiAgentBase"
invocation_state: dict[str, Any] | None = None


@dataclass
class AfterMultiAgentInvocationEvent(BaseHookEvent):
"""Event triggered after orchestrator execution completes.
Attributes:
source: The multi-agent orchestrator instance
invocation_state: Configuration that user pass in
"""

source: "MultiAgentBase"
invocation_state: dict[str, Any] | None = None
156 changes: 153 additions & 3 deletions src/strands/multiagent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
"""

import asyncio
import logging
import warnings
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Union
from typing import Any, Union, cast

from ..agent import AgentResult
from ..types.content import ContentBlock
from ..types.event_loop import Metrics, Usage
from ..telemetry.metrics import EventLoopMetrics
from ..types.content import ContentBlock, Message
from ..types.event_loop import Metrics, StopReason, Usage

logger = logging.getLogger(__name__)


class Status(Enum):
Expand Down Expand Up @@ -59,6 +63,93 @@ def get_agent_results(self) -> list[AgentResult]:
flattened.extend(nested_node_result.get_agent_results())
return flattened

def to_dict(self) -> dict[str, Any]:
"""Convert NodeResult to JSON-serializable dict, ignoring state field."""
if isinstance(self.result, Exception):
result_data: dict[str, Any] = {"type": "exception", "message": str(self.result)}
elif isinstance(self.result, AgentResult):
# Serialize AgentResult without state field
result_data = {
"type": "agent_result",
"stop_reason": self.result.stop_reason,
"message": self.result.message,
}
elif isinstance(self.result, MultiAgentResult):
result_data = self.result.to_dict()
else:
raise TypeError(f"Unsupported NodeResult.result type for serialization: {type(self.result).__name__}")

return {
"result": result_data,
"execution_time": self.execution_time,
"status": self.status.value,
"accumulated_usage": self.accumulated_usage,
"accumulated_metrics": self.accumulated_metrics,
"execution_count": self.execution_count,
}

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "NodeResult":
"""Rehydrate a NodeResult from persisted JSON."""
if "result" not in data:
raise TypeError("NodeResult.from_dict: missing 'result'")
raw = data["result"]

result: Union[AgentResult, "MultiAgentResult", Exception]
if isinstance(raw, dict) and raw.get("type") == "agent_result":
result = NodeResult.agent_result_from_persisted(raw)
elif isinstance(raw, dict) and raw.get("type") == "exception":
result = Exception(str(raw.get("message", "node failed")))
elif isinstance(raw, dict) and ("results" in raw):
result = MultiAgentResult.from_dict(raw)
else:
raise TypeError(f"NodeResult.from_dict: unsupported result payload: {raw!r}")

usage_data = data.get("accumulated_usage", {})
usage = Usage(
inputTokens=usage_data.get("inputTokens", 0),
outputTokens=usage_data.get("outputTokens", 0),
totalTokens=usage_data.get("totalTokens", 0),
)
# Add optional fields if they exist
if "cacheReadInputTokens" in usage_data:
usage["cacheReadInputTokens"] = usage_data["cacheReadInputTokens"]
if "cacheWriteInputTokens" in usage_data:
usage["cacheWriteInputTokens"] = usage_data["cacheWriteInputTokens"]

metrics = Metrics(latencyMs=data.get("accumulated_metrics", {}).get("latencyMs", 0))

return cls(
result=result,
execution_time=int(data.get("execution_time", 0)),
status=Status(data.get("status", "pending")),
accumulated_usage=usage,
accumulated_metrics=metrics,
execution_count=int(data.get("execution_count", 0)),
)

@classmethod
def agent_result_from_persisted(cls, data: dict[str, Any]) -> AgentResult:
"""Rehydrate a minimal AgentResult from persisted JSON.

Expected shape:
{"type": "agent_result", "message": <Message>, "stop_reason": <str|None>}
"""
if data.get("type") != "agent_result":
raise TypeError(f"agent_result_from_persisted: unexpected type {data.get('type')!r}")

message = cast(Message, data.get("message"))
stop_reason = cast(
StopReason,
data.get("stop_reason"),
)

try:
return AgentResult(message=message, stop_reason=stop_reason, metrics=EventLoopMetrics(), state={})
except Exception:
logger.debug("AgentResult constructor failed during rehydrating")
raise


@dataclass
class MultiAgentResult:
Expand All @@ -76,6 +167,46 @@ class MultiAgentResult:
execution_count: int = 0
execution_time: int = 0

def to_dict(self) -> dict[str, Any]:
"""Convert MultiAgentResult to JSON-serializable dict."""
return {
"type": "mutiagent_result",
"status": self.status.value,
"results": {k: v.to_dict() for k, v in self.results.items()},
"accumulated_usage": dict(self.accumulated_usage),
"accumulated_metrics": dict(self.accumulated_metrics),
"execution_count": self.execution_count,
"execution_time": self.execution_time,
}

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "MultiAgentResult":
"""Rehydrate a MultiAgentResult from persisted JSON."""
results = {k: NodeResult.from_dict(v) for k, v in data.get("results", {}).items()}
usage_data = data.get("accumulated_usage", {})
usage = Usage(
inputTokens=usage_data.get("inputTokens", 0),
outputTokens=usage_data.get("outputTokens", 0),
totalTokens=usage_data.get("totalTokens", 0),
)
# Add optional fields if they exist
if "cacheReadInputTokens" in usage_data:
usage["cacheReadInputTokens"] = usage_data["cacheReadInputTokens"]
if "cacheWriteInputTokens" in usage_data:
usage["cacheWriteInputTokens"] = usage_data["cacheWriteInputTokens"]

metrics = Metrics(latencyMs=data.get("accumulated_metrics", {}).get("latencyMs", 0))

multiagent_result = cls(
status=Status(data.get("status", Status.PENDING.value)),
results=results,
accumulated_usage=usage,
accumulated_metrics=metrics,
execution_count=int(data.get("execution_count", 0)),
execution_time=int(data.get("execution_time", 0)),
)
return multiagent_result


class MultiAgentBase(ABC):
"""Base class for multi-agent helpers.
Expand Down Expand Up @@ -122,3 +253,22 @@ def execute() -> MultiAgentResult:
with ThreadPoolExecutor() as executor:
future = executor.submit(execute)
return future.result()

def serialize_state(self) -> dict[str, Any]:
"""Return a JSON-serializable snapshot of the orchestrator state."""
raise NotImplementedError

def deserialize_state(self, payload: dict[str, Any]) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't see deserialize as being a mutative action. If this is actually doing

"""Restore orchestrator state from a session dict and prepare for execution.

why not call restore_from_state or session

"""Restore orchestrator state from a session dict."""
raise NotImplementedError

def serialize_node_result_for_persist(self, raw: NodeResult) -> dict[str, Any]:
"""Serialize node result for persistence.

Args:
raw: Raw node result to serialize

Returns:
JSON-serializable dict representation
"""
return raw.to_dict()
Loading
Loading