feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持

2026-03-05 18:25:51 +08:00
parent c07d339a5f
commit db158de39c
26 changed files with 1215 additions and 2914 deletions
@@ -5,6 +5,10 @@ from uuid import UUID
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
 from core.agent.application.session_state_persistence import SessionStatePersistence
 from core.agent.domain.message_metadata import (
    MessageMetadataAssistantOutput,
    MessageMetadataToolResult,
 )
 from core.agent.infrastructure.persistence.message_repository import MessageRepository
 from core.agent.infrastructure.persistence.session_repository import SessionRepository
 from core.db import AsyncSessionLocal
@@ -46,14 +50,16 @@ class ResumeService:
                seq=next_seq,
                role=AgentChatMessageRole.TOOL,
                content='{"status":"ok"}',
-                metadata={"type": "tool_result", "tool_call_id": tool_call_id},
+                metadata=MessageMetadataToolResult(
                    tool_call_id=tool_call_id,
                ).model_dump(),
            )
            await message_repository.append_message(
                session_id=session_uuid,
                seq=next_seq + 1,
                role=AgentChatMessageRole.ASSISTANT,
                content="Tool result received",
-                metadata={"type": "assistant_output"},
+                metadata=MessageMetadataAssistantOutput().model_dump(),
            )
            snapshot = self._state_persistence.build_completed_snapshot()
@@ -3,10 +3,16 @@ from __future__ import annotations
 from decimal import Decimal
 from uuid import UUID, uuid4
 from pydantic import ValidationError
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
 from core.agent.application.session_state_persistence import SessionStatePersistence
 from core.agent.domain.message_metadata import (
    MessageMetadataToolCall,
    MessageMetadataUserInput,
 )
 from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.crewai.factory import create_runtime
 from core.agent.infrastructure.persistence.message_repository import MessageRepository
 from core.agent.infrastructure.persistence.session_repository import SessionRepository
@@ -58,10 +64,16 @@ class RunService:
            if chat_session is None:
                raise ValueError("session not found")
-            model_code, provider_name = await self._load_agent_model_selection(
+            (
-                db_session
+                model_code,
                provider_name,
                llm_config,
            ) = await self._load_agent_model_selection(db_session)
            runtime = create_runtime(
                model_code=model_code,
                provider_name=provider_name,
                llm_config=llm_config,
            )
            runtime = create_runtime(model_code=model_code, provider_name=provider_name)
            runtime_result = runtime.execute(user_input=user_input)
            assistant_text = str(runtime_result.get("assistant_text", ""))
            prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0))
@@ -79,7 +91,7 @@ class RunService:
                role=AgentChatMessageRole.USER,
                content=user_input,
                model_code=model_code,
-                metadata={"type": "user_input"},
+                metadata=MessageMetadataUserInput().model_dump(),
            )
            await message_repository.append_message(
                session_id=session_uuid,
@@ -87,10 +99,9 @@ class RunService:
                role=AgentChatMessageRole.ASSISTANT,
                content=assistant_text or "Tool call pending approval",
                model_code=model_code,
-                metadata={
+                metadata=MessageMetadataToolCall(
-                    "type": "tool_call",
+                    tool_call_id=pending_tool_call_id,
-                    "tool_call_id": pending_tool_call_id,
+                ).model_dump(),
                },
                input_tokens=prompt_tokens,
                output_tokens=completion_tokens,
                cost=cost,
@@ -119,9 +130,9 @@ class RunService:
    async def _load_agent_model_selection(
        self, session: AsyncSession
-    ) -> tuple[str, str]:
+    ) -> tuple[str, str, SystemAgentLLMConfig]:
        stmt = (
-            select(Llm.model_code, LlmFactory.name)
+            select(Llm.model_code, LlmFactory.name, SystemAgents.config)
            .join(SystemAgents, SystemAgents.llm_id == Llm.id)
            .join(LlmFactory, LlmFactory.id == Llm.factory_id)
            .where(SystemAgents.status == "active")
@@ -131,4 +142,11 @@ class RunService:
        record = (await session.execute(stmt)).one_or_none()
        if record is None:
            raise ValueError("active system agent model is required")
-        return str(record[0]), str(record[1])
+
        raw_config = record[2] if isinstance(record[2], dict) else {}
        try:
            llm_config = SystemAgentLLMConfig.model_validate(raw_config)
        except ValidationError as exc:
            raise ValueError("invalid system agent config") from exc
        return str(record[0]), str(record[1]), llm_config
@@ -0,0 +1,39 @@
 from __future__ import annotations
 from typing import Literal
 from pydantic import BaseModel
 class MessageMetadataUserInput(BaseModel):
    type: Literal["user_input"] = "user_input"
 class MessageMetadataToolCall(BaseModel):
    type: Literal["tool_call"] = "tool_call"
    tool_call_id: str
 class MessageMetadataToolResult(BaseModel):
    type: Literal["tool_result"] = "tool_result"
    tool_call_id: str
    run_id: str | None = None
    turn_id: str | None = None
    tool_name: str | None = None
    storage_bucket: str | None = None
    storage_path: str | None = None
    payload_sha256: str | None = None
    payload_bytes: int | None = None
    payload_format: str | None = None
 class MessageMetadataAssistantOutput(BaseModel):
    type: Literal["assistant_output"] = "assistant_output"
 MessageMetadata = (
    MessageMetadataUserInput
    | MessageMetadataToolCall
    | MessageMetadataToolResult
    | MessageMetadataAssistantOutput
 )
@@ -0,0 +1,8 @@
 from __future__ import annotations
 from pydantic import BaseModel, Field
 class SystemAgentLLMConfig(BaseModel):
    temperature: float | None = Field(default=None, ge=0.0, le=2.0)
    max_tokens: int | None = Field(default=None, ge=1)
@@ -1,5 +1,7 @@
 from __future__ import annotations
 from core.agent.domain.message_metadata import MessageMetadataToolResult
 def reconstruct_tool_call_result_event(
    *,
@@ -26,15 +28,14 @@ def build_tool_result_metadata(
    payload_bytes: int,
    payload_format: str,
 ) -> dict[str, object]:
-    return {
+    return MessageMetadataToolResult(
-        "type": "tool_result",
+        run_id=run_id,
-        "run_id": run_id,
+        turn_id=turn_id,
-        "turn_id": turn_id,
+        tool_call_id=tool_call_id,
-        "tool_call_id": tool_call_id,
+        tool_name=tool_name,
-        "tool_name": tool_name,
+        storage_bucket=storage_bucket,
-        "storage_bucket": storage_bucket,
+        storage_path=storage_path,
-        "storage_path": storage_path,
+        payload_sha256=payload_sha256,
-        "payload_sha256": payload_sha256,
+        payload_bytes=payload_bytes,
-        "payload_bytes": payload_bytes,
+        payload_format=payload_format,
-        "payload_format": payload_format,
+    ).model_dump()
    }
@@ -1,15 +1,20 @@
 from __future__ import annotations
 from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.config.resolver import AgentConfigResolver
 from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
 def create_runtime(
-    *, model_code: str | None, provider_name: str | None
+    *,
    model_code: str | None,
    provider_name: str | None,
    llm_config: SystemAgentLLMConfig | None = None,
 ) -> CrewAIRuntime:
    resolver = AgentConfigResolver()
    return CrewAIRuntime(
        resolver=resolver,
        model_code=model_code,
        provider_name=provider_name,
        llm_config=llm_config,
    )
@@ -2,6 +2,7 @@ from __future__ import annotations
 from typing import Any
 from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.agui.bridge import to_agui_events
 from core.agent.infrastructure.config.resolver import (
    AgentConfigResolver,
@@ -47,11 +48,13 @@ class CrewAIRuntime:
        resolver: AgentConfigResolver,
        model_code: str | None,
        provider_name: str | None,
        llm_config: SystemAgentLLMConfig | None = None,
    ) -> None:
        self._config: ResolvedAgentConfig = resolver.resolve(
            model_code=model_code,
            provider_name=provider_name,
        )
        self._llm_config = llm_config or SystemAgentLLMConfig()
    def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
        return to_agui_events(internal_events)
@@ -65,6 +68,8 @@ class CrewAIRuntime:
            model=litellm_model,
            api_key=self._config.provider_api_key,
            messages=[{"role": "user", "content": user_input}],
            temperature=self._llm_config.temperature,
            max_tokens=self._llm_config.max_tokens,
        )
        if not isinstance(response, dict):
            raise ValueError("llm response must be a dict")
@@ -5,13 +5,26 @@ from typing import Any
 from litellm import completion
-def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any:
+def run_completion(
-    response = completion(
+    *,
-        model=model,
+    model: str,
-        api_key=api_key,
+    api_key: str,
-        messages=messages,
+    messages: list[dict[str, Any]],
-        stream=False,
+    temperature: float | None = None,
-    )
+    max_tokens: int | None = None,
 ) -> Any:
    kwargs: dict[str, Any] = {
        "model": model,
        "api_key": api_key,
        "messages": messages,
        "stream": False,
    }
    if temperature is not None:
        kwargs["temperature"] = temperature
    if max_tokens is not None:
        kwargs["max_tokens"] = max_tokens
    response = completion(**kwargs)
    model_dump = getattr(response, "model_dump", None)
    if callable(model_dump):
        return model_dump()
@@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.db.session import AsyncSessionLocal
 from core.logging import get_logger
 from models.llm import Llm
@@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel):
    agent_type: str
    llm_model_code: str
    status: str
-    config: dict[str, Any]
+    config: SystemAgentLLMConfig | None = None
 class SystemAgentsYaml(BaseModel):
@@ -184,7 +185,9 @@ async def initialize_system_agents() -> None:
                    agent_type=agent["agent_type"],
                    llm_id=llm.id,
                    status=agent["status"],
-                    config=agent["config"],
+                    config=SystemAgentLLMConfig.model_validate(
                        agent.get("config") or {}
                    ).model_dump(),
                )
    logger.info("Initialized system agents")
@@ -4,15 +4,18 @@ agents:
    status: active
    config:
      temperature: 0.7
      max_tokens: null
  - agent_type: TASK_EXECUTION
    llm_model_code: deepseek-v3.2
    status: active
    config:
      temperature: 0.7
      max_tokens: null
  - agent_type: RESULT_REPORTING
    llm_model_code: deepseek-v3.2
    status: active
    config:
      temperature: 0.7
      max_tokens: null
@@ -1,22 +1,26 @@
 from __future__ import annotations
 from types import SimpleNamespace
 from typing import cast
-from core.agent.infrastructure.config.resolver import AgentConfigResolver
+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
 from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
 def test_runtime_emits_text_tool_reasoning_events() -> None:
-    runtime = CrewAIRuntime(
+    settings = cast(
-        resolver=AgentConfigResolver(
+        SettingsLike,
-            settings=SimpleNamespace(
+        SimpleNamespace(
-                agent_runtime=SimpleNamespace(
+            agent_runtime=SimpleNamespace(
-                    default_model_code="",
+                default_model_code="",
-                    streaming_enabled=True,
+                streaming_enabled=True,
-                ),
+            ),
-                llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
+            llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
            )
        ),
    )
    runtime = CrewAIRuntime(
        resolver=AgentConfigResolver(settings=settings),
        model_code="gpt-4o-mini",
        provider_name="dashscope",
    )
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
    captured: dict[str, object] = {}
    def _fake_completion(
-        *, model: str, api_key: str, messages: list[dict[str, object]]
+        *,
        model: str,
        api_key: str,
        messages: list[dict[str, object]],
        temperature: float | None = None,
        max_tokens: int | None = None,
    ):
        captured["model"] = model
        captured["api_key"] = api_key
        captured["messages"] = messages
        captured["temperature"] = temperature
        captured["max_tokens"] = max_tokens
        return {
            "choices": [
                {
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
            cost=0.001,
        ),
    )
    settings = cast(
        SettingsLike,
        SimpleNamespace(
            agent_runtime=SimpleNamespace(
                default_model_code="",
                streaming_enabled=True,
            ),
            llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
        ),
    )
    runtime = CrewAIRuntime(
-        resolver=AgentConfigResolver(
+        resolver=AgentConfigResolver(settings=settings),
            settings=SimpleNamespace(
                agent_runtime=SimpleNamespace(
                    default_model_code="",
                    streaming_enabled=True,
                ),
                llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
            )
        ),
        model_code="qwen3.5-flash",
        provider_name="dashscope",
        llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
    )
    result = runtime.execute(user_input="hi")
    assert captured["model"] == "dashscope/qwen3.5-flash"
    assert captured["api_key"] == "env-api-key"
    assert captured["temperature"] == 0.3
    assert captured["max_tokens"] == 256
    assert result["assistant_text"] == "hello"
@@ -0,0 +1,14 @@
 from __future__ import annotations
 from core.config.initial.init_data import load_system_agents
 def test_load_system_agents_supports_nullable_max_tokens() -> None:
    loaded = load_system_agents()
    agents = loaded["agents"]
    assert len(agents) > 0
    for agent in agents:
        assert "config" in agent
        assert "max_tokens" in agent["config"]
        assert agent["config"]["max_tokens"] is None
@@ -0,0 +1,51 @@
 from __future__ import annotations
 from core.agent.infrastructure.litellm.client import run_completion
 def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
    captured: dict[str, object] = {}
    def _fake_completion(**kwargs):  # type: ignore[no-untyped-def]
        captured.update(kwargs)
        return {"ok": True}
    monkeypatch.setattr(
        "core.agent.infrastructure.litellm.client.completion",
        _fake_completion,
    )
    run_completion(
        model="dashscope/qwen3.5-flash",
        api_key="key",
        messages=[{"role": "user", "content": "hi"}],
        temperature=0.6,
        max_tokens=120,
    )
    assert captured["temperature"] == 0.6
    assert captured["max_tokens"] == 120
 def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
    captured: dict[str, object] = {}
    def _fake_completion(**kwargs):  # type: ignore[no-untyped-def]
        captured.update(kwargs)
        return {"ok": True}
    monkeypatch.setattr(
        "core.agent.infrastructure.litellm.client.completion",
        _fake_completion,
    )
    run_completion(
        model="dashscope/qwen3.5-flash",
        api_key="key",
        messages=[{"role": "user", "content": "hi"}],
        temperature=None,
        max_tokens=None,
    )
    assert "temperature" not in captured
    assert "max_tokens" not in captured
@@ -4,6 +4,23 @@ import pytest
 from core.agent.application.resume_service import ResumeService
 from core.agent.application.run_service import RunService
 from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 class _FakeResult:
    def __init__(self, record: tuple[object, object, object] | None) -> None:
        self._record = record
    def one_or_none(self) -> tuple[object, object, object] | None:
        return self._record
 class _FakeSession:
    def __init__(self, record: tuple[object, object, object] | None) -> None:
        self._record = record
    async def execute(self, _stmt: object) -> _FakeResult:
        return _FakeResult(self._record)
@pytest.mark.asyncio
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:
    with pytest.raises(ValueError):
        await resume_service.resume(session_id="session-1", tool_call_id="call-1")
@pytest.mark.asyncio
 async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
    run_service = RunService()
    fake_session = _FakeSession(
        (
            "qwen3.5-flash",
            "dashscope",
            {"temperature": 0.5, "max_tokens": 512},
        )
    )
    (
        model_code,
        provider_name,
        llm_config,
    ) = await run_service._load_agent_model_selection(
        fake_session  # type: ignore[arg-type]
    )
    assert model_code == "qwen3.5-flash"
    assert provider_name == "dashscope"
    assert isinstance(llm_config, SystemAgentLLMConfig)
    assert llm_config.temperature == 0.5
    assert llm_config.max_tokens == 512
@pytest.mark.asyncio
 async def test_load_agent_model_selection_rejects_invalid_config() -> None:
    run_service = RunService()
    fake_session = _FakeSession(
        (
            "qwen3.5-flash",
            "dashscope",
            {"temperature": 3.0},
        )
    )
    with pytest.raises(ValueError, match="invalid system agent config"):
        await run_service._load_agent_model_selection(fake_session)  # type: ignore[arg-type]
@pytest.mark.asyncio
 async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
    run_service = RunService()
    fake_session = _FakeSession(
        (
            "qwen3.5-flash",
            "dashscope",
            "not-a-dict",
        )
    )
    _, _, llm_config = await run_service._load_agent_model_selection(
        fake_session  # type: ignore[arg-type]
    )
    assert llm_config.temperature is None
    assert llm_config.max_tokens is None
@pytest.mark.asyncio
 async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
    run_service = RunService()
    fake_session = _FakeSession(None)
    with pytest.raises(ValueError, match="active system agent model is required"):
        await run_service._load_agent_model_selection(fake_session)  # type: ignore[arg-type]
@@ -1,116 +0,0 @@
 # 前后端 API 对比分析
 **Date:** 2026-03-04
 **Status:** Open
 **Type:** 架构分析
 ---
 ## 一、后端已有、前端缺失的 API
 ### 1. Friendships API (`/api/v1/friends`)
 | 方法 | 路径 | 功能 | 前端状态 |
 |------|------|------|----------|
 | POST | `/requests` | 发送好友请求 | **缺失** |
 | GET | `/requests/inbox` | 获取收件箱 | **缺失** |
 | GET | `/requests/outgoing` | 获取发出的请求 | **缺失** |
 | POST | `/requests/{id}/accept` | 接受好友请求 | **缺失** |
 | POST | `/requests/{id}/decline` | 拒绝好友请求 | **缺失** |
 | DELETE | `/requests/{id}` | 取消好友请求 | **缺失** |
 | GET | `` | 获取好友列表 | **缺失** |
 | DELETE | `/{id}` | 删除好友 | **缺失** |
 ### 2. Inbox Messages API (`/api/v1/inbox/messages`)
 | 方法 | 路径 | 功能 | 前端状态 |
 |------|------|------|----------|
 | GET | `` | 获取消息列表 | **缺失** |
 | POST | `/{id}/accept` | 接受邀请 | **缺失** |
 | POST | `/{id}/dismiss` | 忽略消息 | **缺失** |
 ### 3. Chat/AgUi 流式 API
 | 功能 | 前端状态 |
 |------|----------|
 | 发送消息 SSE 流式 | **仅有 Mock** |
 | 加载历史记录 | **仅有 Mock** |
 > 前端 `AgUiService` 只有本地 mock (`throw UnimplementedError`)，未实现真实 API 调用。
 ### 4. Infra API
 | 方法 | 路径 | 功能 | 前端状态 |
 |------|------|------|----------|
 | GET | `/infra/health` | 基础设施健康检查 | **未使用** |
 ---
 ## 二、前端已有、后端已实现的 API
 ### Auth API (`/api/v1/auth`)
 | 方法 | 路径 | 后端 | 前端 |
 |------|------|------|------|
 | POST | `/verifications` | ✅ | ✅ |
 | POST | `/verifications/verify` | ✅ | ✅ |
 | POST | `/verifications/resend` | ✅ | ✅ |
 | POST | `/sessions` | ✅ | ✅ |
 | POST | `/sessions/refresh` | ✅ | ✅ |
 | DELETE | `/sessions` | ✅ | ✅ |
 | POST | `/password-reset` | ✅ | ✅ |
 | POST | `/password-reset/confirm` | ✅ | ✅ |
 | GET | `/users` | ✅ | **未使用** |
 ### Users API (`/api/v1/users`)
 | 方法 | 路径 | 后端 | 前端 |
 |------|------|------|------|
 | GET | `/me` | ✅ | ✅ |
 | PATCH | `/me` | ✅ | ✅ |
 | POST | `/search` | ✅ | ✅ |
 ### Schedule Items API (`/api/v1/schedule-items`)
 | 方法 | 路径 | 后端 | 前端 |
 |------|------|------|------|
 | POST | `` | ✅ | **仅有 Mock** |
 | GET | `` (range query) | ✅ | **仅有 Mock** |
 | GET | `/{id}` | ✅ | **仅有 Mock** |
 | PATCH | `/{id}` | ✅ | **仅有 Mock** |
 | DELETE | `/{id}` | ✅ | **仅有 Mock** |
 | POST | `/{id}/share` | ✅ | **缺失** |
 ---
 ## 三、待实现功能清单
 | 优先级 | 功能 | 说明 |
 |--------|------|------|
 | **P0** | FriendsApi | 前端无 Friendships API 客户端 |
 | **P0** | InboxMessagesApi | 前端无 Inbox Messages API 客户端 |
 | **P0** | Chat/AgUi 后端连接 | 前端 AgUiService 未实现真实 API |
 | **P1** | CalendarService 真实 API | MockCalendarService → 真实 API 调用 |
 | **P1** | Schedule Share 接口 | 前端未调用 `POST /{id}/share` |
 | **P2** | Infra Health 集成 | 可用于前端健康检查 |
 ---
 ## 四、相关文件位置
 ### 前端 API 客户端
 - `apps/lib/features/auth/data/auth_api.dart` - Auth API
 - `apps/lib/features/users/data/users_api.dart` - Users API
 - `apps/lib/features/calendar/data/services/mock_calendar_service.dart` - Calendar Mock
 - `apps/lib/features/chat/data/services/ag_ui_service.dart` - Chat/AgUi Mock
 - `apps/lib/features/chat/data/services/mock_history_service.dart` - History Mock
 ### 后端 Router
 - `backend/src/v1/auth/router.py` - Auth 路由
 - `backend/src/v1/users/router.py` - Users 路由
 - `backend/src/v1/friendships/router.py` - Friendships 路由
 - `backend/src/v1/inbox_messages/router.py` - Inbox Messages 路由
 - `backend/src/v1/schedule_items/router.py` - Schedule Items 路由
 - `backend/src/v1/infra/router.py` - Infra 路由
@@ -1,145 +0,0 @@
 # 前后端测试分析报告
 **Date:** 2026-03-04
 **Status:** Completed
 ---
 ## 测试统计
 ### 后端测试
 | 类型 | 数量 | 状态 |
 |------|------|------|
 | Unit Tests | ~100+ | 可运行 |
 | Integration Tests | ~70+ | 可运行 |
 | E2E Tests | 5 | **无法运行** (缺少 playwright 依赖) |
 ### 前端测试
 | 类型 | 数量 | 状态 |
 |------|------|------|
 | Flutter Tests | 140 | ✅ 全部通过 |
 ---
 ## 问题发现
 ### 1. 后端 E2E 测试无法运行 (HIGH)
 **问题**: 5 个 E2E 测试文件需要 `playwright` 模块，但依赖未安装。
 **影响文件**:
 - `tests/e2e/test_auth_flow.py`
 - `tests/e2e/test_infra_health_e2e.py`
 - `tests/e2e/test_logging_e2e.py`
 - `tests/e2e/test_mobile_health_e2e.py`
 - `tests/e2e/test_profile_flow.py`
 **错误**:
 ```
 ModuleNotFoundError: No module named 'playwright'
 ```
 **建议**: 
 - 安装 playwright: `uv add playwright && uv run playwright install`
 - 或者移除这些无法运行的 E2E 测试文件
 ---
 ### 2. 测试文件命名冲突导致收集警告 (LOW)
 **问题**: 存在多个同名 `test_schemas.py` 文件在不同目录，导致 pytest 收集时显示警告。
 **影响文件**:
 - `tests/unit/v1/schedule_items/test_schemas.py`
 - `tests/unit/v1/profile/test_schemas.py`
 - `tests/unit/v1/inbox_messages/test_schemas.py`
 - `tests/unit/v1/friendships/test_schemas.py`
 **状态**: 测试实际可以正常运行，只是有警告提示。
 **建议**: 可保持现状（这是合理的代码组织方式），或重命名为 `test_*.py` 以消除警告。
 ---
 ### 3. 遗留测试验证旧字段 (INFO)
 **文件**: `tests/unit/v1/profile/test_schemas.py`
 **测试**: `test_profile_update_rejects_display_name_field`
 **说明**: 此测试验证旧的 `display_name` 字段被正确拒绝。字段已在之前的重构中删除。
 **状态**: **有效** - 这是一个回归测试，确保旧字段不被使用。
 ---
 ## 未发现的问题
 ### 冗余测试
 经过检查，未发现明显冗余的测试：
 - 每个模块的测试覆盖不同的功能
 - Unit tests、Integration tests、E2E tests 有清晰的职责划分
 ### 死代码
 未发现测试文件中有未使用的:
 - imports
 - mock 类
 - helper 函数
 ### 缺失测试
 未发现对应已实现功能但缺少测试的情况。
 ---
 ## 测试覆盖模块
 ### 后端
 | 模块 | Unit | Integration | E2E |
 |------|------|-------------|-----|
 | Auth | ✅ | ✅ | ❌ |
 | Users | - | ✅ | - |
 | Profile | ✅ | - | ❌ |
 | Friendships | ✅ | ✅ | - |
 | Inbox Messages | ✅ | ✅ | - |
 | Schedule Items | ✅ | ✅ | - |
 | Logging | ✅ | ✅ | ✅ |
 | Settings | ✅ | - | - |
 ### 前端
 | 模块 | 测试数 |
 |------|--------|
 | Auth | ~20 |
 | Chat | ~70 |
 | Home | ~15 |
 | Calendar | ~5 |
 | Core (API, Storage) | ~30 |
 ---
 ## 建议
 1. **立即**: 解决 E2E 测试依赖问题或移除无法运行的测试文件
 2. **可选**: 清理 test_schemas.py 重名警告（低优先级）
 3. **保持**: 现有的测试结构良好，无需重大重构
 ---
 ## 附: 测试代码质量问题
 ### 测试类未完全实现 Protocol (LSP 警告)
 **文件**: `tests/unit/v1/auth/test_auth_service.py`
 **问题**: `FakeGateway` 和 `LogoutAssertingGateway` 类没有实现 `AuthServiceGateway` Protocol 的全部方法：
 - `request_password_reset`
 - `confirm_password_reset`
 **影响**: LSP 类型检查器报告错误，但运行时不受影响（因为这些方法在测试中不会被调用）。
 **建议**: 可选择补充缺失的方法实现，或使用 `@pytest.mark.skip` 标记不需要的协议方法。
 ---
 *报告生成时间: 2026-03-04*
@@ -1,201 +0,0 @@
 # Agent 后端硬切重构设计
 ## 目标
 - 一次性移除现有 Agent 运行时代码、测试和旧文档契约，避免新旧方案并存。
 - 仅从后端重新设计 Agent 体系，不依赖前端实现细节。
 - 新方案必须满足以下六项要求：
  1. 配置层可通过 `.env` 驱动 LLM API Key。
  2. 对话与 resume 通过 Celery 队列处理，不阻塞 Web 主线程。
  3. `v1/agent` 仅负责路由组织与服务调用，核心逻辑在 `core/agent`。
  4. 按 CrewAI 官方模型组织 Agent/Task/Crew/Flow/Tools。
  5. 按 AG-UI 协议输出事件，优先使用 `ag-ui-crewai` 适配库。
  6. 使用 LiteLLM 统计每次 LLM 调用的 token 和 cost。
 ## 设计原则
 - 单一职责：HTTP 层只做协议和鉴权，编排与执行下沉到核心层。
 - 异步优先：长耗时推理、工具调用、恢复流程全部异步化。
 - 协议优先：AG-UI 作为唯一事件契约，不维护自定义事件方言。
 - 可观测性优先：每次 run、每次 stage、每次 LLM 调用可追踪。
 - 配置单一来源：所有密钥和模型配置只走 `core.config.settings`。
 ## 目标架构
 ### 1) 分层
 - `backend/src/v1/agent/`
  - `router.py`: 暴露 HTTP/SSE 接口。
  - `schemas.py`: 请求/响应 DTO 和输入校验。
  - `dependencies.py`: DI 装配。
  - `service.py`: 薄服务，仅调用 `core/agent` 应用服务。
 - `backend/src/core/agent/`
  - `application/`: run/resume 应用服务。
  - `domain/`: run 状态机、resume 幂等语义、错误模型。
  - `infrastructure/crewai/`: CrewAI Agent/Task/Crew/Flow 装配与执行。
  - `infrastructure/agui/`: AG-UI 事件映射与 SSE 序列化。
  - `infrastructure/litellm/`: LiteLLM 客户端与 usage/cost 拦截器。
  - `infrastructure/queue/`: Celery task producer/consumer。
 ### 1.1) 配置来源与合并策略
 - Agent 运行配置由两部分组成：
  - 数据库存量配置：`system_agents`（每种 agent_type 对应 llm 与 llm_config）。
  - 静态模板配置：`backend/src/core/config/static/crewai/*.yaml`（角色描述、任务模板、workflow、tools）。
 - 合并策略：
  - `llm` 与 `llm_config` 以 `system_agents` 为准。
  - prompt 模板、task 描述、flow stage、tool 白名单以 static/crewai 为准。
  - 若任一 agent_type 在 `system_agents` 缺失，运行前失败并返回受控错误。
 ### 2) 核心运行链路
 1. `POST /api/v1/agent/runs` 只负责参数校验和鉴权。
 2. 路由调用 `AgentRunAppService.enqueue_run()`，写入 run 记录并投递 Celery。
 3. Worker 执行 `run_agent_task`：
   - 读取 run 上下文。
   - 构建 CrewAI `Agent/Task/Crew/Flow`。
   - 通过 `ag-ui-crewai` 将执行事件转为 AG-UI 标准事件。
   - 每次 LLM 调用由 LiteLLM 中间层记录 token/cost。
 4. 事件落库并发布到事件通道（Redis Stream/Channel）。
 5. SSE 接口从事件通道读取并持续推送，直到 `RUN_FINISHED` 或 `RUN_ERROR`。
 ### 3) Resume 链路
 1. `POST /api/v1/agent/runs/{run_id}/resume` 校验 `interrupt_id` 与决策 payload。
 2. 调用 `enqueue_resume()` 投递 `resume_agent_task`。
 3. Worker 在事务内做并发控制：
   - `run_id + interrupt_id` 幂等锁。
   - 过期校验与状态迁移。
 4. 恢复后继续 CrewAI Flow，事件按 AG-UI 继续输出。
 ### 4) Session 状态持久化
 - 使用 `sessions.state_snapshot` 作为运行态单一快照来源。
 - 快照至少包含：
  - run 上下文（thread_id、run_id、stage）
  - pending_tool_calls（tool_call_id、tool_name、args、status、expires_at）
  - correlation 索引（tool_call_id -> message_id / step_id）
 - 所有中断/恢复均以 `state_snapshot` 事务更新为准，避免内存态漂移。
 ### 5) 会话与消息落库模型
 - 会话主表：`sessions`
  - 新建 run 时写入：`id/user_id/session_type/status=running/last_activity_at`。
  - 运行中持续更新：`status`、`last_activity_at`、`message_count`、`total_tokens`、`total_cost`、`state_snapshot`。
  - 运行结束更新：
    - 成功：`status=completed`
    - 失败：`status=failed`
 - 消息表：`messages`
  - 用户输入落库为 `role=user`（每次 run 开始时先写入）。
  - 模型输出落库为 `role=assistant`（按最终聚合文本落库，保留 metadata 记录增量信息）。
  - 工具调用结果落库为 `role=tool`，并写入 `tool_name` 与 `metadata.tool_call_id`。
  - `seq` 由每个 `session_id` 内单调递增分配，满足 `uq_messages_session_seq`。
 - 计量落库：每次 LLM 调用的 usage/cost 先写消息级，再聚合更新到 session 级。
 ## 六项要求落地映射
 ### 要求 1: `.env` 驱动 LLM API Key
 - 新增 `LLMSettings` 到 `core.config.settings.Settings`，统一定义：
  - `SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE`
  - `SOCIAL_LLM__PROVIDER_KEYS__MINIMAX`
  - `SOCIAL_LLM__PROVIDER_KEYS__MOONSHOT`
  - `SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK`
  - `SOCIAL_LLM__PROVIDER_KEYS__ARK`
  - `SOCIAL_LLM__PROVIDER_KEYS__ZAI`
 - 禁止 `os.environ` 直接读取密钥。
 ### 要求 2: 对话和 resume 走 Celery
 - Web 层不直接执行编排。
 - `run`/`resume` 一律入队，Worker 处理，Web 仅做事件流转发。
 - 加入任务级超时、重试、死信策略。
 ### 要求 3: v1 仅路由与调用
 - `v1/agent/service.py` 仅保留应用服务调用和错误映射。
 - 任何编排、状态机、工具执行逻辑禁止进入 `v1`。
 ### 要求 4: CrewAI 官方流程
 - 采用 CrewAI 原生对象：`Agent`、`Task`、`Crew`、`Flow`。
 - tools 通过 CrewAI Tool 机制注册，不做平行实现。
 - 任务模板与 agent 配置集中化（静态模板 + 运行时拼装）。
 - 配置拼装明确依赖 `system_agents + static/crewai`，不再使用双套来源。
 ### 要求 5: AG-UI + ag-ui-crewai
 - 事件集遵循 AG-UI 协议，生命周期闭环：
  - `RUN_STARTED`
  - 流式消息和工具事件
  - 终态 `RUN_FINISHED` 或 `RUN_ERROR`
 - 优先引入 `ag-ui-crewai` 做 CrewAI 到 AG-UI 的桥接，避免重复造轮子。
 ### 要求 6: LiteLLM token/cost 统计
 - 所有 LLM 调用通过 LiteLLM 统一出入口。
 - 按调用粒度记录：`input_tokens`、`output_tokens`、`total_tokens`、`cost`、`currency`。
 - 按 run 粒度聚合并落库，支持后续计费和审计。
 ## 数据与可观测性
 - 保留现有 Agent 相关表结构，不在本次硬切做数据库破坏性变更。
 - 新增事件日志与调用指标落点（如已有字段不足，后续增量迁移）。
 - 日志使用结构化字段：`run_id`、`task_id`、`stage`、`tool_name`、`llm_model`、`latency_ms`。
 - 持久化原则：run/resume 的关键状态变更必须可重放，禁止仅保存在内存。
 ## 事务边界
 - `run` 入口事务：创建或加载 `session` + 写入用户消息。
 - `worker` 执行事务（可分阶段短事务）：
  - 阶段开始：更新 `session.status/state_snapshot`。
  - LLM 返回：写 assistant/tool 消息 + 更新 token/cost 聚合。
  - 中断：写 `pending_tool_calls` 到 `state_snapshot` 并提交。
  - 完成：更新终态 `session.status` 并提交。
 - `resume` 事务：校验 `interrupt_id` 与 ownership，CAS 更新 `state_snapshot`，然后进入后续执行事务。
 ## 错误处理与安全
 - API Key 缺失启动即失败，不进入运行态。
 - 外部工具入参统一白名单和 schema 校验。
 - resume 决策必须鉴权与会话所有权校验。
 - 错误响应遵循 RFC 7807，避免泄漏敏感上下文。
 ## 工具调用与恢复语义
 - 工具分三类：
  - 前端工具：由 `RunAgentInput.tools` 提供能力声明，触发 interrupt，由客户端执行并回传 result。
  - 后端工具（需审批）：先 interrupt 给前端审批；审批通过后由后端执行，不由前端执行。
  - 后端工具（直执）：后端直接执行。
 - 一致性约束：
  - 每个 tool_result 必须携带 `tool_call_id`。
  - 后端仅接受当前 `state_snapshot.pending_tool_calls` 中存在且状态合法的 `tool_call_id`。
  - 若收到未知/已消费/过期 `tool_call_id`，立即产出 `RUN_ERROR` 并记录审计日志。
 ## 测试策略
 - 单元测试：
  - 配置解析与 key 解析
  - run/resume 状态机与幂等
  - LiteLLM usage 聚合
 - 集成测试：
  - API 入队
  - Worker 消费
  - SSE 事件顺序与终态
 - E2E：
  - run 成功链路
  - interrupt + resume 链路
  - tool 调用链路
 ## 迁移策略
 - 阶段 0（本次）：硬切删除旧代码、旧测试、旧文档契约。
 - 阶段 1：搭建新架构骨架和最小可运行 run 流程。
 - 阶段 2：接入 CrewAI + ag-ui-crewai + LiteLLM 完整链路。
 - 阶段 3：补齐可观测性、压测与稳定性治理。
 ## 验收标准
 - 后端仓库不存在旧 `v1/agent` 和 `core/agent` 旧实现。
 - 所有 Agent 相关旧测试与旧文档契约已移除。
 - 新方案设计文档明确覆盖六项要求并可进入实现阶段。
@@ -1,574 +0,0 @@
 # Agent 后端重建 Implementation Plan
 > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
 **Goal:** 在后端重建 Agent 运行时，满足队列异步、CrewAI 配置打通、AG-UI 工具中断恢复、LiteLLM 计量、以及 `sessions.state_snapshot` 持久化要求。
 **Architecture:** `v1/agent` 仅做 API/鉴权/参数校验与 SSE 输出，`core/agent` 负责编排与执行。Agent 创建配置由 `system_agents`（数据库）+ `core/config/static/crewai/*.yaml`（静态模板）合并生成。run/resume 全链路通过 Celery Worker 执行，状态写入 `sessions.state_snapshot`。
 **Tech Stack:** FastAPI, Celery, Redis, CrewAI, ag-ui-crewai, LiteLLM, SQLAlchemy, Alembic, pytest
 ---
 ### Task 1: 建立配置聚合器（system_agents + static/crewai）
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/config/resolver.py`
 - Modify: `backend/src/core/config/static/crewai/agents.yaml`
 - Modify: `backend/src/core/config/static/crewai/tasks.yaml`
 - Create: `backend/src/core/config/static/crewai/workflow.yaml`
 - Create: `backend/src/core/config/static/crewai/tools.yaml`
 - Test: `backend/tests/unit/core/agent/test_config_resolver.py`
 **Step 1: Write the failing test**
 ```python
 def test_resolver_merges_system_agents_and_static_templates():
    resolved = resolve_agent_runtime_config(...)
    assert resolved.intent.llm.model_code == "deepseek-v3.2"
    assert "intent" in resolved.workflow_stages
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
 Expected: FAIL with `NameError` or import not found
 **Step 3: Write minimal implementation**
 ```python
 def resolve_agent_runtime_config(system_agents: list[dict], static_cfg: dict) -> RuntimeConfig:
    by_type = {item["agent_type"]: item for item in system_agents}
    return RuntimeConfig.from_sources(by_type=by_type, static_cfg=static_cfg)
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/config/resolver.py backend/src/core/config/static/crewai backend/tests/unit/core/agent/test_config_resolver.py
 git commit -m "feat: add system_agents and static crewai config resolver"
 ```
 ### Task 2: 统一 LLM Key 与模型配置入口
 **Files:**
 - Modify: `backend/src/core/config/settings.py`
 - Modify: `.env.example`
 - Create: `backend/tests/unit/core/config/test_llm_settings.py`
 **Step 1: Write the failing test**
 ```python
 def test_llm_keys_read_from_settings(monkeypatch):
    monkeypatch.setenv("SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK", "k1")
    s = Settings()
    assert s.llm.provider_keys.deepseek == "k1"
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
 Expected: FAIL with missing `llm` field
 **Step 3: Write minimal implementation**
 ```python
 class LLMProviderKeys(BaseModel):
    deepseek: str | None = None
 class LLMSettings(BaseModel):
    provider_keys: LLMProviderKeys = LLMProviderKeys()
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/config/settings.py .env.example backend/tests/unit/core/config/test_llm_settings.py
 git commit -m "feat: centralize llm provider keys in settings"
 ```
 ### Task 3: sessions 表状态快照契约落地
 **Files:**
 - Create: `backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py`
 - Modify: `backend/src/models/agent_chat_session.py`
 - Create: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
 **Step 1: Write the failing test**
 ```python
 def test_sessions_has_state_snapshot_column(db_inspector):
    columns = db_inspector.get_columns("sessions")
    assert "state_snapshot" in [c["name"] for c in columns]
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
 Expected: FAIL when migration not applied
 **Step 3: Write minimal implementation**
 ```python
 def upgrade() -> None:
    op.add_column("sessions", sa.Column("state_snapshot", postgresql.JSONB, nullable=True))
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py backend/src/models/agent_chat_session.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
 git commit -m "feat(db): enforce sessions state_snapshot contract"
 ```
 ### Task 3.1: 会话与消息持久化仓储
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
 - Create: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
 - Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
 **Step 1: Write the failing test**
 ```python
 def test_run_persists_user_and_assistant_messages(db_session):
    run = execute_run(...)
    rows = list_messages(session_id=run.session_id)
    assert rows[0].role == "user"
    assert rows[1].role == "assistant"
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 async def append_message(...):
    session.add(AgentChatMessage(...))
 async def update_session_aggregate(...):
    session_obj.message_count = message_count
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/persistence backend/tests/integration/core/agent/test_session_message_persistence.py
 git commit -m "feat: persist session lifecycle and messages for agent runs"
 ```
 ### Task 4: 定义 state_snapshot 结构与并发语义
 **Files:**
 - Create: `backend/src/core/agent/domain/state_snapshot.py`
 - Create: `backend/tests/unit/core/agent/test_state_snapshot.py`
 **Step 1: Write the failing test**
 ```python
 def test_pending_tool_call_snapshot_contains_correlation_fields():
    snap = StateSnapshot.new(...)
    pending = snap.pending_tool_calls[0]
    assert pending.tool_call_id
    assert pending.status == "PENDING_APPROVAL"
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 class PendingToolCall(BaseModel):
    tool_call_id: str
    tool_name: str
    status: Literal["PENDING_APPROVAL", "APPROVED", "EXECUTED", "REJECTED", "EXPIRED"]
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/domain/state_snapshot.py backend/tests/unit/core/agent/test_state_snapshot.py
 git commit -m "feat: define sessions state_snapshot schema for run and tool state"
 ```
 ### Task 5: 工具路由策略（前端/后端/审批）
 **Files:**
 - Create: `backend/src/core/agent/domain/tool_policy.py`
 - Create: `backend/tests/unit/core/agent/test_tool_policy.py`
 **Step 1: Write the failing test**
 ```python
 def test_frontend_tool_requires_interrupt_and_client_execution():
    decision = classify_tool_call(name="ui.navigate_to", source="request.tools")
    assert decision.mode == "FRONTEND_EXECUTE"
 def test_backend_approval_tool_returns_interrupt_but_executes_on_backend_after_approve():
    decision = classify_tool_call(name="srv.transfer_funds", requires_approval=True)
    assert decision.mode == "BACKEND_APPROVAL_INTERRUPT"
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 if tool_name.startswith("ui."):
    return ToolDecision(mode="FRONTEND_EXECUTE")
 if requires_approval:
    return ToolDecision(mode="BACKEND_APPROVAL_INTERRUPT")
 return ToolDecision(mode="BACKEND_DIRECT_EXECUTE")
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/domain/tool_policy.py backend/tests/unit/core/agent/test_tool_policy.py
 git commit -m "feat: add frontend/backend tool policy and approval routing"
 ```
 ### Task 6: tool_call 与 tool_result 对账机制
 **Files:**
 - Create: `backend/src/core/agent/domain/tool_correlation.py`
 - Create: `backend/tests/unit/core/agent/test_tool_correlation.py`
 **Step 1: Write the failing test**
 ```python
 def test_rejects_tool_result_when_tool_call_id_not_pending():
    store = PendingToolStore([])
    with pytest.raises(ToolCorrelationError):
        store.apply_result(tool_call_id="unknown", result={"ok": True})
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 def apply_result(self, *, tool_call_id: str, result: dict) -> None:
    pending = self._pending.get(tool_call_id)
    if pending is None:
        raise ToolCorrelationError("tool_call_id not pending")
    pending.result = result
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/domain/tool_correlation.py backend/tests/unit/core/agent/test_tool_correlation.py
 git commit -m "feat: add tool call/result correlation guard"
 ```
 ### Task 7: Celery run/resume 异步任务
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
 - Create: `backend/src/core/agent/application/run_service.py`
 - Create: `backend/src/core/agent/application/resume_service.py`
 - Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
 **Step 1: Write the failing test**
 ```python
 def test_run_api_enqueues_celery_task(client):
    resp = client.post("/api/v1/agent/runs", json={...})
    assert resp.status_code == 202
 def test_resume_updates_session_status_and_snapshot(client):
    resp = client.post("/api/v1/agent/runs/r1/resume", json={...})
    assert resp.status_code == 202
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 def enqueue_run(cmd: RunCommand) -> str:
    task = run_agent_task.apply_async(args=[cmd.model_dump()])
    return task.id
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/application backend/src/core/agent/infrastructure/queue backend/tests/integration/core/agent/test_queue_run_resume.py
 git commit -m "feat: add celery-based run and resume tasks"
 ```
 ### Task 8: CrewAI 运行时加载与创建
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
 - Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
 - Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
 **Step 1: Write the failing test**
 ```python
 def test_runtime_creates_agents_tasks_from_resolved_config():
    runtime = CrewAIRuntime(...)
    crew = runtime.build_crew(message="hello")
    assert len(crew.agents) >= 1
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 def build_crew(self, *, message: str) -> Crew:
    agents = self._factory.build_agents(self._config)
    tasks = self._factory.build_tasks(self._config, message=message)
    return Crew(agents=agents, tasks=tasks)
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/crewai backend/tests/unit/core/agent/test_crewai_runtime.py
 git commit -m "feat: create crewai runtime from resolved config"
 ```
 ### Task 9: AG-UI 与 ag-ui-crewai 事件桥
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
 - Create: `backend/src/core/agent/infrastructure/agui/stream.py`
 - Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
 **Step 1: Write the failing test**
 ```python
 def test_agui_stream_emits_required_lifecycle():
    events = to_agui_events(internal_events=[...])
    assert events[0]["type"] == "RUN_STARTED"
    assert events[-1]["type"] in {"RUN_FINISHED", "RUN_ERROR"}
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 def to_agui_events(internal_events: list[dict]) -> list[dict]:
    return [map_event(e) for e in internal_events]
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/agui backend/tests/unit/core/agent/test_agui_bridge.py
 git commit -m "feat: add ag-ui and ag-ui-crewai event bridge"
 ```
 ### Task 10: LiteLLM 调用统计与会话聚合
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/litellm/client.py`
 - Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
 - Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
 **Step 1: Write the failing test**
 ```python
 def test_tracker_aggregates_per_call_usage_and_cost():
    t = UsageTracker()
    t.add({"input_tokens": 10, "output_tokens": 5, "cost": "0.1"})
    assert t.snapshot()["total_tokens"] == 15
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
 def add(self, usage: dict[str, object]) -> None:
    self.input_tokens += int(usage.get("input_tokens", 0))
    self.output_tokens += int(usage.get("output_tokens", 0))
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/litellm backend/tests/unit/core/agent/test_litellm_usage.py
 git commit -m "feat: add litellm usage and cost tracking"
 ```
 ### Task 11: v1/agent 薄层 API + SSE 出口
 **Files:**
 - Create: `backend/src/v1/agent/router.py`
 - Create: `backend/src/v1/agent/schemas.py`
 - Create: `backend/src/v1/agent/dependencies.py`
 - Create: `backend/src/v1/agent/service.py`
 - Modify: `backend/src/v1/router.py`
 - Test: `backend/tests/integration/v1/agent/test_routes.py`
 **Step 1: Write the failing test**
 ```python
 def test_run_endpoint_returns_sse_and_not_blocking(client):
    resp = client.post("/api/v1/agent/runs", json={...})
    assert resp.status_code == 202
 ```
 **Step 2: Run test to verify it fails**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
 Expected: FAIL
 **Step 3: Write minimal implementation**
 ```python
@router.post("/runs", status_code=202)
 async def create_run(...):
    task_id = service.enqueue_run(input_data)
    return {"task_id": task_id}
 ```
 **Step 4: Run test to verify it passes**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
 Expected: PASS
 **Step 5: Commit**
 ```bash
 git add backend/src/v1/agent backend/src/v1/router.py backend/tests/integration/v1/agent/test_routes.py
 git commit -m "feat: add thin v1 agent api and sse endpoints"
 ```
 ### Task 12: 端到端验证与文档回填
 **Files:**
 - Modify: `docs/runtime/runtime-route.md`
 - Modify: `docs/runtime/runtime-runbook.md`
 **Step 1: Run unit tests**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent backend/tests/unit/core/config backend/tests/unit/database -q`
 Expected: PASS
 **Step 2: Run integration tests**
 Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
 Expected: PASS
 **Step 3: Run lint and typecheck**
 Run: `PYTHONPATH=backend/src uv run ruff check backend/src backend/tests`
 Expected: PASS
 Run: `PYTHONPATH=backend/src uv run basedpyright backend/src`
 Expected: PASS
 **Step 4: Document protocol contracts**
 在运行手册中补充以下固定规则：
 - `system_agents` + `static/crewai` 配置合并优先级。
 - `sessions.state_snapshot` 字段结构与版本号。
 - `messages` 入库顺序与 `sessions` 聚合字段更新规则。
 - 工具调用审批与恢复时序图。
 - tool_call/result 不匹配时的错误语义（`RUN_ERROR` + 可审计日志）。
 **Step 5: Commit**
 ```bash
 git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md
 git commit -m "docs: add new agent runtime contracts and operational guide"
 ```
 ## Success Criteria
 - [ ] Agent 创建配置由 `system_agents` 与 `core/config/static/crewai` 合并生成。
 - [ ] run/resume 仅通过 Celery Worker 执行，Web 不执行编排。
 - [ ] `v1/agent` 无业务编排代码。
 - [ ] `sessions.state_snapshot` 承担运行态和工具审批恢复状态。
 - [ ] 每次 run/resume 的会话状态变更均落库到 `sessions`。
 - [ ] 用户/助手/工具消息按 `messages` 约束落库，`seq` 单调递增。
 - [ ] 前端工具与后端工具（审批/非审批）策略完整可测。
 - [ ] tool_call 与 tool_result 具备强关联校验并可恢复/报错。
 - [ ] LiteLLM 逐次计量与 run 聚合可落库。
@@ -1,199 +0,0 @@
 # Agent Architecture Simplification Design
 **Date:** 2026-03-04
 **Status:** Approved
 **Author:** AI Assistant
 ## Overview
 Simplify the agent configuration architecture by removing the redundant `user_agents` table and renaming `user_agent_catalog` to `system_agents`.
 ## Problem Statement
 Current architecture has redundant data:
 - `user_agent_catalog`: System-level agent configurations (3 agent types for all users)
 - `user_agents`: Per-user agent instances (copies catalog data for each user)
 Since every user has the same 3 agents with identical configurations (from catalog), maintaining `user_agents` table creates unnecessary complexity and data duplication.
 ## Goals
 1. Remove `user_agents` table and related code
 2. Rename `user_agent_catalog` to `system_agents` for clarity
 3. Preserve ability for future user-level prompt customization via `profiles.settings`
 4. Maintain backward compatibility in deployment process
 ## Non-Goals
 - User-level agent configuration (LLM selection, temperature, etc.)
 - User-level prompt customization implementation (deferred to future iteration)
 ## Architecture Changes
 ### Current Architecture
 ```
 user_agent_catalog (system config)
    ↓ (trigger copies for each new user)
 user_agents (per-user instances)
 ```
 ### New Architecture
 ```
 system_agents (shared by all users)
 profiles.settings.agent_prompts (future: user-level prompts)
 ```
 ### Data Flow
 1. System startup: Load `system_agents` from YAML
 2. User creation: No longer creates `user_agents` records
 3. Runtime (future): Read from `system_agents` + merge with `profiles.settings.agent_prompts`
 ## Database Migration
 ### Changes
 1. **Delete `memories.agent_id` column**
   - Remove foreign key `fk_memories_agent_id`
   - Remove check constraint `chk_memory_type_agent_id`
   - Remove index `ix_memories_agent_type_status`
   - Drop column `agent_id`
 2. **Delete `user_agents` table**
   - Remove all RLS policies
   - Remove indexes: `ix_user_agents_agent_type`, `ix_user_agents_status`
   - Remove foreign keys: `fk_user_agents_user_id`, `fk_user_agents_llm_id`, etc.
   - Remove check constraint `chk_agent_type`
   - Remove unique constraint `uq_user_agents_user_id_agent_type`
   - Drop table
 3. **Rename `user_agent_catalog` → `system_agents`**
   - Remove old RLS policies
   - Rename table
   - Rename constraints: `fk_user_agent_catalog_llm_id` → `fk_system_agents_llm_id`
   - Rename check constraint: `chk_user_agent_catalog_status` → `chk_system_agents_status`
   - Re-create RLS policies with new table name
 4. **Update trigger `create_profile_for_new_user()`**
   - Remove logic that inserts into `user_agents`
   - Initialize `profiles.settings.agent_prompts` with empty object
 5. **Update existing `profiles.settings`**
   - Add `agent_prompts: {}` to all existing profiles
 ### Downgrade Path
 - Re-create `user_agents` table with all constraints and indexes
 - Restore `memories.agent_id` column and constraints
 - Rename `system_agents` → `user_agent_catalog`
 - Restore original trigger
 ## Code Changes
 ### Model Layer
 **Delete:**
 - `backend/src/models/user_agents.py`
 **Rename:**
 - `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py`
 - Class `UserAgentCatalog` → `SystemAgents`
 **Update:**
 - `backend/src/models/__init__.py` - Update imports and exports
 ### Configuration Layer
 **Rename:**
 - `backend/src/core/config/static/database/user_agent_catalog.yaml`
  → `backend/src/core/config/static/database/system_agents.yaml`
 **Update:**
 - `backend/src/core/config/initial/init_data.py`
  - `UserAgentCatalogSeed` → `SystemAgentsSeed`
  - `UserAgentCatalogYaml` → `SystemAgentsYaml`
  - Import from `models.system_agents`
  - Path: `system_agents.yaml`
  - Function: `initialize_user_agent_catalog()` → `initialize_system_agents()`
 ### Future: Profile Settings Structure (Deferred)
 ```json
 {
  "agent_prompts": {
    "INTENT_RECOGNITION": "custom prompt...",
    "TASK_EXECUTION": "custom prompt...",
    "RESULT_REPORTING": "custom prompt..."
  }
 }
 ```
 ## Testing Strategy
 ### Migration Tests
 - Verify `user_agents` table is deleted
 - Verify `system_agents` table exists with correct structure
 - Verify trigger no longer creates `user_agents` records
 - Verify `profiles.settings.agent_prompts` is initialized
 - Verify downgrade path works correctly
 ### Model Tests
 - Verify `SystemAgents` model CRUD operations
 - Verify `Profile.settings` JSONB storage
 ### Integration Tests
 - Verify `initialize_system_agents()` loads from YAML
 - Verify data is correctly inserted into `system_agents` table
 ## Deployment Considerations
 ### Pre-deployment
 - Backup database (especially `user_agents` if any data exists)
 - Confirm production `user_agents` table has no critical data
 ### Deployment
 1. Run migration: `alembic upgrade head`
 2. Verify migration success
 3. Restart application services
 4. Verify new user registration works without `user_agents`
 ### Post-deployment
 - Monitor application logs for any references to deleted `user_agents`
 - Verify agent-related functionality still works
 ## Risks and Mitigations
 | Risk | Mitigation |
 |------|-----------|
 | Existing `user_agents` data loss | Backup before migration; data is redundant anyway |
 | Code still references `user_agents` | Comprehensive code search and testing |
 | Trigger fails on new user creation | Test migration thoroughly; include rollback plan |
 | Future need for user-level config | Can add `agent_overrides` to `profiles.settings` |
 ## Success Criteria
 - [ ] All tests pass
 - [ ] Migration runs successfully (upgrade and downgrade)
 - [ ] New user registration creates profile without `user_agents` records
 - [ ] System agents are loaded from YAML correctly
 - [ ] No references to `user_agents` remain in codebase
 ## Timeline
 - Design: 2026-03-04 (Completed)
 - Implementation: TBD
 - Testing: TBD
 - Deployment: TBD
 ## References
 - Migration file: `backend/alembic/versions/YYYYMMDD_simplify_agent_architecture.py`
 - Original catalog migration: `backend/alembic/versions/50ae013ce530_add_user_agent_catalog.py`
@@ -1,844 +0,0 @@
 # Agent Architecture Simplification Implementation Plan
 > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
 **Goal:** Simplify agent configuration by removing redundant user_agents table and renaming user_agent_catalog to system_agents
 **Architecture:** Delete user_agents table (including memories.agent_id dependency), rename user_agent_catalog to system_agents, update all references in code
 **Tech Stack:** Python 3.11+, SQLAlchemy, Alembic, PostgreSQL
 ---
 ## Prerequisites
 - [ ] Current branch: dev
 - [ ] No uncommitted changes
 - [ ] Docker services running (Supabase local)
 ## Task 1: Create Database Migration
 **Files:**
 - Create: `backend/alembic/versions/20260304_simplify_agent_architecture.py`
 **Step 1: Create migration file**
 Run: `cd backend && uv run alembic revision -m "simplify_agent_architecture"`
 Expected: New migration file created with revision ID
 **Step 2: Write migration upgrade logic**
 Edit the generated migration file with this complete upgrade function:
 ```python
 def upgrade() -> None:
    # 1. Delete memories.agent_id dependencies
    op.drop_constraint("fk_memories_agent_id", "memories", type_="foreignkey")
    op.drop_constraint("chk_memory_type_agent_id", "memories", type_="check")
    op.execute("DROP INDEX IF EXISTS ix_memories_agent_type_status")
    op.drop_column("memories", "agent_id")
    # 2. Delete user_agents table
    _drop_rls("user_agents")
    op.drop_constraint("fk_user_agents_updated_by", "user_agents", type_="foreignkey")
    op.drop_constraint("fk_user_agents_created_by", "user_agents", type_="foreignkey")
    op.drop_constraint("fk_user_agents_llm_id", "user_agents", type_="foreignkey")
    op.drop_constraint("fk_user_agents_user_id", "user_agents", type_="foreignkey")
    op.drop_constraint("chk_agent_type", "user_agents", type_="check")
    op.drop_constraint("uq_user_agents_user_id_agent_type", "user_agents", type_="unique")
    op.execute("DROP INDEX IF EXISTS ix_user_agents_status")
    op.execute("DROP INDEX IF EXISTS ix_user_agents_agent_type")
    op.drop_table("user_agents")
    # 3. Rename user_agent_catalog to system_agents
    _drop_rls("user_agent_catalog")
    op.rename_table("user_agent_catalog", "system_agents")
    op.execute(
        "ALTER TABLE system_agents RENAME CONSTRAINT fk_user_agent_catalog_llm_id "
        "TO fk_system_agents_llm_id"
    )
    op.execute(
        "ALTER TABLE system_agents RENAME CONSTRAINT chk_user_agent_catalog_status "
        "TO chk_system_agents_status"
    )
    _enable_rls("system_agents")
    # 4. Update trigger
    op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
    op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
    op.execute("""
        CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
        RETURNS trigger
        LANGUAGE plpgsql
        SECURITY DEFINER
        SET search_path = public
        AS $$
        BEGIN
            INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
            VALUES (
                NEW.id,
                COALESCE(
                    NEW.raw_user_meta_data ->> 'username',
                    split_part(NEW.email, '@', 1),
                    'user_' || substring(NEW.id::text, 1, 8)
                ),
                NULL,
                NULL,
                '{"agent_prompts": {}}'::jsonb,
                now(),
                now()
            )
            ON CONFLICT (id) DO NOTHING;
            RETURN NEW;
        END;
        $$
    """)
    op.execute("""
        CREATE TRIGGER on_auth_user_created
            AFTER INSERT ON auth.users
            FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
    """)
    # 5. Update existing profiles.settings
    op.execute("""
        UPDATE profiles 
        SET settings = jsonb_set(
            COALESCE(settings, '{}'::jsonb),
            '{agent_prompts}',
            '{}'::jsonb
        )
        WHERE NOT settings ? 'agent_prompts'
    """)
 ```
 **Step 3: Write migration downgrade logic**
 Add this complete downgrade function:
 ```python
 def downgrade() -> None:
    # 1. Revert trigger
    op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
    op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
    op.execute("""
        CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
        RETURNS trigger
        LANGUAGE plpgsql
        SECURITY DEFINER
        SET search_path = public
        AS $$
        BEGIN
            INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
            VALUES (
                NEW.id,
                COALESCE(
                    NEW.raw_user_meta_data ->> 'username',
                    split_part(NEW.email, '@', 1),
                    'user_' || substring(NEW.id::text, 1, 8)
                ),
                NULL,
                NULL,
                '{}'::jsonb,
                now(),
                now()
            )
            ON CONFLICT (id) DO NOTHING;
            INSERT INTO public.user_agents (id, user_id, llm_id, agent_type, config, status, created_by, updated_by)
            SELECT 
                gen_random_uuid(),
                NEW.id,
                uac.llm_id,
                uac.agent_type,
                uac.config,
                uac.status,
                NEW.id,
                NEW.id
            FROM public.user_agent_catalog uac;
            RETURN NEW;
        END;
        $$
    """)
    op.execute("""
        CREATE TRIGGER on_auth_user_created
            AFTER INSERT ON auth.users
            FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
    """)
    # 2. Revert rename: system_agents -> user_agent_catalog
    _drop_rls("system_agents")
    op.rename_table("system_agents", "user_agent_catalog")
    op.execute(
        "ALTER TABLE user_agent_catalog RENAME CONSTRAINT fk_system_agents_llm_id "
        "TO fk_user_agent_catalog_llm_id"
    )
    op.execute(
        "ALTER TABLE user_agent_catalog RENAME CONSTRAINT chk_system_agents_status "
        "TO chk_user_agent_catalog_status"
    )
    _enable_rls("user_agent_catalog")
    # 3. Recreate user_agents table
    op.create_table(
        "user_agents",
        sa.Column("id", sa.UUID(), nullable=False),
        sa.Column("user_id", sa.UUID(), nullable=False),
        sa.Column("llm_id", sa.UUID(), nullable=False),
        sa.Column("agent_type", sa.String(length=20), nullable=False),
        sa.Column(
            "config",
            postgresql.JSONB(astext_type=sa.Text()),
            server_default="{}",
            nullable=False,
        ),
        sa.Column("status", sa.String(length=20), nullable=False),
        sa.Column("created_by", sa.UUID(), nullable=True),
        sa.Column("updated_by", sa.UUID(), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_unique_constraint(
        "uq_user_agents_user_id_agent_type", 
        "user_agents", 
        ["user_id", "agent_type"]
    )
    op.execute(
        "CREATE INDEX ix_user_agents_agent_type ON user_agents (agent_type)"
    )
    op.execute(
        "CREATE INDEX ix_user_agents_status ON user_agents (status)"
    )
    op.execute(
        "ALTER TABLE user_agents ADD CONSTRAINT chk_agent_type "
        "CHECK (agent_type IN ('INTENT_RECOGNITION', 'TASK_EXECUTION', 'RESULT_REPORTING'))"
    )
    op.create_foreign_key(
        "fk_user_agents_user_id",
        "user_agents",
        "users",
        ["user_id"],
        ["id"],
        referent_schema="auth",
        ondelete="CASCADE",
    )
    op.create_foreign_key(
        "fk_user_agents_llm_id",
        "user_agents",
        "llms",
        ["llm_id"],
        ["id"],
        ondelete="RESTRICT",
    )
    op.create_foreign_key(
        "fk_user_agents_created_by",
        "user_agents",
        "users",
        ["created_by"],
        ["id"],
        referent_schema="auth",
        ondelete="SET NULL",
    )
    op.create_foreign_key(
        "fk_user_agents_updated_by",
        "user_agents",
        "users",
        ["updated_by"],
        ["id"],
        referent_schema="auth",
        ondelete="SET NULL",
    )
    _enable_rls("user_agents")
    # 4. Recreate memories.agent_id
    op.add_column(
        "memories",
        sa.Column("agent_id", sa.UUID(), nullable=True)
    )
    op.create_foreign_key(
        "fk_memories_agent_id",
        "memories",
        "user_agents",
        ["agent_id"],
        ["id"],
        ondelete="CASCADE",
    )
    op.execute(
        "CREATE INDEX ix_memories_agent_type_status ON memories (agent_id, memory_type, status)"
    )
    op.execute(
        "ALTER TABLE memories ADD CONSTRAINT chk_memory_type_agent_id "
        "CHECK ((memory_type = 'work' AND agent_id IS NOT NULL) OR "
        "(memory_type = 'user' AND agent_id IS NULL))"
    )
 ```
 **Step 4: Add helper functions**
 Add these helper functions at the end of the migration file:
 ```python
 def _enable_rls(table_name: str) -> None:
    for role in ["anon", "authenticated"]:
        for action in ["select", "insert", "update", "delete"]:
            op.execute(
                f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
            )
    op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY")
    for role in ["anon", "authenticated"]:
        op.execute(
            f"CREATE POLICY {role}_select_{table_name} ON {table_name} "
            f"FOR SELECT TO {role} USING (false)"
        )
        op.execute(
            f"CREATE POLICY {role}_insert_{table_name} ON {table_name} "
            f"FOR INSERT TO {role} WITH CHECK (false)"
        )
        op.execute(
            f"CREATE POLICY {role}_update_{table_name} ON {table_name} "
            f"FOR UPDATE TO {role} USING (false) WITH CHECK (false)"
        )
        op.execute(
            f"CREATE POLICY {role}_delete_{table_name} ON {table_name} "
            f"FOR DELETE TO {role} USING (false)"
        )
 def _drop_rls(table_name: str) -> None:
    for role in ["anon", "authenticated"]:
        op.execute(f"DROP POLICY IF EXISTS {role}_delete_{table_name} ON {table_name}")
        op.execute(f"DROP POLICY IF EXISTS {role}_update_{table_name} ON {table_name}")
        op.execute(f"DROP POLICY IF EXISTS {role}_insert_{table_name} ON {table_name}")
        op.execute(f"DROP POLICY IF EXISTS {role}_select_{table_name} ON {table_name}")
    op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY")
 ```
 **Step 5: Verify migration file**
 Check that all imports are correct:
 ```python
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 ```
 **Step 6: Commit migration**
 ```bash
 git add backend/alembic/versions/20260304_simplify_agent_architecture.py
 git commit -m "feat(db): add migration to simplify agent architecture"
 ```
 ---
 ## Task 2: Delete UserAgents Model
 **Files:**
 - Delete: `backend/src/models/user_agents.py`
 - Modify: `backend/src/models/__init__.py`
 **Step 1: Remove import from models/__init__.py**
 Edit `backend/src/models/__init__.py`:
 Remove these lines:
 ```python
 from models.user_agents import UserAgent
 ```
 And remove `"UserAgent"` from `__all__` list.
 **Step 2: Delete user_agents.py file**
 ```bash
 rm backend/src/models/user_agents.py
 ```
 **Step 3: Verify no other imports**
 Run: `cd backend && grep -r "from models.user_agents" src/`
 Expected: No results (or only in __init__.py which we already fixed)
 **Step 4: Commit**
 ```bash
 git add backend/src/models/user_agents.py backend/src/models/__init__.py
 git commit -m "refactor(models): remove UserAgents model"
 ```
 ---
 ## Task 3: Rename UserAgentCatalog to SystemAgents
 **Files:**
 - Rename: `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py`
 - Modify: `backend/src/models/__init__.py`
 **Step 1: Rename model file**
 ```bash
 mv backend/src/models/user_agent_catalog.py backend/src/models/system_agents.py
 ```
 **Step 2: Update class name in system_agents.py**
 Edit `backend/src/models/system_agents.py`:
 Change:
 ```python
 class UserAgentCatalog(TimestampMixin, Base):
    __tablename__: str = "user_agent_catalog"
 ```
 To:
 ```python
 class SystemAgents(TimestampMixin, Base):
    __tablename__: str = "system_agents"
 ```
 **Step 3: Update imports in models/__init__.py**
 Edit `backend/src/models/__init__.py`:
 Change:
 ```python
 from models.user_agent_catalog import UserAgentCatalog
 ```
 To:
 ```python
 from models.system_agents import SystemAgents
 ```
 And change `"UserAgentCatalog"` to `"SystemAgents"` in `__all__` list.
 **Step 4: Commit**
 ```bash
 git add backend/src/models/
 git commit -m "refactor(models): rename UserAgentCatalog to SystemAgents"
 ```
 ---
 ## Task 4: Update Configuration Files
 **Files:**
 - Rename: `backend/src/core/config/static/database/user_agent_catalog.yaml`
  → `backend/src/core/config/static/database/system_agents.yaml`
 - Modify: `backend/src/core/config/initial/init_data.py`
 **Step 1: Rename YAML file**
 ```bash
 mv backend/src/core/config/static/database/user_agent_catalog.yaml \
   backend/src/core/config/static/database/system_agents.yaml
 ```
 **Step 2: Update init_data.py imports**
 Edit `backend/src/core/config/initial/init_data.py`:
 Change:
 ```python
 from models.user_agent_catalog import UserAgentCatalog
 ```
 To:
 ```python
 from models.system_agents import SystemAgents
 ```
 **Step 3: Update Pydantic models**
 Change:
 ```python
 class UserAgentCatalogSeed(BaseModel):
    agent_type: str
    llm_model_code: str
    status: str
    config: dict[str, Any]
 class UserAgentCatalogYaml(BaseModel):
    agents: list[UserAgentCatalogSeed]
 ```
 To:
 ```python
 class SystemAgentsSeed(BaseModel):
    agent_type: str
    llm_model_code: str
    status: str
    config: dict[str, Any]
 class SystemAgentsYaml(BaseModel):
    agents: list[SystemAgentsSeed]
 ```
 **Step 4: Update path function**
 Change:
 ```python
 def _default_user_agent_catalog_path() -> Path:
    return (
        Path(__file__).resolve().parents[1]
        / "static"
        / "database"
        / "user_agent_catalog.yaml"
    )
 ```
 To:
 ```python
 def _default_system_agents_path() -> Path:
    return (
        Path(__file__).resolve().parents[1]
        / "static"
        / "database"
        / "system_agents.yaml"
    )
 ```
 **Step 5: Update load function**
 Change:
 ```python
 def load_user_agent_catalog(catalog_path: Path | None = None) -> dict[str, Any]:
    path = catalog_path or _default_user_agent_catalog_path()
    with path.open("r", encoding="utf-8") as file:
        loaded = yaml.safe_load(file) or {}
    if not isinstance(loaded, dict):
        raise ValueError(f"Invalid user agent catalog format: {path}")
    raw_agents = loaded.get("agents", [])
    if not isinstance(raw_agents, list):
        raise ValueError(f"Invalid user agent catalog agents section: {path}")
    try:
        parsed = UserAgentCatalogYaml.model_validate({"agents": list(raw_agents)})
    except ValidationError as exc:
        raise ValueError(f"Invalid user agent catalog data: {path}") from exc
    return parsed.model_dump()
 ```
 To:
 ```python
 def load_system_agents(catalog_path: Path | None = None) -> dict[str, Any]:
    path = catalog_path or _default_system_agents_path()
    with path.open("r", encoding="utf-8") as file:
        loaded = yaml.safe_load(file) or {}
    if not isinstance(loaded, dict):
        raise ValueError(f"Invalid system agents format: {path}")
    raw_agents = loaded.get("agents", [])
    if not isinstance(raw_agents, list):
        raise ValueError(f"Invalid system agents agents section: {path}")
    try:
        parsed = SystemAgentsYaml.model_validate({"agents": list(raw_agents)})
    except ValidationError as exc:
        raise ValueError(f"Invalid system agents data: {path}") from exc
    return parsed.model_dump()
 ```
 **Step 6: Update upsert function**
 Change:
 ```python
 async def _upsert_user_agent_catalog(
    session: AsyncSession,
    *,
    agent_type: str,
    llm_id: uuid.UUID,
    status: str,
    config: dict[str, Any],
 ) -> None:
    result = await session.execute(
        select(UserAgentCatalog).where(UserAgentCatalog.agent_type == agent_type)
    )
    catalog_entry = result.scalar_one_or_none()
    if catalog_entry is None:
        session.add(
            UserAgentCatalog(
                agent_type=agent_type,
                llm_id=llm_id,
                status=status,
                config=config,
            )
        )
    else:
        catalog_entry.llm_id = llm_id
        catalog_entry.status = status
        catalog_entry.config = config
 ```
 To:
 ```python
 async def _upsert_system_agents(
    session: AsyncSession,
    *,
    agent_type: str,
    llm_id: uuid.UUID,
    status: str,
    config: dict[str, Any],
 ) -> None:
    result = await session.execute(
        select(SystemAgents).where(SystemAgents.agent_type == agent_type)
    )
    catalog_entry = result.scalar_one_or_none()
    if catalog_entry is None:
        session.add(
            SystemAgents(
                agent_type=agent_type,
                llm_id=llm_id,
                status=status,
                config=config,
            )
        )
    else:
        catalog_entry.llm_id = llm_id
        catalog_entry.status = status
        catalog_entry.config = config
 ```
 **Step 7: Update initialize function**
 Change:
 ```python
 async def initialize_user_agent_catalog() -> None:
    """Initialize user agent catalog from YAML."""
    catalog = load_user_agent_catalog()
    async with AsyncSessionLocal() as session:
        async with session.begin():
            for agent in catalog["agents"]:
                result = await session.execute(
                    select(Llm).where(Llm.model_code == agent["llm_model_code"])
                )
                llm = result.scalar_one_or_none()
                if llm is None:
                    raise RuntimeError(
                        f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
                    )
                await _upsert_user_agent_catalog(
                    session,
                    agent_type=agent["agent_type"],
                    llm_id=llm.id,
                    status=agent["status"],
                    config=agent["config"],
                )
    logger.info("Initialized user agent catalog")
 ```
 To:
 ```python
 async def initialize_system_agents() -> None:
    """Initialize system agents from YAML."""
    catalog = load_system_agents()
    async with AsyncSessionLocal() as session:
        async with session.begin():
            for agent in catalog["agents"]:
                result = await session.execute(
                    select(Llm).where(Llm.model_code == agent["llm_model_code"])
                )
                llm = result.scalar_one_or_none()
                if llm is None:
                    raise RuntimeError(
                        f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
                    )
                await _upsert_system_agents(
                    session,
                    agent_type=agent["agent_type"],
                    llm_id=llm.id,
                    status=agent["status"],
                    config=agent["config"],
                )
    logger.info("Initialized system agents")
 ```
 **Step 8: Update initialize_data function**
 Change:
 ```python
 async def initialize_data() -> bool:
    """Initialize bootstrap data."""
    await initialize_llm_catalog()
    await initialize_user_agent_catalog()
    return True
 ```
 To:
 ```python
 async def initialize_data() -> bool:
    """Initialize bootstrap data."""
    await initialize_llm_catalog()
    await initialize_system_agents()
    return True
 ```
 **Step 9: Commit**
 ```bash
 git add backend/src/core/config/
 git commit -m "refactor(config): rename user_agent_catalog to system_agents"
 ```
 ---
 ## Task 5: Run Migration
 **Step 1: Run migration**
 ```bash
 cd backend && uv run alembic upgrade head
 ```
 Expected: Migration runs successfully
 **Step 2: Verify tables**
 Connect to database and check:
 - `user_agents` table should NOT exist
 - `system_agents` table should exist
 - `memories.agent_id` column should NOT exist
 **Step 3: Test downgrade (optional but recommended)**
 ```bash
 cd backend && uv run alembic downgrade -1
 ```
 Expected: Previous migration restored
 **Step 4: Re-run upgrade**
 ```bash
 cd backend && uv run alembic upgrade head
 ```
 Expected: Migration runs successfully again
 ---
 ## Task 6: Run Tests and Linting
 **Step 1: Run type checking**
 ```bash
 cd backend && uv run basedpyright src/
 ```
 Expected: No errors
 **Step 2: Run linting**
 ```bash
 cd backend && uv run ruff check src/
 ```
 Expected: No errors
 **Step 3: Run tests**
 ```bash
 cd backend && uv run pytest tests/
 ```
 Expected: All tests pass
 **Step 4: Fix any failures**
 If any tests fail due to UserAgent references, update them to use SystemAgents.
 ---
 ## Task 7: Final Verification
 **Step 1: Search for any remaining references**
 ```bash
 cd backend && grep -r "user_agents" src/ --include="*.py"
 cd backend && grep -r "UserAgent" src/ --include="*.py"
 ```
 Expected: No results (except in migration files)
 **Step 2: Test new user registration**
 Start the backend server and register a new user. Verify:
 - Profile is created
 - No user_agents records are created
 - profiles.settings contains `agent_prompts: {}`
 **Step 3: Commit final changes**
 ```bash
 git add .
 git commit -m "feat: complete agent architecture simplification"
 ```
 ---
 ## Success Criteria
 - [ ] Migration runs successfully (upgrade and downgrade)
 - [ ] No UserAgent model references in code
 - [ ] SystemAgents model works correctly
 - [ ] All tests pass
 - [ ] Linting passes
 - [ ] Type checking passes
 - [ ] New user registration works without user_agents
 ## Notes
 - Keep the design document updated if any changes are made during implementation
 - Test migration thoroughly before deploying to production
 - Backup database before running migration in production
@@ -1,81 +0,0 @@
 # Agent Runtime Closed Loop E2E Design
 ## 背景
 当前 `test_agent_sse_flow.py` 不能稳定证明真实闭环：
 - `session_id` 由随机 UUID 生成，导致 `POST /api/v1/agent/runs` 经常 404。
 - 测试脚本存在不可达重复代码，诊断信息不完整。
 - 未覆盖首聊自动建会话语义，和真实聊天入口不匹配。
 目标是验证真实环境下业务闭环是否可用：
 1. 用户请求 `agent` 路由
 2. 请求进入异步任务
 3. runtime 读取 `system_agents` 和 `llm` 配置并构建执行流程
 4. 真实 LLM 请求发出并返回
 5. `sessions`/`messages` 正确落库
 6. 成本和 token 统计正确
 7. 事件按 AG-UI 规范发布并可由 `stream_events` 订阅
 ## 设计原则
 - 真实优先：不使用 mock，不替换 queue/redis/db/llm。
 - 双轨验证：
  - 诊断脚本用于本地排障（快速观察全链路状态）。
  - pytest E2E 用例用于可重复回归。
 - 明确前置条件：必须先使用 `infra/scripts/app.sh start` 启动 tmux 服务。
 - 本地真实 LLM 基线：DashScope Qwen。
 ## API 契约调整
 ### `POST /api/v1/agent/runs`
 - 现状：`session_id` 必填且必须存在。
 - 新契约：`session_id` 可选。
  - 有值：复用现有会话，校验 owner。
  - 无值：在服务层先创建会话，再入队 run。
 - 响应扩展：返回 `created` 标识是否为首聊自动建会话。
 该契约与聊天产品行为一致：用户首条消息即可开始，不需要前置调用创建会话接口。
 ## 数据关系与删除语义
 - `messages.session_id -> sessions.id` 为外键，且硬删除级联（`ondelete=CASCADE`）。
 - 软删除需要补齐级联：
  - 软删 `sessions` 时，同事务更新对应 `messages.deleted_at`。
  - E2E 增加验证，确保软删后默认查询不可见。
 ## 测试架构
 ### A. 诊断脚本（根目录）
 重构 `test_agent_sse_flow.py`：
 - 增加环境健康检查（web/redis/db）。
 - 支持两种模式：
  - `--new-session`：不传 `session_id`，验证首聊自动创建。
  - `--reuse-session <id>`：验证复聊路径。
 - 输出结构化阶段日志：HTTP、task_id、SSE 事件、数据库断言、失败根因。
 ### B. pytest E2E（`backend/tests/e2e`）
 新增 `test_agent_closed_loop_live.py`：
 - 标记为 `live`，默认不在 CI 执行。
 - 用真实 JWT、真实 HTTP 请求、真实 SSE 订阅。
 - 断言最小闭环标准：
  - run 返回 202
  - SSE 至少收到 `RUN_STARTED` 与终态（`RUN_FINISHED` 或 `RUN_ERROR`）
  - `sessions` 状态和计数更新
  - `messages` 有新增记录
  - token/cost 字段非负且会话聚合一致
 ## 验收标准
 - `uv run python test_agent_sse_flow.py --new-session` 通过。
 - `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -v -m live` 通过。
 - 首聊场景不需要外部先建 `session_id`。
 - 软删除会话后，消息软删除行为与约束一致。
 ## 风险与回退
 - 真实 LLM 网络抖动会造成不稳定：通过重试和超时策略降低误报。
 - 生产契约变更风险：保持字段向后兼容（原 `session_id` 仍可传）。
 - 如果新契约引入问题，可临时退回“必传 session_id”路径并保留测试脚本诊断能力。
@@ -1,230 +0,0 @@
 # Agent Runtime Closed Loop E2E Implementation Plan
 > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
 **Goal:** 让 agent 闭环在真实本地环境中可验证：`runs` 支持首聊自动建会话，并通过真实异步任务、真实 LLM、真实落库与真实 SSE 证明端到端可用。
 **Architecture:** 在 `v1/agent` 服务层引入“可选 session_id + 自动建会话”语义；保持已有 owner 鉴权路径。重构诊断脚本并新增 live E2E 用例，统一验证 run 入队、事件流、数据库状态、成本统计与删除语义。通过最小侵入改造现有 run/resume 流程，确保兼容已存在调用。
 **Tech Stack:** FastAPI, SQLAlchemy async, Celery, Redis Stream, LiteLLM, PyJWT, pytest, httpx
 ---
 ### Task 1: 扩展 API 契约（session_id 可选）
 **Files:**
 - Modify: `backend/src/v1/agent/schemas.py`
 - Modify: `backend/src/v1/agent/router.py`
 - Test: `backend/tests/integration/v1/agent/test_routes.py`
 **Step 1: Write the failing test**
 在 `test_routes.py` 新增用例：请求体不传 `session_id` 仍返回 202，且响应含 `session_id`。
 **Step 2: Run test to verify it fails**
 Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -k "runs and session" -v`
 Expected: FAIL，提示 `session_id` 缺失导致 422 或 mock 接口签名不匹配。
 **Step 3: Write minimal implementation**
 - `RunRequest.session_id` 改为可选。
 - `enqueue_run` 调用 service 时传可选值。
 - `TaskAcceptedResponse` 增加 `created: bool` 字段。
 **Step 4: Run test to verify it passes**
 Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
 Expected: PASS。
 **Step 5: Commit**
 ```bash
 git add backend/src/v1/agent/schemas.py backend/src/v1/agent/router.py backend/tests/integration/v1/agent/test_routes.py
 git commit -m "feat: allow agent runs without pre-created session"
 ```
 ### Task 2: 服务层支持自动建会话并保持鉴权
 **Files:**
 - Modify: `backend/src/v1/agent/service.py`
 - Modify: `backend/src/v1/agent/repository.py`
 - Modify: `backend/src/v1/agent/dependencies.py`
 - Test: `backend/tests/unit/v1/agent/test_service.py` (new)
 **Step 1: Write the failing test**
 新增单测覆盖：
 - `session_id is None` 时调用 `create_session_for_user` 并返回 `created=True`
 - `session_id 有值` 时复用并校验 owner
 **Step 2: Run test to verify it fails**
 Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
 Expected: FAIL，当前 service 无自动建会话能力。
 **Step 3: Write minimal implementation**
 - repository 增加 `create_session_for_user(user_id)`。
 - service `enqueue_run` 处理两条路径：
  - 无 `session_id`：先创建 session。
  - 有 `session_id`：校验 owner。
 - 返回 `TaskAccepted(task_id, session_id, created)`。
 **Step 4: Run test to verify it passes**
 Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
 Expected: PASS。
 **Step 5: Commit**
 ```bash
 git add backend/src/v1/agent/service.py backend/src/v1/agent/repository.py backend/src/v1/agent/dependencies.py backend/tests/unit/v1/agent/test_service.py
 git commit -m "feat: auto-create chat session on first agent run"
 ```
 ### Task 3: 对齐 runtime 闭环数据断言（messages/sessions/cost）
 **Files:**
 - Modify: `backend/src/core/agent/application/run_service.py`
 - Modify: `backend/src/core/agent/application/resume_service.py`
 - Modify: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
 - Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
 - Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
 **Step 1: Write the failing test**
 在集成测试增加断言：
 - `sessions.total_tokens`、`sessions.total_cost` 有更新
 - `messages` 的 token/cost 字段与 session 聚合一致
 **Step 2: Run test to verify it fails**
 Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
 Expected: FAIL，当前默认 token/cost 为 0，未做聚合更新。
 **Step 3: Write minimal implementation**
 - run/resume 流程接入 usage/cost 结果（来自 litellm 返回或 fallback 规则）。
 - message 写入时填充 input/output tokens 与 cost。
 - session 更新时累加 total_tokens/total_cost。
 **Step 4: Run test to verify it passes**
 Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
 Expected: PASS。
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/application/run_service.py backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/persistence/message_repository.py backend/src/core/agent/infrastructure/persistence/session_repository.py backend/tests/integration/core/agent/test_queue_run_resume.py
 git commit -m "feat: persist runtime token and cost aggregates"
 ```
 ### Task 4: 补齐软删除级联（session -> messages）
 **Files:**
 - Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
 - Modify: `backend/src/v1/agent/service.py`
 - Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
 **Step 1: Write the failing test**
 新增用例：软删 session 后，同会话 messages 的 `deleted_at` 同步写入。
 **Step 2: Run test to verify it fails**
 Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
 Expected: FAIL，当前无软删级联。
 **Step 3: Write minimal implementation**
 - repository 增加 `soft_delete_session_with_messages(session_id)`。
 - service 调用时使用同事务批量更新 messages。
 **Step 4: Run test to verify it passes**
 Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
 Expected: PASS。
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/persistence/session_repository.py backend/src/v1/agent/service.py backend/tests/integration/core/agent/test_queue_run_resume.py
 git commit -m "fix: cascade soft delete from sessions to messages"
 ```
 ### Task 5: 重构诊断脚本并新增 live E2E
 **Files:**
 - Modify: `test_agent_sse_flow.py`
 - Create: `backend/tests/e2e/test_agent_closed_loop_live.py`
 - Modify: `docs/bugs/2026-03-05-agent-runtime-bugs.md`
 **Step 1: Write the failing test**
 新增 live E2E 用例（`@pytest.mark.live`）：
 - 首聊不传 `session_id` 返回 202
 - 订阅 SSE 收到关键事件
 - DB 断言 session/messages/tokens/cost
 **Step 2: Run test to verify it fails**
 Run: `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
 Expected: FAIL，当前契约或脚本未对齐。
 **Step 3: Write minimal implementation**
 - 清理脚本重复/不可达逻辑。
 - 增加健康检查、阶段化日志、超时和错误根因输出。
 - E2E 用例复用脚本中的 helper（JWT、SSE 解析、DB 断言）。
 **Step 4: Run test to verify it passes**
 Run:
 - `uv run python test_agent_sse_flow.py --new-session`
 - `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
 Expected: PASS。
 **Step 5: Commit**
 ```bash
 git add test_agent_sse_flow.py backend/tests/e2e/test_agent_closed_loop_live.py docs/bugs/2026-03-05-agent-runtime-bugs.md
 git commit -m "test: add live closed-loop agent e2e verification"
 ```
 ### Task 6: 全量验证与文档同步
 **Files:**
 - Modify: `docs/runtime/runtime-runbook.md`
 - Modify: `docs/runtime/runtime-route.md`
 **Step 1: Run targeted checks**
 Run:
 - `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
 - `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
 - `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
 - `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
 Expected: PASS。
 **Step 2: Run quality gates**
 Run:
 - `uv run ruff check backend/src backend/tests`
 - `uv run basedpyright`
 Expected: PASS。
 **Step 3: Update docs**
 记录本地启动流程、真实 LLM 前置配置、live E2E 执行方式和故障排查。
 **Step 4: Commit**
 ```bash
 git add docs/runtime/runtime-runbook.md docs/runtime/runtime-route.md
 git commit -m "docs: document live agent closed-loop e2e workflow"
 ```
@@ -1,469 +0,0 @@
 # Agent Runtime Closed Loop Implementation Plan
 > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
 **Goal:** Build a production-grade closed-loop agent runtime where `frontend -> FastAPI -> Celery -> run/resume service -> CrewAI -> AG-UI events -> Redis Stream -> SSE` is fully connected and verifiable.
 **Architecture:** Keep HTTP API as control-plane and worker as data-plane. The API validates auth/ownership and enqueues commands, the Celery worker executes run/resume business logic using DB-driven agent config, runtime emits normalized AG-UI events and usage/cost telemetry, all events are persisted to Redis Stream, and SSE endpoint streams from Redis with resume support (`Last-Event-ID`).
 **Tech Stack:** FastAPI, SQLAlchemy AsyncSession, Celery, Redis Streams, CrewAI, LiteLLM, Pydantic, pytest (unit/integration).
 **Confirmed Constraints (locked):**
 - Persist semantics use existing `messages.role` only (`assistant|user|system|tool`), no new `message_kind` column.
 - `tool_result` must be semantically complete (especially UI schema); do not store summary-only payload.
 - Store full `tool_result` payload in Supabase Storage (private bucket) and persist durable object reference in DB metadata; do not rely on expiring signed URL as primary reference.
 - `metadata` must be fixed and typed via Pydantic model (no free-form drift).
 - Do not introduce additional business tables for this scope; keep schema minimal.
 - CrewAI runtime must default to streaming mode.
 - Full traceability target is final semantic reconstruction of `user/assistant/tool_result`; chunk-level replay is not required.
 **Metadata Contract (fixed, Pydantic-enforced):**
 - Global required keys for all message metadata: `type`, `run_id`, `turn_id`.
 - Global optional keys for all message metadata: `event_id`, `parent_message_id`, `error`.
 - `type=user_input`:
  - Required: `type`, `run_id`, `turn_id`.
  - Optional: `input_source`, `client_ts`.
 - `type=assistant_output`:
  - Required: `type`, `run_id`, `turn_id`.
  - Optional: `finish_reason`, `model_provider`, `cost_source`.
 - `type=tool_call` (`role=assistant`):
  - Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `tool_args`.
  - Optional: `tool_schema_version`, `timeout_ms`.
 - `type=tool_result` (`role=tool`):
  - Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `storage_bucket`, `storage_path`, `payload_sha256`, `payload_bytes`, `payload_format`.
  - Optional: `ui_schema_version`, `compression`, `storage_etag`, `render_hints`.
 - Validation rules:
  - `messages.role=tool` must use `metadata.type=tool_result`.
  - `messages.role=assistant` + tool event must use `metadata.type=tool_call` or `assistant_output`.
  - `tool_result` payload in DB must be reconstructable to AG-UI `TOOL_CALL_RESULT` using Storage object + metadata checksum.
 ---
 ### Task 1: Add Agent Module Skeleton and Contracts
 **Files:**
 - Create: `backend/src/core/agent/__init__.py`
 - Create: `backend/src/core/agent/application/__init__.py`
 - Create: `backend/src/core/agent/domain/__init__.py`
 - Create: `backend/src/core/agent/infrastructure/events/__init__.py`
 - Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
 - Create: `backend/src/core/agent/infrastructure/agui/stream.py`
 - Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
 **Step 1: Write failing tests for event normalization and SSE formatting**
 ```python
 def test_bridge_normalizes_event_type_to_upper_snake() -> None:
    events = [{"type": "runStarted", "data": {"ok": True}}]
    out = to_agui_events(events)
    assert out[0]["type"] == "RUN_STARTED"
 def test_sse_format_includes_id_event_data() -> None:
    payload = to_sse_event(stream_id="1-0", event={"type": "RUN_STARTED", "data": {"a": 1}})
    assert payload.startswith("id: 1-0\nevent: RUN_STARTED\ndata: {")
 ```
 **Step 2: Run tests and confirm RED**
 Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`  
 Expected: FAIL with missing module/function errors.
 **Step 3: Implement minimal bridge + stream utilities**
 ```python
 def to_agui_events(internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
    ...
 def to_sse_event(stream_id: str, event: dict[str, Any]) -> str:
    ...
 ```
 **Step 4: Run tests and confirm GREEN**
 Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent backend/tests/unit/core/agent/test_agui_bridge.py
 git commit -m "feat(agent): add ag-ui bridge and sse serializer utilities"
 ```
 ### Task 2: Implement Redis Stream Event Store and Reader
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/events/redis_stream.py`
 - Modify: `backend/src/core/config/settings.py`
 - Test: `backend/tests/unit/core/agent/test_redis_stream.py`
 **Step 1: Write failing tests for append/read semantics**
 ```python
 def test_append_event_writes_json_payload() -> None:
    ...
 def test_read_events_respects_last_event_id() -> None:
    ...
 ```
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`  
 Expected: FAIL.
 **Step 3: Implement Redis stream adapter**
 ```python
 def append_event_sync(*, session_id: UUID, event: dict[str, Any]) -> str:
    ...
 async def read_events(...):
    ...
 ```
 **Step 4: Run GREEN**
 Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/events/redis_stream.py backend/src/core/config/settings.py backend/tests/unit/core/agent/test_redis_stream.py
 git commit -m "feat(agent): add redis stream event transport for run events"
 ```
 ### Task 3: Build CrewAI Runtime + AG-UI Event Mapping + Usage Tracking
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
 - Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
 - Create: `backend/src/core/agent/infrastructure/litellm/client.py`
 - Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
 - Create: `backend/src/core/agent/infrastructure/config/resolver.py`
 - Modify: `backend/src/core/config/settings.py`
 - Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
 - Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
 - Test: `backend/tests/unit/core/agent/test_config_resolver.py`
 **Step 1: Write failing runtime tests (events + cost + strict errors)**
 ```python
 def test_runtime_emits_text_tool_reasoning_events() -> None:
    ...
 def test_runtime_raises_if_model_or_api_key_missing() -> None:
    ...
 def test_usage_tracker_extracts_tokens_and_cost() -> None:
    ...
 ```
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`  
 Expected: FAIL.
 **Step 3: Implement runtime and tracker**
 - Register CrewAI event handlers (`Task/LLM/Tool/Reasoning`) and map to AG-UI canonical event types.
 - Default runtime to streaming mode for CrewAI execution.
 - Enforce strict config behavior: no `llm_model_code` or provider key -> raise.
 - Use LiteLLM cost calculator for actual cost; if cost cannot be computed, fail closed (raise), do not silently record zero.
 **Step 4: Run GREEN**
 Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py backend/src/core/config/settings.py
 git commit -m "feat(agent): implement crewai runtime events and litellm usage-cost auditing"
 ```
 ### Task 4: Implement Run/Resume Application Services (DB Config + Persistence)
 **Files:**
 - Create: `backend/src/core/agent/application/run_service.py`
 - Create: `backend/src/core/agent/application/resume_service.py`
 - Create: `backend/src/core/agent/application/session_state_persistence.py`
 - Create: `backend/src/core/agent/domain/state_snapshot.py`
 - Create: `backend/src/core/agent/domain/tool_correlation.py`
 - Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
 - Test: `backend/tests/unit/core/agent/test_state_snapshot.py`
 - Test: `backend/tests/unit/core/agent/test_tool_correlation.py`
 **Step 1: Write failing tests for DB-driven runtime and aggregate updates**
 ```python
 async def test_run_service_loads_agent_config_from_db_and_persists_messages() -> None:
    ...
 async def test_resume_service_requires_pending_tool_call() -> None:
    ...
 ```
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`  
 Expected: FAIL.
 **Step 3: Implement services**
 - `run_service`: read session + system agent config from DB, execute runtime, persist user/assistant messages, update session aggregates.
 - `resume_service`: validate pending tool call status, enforce idempotency semantics, resume runtime, persist audit fields.
 - Persist metadata audit (`tokens`, `cost`, `cost_source`, correlation ids) for every assistant message.
 - Persist tool lifecycle with role-only model:
  - tool call message uses `role=assistant` with fixed metadata (`type=tool_call`, `tool_call_id`, `tool_name`, arguments reference).
  - tool result message uses `role=tool` with fixed metadata (`type=tool_result`, `tool_call_id`, `tool_name`, storage bucket/path, checksum, bytes, schema version).
 - `tool_result` full payload (UI schema) is uploaded to Supabase Storage private bucket; DB stores durable reference and verification fields.
 - Ensure DB->AG-UI `TOOL_CALL_RESULT` reconstruction is equivalent to SSE-streamed final tool result semantics.
 - Enforce metadata contract by Pydantic model at write path and read path (reject malformed metadata early).
 **Step 4: Run GREEN**
 Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/application backend/src/core/agent/domain backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py
 git commit -m "feat(agent): add run-resume app services with db config and audit persistence"
 ```
 ### Task 5: Wire Celery Worker Task to Run/Resume and Publish Runtime Events
 **Files:**
 - Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
 - Modify: `backend/src/core/celery/app.py`
 - Test: `backend/tests/unit/core/agent/test_queue_tasks.py`
 - Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
 **Step 1: Write failing queue tests**
 ```python
 def test_run_agent_task_emits_started_runtime_and_finished_events() -> None:
    ...
 def test_run_agent_task_emits_error_event_on_exception() -> None:
    ...
 ```
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`  
 Expected: FAIL.
 **Step 3: Implement worker task flow**
 - Decode command type (`run`/`resume`).
 - Emit lifecycle events (`RUN_STARTED/RUN_RESUMED/RUN_FINISHED/RUN_ERROR`).
 - Forward runtime callback events to Redis stream immediately.
 - Persist session status/snapshot after completion.
 **Step 4: Run GREEN**
 Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/celery/app.py backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py
 git commit -m "feat(agent): wire celery run-resume execution and redis event publishing"
 ```
 ### Task 6: Implement API Contracts (Run/Resume/SSE) + Auth/Ownership/Idempotency
 **Files:**
 - Create: `backend/src/v1/agent/schemas.py`
 - Create: `backend/src/v1/agent/repository.py`
 - Create: `backend/src/v1/agent/service.py`
 - Create: `backend/src/v1/agent/router.py`
 - Create: `backend/src/v1/agent/dependencies.py`
 - Modify: `backend/src/v1/router.py`
 - Test: `backend/tests/unit/v1/agent/test_service.py`
 - Test: `backend/tests/unit/v1/agent/test_owner_guard.py`
 - Test: `backend/tests/integration/v1/agent/test_routes.py`
 **Step 1: Write failing API tests**
 ```python
 async def test_run_requires_auth_and_returns_202_task_id() -> None:
    ...
 async def test_stream_reads_from_last_event_id() -> None:
    ...
 def test_resume_idempotency_uses_redis_lock_and_task_key() -> None:
    ...
 ```
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`  
 Expected: FAIL.
 **Step 3: Implement API service/router**
 - `POST /api/v1/agent/runs` enqueue run command.
 - `POST /api/v1/agent/runs/{session_id}/resume` enqueue resume command with async redis lock + dedup task key.
 - `GET /api/v1/agent/runs/{session_id}/events` SSE stream from Redis with `Last-Event-ID`.
 - Enforce auth and session ownership checks on all endpoints.
 - Validate `tool_call_id` and message length/pattern boundaries.
 **Step 4: Run GREEN**
 Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/src/v1/agent backend/src/v1/router.py backend/tests/unit/v1/agent backend/tests/integration/v1/agent/test_routes.py
 git commit -m "feat(agent): add authenticated run-resume-sse api with redis-backed idempotency"
 ```
 ### Task 7: Add Schema/Migration Contract for Session Snapshot + Audit Fields
 **Files:**
 - Create: `backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py`
 - Modify: `backend/src/models/agent_chat_session.py`
 - Modify: `backend/src/models/agent_chat_message.py`
 - Test: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
 **Migration scope note:**
 - Fix current schema drift: model has `sessions.state_snapshot` but migration chain does not reliably provide this column in current DB state.
 - Keep schema minimal; do not add new business tables in this migration.
 **Step 1: Write failing migration contract tests**
 ```python
 def test_session_has_state_snapshot_and_status_contract() -> None:
    ...
 def test_message_has_token_cost_and_metadata_contract() -> None:
    ...
 ```
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`  
 Expected: FAIL.
 **Step 3: Implement migration and model alignment**
 - Ensure `state_snapshot`, `status`, token/cost/metadata fields are present and nullable constraints are explicit.
 - Add/verify indexes needed for role-based semantic reconstruction (`session_id, seq`, and targeted metadata lookups if required).
 - Ensure `metadata` structure is validated by fixed Pydantic schema at application boundary.
 - Add DB-level guardrails where feasible (check constraints) for role/metadata consistency without introducing new tables.
 - Keep reversible downgrade path.
 **Step 4: Run GREEN**
 Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`  
 Expected: PASS.
 **Step 5: Commit**
 ```bash
 git add backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py backend/src/models/agent_chat_session.py backend/src/models/agent_chat_message.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
 git commit -m "feat(agent): add db contract for session snapshot and usage audit fields"
 ```
 ### Task 8: End-to-End Closure Verification and Docs Update
 **Files:**
 - Modify: `docs/runtime/runtime-route.md`
 - Modify: `docs/runtime/runtime-runbook.md`
 - Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
 **Step 1: Write integration test for full closure path**
 ```python
 async def test_closed_loop_run_flow_frontend_to_sse() -> None:
    # run request -> queue command -> runtime events -> redis stream -> sse read
    ...
 ```
 Also verify:
 - `tool_result` full UI schema is written to Supabase Storage private bucket.
 - `messages.role=tool` row contains stable storage reference and checksum metadata.
 - Reading from DB can reconstruct final AG-UI `TOOL_CALL_RESULT` event payload semantics.
 **Step 2: Run RED**
 Run: `uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py -q`  
 Expected: FAIL.
 **Step 3: Implement minimal missing glue and docs**
 - Fill any missing wiring revealed by the test.
 - Document endpoint contracts, event taxonomy, and operational runbook for redis/celery troubleshooting.
 **Step 4: Run GREEN + full gate verification**
 Run:
 - `PYTHONPATH=backend/src uv run python backend/src/core/runtime/cli.py migrate`
 - `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
 - `uv run ruff check backend/src backend/tests`
 - `uv run basedpyright backend/src`
 Expected:
 - All relevant tests PASS.
 - Ruff PASS.
 - basedpyright 0 errors (notes/warnings can be documented if pre-existing).
 **Step 5: Commit**
 ```bash
 git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md backend/tests/integration/core/agent/test_session_message_persistence.py
 git commit -m "docs(agent): document closed-loop runtime and verify end-to-end chain"
 ```
 ### Task 9: L2 Mandatory Review Gates
 **Files:**
 - No direct code changes required; apply fixes if findings appear.
 **Step 1: Run required agents**
 - `tdd-guide` (already enforced by plan sequence)
 - `refactor-cleaner`
 - `code-reviewer`
 - `security-reviewer`
 **Step 2: Fix all CRITICAL/HIGH findings**
 Run targeted tests after each fix.
 **Step 3: Final verification rerun**
 Run:
 - `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
 - `uv run ruff check backend/src backend/tests`
 - `uv run basedpyright backend/src`
 Expected: no failing tests; no lint errors; no type errors.
 **Step 4: Final commit (if review fixes were needed)**
 ```bash
 git add backend/src backend/tests docs/runtime
 git commit -m "fix(agent): resolve L2 review findings for closed-loop runtime"
 ```
@@ -0,0 +1,746 @@
 # UserAgentContext & ProfileSettings v1 设计
 **Date:** 2026-03-05
 **Status:** Approved
 ---
 ## 目标
 为 Agent Runtime 提供完整的用户画像上下文，通过 Pydantic 约束 profiles.settings 结构，确保：
 1. 运行时入口读取 profile（username/bio/settings）
 2. settings 结构类型安全、版本可演进
 3. 关键配置（语言/时区/国家）符合标准格式
 ---
 ## 架构
 ```
 Profile (DB JSONB)
    ↓
 ProfileSettings (Pydantic)
    ↓
 UserAgentContext (DataClass)
    ↓
 build_global_system_prompt(ctx)
 ```
 **设计原则：**
 - 唯一入口：`get_user_agent_context(user_id)` 读取并构造上下文
 - 不可变：UserAgentContext 使用 frozen dataclass
 - 向后兼容：version 字段预留未来演进
 ---
 ## ProfileSettings v1 结构
 ```json
 {
  "version": 1,
  "preferences": {
    "interface_language": "zh-CN",
    "ai_language": "zh-CN",
    "timezone": "Asia/Shanghai",
    "country": "CN"
  },
  "privacy": {},
  "notification": {}
 }
 ```
 ### 字段说明
 | 字段 | 类型 | 默认值 | 约束 |
 |------|------|--------|------|
 | `version` | int | 1 | 必须为 1（v1 锁定） |
 | `preferences.interface_language` | str | "zh-CN" | BCP-47 格式 |
 | `preferences.ai_language` | str | "zh-CN" | BCP-47 格式 |
 | `preferences.timezone` | str | "Asia/Shanghai" | IANA 时区 |
 | `preferences.country` | str | "CN" | ISO 3166-1 alpha-2 |
 | `privacy` | dict | {} | 空对象（预留） |
 | `notification` | dict | {} | 空对象（预留） |
 ### 约束规则
 **1. BCP-47 语言格式**
 正则：`^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$`
 示例：
 - ✅ zh-CN, en-US, zh-TW, ja-JP
 - ❌ zh_CN, EN, chn
 **2. IANA 时区**
 使用 `zoneinfo.ZoneInfo` 校验。
 示例：
 - ✅ Asia/Shanghai, America/New_York, UTC
 - ❌ CST, GMT+8
 **3. ISO 3166-1 alpha-2 国家代码**
 使用 `pycountry.countries.get(alpha_2=...)` 校验。
 示例：
 - ✅ CN, US, JP, GB
 - ❌ CHN, USA, zz
 ---
 ## UserAgentContext 结构
 ```python
@dataclass(frozen=True)
 class UserAgentContext:
    user_id: UUID
    username: str
    bio: str | None
    settings: ProfileSettings
 ```
 **设计要点：**
 - 不可变（frozen=True）：防止运行时修改
 - 完整画像：包含身份（username/bio）和配置（settings）
 - 唯一构造入口：`get_user_agent_context(user_id)`
 ---
 ## Pydantic 模型实现
 ```python
 from pydantic import BaseModel, Field, field_validator
 from dataclasses import dataclass
 from uuid import UUID
 import re
 class PreferenceSettings(BaseModel):
    interface_language: str = "zh-CN"
    ai_language: str = "zh-CN"
    timezone: str = "Asia/Shanghai"
    country: str = "CN"
    @field_validator("interface_language", "ai_language")
    @classmethod
    def validate_bcp47(cls, v: str) -> str:
        pattern = r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$"
        if not re.match(pattern, v):
            raise ValueError(f"Invalid BCP-47 language tag: {v}")
        return v
    @field_validator("timezone")
    @classmethod
    def validate_iana_timezone(cls, v: str) -> str:
        import zoneinfo
        try:
            zoneinfo.ZoneInfo(v)
        except Exception:
            raise ValueError(f"Invalid IANA timezone: {v}")
        return v
    @field_validator("country")
    @classmethod
    def validate_iso_country(cls, v: str) -> str:
        import pycountry
        if not pycountry.countries.get(alpha_2=v.upper()):
            raise ValueError(f"Invalid ISO 3166-1 alpha-2 country code: {v}")
        return v.upper()
 class ProfileSettings(BaseModel):
    version: int = Field(default=1, ge=1, le=1)
    preferences: PreferenceSettings = Field(default_factory=PreferenceSettings)
    privacy: dict = Field(default_factory=dict)
    notification: dict = Field(default_factory=dict)
@dataclass(frozen=True)
 class UserAgentContext:
    user_id: UUID
    username: str
    bio: str | None
    settings: ProfileSettings
 ```
 ---
 ## 依赖项
 需要添加到 `backend/pyproject.toml`：
 ```toml
 [project.dependencies]
 pycountry = ">=23.0.0"
 ```
 ---
 ## 迁移策略
 **数据库层：**
 - profiles.settings 保持 JSONB，不做 schema 变更
 - 现有数据默认值：`{"version": 1, "preferences": {"country": "CN"}}`
 **应用层：**
 - 读取时：`ProfileSettings.model_validate(profile.settings or {})`
 - 写入时：`profile.settings = settings.model_dump()`
 ---
 ## 未来演进
 **版本迁移：**
 - Pydantic 支持多版本共存
 - 数据库不做破坏性变更
 ---
 ---
 ## AG-UI 事件转发与落库策略
 ### 核心原则
 **1. 事件转发时机：**
 - 只有 organization 阶段完成后转发 AG-UI 事件
 - AG-UI bridge 已实现底层机制，编排层控制转发时机
 **2. 落库时机：**
 - 意图识别和任务执行阶段：落库但 seq 取负数（用于审计）
 - 结果反馈阶段：seq 取最新 seq 的绝对值 +1（用于展示）
 ### Seq 设计细节
 **意图识别和任务执行阶段（审计用）：**
 - seq 取负数（如 -1, -2）
 - role: "assistant"（标记为 agent 输出）
 - content: 阶段的完整输出（用于审计/调试）
 - 重建会话时通过 `WHERE seq > 0` 过滤，不展示给用户
 **结果反馈阶段（展示用）：**
 - seq 取正数（取最新负数的绝对值 +1）
 - role: "assistant"
 - content: OrganizationResult.assistant_text
 - 重建会话时通过 `WHERE seq > 0` 展示给用户
 **示例：**
 ```
 | seq  | role     | content                    | 展示 |
 |------|----------|----------------------------|------|
 | -2   | assistant| ExecutionResult (完整)     | 否   |
 | -1   | assistant| IntentResult (完整)        | 否   |
 | 1    | user     | 用户输入                   | 是   |
 | 2    | assistant| OrganizationResult         | 是   |
 ```
 ### 编排层职责
 ```python
@listen(intent_stage)
 async def persist_intent(self, state: FlowState) -> FlowState:
    # seq 取负数
    seq = await message_repo.get_next_negative_seq(state.session_id)
    await message_repo.create(
        session_id=state.session_id,
        seq=seq,  # 负数
        role="assistant",
        content=state.intent_result.model_dump_json(),
        ...
    )
    return state
@listen(execution_stage)
 async def persist_execution(self, state: FlowState) -> FlowState:
    # seq 取负数
    seq = await message_repo.get_next_negative_seq(state.session_id)
    await message_repo.create(
        session_id=state.session_id,
        seq=seq,  # 负数
        role="assistant",
        content=state.execution_result.model_dump_json(),
        ...
    )
    return state
@listen(organization_stage)
 async def finalize_flow(self, state: FlowState) -> FlowState:
    result = state.organization_result
    # seq 取正数（最新负数绝对值+1）
    seq = await message_repo.get_next_positive_seq(state.session_id)
    await message_repo.create(
        session_id=state.session_id,
        seq=seq,  # 正数
        role="assistant",
        content=result.assistant_text,
        ...
    )
    # 触发 AG-UI 事件（由 bridge 处理）
    return state
 ```
 ### Token 和 Cost 累加
 **策略：在内存中累加所有阶段的 token 和 cost，organization 完成后统一落库。**
 ```python
@dataclass
 class FlowState:
    # ...
    tokens: dict[str, dict] = field(default_factory=dict)
    cost: Decimal = Decimal("0")
    currency: str = "CNY"
 ```
 ---
 ## CrewAI Flow 三阶段设计
 ### 架构概览
 ```
 User Input + UserAgentContext
    ↓
@start() begin()
    ↓
@listen() intent_stage() → 判断 can_answer_directly
    ↓ (router)
    ├─ DIRECT_RESPONSE → 直接返回
    └─ NEEDS_EXECUTION
           ↓
       @listen() execution_stage() → 任务执行/工具调用
           ↓
       @listen() organization_stage() → 结果组织与表达
           ↓
       返回给用户
 ```
 ### 三阶段职责
 **1. Intent Recognition（意图识别）**
 - Agent Type: `INTENT_RECOGNITION`
 - 输出结构（最小化设计）：
  ```python
  class IntentResult(BaseModel):
      direct_answer: bool  # 是否可以直接回答
      intent_analysis: str  # 意图分析文本（用于调试/审计）
      execution_prompt: str  # 给 execution 阶段的提示词（direct_answer=false时使用）
      direct_response: str  # 直接回复文本（direct_answer=true时使用）
  ```
 - 短路逻辑：
  - `direct_answer=true` → 完全跳过 execution 和 organization，直接返回 direct_response
  - `direct_answer=false` → 进入 execution 阶段
 - 输出约束：使用 `output_pydantic=IntentResult`
 - **落库策略**：落库到 messages 表，但重建会话时不展示
 **2. Task Execution（任务执行）**
 - Agent Type: `TASK_EXECUTION`
 - 输入：IntentResult.execution_prompt + IntentResult.intent_analysis
 - 职责：
  - 执行复杂任务（查询数据库、调用工具、多步骤推理）
  - 返回结构化执行结果
 - 输出结构（最小化设计）：
  ```python
  class ExecutionResult(BaseModel):
      execution_summary: str  # 任务执行摘要（用于调试/审计）
      organization_prompt: str  # 给 organization 阶段的提示词
      execution_data: dict = {}  # 执行结果的结构化数据
  ```
 - 输出约束：使用 `output_pydantic=ExecutionResult`
 - **落库策略**：落库到 messages 表，但重建会话时不展示
 **3. Result Reporting（结果报告）**
 - Agent Type: `RESULT_REPORTING`
 - 输入：
  - IntentResult（意图识别结果）
  - ExecutionResult（任务执行情况）
 - 职责：
  - 结合意图分析和执行结果，格式化为用户友好的响应
  - 应用个性化模板（基于 UserAgentContext）
 - 输出结构（最小化设计）：
  ```python
  class OrganizationResult(BaseModel):
      assistant_text: str  # 最终回复文本
      response_metadata: dict = {}  # 响应元数据（可选）
  ```
 - 输出约束：使用 `output_pydantic=OrganizationResult`
 - **唯一展示阶段**：重建会话时只展示此阶段的 message
 - **唯一转发阶段**：只有此阶段的输出需要通过 AG-UI 事件转发
 ### Flow 状态管理
 ```python
@dataclass
 class FlowState:
    user_input: str
    context: UserAgentContext
    stage_trace: list[str] = field(default_factory=list)
    intent_result: IntentResult | None = None
    execution_result: ExecutionResult | None = None
    organization_result: OrganizationResult | None = None
    assistant_text: str = ""
    tokens: dict = field(default_factory=dict)
    cost: Decimal = Decimal("0")
 ```
 ### 数据流向
 ```
 User Input + UserAgentContext
    ↓
@start() begin()
    ↓
@listen() intent_stage()
    ├─ IntentResult.direct_answer=true
    │    ↓
    │  跳过 execution，直接 organization
    │    ↓
    │  organization_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
    │    ↓
    │  OrganizationResult → AG-UI 事件 + 落库
    │
    └─ IntentResult.direct_answer=false
         ↓
       execution_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
         ↓
       ExecutionResult
         ↓
       organization_stage(ExecutionResult.next_stage_prompt, ExecutionResult.metadata)
         ↓
       OrganizationResult → AG-UI 事件 + 落库
 ```
 ### 三阶段输出约束
 **所有阶段使用 `output_pydantic` 约束输出：**
 ```python
 from pydantic import BaseModel
 class IntentResult(BaseModel):
    direct_answer: bool
    next_stage_prompt: str
    metadata: dict = {}
 class ExecutionResult(BaseModel):
    next_stage_prompt: str
    metadata: dict = {}
 class OrganizationResult(BaseModel):
    assistant_text: str
    metadata: dict = {}
 # Task 定义
 intent_task = Task(
    description="Analyze user intent",
    expected_output="Intent analysis",
    agent=intent_agent,
    output_pydantic=IntentResult,
 )
 execution_task = Task(
    description="Execute tasks",
    expected_output="Execution result",
    agent=execution_agent,
    output_pydantic=ExecutionResult,
 )
 organization_task = Task(
    description="Format response",
    expected_output="User-friendly response",
    agent=organization_agent,
    output_pydantic=OrganizationResult,
 )
 ```
 ---
 ## 系统选模逻辑设计
 ### 问题背景
 旧逻辑：`order_by(...).limit(1)` 随机选择一个系统 agent，不区分阶段。
 新逻辑：按 `agent_type` 显式映射到三阶段。
 ### 选模规则
 **必需的 Agent Types：**
 - `INTENT_RECOGNITION` → 用于 intent_stage
 - `TASK_EXECUTION` → 用于 execution_stage
 - `RESULT_REPORTING` → 用于 organization_stage
 **查询逻辑：**
 ```python
 REQUIRED_TYPES = {"INTENT_RECOGNITION", "TASK_EXECUTION", "RESULT_REPORTING"}
@dataclass(frozen=True)
 class StageModels:
    intent: SystemAgentCatalog
    execution: SystemAgentCatalog
    organization: SystemAgentCatalog
 def resolve_stage_models(rows: list[SystemAgentCatalog]) -> StageModels:
    by_type = {row.agent_type: row for row in rows}
    missing = REQUIRED_TYPES - set(by_type.keys())
    if missing:
        raise ValueError(f"Missing required agent types: {missing}")
    return StageModels(
        intent=by_type["INTENT_RECOGNITION"],
        execution=by_type["TASK_EXECUTION"],
        organization=by_type["RESULT_REPORTING"],
    )
 ```
 **初始化数据约束：**
 - `system_agents` 表必须包含三种类型的记录
 - 运行时启动时验证完整性
 ---
 ## 人民币结算策略设计
 ### 设计原则
 1. **保留 LiteLLM 语义**：`completion_cost()` 始终返回 USD
 2. **业务层映射**：根据用户国家（`profiles.settings.preferences.country`）决定落库货币
 3. **默认人民币**：中国用户或无国家信息默认 CNY
 4. **汇率配置**：USD/CNY 汇率通过环境变量配置
 ### 货币来源
 ```
 UserAgentContext.settings.preferences.country
    ↓
 resolve_billing_currency(country)
    ↓
 CN → CNY
 US → USD
 其他 → USD
 ```
 ### 结算流程
 ```
 LiteLLM completion_cost()
    ↓ (USD)
 resolve_billing_cost(usd_cost, country)
    ↓
    ├─ country="CN" or None → CNY (乘以汇率)
    └─ country="US" → USD (保持原值)
    ↓
 messages.cost + messages.currency
 sessions.total_cost (同一货币)
 ```
 ### 汇率配置
 ```python
 # 环境变量
 BILLING_USD_CNY_RATE=7.2
 # 默认值
 DEFAULT_USD_CNY_RATE = Decimal("7.2")
 ```
 ### 结算模型
 ```python
@dataclass(frozen=True)
 class BillingCost:
    currency: str  # "CNY" or "USD"
    cost: Decimal  # 6位小数精度
 def resolve_billing_cost(
    usd_cost: Decimal,
    country: str | None,
    usd_cny_rate: Decimal = DEFAULT_USD_CNY_RATE,
 ) -> BillingCost:
    currency = "CNY" if (country or "CN").upper() == "CN" else "USD"
    if currency == "CNY":
        cost = usd_cost * usd_cny_rate
    else:
        cost = usd_cost
    return BillingCost(
        currency=currency,
        cost=cost.quantize(Decimal("0.000001"))
    )
 ```
 ### 数据库落库
 **messages 表：**
 - `cost`: NUMERIC(12,6) - 业务货币金额
 - `currency`: VARCHAR(3) - "CNY" or "USD"
 **sessions 表：**
 - `total_cost`: NUMERIC(12,6) - 同一货币累计
 **约束：**
 - 同一 session 内所有 messages 的 currency 必须一致
 - sessions.total_cost 累加时保持货币一致
 ---
 ## Session 状态一致性设计
 ### 问题背景
 旧逻辑：
 - `sessions.status` 与 `state_snapshot.status` 不同步
 - 失败时状态不一致
 - title 未自动赋值
 ### 状态机
 ```
 pending (创建)
    ↓
 running (开始执行)
    ↓
    ├─ completed (成功)
    └─ failed (异常)
 ```
 ### 状态同步规则
 **创建时：**
 ```python
 session = AgentChatSession(
    user_id=user_uuid,
    status=AgentChatSessionStatus.PENDING,
    state_snapshot={
        "status": "pending",
        "pending_tool_call_id": None,
    },
 )
 ```
 **运行时：**
 ```python
 # 开始执行
 session.status = AgentChatSessionStatus.RUNNING
 session.state_snapshot["status"] = "running"
 # 成功完成
 session.status = AgentChatSessionStatus.COMPLETED
 session.state_snapshot["status"] = "completed"
 # 失败
 session.status = AgentChatSessionStatus.FAILED
 session.state_snapshot["status"] = "failed"
 session.state_snapshot["error_id"] = error_id
 ```
 ### 自动 Title 赋值
 **规则：**
 - 首次运行时，如果 `session.title` 为空，使用 `user_input[:255]` 赋值
 - 只在第一次运行时赋值，后续不覆盖
 **实现：**
 ```python
 async def _set_title_if_empty(self, session_id: UUID, title: str) -> None:
    stmt = (
        update(AgentChatSession)
        .where(AgentChatSession.id == session_id)
        .where(AgentChatSession.title.is_(None))
        .values(title=title[:255])
    )
    await self.db.execute(stmt)
 ```
 ### Repository 方法
 ```python
 class SessionRepository:
    async def mark_running(self, session_id: UUID) -> None: ...
    async def mark_completed(self, session_id: UUID) -> None: ...
    async def mark_failed(self, session_id: UUID, error_id: str) -> None: ...
 ```
 ---
 ## 全局 Prompt 构建设计
 ### 分层结构
 ```
 全局系统 Prompt
 ├─ 身份段（username/bio）
 ├─ 偏好段（language/timezone/country）
 └─ 阶段段（动态注入）
    ├─ intent stage prompt
    ├─ execution stage prompt
    └─ organization stage prompt
 ```
 ### 构建函数
 ```python
 def build_global_system_prompt(ctx: UserAgentContext) -> str:
    lines = [
        "# User Identity",
        f"username: {ctx.username}",
        f"bio: {ctx.bio or 'N/A'}",
        "",
        "# User Preferences",
        f"interface_language: {ctx.settings.preferences.interface_language}",
        f"ai_language: {ctx.settings.preferences.ai_language}",
        f"timezone: {ctx.settings.preferences.timezone}",
        f"country: {ctx.settings.preferences.country}",
        "",
        "# Instructions",
        "Use the user's preferences to personalize responses.",
        "Respond in the user's preferred AI language.",
        "Consider the user's timezone for time-related queries.",
    ]
    return "\n".join(lines)
 ```
 ### 阶段注入
 每个阶段运行时，在全局 prompt 基础上追加阶段特定的指令：
 ```python
 def build_stage_prompt(
    base_prompt: str,
    stage: str,  # "intent" | "execution" | "organization"
    ctx: UserAgentContext,
 ) -> str:
    stage_prompts = {
        "intent": "Analyze the user's intent and decide if direct response is possible.",
        "execution": "Execute the required tasks and tools to fulfill the user's request.",
        "organization": "Format the execution results into a user-friendly response.",
    }
    return f"{base_prompt}\n\n# Stage: {stage}\n{stage_prompts[stage]}"
 ```
 ---
 ## 依赖关系图
 ```
 UserAgentContext (核心上下文)
    ↓
    ├─ ProfileSettings (用户配置)
    │   └─ preferences.country → 人民币结算
    │
    ├─ build_global_system_prompt() (全局 Prompt)
    │   └─ 三阶段 Flow 使用
    │
    └─ resolve_stage_models() (选模逻辑)
        └─ 三阶段 Agent 配置
 ```
 ---
 ## 相关文档
 - [Runtime Database Schema](../runtime/runtime-database.md)
 - [AG-UI Protocol](.opencode/skills/ag-ui/SKILL.md)
 - [CrewAI Framework](.opencode/skills/crewai/SKILL.md)
@@ -0,0 +1,144 @@
 # Agent LLM Config Implementation Plan
 > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
 **Goal:** 将 `system_agents.config` 中的 `temperature` / `max_tokens` 以受约束方式加载到运行时，并在调用 LiteLLM 时按需透传。
 **Architecture:** 在应用层 `RunService` 读取模型选择时同步读取并校验 `SystemAgents.config`；将校验后的 `SystemAgentLLMConfig` 传入 `CrewAIRuntime`；由 runtime 将配置转交给 LiteLLM client，client 仅在值非 `None` 时向 `completion()` 传参，避免不必要的 provider 兼容风险。
 **Tech Stack:** FastAPI, SQLAlchemy (async), Pydantic v2, LiteLLM, pytest
 ---
 ## 背景与修正点
 - 当前真实调用链为：`RunService._load_agent_model_selection()` -> `create_runtime()` -> `CrewAIRuntime.execute()` -> `run_completion()`，并非 `load_stage_models()`。
 - `SystemAgentLLMConfig` 已存在：`backend/src/core/agent/domain/system_agent_config.py`。
 - `system_agents.config` 目前在初始化 YAML 侧有约束，但运行时 DB 读取仍需二次校验，防止脏数据绕过。
 ## 规则约束
 - 严格 TDD：先写失败测试，再做实现。
 - Python 命令统一使用 `uv run ...`。
 - 仅做增量改动，不回滚或覆盖与本任务无关的已有变更。
 ## 字段映射与透传策略
 | 配置字段 | LiteLLM 参数 | 规则 |
 |---|---|---|
 | `temperature` | `temperature` | `None` 不透传；非空直接透传 |
 | `max_tokens` | `max_tokens` | `None` 不透传；非空直接透传 |
 ---
 ### Task 1: 应用层加载并校验 Agent LLM Config
 **Files:**
 - Modify: `backend/src/core/agent/application/run_service.py`
 - Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
 **Step 1: 写失败测试（RED）**
 新增单测覆盖以下行为：
 1. `_load_agent_model_selection()` 返回三元组：`(model_code, provider_name, llm_config)`。
 2. 当 DB `config` 为 `{}` 时，`llm_config.temperature/max_tokens` 为 `None`。
 3. 当 DB `config` 含非法值（如 `temperature=3`）时抛 `ValueError`。
 **Step 2: 运行测试确认失败**
 Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
 Expected: 新增断言失败（返回值结构/异常行为不匹配）。
 **Step 3: 最小实现（GREEN）**
 在 `run_service.py`：
 1. 查询 `SystemAgents.config`。
 2. 用 `SystemAgentLLMConfig.model_validate(config or {})` 校验。
 3. 将 `_load_agent_model_selection()` 改为返回三元组。
 4. 在 `run()` 中把 `llm_config` 传递到 `create_runtime(...)`。
 **Step 4: 运行测试确认通过**
 Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
 Expected: PASS。
 ---
 ### Task 2: Runtime 与 LiteLLM Client 支持可选参数透传
 **Files:**
 - Modify: `backend/src/core/agent/infrastructure/crewai/factory.py`
 - Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py`
 - Modify: `backend/src/core/agent/infrastructure/litellm/client.py`
 - Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
 **Step 1: 写失败测试（RED）**
 在 `test_crewai_runtime.py` 增加用例：
 1. 传入 `temperature/max_tokens` 时，`run_completion` 收到对应参数。
 2. 参数为 `None` 时，不应被透传到 LiteLLM。
 必要时新增 `backend/tests/unit/core/agent/test_litellm_client.py`，单测 `run_completion` 的 kwargs 组装逻辑。
 **Step 2: 运行测试确认失败**
 Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
 Expected: 新增断言失败（参数未透传或未过滤 `None`）。
 **Step 3: 最小实现（GREEN）**
 1. `create_runtime()` 增加 `llm_config` 参数并传给 `CrewAIRuntime`。
 2. `CrewAIRuntime` 保存 `llm_config`，执行时调用：
   - `run_completion(..., temperature=llm_config.temperature, max_tokens=llm_config.max_tokens)`
 3. `run_completion()` 改为支持可选 `temperature/max_tokens`，内部仅在非 `None` 时加入 kwargs 再调用 `completion()`。
 **Step 4: 运行测试确认通过**
 Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
 Expected: PASS。
 ---
 ### Task 3: 初始化数据补齐与回归验证
 **Files:**
 - Modify: `backend/src/core/config/static/database/system_agents.yaml`
 - Modify: `backend/src/core/config/initial/init_data.py`（如需补充类型兜底）
 - Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
 **Step 1: 写失败测试（RED）**
 补充断言：YAML 读取后 `config` 可为空或包含 `max_tokens: null`，初始化逻辑不会报错，且生成结构符合 `SystemAgentLLMConfig`。
 **Step 2: 运行测试确认失败**
 Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
 Expected: 新增断言失败。
 **Step 3: 最小实现（GREEN）**
 1. 在 `system_agents.yaml` 为各 agent 配置显式补充 `max_tokens: null`。
 2. `init_data.py` 保持 `config: SystemAgentLLMConfig | None = None`，写库时统一序列化为 dict。
 **Step 4: 运行测试确认通过**
 Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
 Expected: PASS。
 ---
 ## 最终验证
 1. `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_crewai_runtime.py -q`
 2. `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -q`
 3. `uv run ruff check backend/src backend/tests`
 4. `uv run basedpyright`
 预期：全部通过；若集成测试依赖本地 DB 状态导致跳过/失败，需记录原因并给出手工验证步骤。
 ## 完成标准
 - `RunService` 从 DB 读取并校验 `config`。
 - runtime 到 LiteLLM 链路支持 `temperature/max_tokens` 可选透传。
 - `None` 不透传。
 - 单测与相关集成测试通过，并给出命令级证据。
@@ -0,0 +1,2 @@
 1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai
 2.
		`@@ -0,0 +1,2 @@`
							`1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai`
							`2.`