feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持

2026-03-05 18:25:51 +08:00
parent c07d339a5f
commit db158de39c
26 changed files with 1215 additions and 2914 deletions
@@ -5,6 +5,10 @@ from uuid import UUID
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker

 from core.agent.application.session_state_persistence import SessionStatePersistence
+from core.agent.domain.message_metadata import (
+    MessageMetadataAssistantOutput,
+    MessageMetadataToolResult,
+)
 from core.agent.infrastructure.persistence.message_repository import MessageRepository
 from core.agent.infrastructure.persistence.session_repository import SessionRepository
 from core.db import AsyncSessionLocal
@@ -46,14 +50,16 @@ class ResumeService:
                seq=next_seq,
                role=AgentChatMessageRole.TOOL,
                content='{"status":"ok"}',
-                metadata={"type": "tool_result", "tool_call_id": tool_call_id},
+                metadata=MessageMetadataToolResult(
+                    tool_call_id=tool_call_id,
+                ).model_dump(),
            )
            await message_repository.append_message(
                session_id=session_uuid,
                seq=next_seq + 1,
                role=AgentChatMessageRole.ASSISTANT,
                content="Tool result received",
-                metadata={"type": "assistant_output"},
+                metadata=MessageMetadataAssistantOutput().model_dump(),
            )

            snapshot = self._state_persistence.build_completed_snapshot()
@@ -3,10 +3,16 @@ from __future__ import annotations
 from decimal import Decimal
 from uuid import UUID, uuid4

+from pydantic import ValidationError
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker

 from core.agent.application.session_state_persistence import SessionStatePersistence
+from core.agent.domain.message_metadata import (
+    MessageMetadataToolCall,
+    MessageMetadataUserInput,
+)
+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.crewai.factory import create_runtime
 from core.agent.infrastructure.persistence.message_repository import MessageRepository
 from core.agent.infrastructure.persistence.session_repository import SessionRepository
@@ -58,10 +64,16 @@ class RunService:
            if chat_session is None:
                raise ValueError("session not found")

-            model_code, provider_name = await self._load_agent_model_selection(
-                db_session
+            (
+                model_code,
+                provider_name,
+                llm_config,
+            ) = await self._load_agent_model_selection(db_session)
+            runtime = create_runtime(
+                model_code=model_code,
+                provider_name=provider_name,
+                llm_config=llm_config,
            )
-            runtime = create_runtime(model_code=model_code, provider_name=provider_name)
            runtime_result = runtime.execute(user_input=user_input)
            assistant_text = str(runtime_result.get("assistant_text", ""))
            prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0))
@@ -79,7 +91,7 @@ class RunService:
                role=AgentChatMessageRole.USER,
                content=user_input,
                model_code=model_code,
-                metadata={"type": "user_input"},
+                metadata=MessageMetadataUserInput().model_dump(),
            )
            await message_repository.append_message(
                session_id=session_uuid,
@@ -87,10 +99,9 @@ class RunService:
                role=AgentChatMessageRole.ASSISTANT,
                content=assistant_text or "Tool call pending approval",
                model_code=model_code,
-                metadata={
-                    "type": "tool_call",
-                    "tool_call_id": pending_tool_call_id,
-                },
+                metadata=MessageMetadataToolCall(
+                    tool_call_id=pending_tool_call_id,
+                ).model_dump(),
                input_tokens=prompt_tokens,
                output_tokens=completion_tokens,
                cost=cost,
@@ -119,9 +130,9 @@ class RunService:

    async def _load_agent_model_selection(
        self, session: AsyncSession
-    ) -> tuple[str, str]:
+    ) -> tuple[str, str, SystemAgentLLMConfig]:
        stmt = (
-            select(Llm.model_code, LlmFactory.name)
+            select(Llm.model_code, LlmFactory.name, SystemAgents.config)
            .join(SystemAgents, SystemAgents.llm_id == Llm.id)
            .join(LlmFactory, LlmFactory.id == Llm.factory_id)
            .where(SystemAgents.status == "active")
@@ -131,4 +142,11 @@ class RunService:
        record = (await session.execute(stmt)).one_or_none()
        if record is None:
            raise ValueError("active system agent model is required")
-        return str(record[0]), str(record[1])
+
+        raw_config = record[2] if isinstance(record[2], dict) else {}
+        try:
+            llm_config = SystemAgentLLMConfig.model_validate(raw_config)
+        except ValidationError as exc:
+            raise ValueError("invalid system agent config") from exc
+
+        return str(record[0]), str(record[1]), llm_config
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel
+
+
+class MessageMetadataUserInput(BaseModel):
+    type: Literal["user_input"] = "user_input"
+
+
+class MessageMetadataToolCall(BaseModel):
+    type: Literal["tool_call"] = "tool_call"
+    tool_call_id: str
+
+
+class MessageMetadataToolResult(BaseModel):
+    type: Literal["tool_result"] = "tool_result"
+    tool_call_id: str
+    run_id: str | None = None
+    turn_id: str | None = None
+    tool_name: str | None = None
+    storage_bucket: str | None = None
+    storage_path: str | None = None
+    payload_sha256: str | None = None
+    payload_bytes: int | None = None
+    payload_format: str | None = None
+
+
+class MessageMetadataAssistantOutput(BaseModel):
+    type: Literal["assistant_output"] = "assistant_output"
+
+
+MessageMetadata = (
+    MessageMetadataUserInput
+    | MessageMetadataToolCall
+    | MessageMetadataToolResult
+    | MessageMetadataAssistantOutput
+)
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class SystemAgentLLMConfig(BaseModel):
+    temperature: float | None = Field(default=None, ge=0.0, le=2.0)
+    max_tokens: int | None = Field(default=None, ge=1)
@@ -1,5 +1,7 @@
 from __future__ import annotations

+from core.agent.domain.message_metadata import MessageMetadataToolResult
+

 def reconstruct_tool_call_result_event(
    *,
@@ -26,15 +28,14 @@ def build_tool_result_metadata(
    payload_bytes: int,
    payload_format: str,
 ) -> dict[str, object]:
-    return {
-        "type": "tool_result",
-        "run_id": run_id,
-        "turn_id": turn_id,
-        "tool_call_id": tool_call_id,
-        "tool_name": tool_name,
-        "storage_bucket": storage_bucket,
-        "storage_path": storage_path,
-        "payload_sha256": payload_sha256,
-        "payload_bytes": payload_bytes,
-        "payload_format": payload_format,
-    }
+    return MessageMetadataToolResult(
+        run_id=run_id,
+        turn_id=turn_id,
+        tool_call_id=tool_call_id,
+        tool_name=tool_name,
+        storage_bucket=storage_bucket,
+        storage_path=storage_path,
+        payload_sha256=payload_sha256,
+        payload_bytes=payload_bytes,
+        payload_format=payload_format,
+    ).model_dump()
@@ -1,15 +1,20 @@
 from __future__ import annotations

+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.config.resolver import AgentConfigResolver
 from core.agent.infrastructure.crewai.runtime import CrewAIRuntime


 def create_runtime(
-    *, model_code: str | None, provider_name: str | None
+    *,
+    model_code: str | None,
+    provider_name: str | None,
+    llm_config: SystemAgentLLMConfig | None = None,
 ) -> CrewAIRuntime:
    resolver = AgentConfigResolver()
    return CrewAIRuntime(
        resolver=resolver,
        model_code=model_code,
        provider_name=provider_name,
+        llm_config=llm_config,
    )
@@ -2,6 +2,7 @@ from __future__ import annotations

 from typing import Any

+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.agent.infrastructure.agui.bridge import to_agui_events
 from core.agent.infrastructure.config.resolver import (
    AgentConfigResolver,
@@ -47,11 +48,13 @@ class CrewAIRuntime:
        resolver: AgentConfigResolver,
        model_code: str | None,
        provider_name: str | None,
+        llm_config: SystemAgentLLMConfig | None = None,
    ) -> None:
        self._config: ResolvedAgentConfig = resolver.resolve(
            model_code=model_code,
            provider_name=provider_name,
        )
+        self._llm_config = llm_config or SystemAgentLLMConfig()

    def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
        return to_agui_events(internal_events)
@@ -65,6 +68,8 @@ class CrewAIRuntime:
            model=litellm_model,
            api_key=self._config.provider_api_key,
            messages=[{"role": "user", "content": user_input}],
+            temperature=self._llm_config.temperature,
+            max_tokens=self._llm_config.max_tokens,
        )
        if not isinstance(response, dict):
            raise ValueError("llm response must be a dict")
@@ -5,13 +5,26 @@ from typing import Any
 from litellm import completion


-def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any:
-    response = completion(
-        model=model,
-        api_key=api_key,
-        messages=messages,
-        stream=False,
-    )
+def run_completion(
+    *,
+    model: str,
+    api_key: str,
+    messages: list[dict[str, Any]],
+    temperature: float | None = None,
+    max_tokens: int | None = None,
+) -> Any:
+    kwargs: dict[str, Any] = {
+        "model": model,
+        "api_key": api_key,
+        "messages": messages,
+        "stream": False,
+    }
+    if temperature is not None:
+        kwargs["temperature"] = temperature
+    if max_tokens is not None:
+        kwargs["max_tokens"] = max_tokens
+
+    response = completion(**kwargs)
    model_dump = getattr(response, "model_dump", None)
    if callable(model_dump):
        return model_dump()
@@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
 from core.db.session import AsyncSessionLocal
 from core.logging import get_logger
 from models.llm import Llm
@@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel):
    agent_type: str
    llm_model_code: str
    status: str
-    config: dict[str, Any]
+    config: SystemAgentLLMConfig | None = None


 class SystemAgentsYaml(BaseModel):
@@ -184,7 +185,9 @@ async def initialize_system_agents() -> None:
                    agent_type=agent["agent_type"],
                    llm_id=llm.id,
                    status=agent["status"],
-                    config=agent["config"],
+                    config=SystemAgentLLMConfig.model_validate(
+                        agent.get("config") or {}
+                    ).model_dump(),
                )

    logger.info("Initialized system agents")
@@ -4,15 +4,18 @@ agents:
    status: active
    config:
      temperature: 0.7
+      max_tokens: null
    
  - agent_type: TASK_EXECUTION
    llm_model_code: deepseek-v3.2
    status: active
    config:
      temperature: 0.7
+      max_tokens: null
    
  - agent_type: RESULT_REPORTING
    llm_model_code: deepseek-v3.2
    status: active
    config:
      temperature: 0.7
+      max_tokens: null
@@ -1,22 +1,26 @@
 from __future__ import annotations

 from types import SimpleNamespace
+from typing import cast

-from core.agent.infrastructure.config.resolver import AgentConfigResolver
+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
+from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
 from core.agent.infrastructure.crewai.runtime import CrewAIRuntime


 def test_runtime_emits_text_tool_reasoning_events() -> None:
-    runtime = CrewAIRuntime(
-        resolver=AgentConfigResolver(
-            settings=SimpleNamespace(
-                agent_runtime=SimpleNamespace(
-                    default_model_code="",
-                    streaming_enabled=True,
-                ),
-                llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
-            )
+    settings = cast(
+        SettingsLike,
+        SimpleNamespace(
+            agent_runtime=SimpleNamespace(
+                default_model_code="",
+                streaming_enabled=True,
+            ),
+            llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
        ),
+    )
+    runtime = CrewAIRuntime(
+        resolver=AgentConfigResolver(settings=settings),
        model_code="gpt-4o-mini",
        provider_name="dashscope",
    )
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
    captured: dict[str, object] = {}

    def _fake_completion(
-        *, model: str, api_key: str, messages: list[dict[str, object]]
+        *,
+        model: str,
+        api_key: str,
+        messages: list[dict[str, object]],
+        temperature: float | None = None,
+        max_tokens: int | None = None,
    ):
        captured["model"] = model
        captured["api_key"] = api_key
        captured["messages"] = messages
+        captured["temperature"] = temperature
+        captured["max_tokens"] = max_tokens
        return {
            "choices": [
                {
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
            cost=0.001,
        ),
    )
+    settings = cast(
+        SettingsLike,
+        SimpleNamespace(
+            agent_runtime=SimpleNamespace(
+                default_model_code="",
+                streaming_enabled=True,
+            ),
+            llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
+        ),
+    )

    runtime = CrewAIRuntime(
-        resolver=AgentConfigResolver(
-            settings=SimpleNamespace(
-                agent_runtime=SimpleNamespace(
-                    default_model_code="",
-                    streaming_enabled=True,
-                ),
-                llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
-            )
-        ),
+        resolver=AgentConfigResolver(settings=settings),
        model_code="qwen3.5-flash",
        provider_name="dashscope",
+        llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
    )

    result = runtime.execute(user_input="hi")

    assert captured["model"] == "dashscope/qwen3.5-flash"
    assert captured["api_key"] == "env-api-key"
+    assert captured["temperature"] == 0.3
+    assert captured["max_tokens"] == 256
    assert result["assistant_text"] == "hello"
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+from core.config.initial.init_data import load_system_agents
+
+
+def test_load_system_agents_supports_nullable_max_tokens() -> None:
+    loaded = load_system_agents()
+
+    agents = loaded["agents"]
+    assert len(agents) > 0
+    for agent in agents:
+        assert "config" in agent
+        assert "max_tokens" in agent["config"]
+        assert agent["config"]["max_tokens"] is None
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from core.agent.infrastructure.litellm.client import run_completion
+
+
+def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
+    captured: dict[str, object] = {}
+
+    def _fake_completion(**kwargs):  # type: ignore[no-untyped-def]
+        captured.update(kwargs)
+        return {"ok": True}
+
+    monkeypatch.setattr(
+        "core.agent.infrastructure.litellm.client.completion",
+        _fake_completion,
+    )
+
+    run_completion(
+        model="dashscope/qwen3.5-flash",
+        api_key="key",
+        messages=[{"role": "user", "content": "hi"}],
+        temperature=0.6,
+        max_tokens=120,
+    )
+
+    assert captured["temperature"] == 0.6
+    assert captured["max_tokens"] == 120
+
+
+def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
+    captured: dict[str, object] = {}
+
+    def _fake_completion(**kwargs):  # type: ignore[no-untyped-def]
+        captured.update(kwargs)
+        return {"ok": True}
+
+    monkeypatch.setattr(
+        "core.agent.infrastructure.litellm.client.completion",
+        _fake_completion,
+    )
+
+    run_completion(
+        model="dashscope/qwen3.5-flash",
+        api_key="key",
+        messages=[{"role": "user", "content": "hi"}],
+        temperature=None,
+        max_tokens=None,
+    )
+
+    assert "temperature" not in captured
+    assert "max_tokens" not in captured
@@ -4,6 +4,23 @@ import pytest

 from core.agent.application.resume_service import ResumeService
 from core.agent.application.run_service import RunService
+from core.agent.domain.system_agent_config import SystemAgentLLMConfig
+
+
+class _FakeResult:
+    def __init__(self, record: tuple[object, object, object] | None) -> None:
+        self._record = record
+
+    def one_or_none(self) -> tuple[object, object, object] | None:
+        return self._record
+
+
+class _FakeSession:
+    def __init__(self, record: tuple[object, object, object] | None) -> None:
+        self._record = record
+
+    async def execute(self, _stmt: object) -> _FakeResult:
+        return _FakeResult(self._record)


@pytest.mark.asyncio
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:

    with pytest.raises(ValueError):
        await resume_service.resume(session_id="session-1", tool_call_id="call-1")
+
+
+@pytest.mark.asyncio
+async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
+    run_service = RunService()
+    fake_session = _FakeSession(
+        (
+            "qwen3.5-flash",
+            "dashscope",
+            {"temperature": 0.5, "max_tokens": 512},
+        )
+    )
+
+    (
+        model_code,
+        provider_name,
+        llm_config,
+    ) = await run_service._load_agent_model_selection(
+        fake_session  # type: ignore[arg-type]
+    )
+
+    assert model_code == "qwen3.5-flash"
+    assert provider_name == "dashscope"
+    assert isinstance(llm_config, SystemAgentLLMConfig)
+    assert llm_config.temperature == 0.5
+    assert llm_config.max_tokens == 512
+
+
+@pytest.mark.asyncio
+async def test_load_agent_model_selection_rejects_invalid_config() -> None:
+    run_service = RunService()
+    fake_session = _FakeSession(
+        (
+            "qwen3.5-flash",
+            "dashscope",
+            {"temperature": 3.0},
+        )
+    )
+
+    with pytest.raises(ValueError, match="invalid system agent config"):
+        await run_service._load_agent_model_selection(fake_session)  # type: ignore[arg-type]
+
+
+@pytest.mark.asyncio
+async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
+    run_service = RunService()
+    fake_session = _FakeSession(
+        (
+            "qwen3.5-flash",
+            "dashscope",
+            "not-a-dict",
+        )
+    )
+
+    _, _, llm_config = await run_service._load_agent_model_selection(
+        fake_session  # type: ignore[arg-type]
+    )
+
+    assert llm_config.temperature is None
+    assert llm_config.max_tokens is None
+
+
+@pytest.mark.asyncio
+async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
+    run_service = RunService()
+    fake_session = _FakeSession(None)
+
+    with pytest.raises(ValueError, match="active system agent model is required"):
+        await run_service._load_agent_model_selection(fake_session)  # type: ignore[arg-type]
@@ -1,116 +0,0 @@
-# 前后端 API 对比分析
-
-**Date:** 2026-03-04
-**Status:** Open
-**Type:** 架构分析
-
---
-
-## 一、后端已有、前端缺失的 API
-
-### 1. Friendships API (`/api/v1/friends`)
-
-| 方法 | 路径 | 功能 | 前端状态 |
-|------|------|------|----------|
-| POST | `/requests` | 发送好友请求 | **缺失** |
-| GET | `/requests/inbox` | 获取收件箱 | **缺失** |
-| GET | `/requests/outgoing` | 获取发出的请求 | **缺失** |
-| POST | `/requests/{id}/accept` | 接受好友请求 | **缺失** |
-| POST | `/requests/{id}/decline` | 拒绝好友请求 | **缺失** |
-| DELETE | `/requests/{id}` | 取消好友请求 | **缺失** |
-| GET | `` | 获取好友列表 | **缺失** |
-| DELETE | `/{id}` | 删除好友 | **缺失** |
-
-### 2. Inbox Messages API (`/api/v1/inbox/messages`)
-
-| 方法 | 路径 | 功能 | 前端状态 |
-|------|------|------|----------|
-| GET | `` | 获取消息列表 | **缺失** |
-| POST | `/{id}/accept` | 接受邀请 | **缺失** |
-| POST | `/{id}/dismiss` | 忽略消息 | **缺失** |
-
-### 3. Chat/AgUi 流式 API
-
-| 功能 | 前端状态 |
-|------|----------|
-| 发送消息 SSE 流式 | **仅有 Mock** |
-| 加载历史记录 | **仅有 Mock** |
-
-> 前端 `AgUiService` 只有本地 mock (`throw UnimplementedError`)，未实现真实 API 调用。
-
-### 4. Infra API
-
-| 方法 | 路径 | 功能 | 前端状态 |
-|------|------|------|----------|
-| GET | `/infra/health` | 基础设施健康检查 | **未使用** |
-
---
-
-## 二、前端已有、后端已实现的 API
-
-### Auth API (`/api/v1/auth`)
-
-| 方法 | 路径 | 后端 | 前端 |
-|------|------|------|------|
-| POST | `/verifications` | ✅ | ✅ |
-| POST | `/verifications/verify` | ✅ | ✅ |
-| POST | `/verifications/resend` | ✅ | ✅ |
-| POST | `/sessions` | ✅ | ✅ |
-| POST | `/sessions/refresh` | ✅ | ✅ |
-| DELETE | `/sessions` | ✅ | ✅ |
-| POST | `/password-reset` | ✅ | ✅ |
-| POST | `/password-reset/confirm` | ✅ | ✅ |
-| GET | `/users` | ✅ | **未使用** |
-
-### Users API (`/api/v1/users`)
-
-| 方法 | 路径 | 后端 | 前端 |
-|------|------|------|------|
-| GET | `/me` | ✅ | ✅ |
-| PATCH | `/me` | ✅ | ✅ |
-| POST | `/search` | ✅ | ✅ |
-
-### Schedule Items API (`/api/v1/schedule-items`)
-
-| 方法 | 路径 | 后端 | 前端 |
-|------|------|------|------|
-| POST | `` | ✅ | **仅有 Mock** |
-| GET | `` (range query) | ✅ | **仅有 Mock** |
-| GET | `/{id}` | ✅ | **仅有 Mock** |
-| PATCH | `/{id}` | ✅ | **仅有 Mock** |
-| DELETE | `/{id}` | ✅ | **仅有 Mock** |
-| POST | `/{id}/share` | ✅ | **缺失** |
-
---
-
-## 三、待实现功能清单
-
-| 优先级 | 功能 | 说明 |
-|--------|------|------|
-| **P0** | FriendsApi | 前端无 Friendships API 客户端 |
-| **P0** | InboxMessagesApi | 前端无 Inbox Messages API 客户端 |
-| **P0** | Chat/AgUi 后端连接 | 前端 AgUiService 未实现真实 API |
-| **P1** | CalendarService 真实 API | MockCalendarService → 真实 API 调用 |
-| **P1** | Schedule Share 接口 | 前端未调用 `POST /{id}/share` |
-| **P2** | Infra Health 集成 | 可用于前端健康检查 |
-
---
-
-## 四、相关文件位置
-
-### 前端 API 客户端
-
- `apps/lib/features/auth/data/auth_api.dart` - Auth API
- `apps/lib/features/users/data/users_api.dart` - Users API
- `apps/lib/features/calendar/data/services/mock_calendar_service.dart` - Calendar Mock
- `apps/lib/features/chat/data/services/ag_ui_service.dart` - Chat/AgUi Mock
- `apps/lib/features/chat/data/services/mock_history_service.dart` - History Mock
-
-### 后端 Router
-
- `backend/src/v1/auth/router.py` - Auth 路由
- `backend/src/v1/users/router.py` - Users 路由
- `backend/src/v1/friendships/router.py` - Friendships 路由
- `backend/src/v1/inbox_messages/router.py` - Inbox Messages 路由
- `backend/src/v1/schedule_items/router.py` - Schedule Items 路由
- `backend/src/v1/infra/router.py` - Infra 路由
@@ -1,145 +0,0 @@
-# 前后端测试分析报告
-
-**Date:** 2026-03-04
-**Status:** Completed
-
---
-
-## 测试统计
-
-### 后端测试
-
-| 类型 | 数量 | 状态 |
-|------|------|------|
-| Unit Tests | ~100+ | 可运行 |
-| Integration Tests | ~70+ | 可运行 |
-| E2E Tests | 5 | **无法运行** (缺少 playwright 依赖) |
-
-### 前端测试
-
-| 类型 | 数量 | 状态 |
-|------|------|------|
-| Flutter Tests | 140 | ✅ 全部通过 |
-
---
-
-## 问题发现
-
-### 1. 后端 E2E 测试无法运行 (HIGH)
-
-**问题**: 5 个 E2E 测试文件需要 `playwright` 模块，但依赖未安装。
-
-**影响文件**:
- `tests/e2e/test_auth_flow.py`
- `tests/e2e/test_infra_health_e2e.py`
- `tests/e2e/test_logging_e2e.py`
- `tests/e2e/test_mobile_health_e2e.py`
- `tests/e2e/test_profile_flow.py`
-
-**错误**:
-```
-ModuleNotFoundError: No module named 'playwright'
-```
-
-**建议**: 
- 安装 playwright: `uv add playwright && uv run playwright install`
- 或者移除这些无法运行的 E2E 测试文件
-
---
-
-### 2. 测试文件命名冲突导致收集警告 (LOW)
-
-**问题**: 存在多个同名 `test_schemas.py` 文件在不同目录，导致 pytest 收集时显示警告。
-
-**影响文件**:
- `tests/unit/v1/schedule_items/test_schemas.py`
- `tests/unit/v1/profile/test_schemas.py`
- `tests/unit/v1/inbox_messages/test_schemas.py`
- `tests/unit/v1/friendships/test_schemas.py`
-
-**状态**: 测试实际可以正常运行，只是有警告提示。
-
-**建议**: 可保持现状（这是合理的代码组织方式），或重命名为 `test_*.py` 以消除警告。
-
---
-
-### 3. 遗留测试验证旧字段 (INFO)
-
-**文件**: `tests/unit/v1/profile/test_schemas.py`
-
-**测试**: `test_profile_update_rejects_display_name_field`
-
-**说明**: 此测试验证旧的 `display_name` 字段被正确拒绝。字段已在之前的重构中删除。
-
-**状态**: **有效** - 这是一个回归测试，确保旧字段不被使用。
-
---
-
-## 未发现的问题
-
-### 冗余测试
-经过检查，未发现明显冗余的测试：
- 每个模块的测试覆盖不同的功能
- Unit tests、Integration tests、E2E tests 有清晰的职责划分
-
-### 死代码
-未发现测试文件中有未使用的:
- imports
- mock 类
- helper 函数
-
-### 缺失测试
-未发现对应已实现功能但缺少测试的情况。
-
---
-
-## 测试覆盖模块
-
-### 后端
-| 模块 | Unit | Integration | E2E |
-|------|------|-------------|-----|
-| Auth | ✅ | ✅ | ❌ |
-| Users | - | ✅ | - |
-| Profile | ✅ | - | ❌ |
-| Friendships | ✅ | ✅ | - |
-| Inbox Messages | ✅ | ✅ | - |
-| Schedule Items | ✅ | ✅ | - |
-| Logging | ✅ | ✅ | ✅ |
-| Settings | ✅ | - | - |
-
-### 前端
-| 模块 | 测试数 |
-|------|--------|
-| Auth | ~20 |
-| Chat | ~70 |
-| Home | ~15 |
-| Calendar | ~5 |
-| Core (API, Storage) | ~30 |
-
---
-
-## 建议
-
-1. **立即**: 解决 E2E 测试依赖问题或移除无法运行的测试文件
-2. **可选**: 清理 test_schemas.py 重名警告（低优先级）
-3. **保持**: 现有的测试结构良好，无需重大重构
-
---
-
-## 附: 测试代码质量问题
-
-### 测试类未完全实现 Protocol (LSP 警告)
-
-**文件**: `tests/unit/v1/auth/test_auth_service.py`
-
-**问题**: `FakeGateway` 和 `LogoutAssertingGateway` 类没有实现 `AuthServiceGateway` Protocol 的全部方法：
- `request_password_reset`
- `confirm_password_reset`
-
-**影响**: LSP 类型检查器报告错误，但运行时不受影响（因为这些方法在测试中不会被调用）。
-
-**建议**: 可选择补充缺失的方法实现，或使用 `@pytest.mark.skip` 标记不需要的协议方法。
-
---
-
-*报告生成时间: 2026-03-04*
@@ -1,201 +0,0 @@
-# Agent 后端硬切重构设计
-
-## 目标
-
- 一次性移除现有 Agent 运行时代码、测试和旧文档契约，避免新旧方案并存。
- 仅从后端重新设计 Agent 体系，不依赖前端实现细节。
- 新方案必须满足以下六项要求：
-  1. 配置层可通过 `.env` 驱动 LLM API Key。
-  2. 对话与 resume 通过 Celery 队列处理，不阻塞 Web 主线程。
-  3. `v1/agent` 仅负责路由组织与服务调用，核心逻辑在 `core/agent`。
-  4. 按 CrewAI 官方模型组织 Agent/Task/Crew/Flow/Tools。
-  5. 按 AG-UI 协议输出事件，优先使用 `ag-ui-crewai` 适配库。
-  6. 使用 LiteLLM 统计每次 LLM 调用的 token 和 cost。
-
-## 设计原则
-
- 单一职责：HTTP 层只做协议和鉴权，编排与执行下沉到核心层。
- 异步优先：长耗时推理、工具调用、恢复流程全部异步化。
- 协议优先：AG-UI 作为唯一事件契约，不维护自定义事件方言。
- 可观测性优先：每次 run、每次 stage、每次 LLM 调用可追踪。
- 配置单一来源：所有密钥和模型配置只走 `core.config.settings`。
-
-## 目标架构
-
-### 1) 分层
-
- `backend/src/v1/agent/`
-  - `router.py`: 暴露 HTTP/SSE 接口。
-  - `schemas.py`: 请求/响应 DTO 和输入校验。
-  - `dependencies.py`: DI 装配。
-  - `service.py`: 薄服务，仅调用 `core/agent` 应用服务。
- `backend/src/core/agent/`
-  - `application/`: run/resume 应用服务。
-  - `domain/`: run 状态机、resume 幂等语义、错误模型。
-  - `infrastructure/crewai/`: CrewAI Agent/Task/Crew/Flow 装配与执行。
-  - `infrastructure/agui/`: AG-UI 事件映射与 SSE 序列化。
-  - `infrastructure/litellm/`: LiteLLM 客户端与 usage/cost 拦截器。
-  - `infrastructure/queue/`: Celery task producer/consumer。
-
-### 1.1) 配置来源与合并策略
-
- Agent 运行配置由两部分组成：
-  - 数据库存量配置：`system_agents`（每种 agent_type 对应 llm 与 llm_config）。
-  - 静态模板配置：`backend/src/core/config/static/crewai/*.yaml`（角色描述、任务模板、workflow、tools）。
- 合并策略：
-  - `llm` 与 `llm_config` 以 `system_agents` 为准。
-  - prompt 模板、task 描述、flow stage、tool 白名单以 static/crewai 为准。
-  - 若任一 agent_type 在 `system_agents` 缺失，运行前失败并返回受控错误。
-
-### 2) 核心运行链路
-
-1. `POST /api/v1/agent/runs` 只负责参数校验和鉴权。
-2. 路由调用 `AgentRunAppService.enqueue_run()`，写入 run 记录并投递 Celery。
-3. Worker 执行 `run_agent_task`：
-   - 读取 run 上下文。
-   - 构建 CrewAI `Agent/Task/Crew/Flow`。
-   - 通过 `ag-ui-crewai` 将执行事件转为 AG-UI 标准事件。
-   - 每次 LLM 调用由 LiteLLM 中间层记录 token/cost。
-4. 事件落库并发布到事件通道（Redis Stream/Channel）。
-5. SSE 接口从事件通道读取并持续推送，直到 `RUN_FINISHED` 或 `RUN_ERROR`。
-
-### 3) Resume 链路
-
-1. `POST /api/v1/agent/runs/{run_id}/resume` 校验 `interrupt_id` 与决策 payload。
-2. 调用 `enqueue_resume()` 投递 `resume_agent_task`。
-3. Worker 在事务内做并发控制：
-   - `run_id + interrupt_id` 幂等锁。
-   - 过期校验与状态迁移。
-4. 恢复后继续 CrewAI Flow，事件按 AG-UI 继续输出。
-
-### 4) Session 状态持久化
-
- 使用 `sessions.state_snapshot` 作为运行态单一快照来源。
- 快照至少包含：
-  - run 上下文（thread_id、run_id、stage）
-  - pending_tool_calls（tool_call_id、tool_name、args、status、expires_at）
-  - correlation 索引（tool_call_id -> message_id / step_id）
- 所有中断/恢复均以 `state_snapshot` 事务更新为准，避免内存态漂移。
-
-### 5) 会话与消息落库模型
-
- 会话主表：`sessions`
-  - 新建 run 时写入：`id/user_id/session_type/status=running/last_activity_at`。
-  - 运行中持续更新：`status`、`last_activity_at`、`message_count`、`total_tokens`、`total_cost`、`state_snapshot`。
-  - 运行结束更新：
-    - 成功：`status=completed`
-    - 失败：`status=failed`
- 消息表：`messages`
-  - 用户输入落库为 `role=user`（每次 run 开始时先写入）。
-  - 模型输出落库为 `role=assistant`（按最终聚合文本落库，保留 metadata 记录增量信息）。
-  - 工具调用结果落库为 `role=tool`，并写入 `tool_name` 与 `metadata.tool_call_id`。
-  - `seq` 由每个 `session_id` 内单调递增分配，满足 `uq_messages_session_seq`。
- 计量落库：每次 LLM 调用的 usage/cost 先写消息级，再聚合更新到 session 级。
-
-## 六项要求落地映射
-
-### 要求 1: `.env` 驱动 LLM API Key
-
- 新增 `LLMSettings` 到 `core.config.settings.Settings`，统一定义：
-  - `SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE`
-  - `SOCIAL_LLM__PROVIDER_KEYS__MINIMAX`
-  - `SOCIAL_LLM__PROVIDER_KEYS__MOONSHOT`
-  - `SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK`
-  - `SOCIAL_LLM__PROVIDER_KEYS__ARK`
-  - `SOCIAL_LLM__PROVIDER_KEYS__ZAI`
- 禁止 `os.environ` 直接读取密钥。
-
-### 要求 2: 对话和 resume 走 Celery
-
- Web 层不直接执行编排。
- `run`/`resume` 一律入队，Worker 处理，Web 仅做事件流转发。
- 加入任务级超时、重试、死信策略。
-
-### 要求 3: v1 仅路由与调用
-
- `v1/agent/service.py` 仅保留应用服务调用和错误映射。
- 任何编排、状态机、工具执行逻辑禁止进入 `v1`。
-
-### 要求 4: CrewAI 官方流程
-
- 采用 CrewAI 原生对象：`Agent`、`Task`、`Crew`、`Flow`。
- tools 通过 CrewAI Tool 机制注册，不做平行实现。
- 任务模板与 agent 配置集中化（静态模板 + 运行时拼装）。
- 配置拼装明确依赖 `system_agents + static/crewai`，不再使用双套来源。
-
-### 要求 5: AG-UI + ag-ui-crewai
-
- 事件集遵循 AG-UI 协议，生命周期闭环：
-  - `RUN_STARTED`
-  - 流式消息和工具事件
-  - 终态 `RUN_FINISHED` 或 `RUN_ERROR`
- 优先引入 `ag-ui-crewai` 做 CrewAI 到 AG-UI 的桥接，避免重复造轮子。
-
-### 要求 6: LiteLLM token/cost 统计
-
- 所有 LLM 调用通过 LiteLLM 统一出入口。
- 按调用粒度记录：`input_tokens`、`output_tokens`、`total_tokens`、`cost`、`currency`。
- 按 run 粒度聚合并落库，支持后续计费和审计。
-
-## 数据与可观测性
-
- 保留现有 Agent 相关表结构，不在本次硬切做数据库破坏性变更。
- 新增事件日志与调用指标落点（如已有字段不足，后续增量迁移）。
- 日志使用结构化字段：`run_id`、`task_id`、`stage`、`tool_name`、`llm_model`、`latency_ms`。
- 持久化原则：run/resume 的关键状态变更必须可重放，禁止仅保存在内存。
-
-## 事务边界
-
- `run` 入口事务：创建或加载 `session` + 写入用户消息。
- `worker` 执行事务（可分阶段短事务）：
-  - 阶段开始：更新 `session.status/state_snapshot`。
-  - LLM 返回：写 assistant/tool 消息 + 更新 token/cost 聚合。
-  - 中断：写 `pending_tool_calls` 到 `state_snapshot` 并提交。
-  - 完成：更新终态 `session.status` 并提交。
- `resume` 事务：校验 `interrupt_id` 与 ownership，CAS 更新 `state_snapshot`，然后进入后续执行事务。
-
-## 错误处理与安全
-
- API Key 缺失启动即失败，不进入运行态。
- 外部工具入参统一白名单和 schema 校验。
- resume 决策必须鉴权与会话所有权校验。
- 错误响应遵循 RFC 7807，避免泄漏敏感上下文。
-
-## 工具调用与恢复语义
-
- 工具分三类：
-  - 前端工具：由 `RunAgentInput.tools` 提供能力声明，触发 interrupt，由客户端执行并回传 result。
-  - 后端工具（需审批）：先 interrupt 给前端审批；审批通过后由后端执行，不由前端执行。
-  - 后端工具（直执）：后端直接执行。
- 一致性约束：
-  - 每个 tool_result 必须携带 `tool_call_id`。
-  - 后端仅接受当前 `state_snapshot.pending_tool_calls` 中存在且状态合法的 `tool_call_id`。
-  - 若收到未知/已消费/过期 `tool_call_id`，立即产出 `RUN_ERROR` 并记录审计日志。
-
-## 测试策略
-
- 单元测试：
-  - 配置解析与 key 解析
-  - run/resume 状态机与幂等
-  - LiteLLM usage 聚合
- 集成测试：
-  - API 入队
-  - Worker 消费
-  - SSE 事件顺序与终态
- E2E：
-  - run 成功链路
-  - interrupt + resume 链路
-  - tool 调用链路
-
-## 迁移策略
-
- 阶段 0（本次）：硬切删除旧代码、旧测试、旧文档契约。
- 阶段 1：搭建新架构骨架和最小可运行 run 流程。
- 阶段 2：接入 CrewAI + ag-ui-crewai + LiteLLM 完整链路。
- 阶段 3：补齐可观测性、压测与稳定性治理。
-
-## 验收标准
-
- 后端仓库不存在旧 `v1/agent` 和 `core/agent` 旧实现。
- 所有 Agent 相关旧测试与旧文档契约已移除。
- 新方案设计文档明确覆盖六项要求并可进入实现阶段。
@@ -1,574 +0,0 @@
-# Agent 后端重建 Implementation Plan
-
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
-
-**Goal:** 在后端重建 Agent 运行时，满足队列异步、CrewAI 配置打通、AG-UI 工具中断恢复、LiteLLM 计量、以及 `sessions.state_snapshot` 持久化要求。
-
-**Architecture:** `v1/agent` 仅做 API/鉴权/参数校验与 SSE 输出，`core/agent` 负责编排与执行。Agent 创建配置由 `system_agents`（数据库）+ `core/config/static/crewai/*.yaml`（静态模板）合并生成。run/resume 全链路通过 Celery Worker 执行，状态写入 `sessions.state_snapshot`。
-
-**Tech Stack:** FastAPI, Celery, Redis, CrewAI, ag-ui-crewai, LiteLLM, SQLAlchemy, Alembic, pytest
-
---
-
-### Task 1: 建立配置聚合器（system_agents + static/crewai）
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
- Modify: `backend/src/core/config/static/crewai/agents.yaml`
- Modify: `backend/src/core/config/static/crewai/tasks.yaml`
- Create: `backend/src/core/config/static/crewai/workflow.yaml`
- Create: `backend/src/core/config/static/crewai/tools.yaml`
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_resolver_merges_system_agents_and_static_templates():
-    resolved = resolve_agent_runtime_config(...)
-    assert resolved.intent.llm.model_code == "deepseek-v3.2"
-    assert "intent" in resolved.workflow_stages
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
-Expected: FAIL with `NameError` or import not found
-
-**Step 3: Write minimal implementation**
-
-```python
-def resolve_agent_runtime_config(system_agents: list[dict], static_cfg: dict) -> RuntimeConfig:
-    by_type = {item["agent_type"]: item for item in system_agents}
-    return RuntimeConfig.from_sources(by_type=by_type, static_cfg=static_cfg)
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/config/resolver.py backend/src/core/config/static/crewai backend/tests/unit/core/agent/test_config_resolver.py
-git commit -m "feat: add system_agents and static crewai config resolver"
-```
-
-### Task 2: 统一 LLM Key 与模型配置入口
-
-**Files:**
- Modify: `backend/src/core/config/settings.py`
- Modify: `.env.example`
- Create: `backend/tests/unit/core/config/test_llm_settings.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_llm_keys_read_from_settings(monkeypatch):
-    monkeypatch.setenv("SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK", "k1")
-    s = Settings()
-    assert s.llm.provider_keys.deepseek == "k1"
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
-Expected: FAIL with missing `llm` field
-
-**Step 3: Write minimal implementation**
-
-```python
-class LLMProviderKeys(BaseModel):
-    deepseek: str | None = None
-
-class LLMSettings(BaseModel):
-    provider_keys: LLMProviderKeys = LLMProviderKeys()
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/config/settings.py .env.example backend/tests/unit/core/config/test_llm_settings.py
-git commit -m "feat: centralize llm provider keys in settings"
-```
-
-### Task 3: sessions 表状态快照契约落地
-
-**Files:**
- Create: `backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py`
- Modify: `backend/src/models/agent_chat_session.py`
- Create: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_sessions_has_state_snapshot_column(db_inspector):
-    columns = db_inspector.get_columns("sessions")
-    assert "state_snapshot" in [c["name"] for c in columns]
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
-Expected: FAIL when migration not applied
-
-**Step 3: Write minimal implementation**
-
-```python
-def upgrade() -> None:
-    op.add_column("sessions", sa.Column("state_snapshot", postgresql.JSONB, nullable=True))
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py backend/src/models/agent_chat_session.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
-git commit -m "feat(db): enforce sessions state_snapshot contract"
-```
-
-### Task 3.1: 会话与消息持久化仓储
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Create: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_run_persists_user_and_assistant_messages(db_session):
-    run = execute_run(...)
-    rows = list_messages(session_id=run.session_id)
-    assert rows[0].role == "user"
-    assert rows[1].role == "assistant"
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-async def append_message(...):
-    session.add(AgentChatMessage(...))
-
-async def update_session_aggregate(...):
-    session_obj.message_count = message_count
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/persistence backend/tests/integration/core/agent/test_session_message_persistence.py
-git commit -m "feat: persist session lifecycle and messages for agent runs"
-```
-
-### Task 4: 定义 state_snapshot 结构与并发语义
-
-**Files:**
- Create: `backend/src/core/agent/domain/state_snapshot.py`
- Create: `backend/tests/unit/core/agent/test_state_snapshot.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_pending_tool_call_snapshot_contains_correlation_fields():
-    snap = StateSnapshot.new(...)
-    pending = snap.pending_tool_calls[0]
-    assert pending.tool_call_id
-    assert pending.status == "PENDING_APPROVAL"
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-class PendingToolCall(BaseModel):
-    tool_call_id: str
-    tool_name: str
-    status: Literal["PENDING_APPROVAL", "APPROVED", "EXECUTED", "REJECTED", "EXPIRED"]
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/domain/state_snapshot.py backend/tests/unit/core/agent/test_state_snapshot.py
-git commit -m "feat: define sessions state_snapshot schema for run and tool state"
-```
-
-### Task 5: 工具路由策略（前端/后端/审批）
-
-**Files:**
- Create: `backend/src/core/agent/domain/tool_policy.py`
- Create: `backend/tests/unit/core/agent/test_tool_policy.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_frontend_tool_requires_interrupt_and_client_execution():
-    decision = classify_tool_call(name="ui.navigate_to", source="request.tools")
-    assert decision.mode == "FRONTEND_EXECUTE"
-
-def test_backend_approval_tool_returns_interrupt_but_executes_on_backend_after_approve():
-    decision = classify_tool_call(name="srv.transfer_funds", requires_approval=True)
-    assert decision.mode == "BACKEND_APPROVAL_INTERRUPT"
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-if tool_name.startswith("ui."):
-    return ToolDecision(mode="FRONTEND_EXECUTE")
-if requires_approval:
-    return ToolDecision(mode="BACKEND_APPROVAL_INTERRUPT")
-return ToolDecision(mode="BACKEND_DIRECT_EXECUTE")
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/domain/tool_policy.py backend/tests/unit/core/agent/test_tool_policy.py
-git commit -m "feat: add frontend/backend tool policy and approval routing"
-```
-
-### Task 6: tool_call 与 tool_result 对账机制
-
-**Files:**
- Create: `backend/src/core/agent/domain/tool_correlation.py`
- Create: `backend/tests/unit/core/agent/test_tool_correlation.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_rejects_tool_result_when_tool_call_id_not_pending():
-    store = PendingToolStore([])
-    with pytest.raises(ToolCorrelationError):
-        store.apply_result(tool_call_id="unknown", result={"ok": True})
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-def apply_result(self, *, tool_call_id: str, result: dict) -> None:
-    pending = self._pending.get(tool_call_id)
-    if pending is None:
-        raise ToolCorrelationError("tool_call_id not pending")
-    pending.result = result
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/domain/tool_correlation.py backend/tests/unit/core/agent/test_tool_correlation.py
-git commit -m "feat: add tool call/result correlation guard"
-```
-
-### Task 7: Celery run/resume 异步任务
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
- Create: `backend/src/core/agent/application/run_service.py`
- Create: `backend/src/core/agent/application/resume_service.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_run_api_enqueues_celery_task(client):
-    resp = client.post("/api/v1/agent/runs", json={...})
-    assert resp.status_code == 202
-
-def test_resume_updates_session_status_and_snapshot(client):
-    resp = client.post("/api/v1/agent/runs/r1/resume", json={...})
-    assert resp.status_code == 202
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-def enqueue_run(cmd: RunCommand) -> str:
-    task = run_agent_task.apply_async(args=[cmd.model_dump()])
-    return task.id
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/application backend/src/core/agent/infrastructure/queue backend/tests/integration/core/agent/test_queue_run_resume.py
-git commit -m "feat: add celery-based run and resume tasks"
-```
-
-### Task 8: CrewAI 运行时加载与创建
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_runtime_creates_agents_tasks_from_resolved_config():
-    runtime = CrewAIRuntime(...)
-    crew = runtime.build_crew(message="hello")
-    assert len(crew.agents) >= 1
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-def build_crew(self, *, message: str) -> Crew:
-    agents = self._factory.build_agents(self._config)
-    tasks = self._factory.build_tasks(self._config, message=message)
-    return Crew(agents=agents, tasks=tasks)
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/crewai backend/tests/unit/core/agent/test_crewai_runtime.py
-git commit -m "feat: create crewai runtime from resolved config"
-```
-
-### Task 9: AG-UI 与 ag-ui-crewai 事件桥
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_agui_stream_emits_required_lifecycle():
-    events = to_agui_events(internal_events=[...])
-    assert events[0]["type"] == "RUN_STARTED"
-    assert events[-1]["type"] in {"RUN_FINISHED", "RUN_ERROR"}
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-def to_agui_events(internal_events: list[dict]) -> list[dict]:
-    return [map_event(e) for e in internal_events]
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/agui backend/tests/unit/core/agent/test_agui_bridge.py
-git commit -m "feat: add ag-ui and ag-ui-crewai event bridge"
-```
-
-### Task 10: LiteLLM 调用统计与会话聚合
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_tracker_aggregates_per_call_usage_and_cost():
-    t = UsageTracker()
-    t.add({"input_tokens": 10, "output_tokens": 5, "cost": "0.1"})
-    assert t.snapshot()["total_tokens"] == 15
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-def add(self, usage: dict[str, object]) -> None:
-    self.input_tokens += int(usage.get("input_tokens", 0))
-    self.output_tokens += int(usage.get("output_tokens", 0))
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/litellm backend/tests/unit/core/agent/test_litellm_usage.py
-git commit -m "feat: add litellm usage and cost tracking"
-```
-
-### Task 11: v1/agent 薄层 API + SSE 出口
-
-**Files:**
- Create: `backend/src/v1/agent/router.py`
- Create: `backend/src/v1/agent/schemas.py`
- Create: `backend/src/v1/agent/dependencies.py`
- Create: `backend/src/v1/agent/service.py`
- Modify: `backend/src/v1/router.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
-
-**Step 1: Write the failing test**
-
-```python
-def test_run_endpoint_returns_sse_and_not_blocking(client):
-    resp = client.post("/api/v1/agent/runs", json={...})
-    assert resp.status_code == 202
-```
-
-**Step 2: Run test to verify it fails**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
-Expected: FAIL
-
-**Step 3: Write minimal implementation**
-
-```python
-@router.post("/runs", status_code=202)
-async def create_run(...):
-    task_id = service.enqueue_run(input_data)
-    return {"task_id": task_id}
-```
-
-**Step 4: Run test to verify it passes**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
-Expected: PASS
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/v1/agent backend/src/v1/router.py backend/tests/integration/v1/agent/test_routes.py
-git commit -m "feat: add thin v1 agent api and sse endpoints"
-```
-
-### Task 12: 端到端验证与文档回填
-
-**Files:**
- Modify: `docs/runtime/runtime-route.md`
- Modify: `docs/runtime/runtime-runbook.md`
-
-**Step 1: Run unit tests**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent backend/tests/unit/core/config backend/tests/unit/database -q`
-Expected: PASS
-
-**Step 2: Run integration tests**
-
-Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
-Expected: PASS
-
-**Step 3: Run lint and typecheck**
-
-Run: `PYTHONPATH=backend/src uv run ruff check backend/src backend/tests`
-Expected: PASS
-
-Run: `PYTHONPATH=backend/src uv run basedpyright backend/src`
-Expected: PASS
-
-**Step 4: Document protocol contracts**
-
-在运行手册中补充以下固定规则：
- `system_agents` + `static/crewai` 配置合并优先级。
- `sessions.state_snapshot` 字段结构与版本号。
- `messages` 入库顺序与 `sessions` 聚合字段更新规则。
- 工具调用审批与恢复时序图。
- tool_call/result 不匹配时的错误语义（`RUN_ERROR` + 可审计日志）。
-
-**Step 5: Commit**
-
-```bash
-git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md
-git commit -m "docs: add new agent runtime contracts and operational guide"
-```
-
-## Success Criteria
-
- [ ] Agent 创建配置由 `system_agents` 与 `core/config/static/crewai` 合并生成。
- [ ] run/resume 仅通过 Celery Worker 执行，Web 不执行编排。
- [ ] `v1/agent` 无业务编排代码。
- [ ] `sessions.state_snapshot` 承担运行态和工具审批恢复状态。
- [ ] 每次 run/resume 的会话状态变更均落库到 `sessions`。
- [ ] 用户/助手/工具消息按 `messages` 约束落库，`seq` 单调递增。
- [ ] 前端工具与后端工具（审批/非审批）策略完整可测。
- [ ] tool_call 与 tool_result 具备强关联校验并可恢复/报错。
- [ ] LiteLLM 逐次计量与 run 聚合可落库。
@@ -1,199 +0,0 @@
-# Agent Architecture Simplification Design
-
-**Date:** 2026-03-04
-**Status:** Approved
-**Author:** AI Assistant
-
-## Overview
-
-Simplify the agent configuration architecture by removing the redundant `user_agents` table and renaming `user_agent_catalog` to `system_agents`.
-
-## Problem Statement
-
-Current architecture has redundant data:
- `user_agent_catalog`: System-level agent configurations (3 agent types for all users)
- `user_agents`: Per-user agent instances (copies catalog data for each user)
-
-Since every user has the same 3 agents with identical configurations (from catalog), maintaining `user_agents` table creates unnecessary complexity and data duplication.
-
-## Goals
-
-1. Remove `user_agents` table and related code
-2. Rename `user_agent_catalog` to `system_agents` for clarity
-3. Preserve ability for future user-level prompt customization via `profiles.settings`
-4. Maintain backward compatibility in deployment process
-
-## Non-Goals
-
- User-level agent configuration (LLM selection, temperature, etc.)
- User-level prompt customization implementation (deferred to future iteration)
-
-## Architecture Changes
-
-### Current Architecture
-
-```
-user_agent_catalog (system config)
-    ↓ (trigger copies for each new user)
-user_agents (per-user instances)
-```
-
-### New Architecture
-
-```
-system_agents (shared by all users)
-profiles.settings.agent_prompts (future: user-level prompts)
-```
-
-### Data Flow
-
-1. System startup: Load `system_agents` from YAML
-2. User creation: No longer creates `user_agents` records
-3. Runtime (future): Read from `system_agents` + merge with `profiles.settings.agent_prompts`
-
-## Database Migration
-
-### Changes
-
-1. **Delete `memories.agent_id` column**
-   - Remove foreign key `fk_memories_agent_id`
-   - Remove check constraint `chk_memory_type_agent_id`
-   - Remove index `ix_memories_agent_type_status`
-   - Drop column `agent_id`
-
-2. **Delete `user_agents` table**
-   - Remove all RLS policies
-   - Remove indexes: `ix_user_agents_agent_type`, `ix_user_agents_status`
-   - Remove foreign keys: `fk_user_agents_user_id`, `fk_user_agents_llm_id`, etc.
-   - Remove check constraint `chk_agent_type`
-   - Remove unique constraint `uq_user_agents_user_id_agent_type`
-   - Drop table
-
-3. **Rename `user_agent_catalog` → `system_agents`**
-   - Remove old RLS policies
-   - Rename table
-   - Rename constraints: `fk_user_agent_catalog_llm_id` → `fk_system_agents_llm_id`
-   - Rename check constraint: `chk_user_agent_catalog_status` → `chk_system_agents_status`
-   - Re-create RLS policies with new table name
-
-4. **Update trigger `create_profile_for_new_user()`**
-   - Remove logic that inserts into `user_agents`
-   - Initialize `profiles.settings.agent_prompts` with empty object
-
-5. **Update existing `profiles.settings`**
-   - Add `agent_prompts: {}` to all existing profiles
-
-### Downgrade Path
-
- Re-create `user_agents` table with all constraints and indexes
- Restore `memories.agent_id` column and constraints
- Rename `system_agents` → `user_agent_catalog`
- Restore original trigger
-
-## Code Changes
-
-### Model Layer
-
-**Delete:**
- `backend/src/models/user_agents.py`
-
-**Rename:**
- `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py`
- Class `UserAgentCatalog` → `SystemAgents`
-
-**Update:**
- `backend/src/models/__init__.py` - Update imports and exports
-
-### Configuration Layer
-
-**Rename:**
- `backend/src/core/config/static/database/user_agent_catalog.yaml`
-  → `backend/src/core/config/static/database/system_agents.yaml`
-
-**Update:**
- `backend/src/core/config/initial/init_data.py`
-  - `UserAgentCatalogSeed` → `SystemAgentsSeed`
-  - `UserAgentCatalogYaml` → `SystemAgentsYaml`
-  - Import from `models.system_agents`
-  - Path: `system_agents.yaml`
-  - Function: `initialize_user_agent_catalog()` → `initialize_system_agents()`
-
-### Future: Profile Settings Structure (Deferred)
-
-```json
-{
-  "agent_prompts": {
-    "INTENT_RECOGNITION": "custom prompt...",
-    "TASK_EXECUTION": "custom prompt...",
-    "RESULT_REPORTING": "custom prompt..."
-  }
-}
-```
-
-## Testing Strategy
-
-### Migration Tests
-
- Verify `user_agents` table is deleted
- Verify `system_agents` table exists with correct structure
- Verify trigger no longer creates `user_agents` records
- Verify `profiles.settings.agent_prompts` is initialized
- Verify downgrade path works correctly
-
-### Model Tests
-
- Verify `SystemAgents` model CRUD operations
- Verify `Profile.settings` JSONB storage
-
-### Integration Tests
-
- Verify `initialize_system_agents()` loads from YAML
- Verify data is correctly inserted into `system_agents` table
-
-## Deployment Considerations
-
-### Pre-deployment
-
- Backup database (especially `user_agents` if any data exists)
- Confirm production `user_agents` table has no critical data
-
-### Deployment
-
-1. Run migration: `alembic upgrade head`
-2. Verify migration success
-3. Restart application services
-4. Verify new user registration works without `user_agents`
-
-### Post-deployment
-
- Monitor application logs for any references to deleted `user_agents`
- Verify agent-related functionality still works
-
-## Risks and Mitigations
-
-| Risk | Mitigation |
-|------|-----------|
-| Existing `user_agents` data loss | Backup before migration; data is redundant anyway |
-| Code still references `user_agents` | Comprehensive code search and testing |
-| Trigger fails on new user creation | Test migration thoroughly; include rollback plan |
-| Future need for user-level config | Can add `agent_overrides` to `profiles.settings` |
-
-## Success Criteria
-
- [ ] All tests pass
- [ ] Migration runs successfully (upgrade and downgrade)
- [ ] New user registration creates profile without `user_agents` records
- [ ] System agents are loaded from YAML correctly
- [ ] No references to `user_agents` remain in codebase
-
-## Timeline
-
- Design: 2026-03-04 (Completed)
- Implementation: TBD
- Testing: TBD
- Deployment: TBD
-
-## References
-
- Migration file: `backend/alembic/versions/YYYYMMDD_simplify_agent_architecture.py`
- Original catalog migration: `backend/alembic/versions/50ae013ce530_add_user_agent_catalog.py`
@@ -1,844 +0,0 @@
-# Agent Architecture Simplification Implementation Plan
-
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
-
-**Goal:** Simplify agent configuration by removing redundant user_agents table and renaming user_agent_catalog to system_agents
-
-**Architecture:** Delete user_agents table (including memories.agent_id dependency), rename user_agent_catalog to system_agents, update all references in code
-
-**Tech Stack:** Python 3.11+, SQLAlchemy, Alembic, PostgreSQL
-
---
-
-## Prerequisites
-
- [ ] Current branch: dev
- [ ] No uncommitted changes
- [ ] Docker services running (Supabase local)
-
-## Task 1: Create Database Migration
-
-**Files:**
- Create: `backend/alembic/versions/20260304_simplify_agent_architecture.py`
-
-**Step 1: Create migration file**
-
-Run: `cd backend && uv run alembic revision -m "simplify_agent_architecture"`
-
-Expected: New migration file created with revision ID
-
-**Step 2: Write migration upgrade logic**
-
-Edit the generated migration file with this complete upgrade function:
-
-```python
-def upgrade() -> None:
-    # 1. Delete memories.agent_id dependencies
-    op.drop_constraint("fk_memories_agent_id", "memories", type_="foreignkey")
-    op.drop_constraint("chk_memory_type_agent_id", "memories", type_="check")
-    op.execute("DROP INDEX IF EXISTS ix_memories_agent_type_status")
-    op.drop_column("memories", "agent_id")
-
-    # 2. Delete user_agents table
-    _drop_rls("user_agents")
-    
-    op.drop_constraint("fk_user_agents_updated_by", "user_agents", type_="foreignkey")
-    op.drop_constraint("fk_user_agents_created_by", "user_agents", type_="foreignkey")
-    op.drop_constraint("fk_user_agents_llm_id", "user_agents", type_="foreignkey")
-    op.drop_constraint("fk_user_agents_user_id", "user_agents", type_="foreignkey")
-    op.drop_constraint("chk_agent_type", "user_agents", type_="check")
-    op.drop_constraint("uq_user_agents_user_id_agent_type", "user_agents", type_="unique")
-    
-    op.execute("DROP INDEX IF EXISTS ix_user_agents_status")
-    op.execute("DROP INDEX IF EXISTS ix_user_agents_agent_type")
-    
-    op.drop_table("user_agents")
-
-    # 3. Rename user_agent_catalog to system_agents
-    _drop_rls("user_agent_catalog")
-    
-    op.rename_table("user_agent_catalog", "system_agents")
-    
-    op.execute(
-        "ALTER TABLE system_agents RENAME CONSTRAINT fk_user_agent_catalog_llm_id "
-        "TO fk_system_agents_llm_id"
-    )
-    op.execute(
-        "ALTER TABLE system_agents RENAME CONSTRAINT chk_user_agent_catalog_status "
-        "TO chk_system_agents_status"
-    )
-    
-    _enable_rls("system_agents")
-
-    # 4. Update trigger
-    op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
-    op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
-    
-    op.execute("""
-        CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
-        RETURNS trigger
-        LANGUAGE plpgsql
-        SECURITY DEFINER
-        SET search_path = public
-        AS $$
-        BEGIN
-            INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
-            VALUES (
-                NEW.id,
-                COALESCE(
-                    NEW.raw_user_meta_data ->> 'username',
-                    split_part(NEW.email, '@', 1),
-                    'user_' || substring(NEW.id::text, 1, 8)
-                ),
-                NULL,
-                NULL,
-                '{"agent_prompts": {}}'::jsonb,
-                now(),
-                now()
-            )
-            ON CONFLICT (id) DO NOTHING;
-            
-            RETURN NEW;
-        END;
-        $$
-    """)
-    
-    op.execute("""
-        CREATE TRIGGER on_auth_user_created
-            AFTER INSERT ON auth.users
-            FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
-    """)
-
-    # 5. Update existing profiles.settings
-    op.execute("""
-        UPDATE profiles 
-        SET settings = jsonb_set(
-            COALESCE(settings, '{}'::jsonb),
-            '{agent_prompts}',
-            '{}'::jsonb
-        )
-        WHERE NOT settings ? 'agent_prompts'
-    """)
-```
-
-**Step 3: Write migration downgrade logic**
-
-Add this complete downgrade function:
-
-```python
-def downgrade() -> None:
-    # 1. Revert trigger
-    op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
-    op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
-    
-    op.execute("""
-        CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
-        RETURNS trigger
-        LANGUAGE plpgsql
-        SECURITY DEFINER
-        SET search_path = public
-        AS $$
-        BEGIN
-            INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
-            VALUES (
-                NEW.id,
-                COALESCE(
-                    NEW.raw_user_meta_data ->> 'username',
-                    split_part(NEW.email, '@', 1),
-                    'user_' || substring(NEW.id::text, 1, 8)
-                ),
-                NULL,
-                NULL,
-                '{}'::jsonb,
-                now(),
-                now()
-            )
-            ON CONFLICT (id) DO NOTHING;
-
-            INSERT INTO public.user_agents (id, user_id, llm_id, agent_type, config, status, created_by, updated_by)
-            SELECT 
-                gen_random_uuid(),
-                NEW.id,
-                uac.llm_id,
-                uac.agent_type,
-                uac.config,
-                uac.status,
-                NEW.id,
-                NEW.id
-            FROM public.user_agent_catalog uac;
-            
-            RETURN NEW;
-        END;
-        $$
-    """)
-    
-    op.execute("""
-        CREATE TRIGGER on_auth_user_created
-            AFTER INSERT ON auth.users
-            FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
-    """)
-
-    # 2. Revert rename: system_agents -> user_agent_catalog
-    _drop_rls("system_agents")
-    
-    op.rename_table("system_agents", "user_agent_catalog")
-    
-    op.execute(
-        "ALTER TABLE user_agent_catalog RENAME CONSTRAINT fk_system_agents_llm_id "
-        "TO fk_user_agent_catalog_llm_id"
-    )
-    op.execute(
-        "ALTER TABLE user_agent_catalog RENAME CONSTRAINT chk_system_agents_status "
-        "TO chk_user_agent_catalog_status"
-    )
-    
-    _enable_rls("user_agent_catalog")
-
-    # 3. Recreate user_agents table
-    op.create_table(
-        "user_agents",
-        sa.Column("id", sa.UUID(), nullable=False),
-        sa.Column("user_id", sa.UUID(), nullable=False),
-        sa.Column("llm_id", sa.UUID(), nullable=False),
-        sa.Column("agent_type", sa.String(length=20), nullable=False),
-        sa.Column(
-            "config",
-            postgresql.JSONB(astext_type=sa.Text()),
-            server_default="{}",
-            nullable=False,
-        ),
-        sa.Column("status", sa.String(length=20), nullable=False),
-        sa.Column("created_by", sa.UUID(), nullable=True),
-        sa.Column("updated_by", sa.UUID(), nullable=True),
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column(
-            "updated_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
-        sa.PrimaryKeyConstraint("id"),
-    )
-    
-    op.create_unique_constraint(
-        "uq_user_agents_user_id_agent_type", 
-        "user_agents", 
-        ["user_id", "agent_type"]
-    )
-    
-    op.execute(
-        "CREATE INDEX ix_user_agents_agent_type ON user_agents (agent_type)"
-    )
-    op.execute(
-        "CREATE INDEX ix_user_agents_status ON user_agents (status)"
-    )
-    
-    op.execute(
-        "ALTER TABLE user_agents ADD CONSTRAINT chk_agent_type "
-        "CHECK (agent_type IN ('INTENT_RECOGNITION', 'TASK_EXECUTION', 'RESULT_REPORTING'))"
-    )
-    
-    op.create_foreign_key(
-        "fk_user_agents_user_id",
-        "user_agents",
-        "users",
-        ["user_id"],
-        ["id"],
-        referent_schema="auth",
-        ondelete="CASCADE",
-    )
-    op.create_foreign_key(
-        "fk_user_agents_llm_id",
-        "user_agents",
-        "llms",
-        ["llm_id"],
-        ["id"],
-        ondelete="RESTRICT",
-    )
-    op.create_foreign_key(
-        "fk_user_agents_created_by",
-        "user_agents",
-        "users",
-        ["created_by"],
-        ["id"],
-        referent_schema="auth",
-        ondelete="SET NULL",
-    )
-    op.create_foreign_key(
-        "fk_user_agents_updated_by",
-        "user_agents",
-        "users",
-        ["updated_by"],
-        ["id"],
-        referent_schema="auth",
-        ondelete="SET NULL",
-    )
-    
-    _enable_rls("user_agents")
-
-    # 4. Recreate memories.agent_id
-    op.add_column(
-        "memories",
-        sa.Column("agent_id", sa.UUID(), nullable=True)
-    )
-    
-    op.create_foreign_key(
-        "fk_memories_agent_id",
-        "memories",
-        "user_agents",
-        ["agent_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-    
-    op.execute(
-        "CREATE INDEX ix_memories_agent_type_status ON memories (agent_id, memory_type, status)"
-    )
-    
-    op.execute(
-        "ALTER TABLE memories ADD CONSTRAINT chk_memory_type_agent_id "
-        "CHECK ((memory_type = 'work' AND agent_id IS NOT NULL) OR "
-        "(memory_type = 'user' AND agent_id IS NULL))"
-    )
-```
-
-**Step 4: Add helper functions**
-
-Add these helper functions at the end of the migration file:
-
-```python
-def _enable_rls(table_name: str) -> None:
-    for role in ["anon", "authenticated"]:
-        for action in ["select", "insert", "update", "delete"]:
-            op.execute(
-                f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
-            )
-    op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY")
-    for role in ["anon", "authenticated"]:
-        op.execute(
-            f"CREATE POLICY {role}_select_{table_name} ON {table_name} "
-            f"FOR SELECT TO {role} USING (false)"
-        )
-        op.execute(
-            f"CREATE POLICY {role}_insert_{table_name} ON {table_name} "
-            f"FOR INSERT TO {role} WITH CHECK (false)"
-        )
-        op.execute(
-            f"CREATE POLICY {role}_update_{table_name} ON {table_name} "
-            f"FOR UPDATE TO {role} USING (false) WITH CHECK (false)"
-        )
-        op.execute(
-            f"CREATE POLICY {role}_delete_{table_name} ON {table_name} "
-            f"FOR DELETE TO {role} USING (false)"
-        )
-
-
-def _drop_rls(table_name: str) -> None:
-    for role in ["anon", "authenticated"]:
-        op.execute(f"DROP POLICY IF EXISTS {role}_delete_{table_name} ON {table_name}")
-        op.execute(f"DROP POLICY IF EXISTS {role}_update_{table_name} ON {table_name}")
-        op.execute(f"DROP POLICY IF EXISTS {role}_insert_{table_name} ON {table_name}")
-        op.execute(f"DROP POLICY IF EXISTS {role}_select_{table_name} ON {table_name}")
-    op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY")
-```
-
-**Step 5: Verify migration file**
-
-Check that all imports are correct:
-
-```python
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-```
-
-**Step 6: Commit migration**
-
-```bash
-git add backend/alembic/versions/20260304_simplify_agent_architecture.py
-git commit -m "feat(db): add migration to simplify agent architecture"
-```
-
---
-
-## Task 2: Delete UserAgents Model
-
-**Files:**
- Delete: `backend/src/models/user_agents.py`
- Modify: `backend/src/models/__init__.py`
-
-**Step 1: Remove import from models/__init__.py**
-
-Edit `backend/src/models/__init__.py`:
-
-Remove these lines:
-```python
-from models.user_agents import UserAgent
-```
-
-And remove `"UserAgent"` from `__all__` list.
-
-**Step 2: Delete user_agents.py file**
-
-```bash
-rm backend/src/models/user_agents.py
-```
-
-**Step 3: Verify no other imports**
-
-Run: `cd backend && grep -r "from models.user_agents" src/`
-
-Expected: No results (or only in __init__.py which we already fixed)
-
-**Step 4: Commit**
-
-```bash
-git add backend/src/models/user_agents.py backend/src/models/__init__.py
-git commit -m "refactor(models): remove UserAgents model"
-```
-
---
-
-## Task 3: Rename UserAgentCatalog to SystemAgents
-
-**Files:**
- Rename: `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py`
- Modify: `backend/src/models/__init__.py`
-
-**Step 1: Rename model file**
-
-```bash
-mv backend/src/models/user_agent_catalog.py backend/src/models/system_agents.py
-```
-
-**Step 2: Update class name in system_agents.py**
-
-Edit `backend/src/models/system_agents.py`:
-
-Change:
-```python
-class UserAgentCatalog(TimestampMixin, Base):
-    __tablename__: str = "user_agent_catalog"
-```
-
-To:
-```python
-class SystemAgents(TimestampMixin, Base):
-    __tablename__: str = "system_agents"
-```
-
-**Step 3: Update imports in models/__init__.py**
-
-Edit `backend/src/models/__init__.py`:
-
-Change:
-```python
-from models.user_agent_catalog import UserAgentCatalog
-```
-
-To:
-```python
-from models.system_agents import SystemAgents
-```
-
-And change `"UserAgentCatalog"` to `"SystemAgents"` in `__all__` list.
-
-**Step 4: Commit**
-
-```bash
-git add backend/src/models/
-git commit -m "refactor(models): rename UserAgentCatalog to SystemAgents"
-```
-
---
-
-## Task 4: Update Configuration Files
-
-**Files:**
- Rename: `backend/src/core/config/static/database/user_agent_catalog.yaml`
-  → `backend/src/core/config/static/database/system_agents.yaml`
- Modify: `backend/src/core/config/initial/init_data.py`
-
-**Step 1: Rename YAML file**
-
-```bash
-mv backend/src/core/config/static/database/user_agent_catalog.yaml \
-   backend/src/core/config/static/database/system_agents.yaml
-```
-
-**Step 2: Update init_data.py imports**
-
-Edit `backend/src/core/config/initial/init_data.py`:
-
-Change:
-```python
-from models.user_agent_catalog import UserAgentCatalog
-```
-
-To:
-```python
-from models.system_agents import SystemAgents
-```
-
-**Step 3: Update Pydantic models**
-
-Change:
-```python
-class UserAgentCatalogSeed(BaseModel):
-    agent_type: str
-    llm_model_code: str
-    status: str
-    config: dict[str, Any]
-
-
-class UserAgentCatalogYaml(BaseModel):
-    agents: list[UserAgentCatalogSeed]
-```
-
-To:
-```python
-class SystemAgentsSeed(BaseModel):
-    agent_type: str
-    llm_model_code: str
-    status: str
-    config: dict[str, Any]
-
-
-class SystemAgentsYaml(BaseModel):
-    agents: list[SystemAgentsSeed]
-```
-
-**Step 4: Update path function**
-
-Change:
-```python
-def _default_user_agent_catalog_path() -> Path:
-    return (
-        Path(__file__).resolve().parents[1]
-        / "static"
-        / "database"
-        / "user_agent_catalog.yaml"
-    )
-```
-
-To:
-```python
-def _default_system_agents_path() -> Path:
-    return (
-        Path(__file__).resolve().parents[1]
-        / "static"
-        / "database"
-        / "system_agents.yaml"
-    )
-```
-
-**Step 5: Update load function**
-
-Change:
-```python
-def load_user_agent_catalog(catalog_path: Path | None = None) -> dict[str, Any]:
-    path = catalog_path or _default_user_agent_catalog_path()
-    with path.open("r", encoding="utf-8") as file:
-        loaded = yaml.safe_load(file) or {}
-    if not isinstance(loaded, dict):
-        raise ValueError(f"Invalid user agent catalog format: {path}")
-    raw_agents = loaded.get("agents", [])
-    if not isinstance(raw_agents, list):
-        raise ValueError(f"Invalid user agent catalog agents section: {path}")
-    try:
-        parsed = UserAgentCatalogYaml.model_validate({"agents": list(raw_agents)})
-    except ValidationError as exc:
-        raise ValueError(f"Invalid user agent catalog data: {path}") from exc
-
-    return parsed.model_dump()
-```
-
-To:
-```python
-def load_system_agents(catalog_path: Path | None = None) -> dict[str, Any]:
-    path = catalog_path or _default_system_agents_path()
-    with path.open("r", encoding="utf-8") as file:
-        loaded = yaml.safe_load(file) or {}
-    if not isinstance(loaded, dict):
-        raise ValueError(f"Invalid system agents format: {path}")
-    raw_agents = loaded.get("agents", [])
-    if not isinstance(raw_agents, list):
-        raise ValueError(f"Invalid system agents agents section: {path}")
-    try:
-        parsed = SystemAgentsYaml.model_validate({"agents": list(raw_agents)})
-    except ValidationError as exc:
-        raise ValueError(f"Invalid system agents data: {path}") from exc
-
-    return parsed.model_dump()
-```
-
-**Step 6: Update upsert function**
-
-Change:
-```python
-async def _upsert_user_agent_catalog(
-    session: AsyncSession,
-    *,
-    agent_type: str,
-    llm_id: uuid.UUID,
-    status: str,
-    config: dict[str, Any],
-) -> None:
-    result = await session.execute(
-        select(UserAgentCatalog).where(UserAgentCatalog.agent_type == agent_type)
-    )
-    catalog_entry = result.scalar_one_or_none()
-
-    if catalog_entry is None:
-        session.add(
-            UserAgentCatalog(
-                agent_type=agent_type,
-                llm_id=llm_id,
-                status=status,
-                config=config,
-            )
-        )
-    else:
-        catalog_entry.llm_id = llm_id
-        catalog_entry.status = status
-        catalog_entry.config = config
-```
-
-To:
-```python
-async def _upsert_system_agents(
-    session: AsyncSession,
-    *,
-    agent_type: str,
-    llm_id: uuid.UUID,
-    status: str,
-    config: dict[str, Any],
-) -> None:
-    result = await session.execute(
-        select(SystemAgents).where(SystemAgents.agent_type == agent_type)
-    )
-    catalog_entry = result.scalar_one_or_none()
-
-    if catalog_entry is None:
-        session.add(
-            SystemAgents(
-                agent_type=agent_type,
-                llm_id=llm_id,
-                status=status,
-                config=config,
-            )
-        )
-    else:
-        catalog_entry.llm_id = llm_id
-        catalog_entry.status = status
-        catalog_entry.config = config
-```
-
-**Step 7: Update initialize function**
-
-Change:
-```python
-async def initialize_user_agent_catalog() -> None:
-    """Initialize user agent catalog from YAML."""
-    catalog = load_user_agent_catalog()
-
-    async with AsyncSessionLocal() as session:
-        async with session.begin():
-            for agent in catalog["agents"]:
-                result = await session.execute(
-                    select(Llm).where(Llm.model_code == agent["llm_model_code"])
-                )
-                llm = result.scalar_one_or_none()
-                if llm is None:
-                    raise RuntimeError(
-                        f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
-                    )
-
-                await _upsert_user_agent_catalog(
-                    session,
-                    agent_type=agent["agent_type"],
-                    llm_id=llm.id,
-                    status=agent["status"],
-                    config=agent["config"],
-                )
-
-    logger.info("Initialized user agent catalog")
-```
-
-To:
-```python
-async def initialize_system_agents() -> None:
-    """Initialize system agents from YAML."""
-    catalog = load_system_agents()
-
-    async with AsyncSessionLocal() as session:
-        async with session.begin():
-            for agent in catalog["agents"]:
-                result = await session.execute(
-                    select(Llm).where(Llm.model_code == agent["llm_model_code"])
-                )
-                llm = result.scalar_one_or_none()
-                if llm is None:
-                    raise RuntimeError(
-                        f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
-                    )
-
-                await _upsert_system_agents(
-                    session,
-                    agent_type=agent["agent_type"],
-                    llm_id=llm.id,
-                    status=agent["status"],
-                    config=agent["config"],
-                )
-
-    logger.info("Initialized system agents")
-```
-
-**Step 8: Update initialize_data function**
-
-Change:
-```python
-async def initialize_data() -> bool:
-    """Initialize bootstrap data."""
-    await initialize_llm_catalog()
-    await initialize_user_agent_catalog()
-
-    return True
-```
-
-To:
-```python
-async def initialize_data() -> bool:
-    """Initialize bootstrap data."""
-    await initialize_llm_catalog()
-    await initialize_system_agents()
-
-    return True
-```
-
-**Step 9: Commit**
-
-```bash
-git add backend/src/core/config/
-git commit -m "refactor(config): rename user_agent_catalog to system_agents"
-```
-
---
-
-## Task 5: Run Migration
-
-**Step 1: Run migration**
-
-```bash
-cd backend && uv run alembic upgrade head
-```
-
-Expected: Migration runs successfully
-
-**Step 2: Verify tables**
-
-Connect to database and check:
- `user_agents` table should NOT exist
- `system_agents` table should exist
- `memories.agent_id` column should NOT exist
-
-**Step 3: Test downgrade (optional but recommended)**
-
-```bash
-cd backend && uv run alembic downgrade -1
-```
-
-Expected: Previous migration restored
-
-**Step 4: Re-run upgrade**
-
-```bash
-cd backend && uv run alembic upgrade head
-```
-
-Expected: Migration runs successfully again
-
---
-
-## Task 6: Run Tests and Linting
-
-**Step 1: Run type checking**
-
-```bash
-cd backend && uv run basedpyright src/
-```
-
-Expected: No errors
-
-**Step 2: Run linting**
-
-```bash
-cd backend && uv run ruff check src/
-```
-
-Expected: No errors
-
-**Step 3: Run tests**
-
-```bash
-cd backend && uv run pytest tests/
-```
-
-Expected: All tests pass
-
-**Step 4: Fix any failures**
-
-If any tests fail due to UserAgent references, update them to use SystemAgents.
-
---
-
-## Task 7: Final Verification
-
-**Step 1: Search for any remaining references**
-
-```bash
-cd backend && grep -r "user_agents" src/ --include="*.py"
-cd backend && grep -r "UserAgent" src/ --include="*.py"
-```
-
-Expected: No results (except in migration files)
-
-**Step 2: Test new user registration**
-
-Start the backend server and register a new user. Verify:
- Profile is created
- No user_agents records are created
- profiles.settings contains `agent_prompts: {}`
-
-**Step 3: Commit final changes**
-
-```bash
-git add .
-git commit -m "feat: complete agent architecture simplification"
-```
-
---
-
-## Success Criteria
-
- [ ] Migration runs successfully (upgrade and downgrade)
- [ ] No UserAgent model references in code
- [ ] SystemAgents model works correctly
- [ ] All tests pass
- [ ] Linting passes
- [ ] Type checking passes
- [ ] New user registration works without user_agents
-
-## Notes
-
- Keep the design document updated if any changes are made during implementation
- Test migration thoroughly before deploying to production
- Backup database before running migration in production
@@ -1,81 +0,0 @@
-# Agent Runtime Closed Loop E2E Design
-
-## 背景
-
-当前 `test_agent_sse_flow.py` 不能稳定证明真实闭环：
- `session_id` 由随机 UUID 生成，导致 `POST /api/v1/agent/runs` 经常 404。
- 测试脚本存在不可达重复代码，诊断信息不完整。
- 未覆盖首聊自动建会话语义，和真实聊天入口不匹配。
-
-目标是验证真实环境下业务闭环是否可用：
-1. 用户请求 `agent` 路由
-2. 请求进入异步任务
-3. runtime 读取 `system_agents` 和 `llm` 配置并构建执行流程
-4. 真实 LLM 请求发出并返回
-5. `sessions`/`messages` 正确落库
-6. 成本和 token 统计正确
-7. 事件按 AG-UI 规范发布并可由 `stream_events` 订阅
-
-## 设计原则
-
- 真实优先：不使用 mock，不替换 queue/redis/db/llm。
- 双轨验证：
-  - 诊断脚本用于本地排障（快速观察全链路状态）。
-  - pytest E2E 用例用于可重复回归。
- 明确前置条件：必须先使用 `infra/scripts/app.sh start` 启动 tmux 服务。
- 本地真实 LLM 基线：DashScope Qwen。
-
-## API 契约调整
-
-### `POST /api/v1/agent/runs`
-
- 现状：`session_id` 必填且必须存在。
- 新契约：`session_id` 可选。
-  - 有值：复用现有会话，校验 owner。
-  - 无值：在服务层先创建会话，再入队 run。
- 响应扩展：返回 `created` 标识是否为首聊自动建会话。
-
-该契约与聊天产品行为一致：用户首条消息即可开始，不需要前置调用创建会话接口。
-
-## 数据关系与删除语义
-
- `messages.session_id -> sessions.id` 为外键，且硬删除级联（`ondelete=CASCADE`）。
- 软删除需要补齐级联：
-  - 软删 `sessions` 时，同事务更新对应 `messages.deleted_at`。
-  - E2E 增加验证，确保软删后默认查询不可见。
-
-## 测试架构
-
-### A. 诊断脚本（根目录）
-
-重构 `test_agent_sse_flow.py`：
- 增加环境健康检查（web/redis/db）。
- 支持两种模式：
-  - `--new-session`：不传 `session_id`，验证首聊自动创建。
-  - `--reuse-session <id>`：验证复聊路径。
- 输出结构化阶段日志：HTTP、task_id、SSE 事件、数据库断言、失败根因。
-
-### B. pytest E2E（`backend/tests/e2e`）
-
-新增 `test_agent_closed_loop_live.py`：
- 标记为 `live`，默认不在 CI 执行。
- 用真实 JWT、真实 HTTP 请求、真实 SSE 订阅。
- 断言最小闭环标准：
-  - run 返回 202
-  - SSE 至少收到 `RUN_STARTED` 与终态（`RUN_FINISHED` 或 `RUN_ERROR`）
-  - `sessions` 状态和计数更新
-  - `messages` 有新增记录
-  - token/cost 字段非负且会话聚合一致
-
-## 验收标准
-
- `uv run python test_agent_sse_flow.py --new-session` 通过。
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -v -m live` 通过。
- 首聊场景不需要外部先建 `session_id`。
- 软删除会话后，消息软删除行为与约束一致。
-
-## 风险与回退
-
- 真实 LLM 网络抖动会造成不稳定：通过重试和超时策略降低误报。
- 生产契约变更风险：保持字段向后兼容（原 `session_id` 仍可传）。
- 如果新契约引入问题，可临时退回“必传 session_id”路径并保留测试脚本诊断能力。
@@ -1,230 +0,0 @@
-# Agent Runtime Closed Loop E2E Implementation Plan
-
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
-
-**Goal:** 让 agent 闭环在真实本地环境中可验证：`runs` 支持首聊自动建会话，并通过真实异步任务、真实 LLM、真实落库与真实 SSE 证明端到端可用。
-
-**Architecture:** 在 `v1/agent` 服务层引入“可选 session_id + 自动建会话”语义；保持已有 owner 鉴权路径。重构诊断脚本并新增 live E2E 用例，统一验证 run 入队、事件流、数据库状态、成本统计与删除语义。通过最小侵入改造现有 run/resume 流程，确保兼容已存在调用。
-
-**Tech Stack:** FastAPI, SQLAlchemy async, Celery, Redis Stream, LiteLLM, PyJWT, pytest, httpx
-
---
-
-### Task 1: 扩展 API 契约（session_id 可选）
-
-**Files:**
- Modify: `backend/src/v1/agent/schemas.py`
- Modify: `backend/src/v1/agent/router.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
-
-**Step 1: Write the failing test**
-
-在 `test_routes.py` 新增用例：请求体不传 `session_id` 仍返回 202，且响应含 `session_id`。
-
-**Step 2: Run test to verify it fails**
-
-Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -k "runs and session" -v`
-Expected: FAIL，提示 `session_id` 缺失导致 422 或 mock 接口签名不匹配。
-
-**Step 3: Write minimal implementation**
-
- `RunRequest.session_id` 改为可选。
- `enqueue_run` 调用 service 时传可选值。
- `TaskAcceptedResponse` 增加 `created: bool` 字段。
-
-**Step 4: Run test to verify it passes**
-
-Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
-Expected: PASS。
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/v1/agent/schemas.py backend/src/v1/agent/router.py backend/tests/integration/v1/agent/test_routes.py
-git commit -m "feat: allow agent runs without pre-created session"
-```
-
-### Task 2: 服务层支持自动建会话并保持鉴权
-
-**Files:**
- Modify: `backend/src/v1/agent/service.py`
- Modify: `backend/src/v1/agent/repository.py`
- Modify: `backend/src/v1/agent/dependencies.py`
- Test: `backend/tests/unit/v1/agent/test_service.py` (new)
-
-**Step 1: Write the failing test**
-
-新增单测覆盖：
- `session_id is None` 时调用 `create_session_for_user` 并返回 `created=True`
- `session_id 有值` 时复用并校验 owner
-
-**Step 2: Run test to verify it fails**
-
-Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
-Expected: FAIL，当前 service 无自动建会话能力。
-
-**Step 3: Write minimal implementation**
-
- repository 增加 `create_session_for_user(user_id)`。
- service `enqueue_run` 处理两条路径：
-  - 无 `session_id`：先创建 session。
-  - 有 `session_id`：校验 owner。
- 返回 `TaskAccepted(task_id, session_id, created)`。
-
-**Step 4: Run test to verify it passes**
-
-Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
-Expected: PASS。
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/v1/agent/service.py backend/src/v1/agent/repository.py backend/src/v1/agent/dependencies.py backend/tests/unit/v1/agent/test_service.py
-git commit -m "feat: auto-create chat session on first agent run"
-```
-
-### Task 3: 对齐 runtime 闭环数据断言（messages/sessions/cost）
-
-**Files:**
- Modify: `backend/src/core/agent/application/run_service.py`
- Modify: `backend/src/core/agent/application/resume_service.py`
- Modify: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
-
-**Step 1: Write the failing test**
-
-在集成测试增加断言：
- `sessions.total_tokens`、`sessions.total_cost` 有更新
- `messages` 的 token/cost 字段与 session 聚合一致
-
-**Step 2: Run test to verify it fails**
-
-Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
-Expected: FAIL，当前默认 token/cost 为 0，未做聚合更新。
-
-**Step 3: Write minimal implementation**
-
- run/resume 流程接入 usage/cost 结果（来自 litellm 返回或 fallback 规则）。
- message 写入时填充 input/output tokens 与 cost。
- session 更新时累加 total_tokens/total_cost。
-
-**Step 4: Run test to verify it passes**
-
-Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
-Expected: PASS。
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/application/run_service.py backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/persistence/message_repository.py backend/src/core/agent/infrastructure/persistence/session_repository.py backend/tests/integration/core/agent/test_queue_run_resume.py
-git commit -m "feat: persist runtime token and cost aggregates"
-```
-
-### Task 4: 补齐软删除级联（session -> messages）
-
-**Files:**
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Modify: `backend/src/v1/agent/service.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
-
-**Step 1: Write the failing test**
-
-新增用例：软删 session 后，同会话 messages 的 `deleted_at` 同步写入。
-
-**Step 2: Run test to verify it fails**
-
-Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
-Expected: FAIL，当前无软删级联。
-
-**Step 3: Write minimal implementation**
-
- repository 增加 `soft_delete_session_with_messages(session_id)`。
- service 调用时使用同事务批量更新 messages。
-
-**Step 4: Run test to verify it passes**
-
-Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
-Expected: PASS。
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/persistence/session_repository.py backend/src/v1/agent/service.py backend/tests/integration/core/agent/test_queue_run_resume.py
-git commit -m "fix: cascade soft delete from sessions to messages"
-```
-
-### Task 5: 重构诊断脚本并新增 live E2E
-
-**Files:**
- Modify: `test_agent_sse_flow.py`
- Create: `backend/tests/e2e/test_agent_closed_loop_live.py`
- Modify: `docs/bugs/2026-03-05-agent-runtime-bugs.md`
-
-**Step 1: Write the failing test**
-
-新增 live E2E 用例（`@pytest.mark.live`）：
- 首聊不传 `session_id` 返回 202
- 订阅 SSE 收到关键事件
- DB 断言 session/messages/tokens/cost
-
-**Step 2: Run test to verify it fails**
-
-Run: `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
-Expected: FAIL，当前契约或脚本未对齐。
-
-**Step 3: Write minimal implementation**
-
- 清理脚本重复/不可达逻辑。
- 增加健康检查、阶段化日志、超时和错误根因输出。
- E2E 用例复用脚本中的 helper（JWT、SSE 解析、DB 断言）。
-
-**Step 4: Run test to verify it passes**
-
-Run:
- `uv run python test_agent_sse_flow.py --new-session`
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
-
-Expected: PASS。
-
-**Step 5: Commit**
-
-```bash
-git add test_agent_sse_flow.py backend/tests/e2e/test_agent_closed_loop_live.py docs/bugs/2026-03-05-agent-runtime-bugs.md
-git commit -m "test: add live closed-loop agent e2e verification"
-```
-
-### Task 6: 全量验证与文档同步
-
-**Files:**
- Modify: `docs/runtime/runtime-runbook.md`
- Modify: `docs/runtime/runtime-route.md`
-
-**Step 1: Run targeted checks**
-
-Run:
- `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
- `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
- `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
-
-Expected: PASS。
-
-**Step 2: Run quality gates**
-
-Run:
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright`
-
-Expected: PASS。
-
-**Step 3: Update docs**
-
-记录本地启动流程、真实 LLM 前置配置、live E2E 执行方式和故障排查。
-
-**Step 4: Commit**
-
-```bash
-git add docs/runtime/runtime-runbook.md docs/runtime/runtime-route.md
-git commit -m "docs: document live agent closed-loop e2e workflow"
-```
@@ -1,469 +0,0 @@
-# Agent Runtime Closed Loop Implementation Plan
-
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
-
-**Goal:** Build a production-grade closed-loop agent runtime where `frontend -> FastAPI -> Celery -> run/resume service -> CrewAI -> AG-UI events -> Redis Stream -> SSE` is fully connected and verifiable.
-
-**Architecture:** Keep HTTP API as control-plane and worker as data-plane. The API validates auth/ownership and enqueues commands, the Celery worker executes run/resume business logic using DB-driven agent config, runtime emits normalized AG-UI events and usage/cost telemetry, all events are persisted to Redis Stream, and SSE endpoint streams from Redis with resume support (`Last-Event-ID`).
-
-**Tech Stack:** FastAPI, SQLAlchemy AsyncSession, Celery, Redis Streams, CrewAI, LiteLLM, Pydantic, pytest (unit/integration).
-
-**Confirmed Constraints (locked):**
- Persist semantics use existing `messages.role` only (`assistant|user|system|tool`), no new `message_kind` column.
- `tool_result` must be semantically complete (especially UI schema); do not store summary-only payload.
- Store full `tool_result` payload in Supabase Storage (private bucket) and persist durable object reference in DB metadata; do not rely on expiring signed URL as primary reference.
- `metadata` must be fixed and typed via Pydantic model (no free-form drift).
- Do not introduce additional business tables for this scope; keep schema minimal.
- CrewAI runtime must default to streaming mode.
- Full traceability target is final semantic reconstruction of `user/assistant/tool_result`; chunk-level replay is not required.
-
-**Metadata Contract (fixed, Pydantic-enforced):**
- Global required keys for all message metadata: `type`, `run_id`, `turn_id`.
- Global optional keys for all message metadata: `event_id`, `parent_message_id`, `error`.
- `type=user_input`:
-  - Required: `type`, `run_id`, `turn_id`.
-  - Optional: `input_source`, `client_ts`.
- `type=assistant_output`:
-  - Required: `type`, `run_id`, `turn_id`.
-  - Optional: `finish_reason`, `model_provider`, `cost_source`.
- `type=tool_call` (`role=assistant`):
-  - Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `tool_args`.
-  - Optional: `tool_schema_version`, `timeout_ms`.
- `type=tool_result` (`role=tool`):
-  - Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `storage_bucket`, `storage_path`, `payload_sha256`, `payload_bytes`, `payload_format`.
-  - Optional: `ui_schema_version`, `compression`, `storage_etag`, `render_hints`.
- Validation rules:
-  - `messages.role=tool` must use `metadata.type=tool_result`.
-  - `messages.role=assistant` + tool event must use `metadata.type=tool_call` or `assistant_output`.
-  - `tool_result` payload in DB must be reconstructable to AG-UI `TOOL_CALL_RESULT` using Storage object + metadata checksum.
-
---
-
-### Task 1: Add Agent Module Skeleton and Contracts
-
-**Files:**
- Create: `backend/src/core/agent/__init__.py`
- Create: `backend/src/core/agent/application/__init__.py`
- Create: `backend/src/core/agent/domain/__init__.py`
- Create: `backend/src/core/agent/infrastructure/events/__init__.py`
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
-
-**Step 1: Write failing tests for event normalization and SSE formatting**
-
-```python
-def test_bridge_normalizes_event_type_to_upper_snake() -> None:
-    events = [{"type": "runStarted", "data": {"ok": True}}]
-    out = to_agui_events(events)
-    assert out[0]["type"] == "RUN_STARTED"
-
-
-def test_sse_format_includes_id_event_data() -> None:
-    payload = to_sse_event(stream_id="1-0", event={"type": "RUN_STARTED", "data": {"a": 1}})
-    assert payload.startswith("id: 1-0\nevent: RUN_STARTED\ndata: {")
-```
-
-**Step 2: Run tests and confirm RED**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`  
-Expected: FAIL with missing module/function errors.
-
-**Step 3: Implement minimal bridge + stream utilities**
-
-```python
-def to_agui_events(internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    ...
-
-
-def to_sse_event(stream_id: str, event: dict[str, Any]) -> str:
-    ...
-```
-
-**Step 4: Run tests and confirm GREEN**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent backend/tests/unit/core/agent/test_agui_bridge.py
-git commit -m "feat(agent): add ag-ui bridge and sse serializer utilities"
-```
-
-### Task 2: Implement Redis Stream Event Store and Reader
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/events/redis_stream.py`
- Modify: `backend/src/core/config/settings.py`
- Test: `backend/tests/unit/core/agent/test_redis_stream.py`
-
-**Step 1: Write failing tests for append/read semantics**
-
-```python
-def test_append_event_writes_json_payload() -> None:
-    ...
-
-
-def test_read_events_respects_last_event_id() -> None:
-    ...
-```
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement Redis stream adapter**
-
-```python
-def append_event_sync(*, session_id: UUID, event: dict[str, Any]) -> str:
-    ...
-
-
-async def read_events(...):
-    ...
-```
-
-**Step 4: Run GREEN**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/events/redis_stream.py backend/src/core/config/settings.py backend/tests/unit/core/agent/test_redis_stream.py
-git commit -m "feat(agent): add redis stream event transport for run events"
-```
-
-### Task 3: Build CrewAI Runtime + AG-UI Event Mapping + Usage Tracking
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
- Modify: `backend/src/core/config/settings.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
-
-**Step 1: Write failing runtime tests (events + cost + strict errors)**
-
-```python
-def test_runtime_emits_text_tool_reasoning_events() -> None:
-    ...
-
-
-def test_runtime_raises_if_model_or_api_key_missing() -> None:
-    ...
-
-
-def test_usage_tracker_extracts_tokens_and_cost() -> None:
-    ...
-```
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement runtime and tracker**
-
- Register CrewAI event handlers (`Task/LLM/Tool/Reasoning`) and map to AG-UI canonical event types.
- Default runtime to streaming mode for CrewAI execution.
- Enforce strict config behavior: no `llm_model_code` or provider key -> raise.
- Use LiteLLM cost calculator for actual cost; if cost cannot be computed, fail closed (raise), do not silently record zero.
-
-**Step 4: Run GREEN**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py backend/src/core/config/settings.py
-git commit -m "feat(agent): implement crewai runtime events and litellm usage-cost auditing"
-```
-
-### Task 4: Implement Run/Resume Application Services (DB Config + Persistence)
-
-**Files:**
- Create: `backend/src/core/agent/application/run_service.py`
- Create: `backend/src/core/agent/application/resume_service.py`
- Create: `backend/src/core/agent/application/session_state_persistence.py`
- Create: `backend/src/core/agent/domain/state_snapshot.py`
- Create: `backend/src/core/agent/domain/tool_correlation.py`
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
- Test: `backend/tests/unit/core/agent/test_state_snapshot.py`
- Test: `backend/tests/unit/core/agent/test_tool_correlation.py`
-
-**Step 1: Write failing tests for DB-driven runtime and aggregate updates**
-
-```python
-async def test_run_service_loads_agent_config_from_db_and_persists_messages() -> None:
-    ...
-
-
-async def test_resume_service_requires_pending_tool_call() -> None:
-    ...
-```
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement services**
-
- `run_service`: read session + system agent config from DB, execute runtime, persist user/assistant messages, update session aggregates.
- `resume_service`: validate pending tool call status, enforce idempotency semantics, resume runtime, persist audit fields.
- Persist metadata audit (`tokens`, `cost`, `cost_source`, correlation ids) for every assistant message.
- Persist tool lifecycle with role-only model:
-  - tool call message uses `role=assistant` with fixed metadata (`type=tool_call`, `tool_call_id`, `tool_name`, arguments reference).
-  - tool result message uses `role=tool` with fixed metadata (`type=tool_result`, `tool_call_id`, `tool_name`, storage bucket/path, checksum, bytes, schema version).
- `tool_result` full payload (UI schema) is uploaded to Supabase Storage private bucket; DB stores durable reference and verification fields.
- Ensure DB->AG-UI `TOOL_CALL_RESULT` reconstruction is equivalent to SSE-streamed final tool result semantics.
- Enforce metadata contract by Pydantic model at write path and read path (reject malformed metadata early).
-
-**Step 4: Run GREEN**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/application backend/src/core/agent/domain backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py
-git commit -m "feat(agent): add run-resume app services with db config and audit persistence"
-```
-
-### Task 5: Wire Celery Worker Task to Run/Resume and Publish Runtime Events
-
-**Files:**
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
- Modify: `backend/src/core/celery/app.py`
- Test: `backend/tests/unit/core/agent/test_queue_tasks.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
-
-**Step 1: Write failing queue tests**
-
-```python
-def test_run_agent_task_emits_started_runtime_and_finished_events() -> None:
-    ...
-
-
-def test_run_agent_task_emits_error_event_on_exception() -> None:
-    ...
-```
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement worker task flow**
-
- Decode command type (`run`/`resume`).
- Emit lifecycle events (`RUN_STARTED/RUN_RESUMED/RUN_FINISHED/RUN_ERROR`).
- Forward runtime callback events to Redis stream immediately.
- Persist session status/snapshot after completion.
-
-**Step 4: Run GREEN**
-
-Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/celery/app.py backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py
-git commit -m "feat(agent): wire celery run-resume execution and redis event publishing"
-```
-
-### Task 6: Implement API Contracts (Run/Resume/SSE) + Auth/Ownership/Idempotency
-
-**Files:**
- Create: `backend/src/v1/agent/schemas.py`
- Create: `backend/src/v1/agent/repository.py`
- Create: `backend/src/v1/agent/service.py`
- Create: `backend/src/v1/agent/router.py`
- Create: `backend/src/v1/agent/dependencies.py`
- Modify: `backend/src/v1/router.py`
- Test: `backend/tests/unit/v1/agent/test_service.py`
- Test: `backend/tests/unit/v1/agent/test_owner_guard.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
-
-**Step 1: Write failing API tests**
-
-```python
-async def test_run_requires_auth_and_returns_202_task_id() -> None:
-    ...
-
-
-async def test_stream_reads_from_last_event_id() -> None:
-    ...
-
-
-def test_resume_idempotency_uses_redis_lock_and_task_key() -> None:
-    ...
-```
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement API service/router**
-
- `POST /api/v1/agent/runs` enqueue run command.
- `POST /api/v1/agent/runs/{session_id}/resume` enqueue resume command with async redis lock + dedup task key.
- `GET /api/v1/agent/runs/{session_id}/events` SSE stream from Redis with `Last-Event-ID`.
- Enforce auth and session ownership checks on all endpoints.
- Validate `tool_call_id` and message length/pattern boundaries.
-
-**Step 4: Run GREEN**
-
-Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/src/v1/agent backend/src/v1/router.py backend/tests/unit/v1/agent backend/tests/integration/v1/agent/test_routes.py
-git commit -m "feat(agent): add authenticated run-resume-sse api with redis-backed idempotency"
-```
-
-### Task 7: Add Schema/Migration Contract for Session Snapshot + Audit Fields
-
-**Files:**
- Create: `backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py`
- Modify: `backend/src/models/agent_chat_session.py`
- Modify: `backend/src/models/agent_chat_message.py`
- Test: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
-
-**Migration scope note:**
- Fix current schema drift: model has `sessions.state_snapshot` but migration chain does not reliably provide this column in current DB state.
- Keep schema minimal; do not add new business tables in this migration.
-
-**Step 1: Write failing migration contract tests**
-
-```python
-def test_session_has_state_snapshot_and_status_contract() -> None:
-    ...
-
-
-def test_message_has_token_cost_and_metadata_contract() -> None:
-    ...
-```
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement migration and model alignment**
-
- Ensure `state_snapshot`, `status`, token/cost/metadata fields are present and nullable constraints are explicit.
- Add/verify indexes needed for role-based semantic reconstruction (`session_id, seq`, and targeted metadata lookups if required).
- Ensure `metadata` structure is validated by fixed Pydantic schema at application boundary.
- Add DB-level guardrails where feasible (check constraints) for role/metadata consistency without introducing new tables.
- Keep reversible downgrade path.
-
-**Step 4: Run GREEN**
-
-Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`  
-Expected: PASS.
-
-**Step 5: Commit**
-
-```bash
-git add backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py backend/src/models/agent_chat_session.py backend/src/models/agent_chat_message.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
-git commit -m "feat(agent): add db contract for session snapshot and usage audit fields"
-```
-
-### Task 8: End-to-End Closure Verification and Docs Update
-
-**Files:**
- Modify: `docs/runtime/runtime-route.md`
- Modify: `docs/runtime/runtime-runbook.md`
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
-
-**Step 1: Write integration test for full closure path**
-
-```python
-async def test_closed_loop_run_flow_frontend_to_sse() -> None:
-    # run request -> queue command -> runtime events -> redis stream -> sse read
-    ...
-```
-
-Also verify:
- `tool_result` full UI schema is written to Supabase Storage private bucket.
- `messages.role=tool` row contains stable storage reference and checksum metadata.
- Reading from DB can reconstruct final AG-UI `TOOL_CALL_RESULT` event payload semantics.
-
-**Step 2: Run RED**
-
-Run: `uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py -q`  
-Expected: FAIL.
-
-**Step 3: Implement minimal missing glue and docs**
-
- Fill any missing wiring revealed by the test.
- Document endpoint contracts, event taxonomy, and operational runbook for redis/celery troubleshooting.
-
-**Step 4: Run GREEN + full gate verification**
-
-Run:
- `PYTHONPATH=backend/src uv run python backend/src/core/runtime/cli.py migrate`
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright backend/src`
-
-Expected:
- All relevant tests PASS.
- Ruff PASS.
- basedpyright 0 errors (notes/warnings can be documented if pre-existing).
-
-**Step 5: Commit**
-
-```bash
-git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md backend/tests/integration/core/agent/test_session_message_persistence.py
-git commit -m "docs(agent): document closed-loop runtime and verify end-to-end chain"
-```
-
-### Task 9: L2 Mandatory Review Gates
-
-**Files:**
- No direct code changes required; apply fixes if findings appear.
-
-**Step 1: Run required agents**
-
- `tdd-guide` (already enforced by plan sequence)
- `refactor-cleaner`
- `code-reviewer`
- `security-reviewer`
-
-**Step 2: Fix all CRITICAL/HIGH findings**
-
-Run targeted tests after each fix.
-
-**Step 3: Final verification rerun**
-
-Run:
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright backend/src`
-
-Expected: no failing tests; no lint errors; no type errors.
-
-**Step 4: Final commit (if review fixes were needed)**
-
-```bash
-git add backend/src backend/tests docs/runtime
-git commit -m "fix(agent): resolve L2 review findings for closed-loop runtime"
-```
@@ -0,0 +1,746 @@
+# UserAgentContext & ProfileSettings v1 设计
+
+**Date:** 2026-03-05
+**Status:** Approved
+
+---
+
+## 目标
+
+为 Agent Runtime 提供完整的用户画像上下文，通过 Pydantic 约束 profiles.settings 结构，确保：
+
+1. 运行时入口读取 profile（username/bio/settings）
+2. settings 结构类型安全、版本可演进
+3. 关键配置（语言/时区/国家）符合标准格式
+
+---
+
+## 架构
+
+```
+Profile (DB JSONB)
+    ↓
+ProfileSettings (Pydantic)
+    ↓
+UserAgentContext (DataClass)
+    ↓
+build_global_system_prompt(ctx)
+```
+
+**设计原则：**
+- 唯一入口：`get_user_agent_context(user_id)` 读取并构造上下文
+- 不可变：UserAgentContext 使用 frozen dataclass
+- 向后兼容：version 字段预留未来演进
+
+---
+
+## ProfileSettings v1 结构
+
+```json
+{
+  "version": 1,
+  "preferences": {
+    "interface_language": "zh-CN",
+    "ai_language": "zh-CN",
+    "timezone": "Asia/Shanghai",
+    "country": "CN"
+  },
+  "privacy": {},
+  "notification": {}
+}
+```
+
+### 字段说明
+
+| 字段 | 类型 | 默认值 | 约束 |
+|------|------|--------|------|
+| `version` | int | 1 | 必须为 1（v1 锁定） |
+| `preferences.interface_language` | str | "zh-CN" | BCP-47 格式 |
+| `preferences.ai_language` | str | "zh-CN" | BCP-47 格式 |
+| `preferences.timezone` | str | "Asia/Shanghai" | IANA 时区 |
+| `preferences.country` | str | "CN" | ISO 3166-1 alpha-2 |
+| `privacy` | dict | {} | 空对象（预留） |
+| `notification` | dict | {} | 空对象（预留） |
+
+### 约束规则
+
+**1. BCP-47 语言格式**
+
+正则：`^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$`
+
+示例：
+- ✅ zh-CN, en-US, zh-TW, ja-JP
+- ❌ zh_CN, EN, chn
+
+**2. IANA 时区**
+
+使用 `zoneinfo.ZoneInfo` 校验。
+
+示例：
+- ✅ Asia/Shanghai, America/New_York, UTC
+- ❌ CST, GMT+8
+
+**3. ISO 3166-1 alpha-2 国家代码**
+
+使用 `pycountry.countries.get(alpha_2=...)` 校验。
+
+示例：
+- ✅ CN, US, JP, GB
+- ❌ CHN, USA, zz
+
+---
+
+## UserAgentContext 结构
+
+```python
+@dataclass(frozen=True)
+class UserAgentContext:
+    user_id: UUID
+    username: str
+    bio: str | None
+    settings: ProfileSettings
+```
+
+**设计要点：**
+- 不可变（frozen=True）：防止运行时修改
+- 完整画像：包含身份（username/bio）和配置（settings）
+- 唯一构造入口：`get_user_agent_context(user_id)`
+
+---
+
+## Pydantic 模型实现
+
+```python
+from pydantic import BaseModel, Field, field_validator
+from dataclasses import dataclass
+from uuid import UUID
+import re
+
+class PreferenceSettings(BaseModel):
+    interface_language: str = "zh-CN"
+    ai_language: str = "zh-CN"
+    timezone: str = "Asia/Shanghai"
+    country: str = "CN"
+    
+    @field_validator("interface_language", "ai_language")
+    @classmethod
+    def validate_bcp47(cls, v: str) -> str:
+        pattern = r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$"
+        if not re.match(pattern, v):
+            raise ValueError(f"Invalid BCP-47 language tag: {v}")
+        return v
+    
+    @field_validator("timezone")
+    @classmethod
+    def validate_iana_timezone(cls, v: str) -> str:
+        import zoneinfo
+        try:
+            zoneinfo.ZoneInfo(v)
+        except Exception:
+            raise ValueError(f"Invalid IANA timezone: {v}")
+        return v
+    
+    @field_validator("country")
+    @classmethod
+    def validate_iso_country(cls, v: str) -> str:
+        import pycountry
+        if not pycountry.countries.get(alpha_2=v.upper()):
+            raise ValueError(f"Invalid ISO 3166-1 alpha-2 country code: {v}")
+        return v.upper()
+
+class ProfileSettings(BaseModel):
+    version: int = Field(default=1, ge=1, le=1)
+    preferences: PreferenceSettings = Field(default_factory=PreferenceSettings)
+    privacy: dict = Field(default_factory=dict)
+    notification: dict = Field(default_factory=dict)
+
+@dataclass(frozen=True)
+class UserAgentContext:
+    user_id: UUID
+    username: str
+    bio: str | None
+    settings: ProfileSettings
+```
+
+---
+
+## 依赖项
+
+需要添加到 `backend/pyproject.toml`：
+
+```toml
+[project.dependencies]
+pycountry = ">=23.0.0"
+```
+
+---
+
+## 迁移策略
+
+**数据库层：**
+- profiles.settings 保持 JSONB，不做 schema 变更
+- 现有数据默认值：`{"version": 1, "preferences": {"country": "CN"}}`
+
+**应用层：**
+- 读取时：`ProfileSettings.model_validate(profile.settings or {})`
+- 写入时：`profile.settings = settings.model_dump()`
+
+---
+
+## 未来演进
+
+
+**版本迁移：**
+- Pydantic 支持多版本共存
+- 数据库不做破坏性变更
+
+---
+
+---
+
+## AG-UI 事件转发与落库策略
+
+### 核心原则
+
+**1. 事件转发时机：**
+- 只有 organization 阶段完成后转发 AG-UI 事件
+- AG-UI bridge 已实现底层机制，编排层控制转发时机
+
+**2. 落库时机：**
+- 意图识别和任务执行阶段：落库但 seq 取负数（用于审计）
+- 结果反馈阶段：seq 取最新 seq 的绝对值 +1（用于展示）
+
+### Seq 设计细节
+
+**意图识别和任务执行阶段（审计用）：**
+- seq 取负数（如 -1, -2）
+- role: "assistant"（标记为 agent 输出）
+- content: 阶段的完整输出（用于审计/调试）
+- 重建会话时通过 `WHERE seq > 0` 过滤，不展示给用户
+
+**结果反馈阶段（展示用）：**
+- seq 取正数（取最新负数的绝对值 +1）
+- role: "assistant"
+- content: OrganizationResult.assistant_text
+- 重建会话时通过 `WHERE seq > 0` 展示给用户
+
+**示例：**
+```
+| seq  | role     | content                    | 展示 |
+|------|----------|----------------------------|------|
+| -2   | assistant| ExecutionResult (完整)     | 否   |
+| -1   | assistant| IntentResult (完整)        | 否   |
+| 1    | user     | 用户输入                   | 是   |
+| 2    | assistant| OrganizationResult         | 是   |
+```
+
+### 编排层职责
+
+```python
+@listen(intent_stage)
+async def persist_intent(self, state: FlowState) -> FlowState:
+    # seq 取负数
+    seq = await message_repo.get_next_negative_seq(state.session_id)
+    await message_repo.create(
+        session_id=state.session_id,
+        seq=seq,  # 负数
+        role="assistant",
+        content=state.intent_result.model_dump_json(),
+        ...
+    )
+    return state
+
+@listen(execution_stage)
+async def persist_execution(self, state: FlowState) -> FlowState:
+    # seq 取负数
+    seq = await message_repo.get_next_negative_seq(state.session_id)
+    await message_repo.create(
+        session_id=state.session_id,
+        seq=seq,  # 负数
+        role="assistant",
+        content=state.execution_result.model_dump_json(),
+        ...
+    )
+    return state
+
+@listen(organization_stage)
+async def finalize_flow(self, state: FlowState) -> FlowState:
+    result = state.organization_result
+    
+    # seq 取正数（最新负数绝对值+1）
+    seq = await message_repo.get_next_positive_seq(state.session_id)
+    await message_repo.create(
+        session_id=state.session_id,
+        seq=seq,  # 正数
+        role="assistant",
+        content=result.assistant_text,
+        ...
+    )
+    
+    # 触发 AG-UI 事件（由 bridge 处理）
+    return state
+```
+
+### Token 和 Cost 累加
+
+**策略：在内存中累加所有阶段的 token 和 cost，organization 完成后统一落库。**
+
+```python
+@dataclass
+class FlowState:
+    # ...
+    tokens: dict[str, dict] = field(default_factory=dict)
+    cost: Decimal = Decimal("0")
+    currency: str = "CNY"
+```
+
+---
+
+## CrewAI Flow 三阶段设计
+
+### 架构概览
+
+```
+User Input + UserAgentContext
+    ↓
+@start() begin()
+    ↓
+@listen() intent_stage() → 判断 can_answer_directly
+    ↓ (router)
+    ├─ DIRECT_RESPONSE → 直接返回
+    └─ NEEDS_EXECUTION
+           ↓
+       @listen() execution_stage() → 任务执行/工具调用
+           ↓
+       @listen() organization_stage() → 结果组织与表达
+           ↓
+       返回给用户
+```
+
+### 三阶段职责
+
+**1. Intent Recognition（意图识别）**
+- Agent Type: `INTENT_RECOGNITION`
+- 输出结构（最小化设计）：
+  ```python
+  class IntentResult(BaseModel):
+      direct_answer: bool  # 是否可以直接回答
+      intent_analysis: str  # 意图分析文本（用于调试/审计）
+      execution_prompt: str  # 给 execution 阶段的提示词（direct_answer=false时使用）
+      direct_response: str  # 直接回复文本（direct_answer=true时使用）
+  ```
+- 短路逻辑：
+  - `direct_answer=true` → 完全跳过 execution 和 organization，直接返回 direct_response
+  - `direct_answer=false` → 进入 execution 阶段
+- 输出约束：使用 `output_pydantic=IntentResult`
+- **落库策略**：落库到 messages 表，但重建会话时不展示
+
+**2. Task Execution（任务执行）**
+- Agent Type: `TASK_EXECUTION`
+- 输入：IntentResult.execution_prompt + IntentResult.intent_analysis
+- 职责：
+  - 执行复杂任务（查询数据库、调用工具、多步骤推理）
+  - 返回结构化执行结果
+- 输出结构（最小化设计）：
+  ```python
+  class ExecutionResult(BaseModel):
+      execution_summary: str  # 任务执行摘要（用于调试/审计）
+      organization_prompt: str  # 给 organization 阶段的提示词
+      execution_data: dict = {}  # 执行结果的结构化数据
+  ```
+- 输出约束：使用 `output_pydantic=ExecutionResult`
+- **落库策略**：落库到 messages 表，但重建会话时不展示
+
+**3. Result Reporting（结果报告）**
+- Agent Type: `RESULT_REPORTING`
+- 输入：
+  - IntentResult（意图识别结果）
+  - ExecutionResult（任务执行情况）
+- 职责：
+  - 结合意图分析和执行结果，格式化为用户友好的响应
+  - 应用个性化模板（基于 UserAgentContext）
+- 输出结构（最小化设计）：
+  ```python
+  class OrganizationResult(BaseModel):
+      assistant_text: str  # 最终回复文本
+      response_metadata: dict = {}  # 响应元数据（可选）
+  ```
+- 输出约束：使用 `output_pydantic=OrganizationResult`
+- **唯一展示阶段**：重建会话时只展示此阶段的 message
+- **唯一转发阶段**：只有此阶段的输出需要通过 AG-UI 事件转发
+
+### Flow 状态管理
+
+```python
+@dataclass
+class FlowState:
+    user_input: str
+    context: UserAgentContext
+    stage_trace: list[str] = field(default_factory=list)
+    intent_result: IntentResult | None = None
+    execution_result: ExecutionResult | None = None
+    organization_result: OrganizationResult | None = None
+    assistant_text: str = ""
+    tokens: dict = field(default_factory=dict)
+    cost: Decimal = Decimal("0")
+```
+
+### 数据流向
+
+```
+User Input + UserAgentContext
+    ↓
+@start() begin()
+    ↓
+@listen() intent_stage()
+    ├─ IntentResult.direct_answer=true
+    │    ↓
+    │  跳过 execution，直接 organization
+    │    ↓
+    │  organization_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
+    │    ↓
+    │  OrganizationResult → AG-UI 事件 + 落库
+    │
+    └─ IntentResult.direct_answer=false
+         ↓
+       execution_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
+         ↓
+       ExecutionResult
+         ↓
+       organization_stage(ExecutionResult.next_stage_prompt, ExecutionResult.metadata)
+         ↓
+       OrganizationResult → AG-UI 事件 + 落库
+```
+
+### 三阶段输出约束
+
+**所有阶段使用 `output_pydantic` 约束输出：**
+
+```python
+from pydantic import BaseModel
+
+class IntentResult(BaseModel):
+    direct_answer: bool
+    next_stage_prompt: str
+    metadata: dict = {}
+
+class ExecutionResult(BaseModel):
+    next_stage_prompt: str
+    metadata: dict = {}
+
+class OrganizationResult(BaseModel):
+    assistant_text: str
+    metadata: dict = {}
+
+# Task 定义
+intent_task = Task(
+    description="Analyze user intent",
+    expected_output="Intent analysis",
+    agent=intent_agent,
+    output_pydantic=IntentResult,
+)
+
+execution_task = Task(
+    description="Execute tasks",
+    expected_output="Execution result",
+    agent=execution_agent,
+    output_pydantic=ExecutionResult,
+)
+
+organization_task = Task(
+    description="Format response",
+    expected_output="User-friendly response",
+    agent=organization_agent,
+    output_pydantic=OrganizationResult,
+)
+```
+
+---
+
+## 系统选模逻辑设计
+
+### 问题背景
+
+旧逻辑：`order_by(...).limit(1)` 随机选择一个系统 agent，不区分阶段。
+
+新逻辑：按 `agent_type` 显式映射到三阶段。
+
+### 选模规则
+
+**必需的 Agent Types：**
+- `INTENT_RECOGNITION` → 用于 intent_stage
+- `TASK_EXECUTION` → 用于 execution_stage
+- `RESULT_REPORTING` → 用于 organization_stage
+
+**查询逻辑：**
+
+```python
+REQUIRED_TYPES = {"INTENT_RECOGNITION", "TASK_EXECUTION", "RESULT_REPORTING"}
+
+@dataclass(frozen=True)
+class StageModels:
+    intent: SystemAgentCatalog
+    execution: SystemAgentCatalog
+    organization: SystemAgentCatalog
+
+def resolve_stage_models(rows: list[SystemAgentCatalog]) -> StageModels:
+    by_type = {row.agent_type: row for row in rows}
+    missing = REQUIRED_TYPES - set(by_type.keys())
+    if missing:
+        raise ValueError(f"Missing required agent types: {missing}")
+    
+    return StageModels(
+        intent=by_type["INTENT_RECOGNITION"],
+        execution=by_type["TASK_EXECUTION"],
+        organization=by_type["RESULT_REPORTING"],
+    )
+```
+
+**初始化数据约束：**
+- `system_agents` 表必须包含三种类型的记录
+- 运行时启动时验证完整性
+
+---
+
+## 人民币结算策略设计
+
+### 设计原则
+
+1. **保留 LiteLLM 语义**：`completion_cost()` 始终返回 USD
+2. **业务层映射**：根据用户国家（`profiles.settings.preferences.country`）决定落库货币
+3. **默认人民币**：中国用户或无国家信息默认 CNY
+4. **汇率配置**：USD/CNY 汇率通过环境变量配置
+
+### 货币来源
+
+```
+UserAgentContext.settings.preferences.country
+    ↓
+resolve_billing_currency(country)
+    ↓
+CN → CNY
+US → USD
+其他 → USD
+```
+
+### 结算流程
+
+```
+LiteLLM completion_cost()
+    ↓ (USD)
+resolve_billing_cost(usd_cost, country)
+    ↓
+    ├─ country="CN" or None → CNY (乘以汇率)
+    └─ country="US" → USD (保持原值)
+    ↓
+messages.cost + messages.currency
+sessions.total_cost (同一货币)
+```
+
+### 汇率配置
+
+```python
+# 环境变量
+BILLING_USD_CNY_RATE=7.2
+
+# 默认值
+DEFAULT_USD_CNY_RATE = Decimal("7.2")
+```
+
+### 结算模型
+
+```python
+@dataclass(frozen=True)
+class BillingCost:
+    currency: str  # "CNY" or "USD"
+    cost: Decimal  # 6位小数精度
+
+def resolve_billing_cost(
+    usd_cost: Decimal,
+    country: str | None,
+    usd_cny_rate: Decimal = DEFAULT_USD_CNY_RATE,
+) -> BillingCost:
+    currency = "CNY" if (country or "CN").upper() == "CN" else "USD"
+    if currency == "CNY":
+        cost = usd_cost * usd_cny_rate
+    else:
+        cost = usd_cost
+    return BillingCost(
+        currency=currency,
+        cost=cost.quantize(Decimal("0.000001"))
+    )
+```
+
+### 数据库落库
+
+**messages 表：**
+- `cost`: NUMERIC(12,6) - 业务货币金额
+- `currency`: VARCHAR(3) - "CNY" or "USD"
+
+**sessions 表：**
+- `total_cost`: NUMERIC(12,6) - 同一货币累计
+
+**约束：**
+- 同一 session 内所有 messages 的 currency 必须一致
+- sessions.total_cost 累加时保持货币一致
+
+---
+
+## Session 状态一致性设计
+
+### 问题背景
+
+旧逻辑：
+- `sessions.status` 与 `state_snapshot.status` 不同步
+- 失败时状态不一致
+- title 未自动赋值
+
+### 状态机
+
+```
+pending (创建)
+    ↓
+running (开始执行)
+    ↓
+    ├─ completed (成功)
+    └─ failed (异常)
+```
+
+### 状态同步规则
+
+**创建时：**
+```python
+session = AgentChatSession(
+    user_id=user_uuid,
+    status=AgentChatSessionStatus.PENDING,
+    state_snapshot={
+        "status": "pending",
+        "pending_tool_call_id": None,
+    },
+)
+```
+
+**运行时：**
+```python
+# 开始执行
+session.status = AgentChatSessionStatus.RUNNING
+session.state_snapshot["status"] = "running"
+
+# 成功完成
+session.status = AgentChatSessionStatus.COMPLETED
+session.state_snapshot["status"] = "completed"
+
+# 失败
+session.status = AgentChatSessionStatus.FAILED
+session.state_snapshot["status"] = "failed"
+session.state_snapshot["error_id"] = error_id
+```
+
+### 自动 Title 赋值
+
+**规则：**
+- 首次运行时，如果 `session.title` 为空，使用 `user_input[:255]` 赋值
+- 只在第一次运行时赋值，后续不覆盖
+
+**实现：**
+```python
+async def _set_title_if_empty(self, session_id: UUID, title: str) -> None:
+    stmt = (
+        update(AgentChatSession)
+        .where(AgentChatSession.id == session_id)
+        .where(AgentChatSession.title.is_(None))
+        .values(title=title[:255])
+    )
+    await self.db.execute(stmt)
+```
+
+### Repository 方法
+
+```python
+class SessionRepository:
+    async def mark_running(self, session_id: UUID) -> None: ...
+    async def mark_completed(self, session_id: UUID) -> None: ...
+    async def mark_failed(self, session_id: UUID, error_id: str) -> None: ...
+```
+
+---
+
+## 全局 Prompt 构建设计
+
+### 分层结构
+
+```
+全局系统 Prompt
+├─ 身份段（username/bio）
+├─ 偏好段（language/timezone/country）
+└─ 阶段段（动态注入）
+    ├─ intent stage prompt
+    ├─ execution stage prompt
+    └─ organization stage prompt
+```
+
+### 构建函数
+
+```python
+def build_global_system_prompt(ctx: UserAgentContext) -> str:
+    lines = [
+        "# User Identity",
+        f"username: {ctx.username}",
+        f"bio: {ctx.bio or 'N/A'}",
+        "",
+        "# User Preferences",
+        f"interface_language: {ctx.settings.preferences.interface_language}",
+        f"ai_language: {ctx.settings.preferences.ai_language}",
+        f"timezone: {ctx.settings.preferences.timezone}",
+        f"country: {ctx.settings.preferences.country}",
+        "",
+        "# Instructions",
+        "Use the user's preferences to personalize responses.",
+        "Respond in the user's preferred AI language.",
+        "Consider the user's timezone for time-related queries.",
+    ]
+    return "\n".join(lines)
+```
+
+### 阶段注入
+
+每个阶段运行时，在全局 prompt 基础上追加阶段特定的指令：
+
+```python
+def build_stage_prompt(
+    base_prompt: str,
+    stage: str,  # "intent" | "execution" | "organization"
+    ctx: UserAgentContext,
+) -> str:
+    stage_prompts = {
+        "intent": "Analyze the user's intent and decide if direct response is possible.",
+        "execution": "Execute the required tasks and tools to fulfill the user's request.",
+        "organization": "Format the execution results into a user-friendly response.",
+    }
+    return f"{base_prompt}\n\n# Stage: {stage}\n{stage_prompts[stage]}"
+```
+
+---
+
+## 依赖关系图
+
+```
+UserAgentContext (核心上下文)
+    ↓
+    ├─ ProfileSettings (用户配置)
+    │   └─ preferences.country → 人民币结算
+    │
+    ├─ build_global_system_prompt() (全局 Prompt)
+    │   └─ 三阶段 Flow 使用
+    │
+    └─ resolve_stage_models() (选模逻辑)
+        └─ 三阶段 Agent 配置
+```
+
+---
+
+## 相关文档
+
+- [Runtime Database Schema](../runtime/runtime-database.md)
+- [AG-UI Protocol](.opencode/skills/ag-ui/SKILL.md)
+- [CrewAI Framework](.opencode/skills/crewai/SKILL.md)
@@ -0,0 +1,144 @@
+# Agent LLM Config Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** 将 `system_agents.config` 中的 `temperature` / `max_tokens` 以受约束方式加载到运行时，并在调用 LiteLLM 时按需透传。
+
+**Architecture:** 在应用层 `RunService` 读取模型选择时同步读取并校验 `SystemAgents.config`；将校验后的 `SystemAgentLLMConfig` 传入 `CrewAIRuntime`；由 runtime 将配置转交给 LiteLLM client，client 仅在值非 `None` 时向 `completion()` 传参，避免不必要的 provider 兼容风险。
+
+**Tech Stack:** FastAPI, SQLAlchemy (async), Pydantic v2, LiteLLM, pytest
+
+---
+
+## 背景与修正点
+
+- 当前真实调用链为：`RunService._load_agent_model_selection()` -> `create_runtime()` -> `CrewAIRuntime.execute()` -> `run_completion()`，并非 `load_stage_models()`。
+- `SystemAgentLLMConfig` 已存在：`backend/src/core/agent/domain/system_agent_config.py`。
+- `system_agents.config` 目前在初始化 YAML 侧有约束，但运行时 DB 读取仍需二次校验，防止脏数据绕过。
+
+## 规则约束
+
+- 严格 TDD：先写失败测试，再做实现。
+- Python 命令统一使用 `uv run ...`。
+- 仅做增量改动，不回滚或覆盖与本任务无关的已有变更。
+
+## 字段映射与透传策略
+
+| 配置字段 | LiteLLM 参数 | 规则 |
+|---|---|---|
+| `temperature` | `temperature` | `None` 不透传；非空直接透传 |
+| `max_tokens` | `max_tokens` | `None` 不透传；非空直接透传 |
+
+---
+
+### Task 1: 应用层加载并校验 Agent LLM Config
+
+**Files:**
+- Modify: `backend/src/core/agent/application/run_service.py`
+- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
+
+**Step 1: 写失败测试（RED）**
+
+新增单测覆盖以下行为：
+1. `_load_agent_model_selection()` 返回三元组：`(model_code, provider_name, llm_config)`。
+2. 当 DB `config` 为 `{}` 时，`llm_config.temperature/max_tokens` 为 `None`。
+3. 当 DB `config` 含非法值（如 `temperature=3`）时抛 `ValueError`。
+
+**Step 2: 运行测试确认失败**
+
+Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
+Expected: 新增断言失败（返回值结构/异常行为不匹配）。
+
+**Step 3: 最小实现（GREEN）**
+
+在 `run_service.py`：
+1. 查询 `SystemAgents.config`。
+2. 用 `SystemAgentLLMConfig.model_validate(config or {})` 校验。
+3. 将 `_load_agent_model_selection()` 改为返回三元组。
+4. 在 `run()` 中把 `llm_config` 传递到 `create_runtime(...)`。
+
+**Step 4: 运行测试确认通过**
+
+Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
+Expected: PASS。
+
+---
+
+### Task 2: Runtime 与 LiteLLM Client 支持可选参数透传
+
+**Files:**
+- Modify: `backend/src/core/agent/infrastructure/crewai/factory.py`
+- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py`
+- Modify: `backend/src/core/agent/infrastructure/litellm/client.py`
+- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
+
+**Step 1: 写失败测试（RED）**
+
+在 `test_crewai_runtime.py` 增加用例：
+1. 传入 `temperature/max_tokens` 时，`run_completion` 收到对应参数。
+2. 参数为 `None` 时，不应被透传到 LiteLLM。
+
+必要时新增 `backend/tests/unit/core/agent/test_litellm_client.py`，单测 `run_completion` 的 kwargs 组装逻辑。
+
+**Step 2: 运行测试确认失败**
+
+Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
+Expected: 新增断言失败（参数未透传或未过滤 `None`）。
+
+**Step 3: 最小实现（GREEN）**
+
+1. `create_runtime()` 增加 `llm_config` 参数并传给 `CrewAIRuntime`。
+2. `CrewAIRuntime` 保存 `llm_config`，执行时调用：
+   - `run_completion(..., temperature=llm_config.temperature, max_tokens=llm_config.max_tokens)`
+3. `run_completion()` 改为支持可选 `temperature/max_tokens`，内部仅在非 `None` 时加入 kwargs 再调用 `completion()`。
+
+**Step 4: 运行测试确认通过**
+
+Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
+Expected: PASS。
+
+---
+
+### Task 3: 初始化数据补齐与回归验证
+
+**Files:**
+- Modify: `backend/src/core/config/static/database/system_agents.yaml`
+- Modify: `backend/src/core/config/initial/init_data.py`（如需补充类型兜底）
+- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
+
+**Step 1: 写失败测试（RED）**
+
+补充断言：YAML 读取后 `config` 可为空或包含 `max_tokens: null`，初始化逻辑不会报错，且生成结构符合 `SystemAgentLLMConfig`。
+
+**Step 2: 运行测试确认失败**
+
+Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
+Expected: 新增断言失败。
+
+**Step 3: 最小实现（GREEN）**
+
+1. 在 `system_agents.yaml` 为各 agent 配置显式补充 `max_tokens: null`。
+2. `init_data.py` 保持 `config: SystemAgentLLMConfig | None = None`，写库时统一序列化为 dict。
+
+**Step 4: 运行测试确认通过**
+
+Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
+Expected: PASS。
+
+---
+
+## 最终验证
+
+1. `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_crewai_runtime.py -q`
+2. `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -q`
+3. `uv run ruff check backend/src backend/tests`
+4. `uv run basedpyright`
+
+预期：全部通过；若集成测试依赖本地 DB 状态导致跳过/失败，需记录原因并给出手工验证步骤。
+
+## 完成标准
+
+- `RunService` 从 DB 读取并校验 `config`。
+- runtime 到 LiteLLM 链路支持 `temperature/max_tokens` 可选透传。
+- `None` 不透传。
+- 单测与相关集成测试通过，并给出命令级证据。
@@ -0,0 +1,2 @@
+1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai
+2.