feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持

This commit is contained in:
qzl
2026-03-05 18:25:51 +08:00
parent c07d339a5f
commit db158de39c
26 changed files with 1215 additions and 2914 deletions
@@ -5,6 +5,10 @@ from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.domain.message_metadata import (
MessageMetadataAssistantOutput,
MessageMetadataToolResult,
)
from core.agent.infrastructure.persistence.message_repository import MessageRepository
from core.agent.infrastructure.persistence.session_repository import SessionRepository
from core.db import AsyncSessionLocal
@@ -46,14 +50,16 @@ class ResumeService:
seq=next_seq,
role=AgentChatMessageRole.TOOL,
content='{"status":"ok"}',
metadata={"type": "tool_result", "tool_call_id": tool_call_id},
metadata=MessageMetadataToolResult(
tool_call_id=tool_call_id,
).model_dump(),
)
await message_repository.append_message(
session_id=session_uuid,
seq=next_seq + 1,
role=AgentChatMessageRole.ASSISTANT,
content="Tool result received",
metadata={"type": "assistant_output"},
metadata=MessageMetadataAssistantOutput().model_dump(),
)
snapshot = self._state_persistence.build_completed_snapshot()
@@ -3,10 +3,16 @@ from __future__ import annotations
from decimal import Decimal
from uuid import UUID, uuid4
from pydantic import ValidationError
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.domain.message_metadata import (
MessageMetadataToolCall,
MessageMetadataUserInput,
)
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.crewai.factory import create_runtime
from core.agent.infrastructure.persistence.message_repository import MessageRepository
from core.agent.infrastructure.persistence.session_repository import SessionRepository
@@ -58,10 +64,16 @@ class RunService:
if chat_session is None:
raise ValueError("session not found")
model_code, provider_name = await self._load_agent_model_selection(
db_session
(
model_code,
provider_name,
llm_config,
) = await self._load_agent_model_selection(db_session)
runtime = create_runtime(
model_code=model_code,
provider_name=provider_name,
llm_config=llm_config,
)
runtime = create_runtime(model_code=model_code, provider_name=provider_name)
runtime_result = runtime.execute(user_input=user_input)
assistant_text = str(runtime_result.get("assistant_text", ""))
prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0))
@@ -79,7 +91,7 @@ class RunService:
role=AgentChatMessageRole.USER,
content=user_input,
model_code=model_code,
metadata={"type": "user_input"},
metadata=MessageMetadataUserInput().model_dump(),
)
await message_repository.append_message(
session_id=session_uuid,
@@ -87,10 +99,9 @@ class RunService:
role=AgentChatMessageRole.ASSISTANT,
content=assistant_text or "Tool call pending approval",
model_code=model_code,
metadata={
"type": "tool_call",
"tool_call_id": pending_tool_call_id,
},
metadata=MessageMetadataToolCall(
tool_call_id=pending_tool_call_id,
).model_dump(),
input_tokens=prompt_tokens,
output_tokens=completion_tokens,
cost=cost,
@@ -119,9 +130,9 @@ class RunService:
async def _load_agent_model_selection(
self, session: AsyncSession
) -> tuple[str, str]:
) -> tuple[str, str, SystemAgentLLMConfig]:
stmt = (
select(Llm.model_code, LlmFactory.name)
select(Llm.model_code, LlmFactory.name, SystemAgents.config)
.join(SystemAgents, SystemAgents.llm_id == Llm.id)
.join(LlmFactory, LlmFactory.id == Llm.factory_id)
.where(SystemAgents.status == "active")
@@ -131,4 +142,11 @@ class RunService:
record = (await session.execute(stmt)).one_or_none()
if record is None:
raise ValueError("active system agent model is required")
return str(record[0]), str(record[1])
raw_config = record[2] if isinstance(record[2], dict) else {}
try:
llm_config = SystemAgentLLMConfig.model_validate(raw_config)
except ValidationError as exc:
raise ValueError("invalid system agent config") from exc
return str(record[0]), str(record[1]), llm_config
@@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel
class MessageMetadataUserInput(BaseModel):
type: Literal["user_input"] = "user_input"
class MessageMetadataToolCall(BaseModel):
type: Literal["tool_call"] = "tool_call"
tool_call_id: str
class MessageMetadataToolResult(BaseModel):
type: Literal["tool_result"] = "tool_result"
tool_call_id: str
run_id: str | None = None
turn_id: str | None = None
tool_name: str | None = None
storage_bucket: str | None = None
storage_path: str | None = None
payload_sha256: str | None = None
payload_bytes: int | None = None
payload_format: str | None = None
class MessageMetadataAssistantOutput(BaseModel):
type: Literal["assistant_output"] = "assistant_output"
MessageMetadata = (
MessageMetadataUserInput
| MessageMetadataToolCall
| MessageMetadataToolResult
| MessageMetadataAssistantOutput
)
@@ -0,0 +1,8 @@
from __future__ import annotations
from pydantic import BaseModel, Field
class SystemAgentLLMConfig(BaseModel):
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
max_tokens: int | None = Field(default=None, ge=1)
@@ -1,5 +1,7 @@
from __future__ import annotations
from core.agent.domain.message_metadata import MessageMetadataToolResult
def reconstruct_tool_call_result_event(
*,
@@ -26,15 +28,14 @@ def build_tool_result_metadata(
payload_bytes: int,
payload_format: str,
) -> dict[str, object]:
return {
"type": "tool_result",
"run_id": run_id,
"turn_id": turn_id,
"tool_call_id": tool_call_id,
"tool_name": tool_name,
"storage_bucket": storage_bucket,
"storage_path": storage_path,
"payload_sha256": payload_sha256,
"payload_bytes": payload_bytes,
"payload_format": payload_format,
}
return MessageMetadataToolResult(
run_id=run_id,
turn_id=turn_id,
tool_call_id=tool_call_id,
tool_name=tool_name,
storage_bucket=storage_bucket,
storage_path=storage_path,
payload_sha256=payload_sha256,
payload_bytes=payload_bytes,
payload_format=payload_format,
).model_dump()
@@ -1,15 +1,20 @@
from __future__ import annotations
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def create_runtime(
*, model_code: str | None, provider_name: str | None
*,
model_code: str | None,
provider_name: str | None,
llm_config: SystemAgentLLMConfig | None = None,
) -> CrewAIRuntime:
resolver = AgentConfigResolver()
return CrewAIRuntime(
resolver=resolver,
model_code=model_code,
provider_name=provider_name,
llm_config=llm_config,
)
@@ -2,6 +2,7 @@ from __future__ import annotations
from typing import Any
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.agui.bridge import to_agui_events
from core.agent.infrastructure.config.resolver import (
AgentConfigResolver,
@@ -47,11 +48,13 @@ class CrewAIRuntime:
resolver: AgentConfigResolver,
model_code: str | None,
provider_name: str | None,
llm_config: SystemAgentLLMConfig | None = None,
) -> None:
self._config: ResolvedAgentConfig = resolver.resolve(
model_code=model_code,
provider_name=provider_name,
)
self._llm_config = llm_config or SystemAgentLLMConfig()
def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
return to_agui_events(internal_events)
@@ -65,6 +68,8 @@ class CrewAIRuntime:
model=litellm_model,
api_key=self._config.provider_api_key,
messages=[{"role": "user", "content": user_input}],
temperature=self._llm_config.temperature,
max_tokens=self._llm_config.max_tokens,
)
if not isinstance(response, dict):
raise ValueError("llm response must be a dict")
@@ -5,13 +5,26 @@ from typing import Any
from litellm import completion
def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any:
response = completion(
model=model,
api_key=api_key,
messages=messages,
stream=False,
)
def run_completion(
*,
model: str,
api_key: str,
messages: list[dict[str, Any]],
temperature: float | None = None,
max_tokens: int | None = None,
) -> Any:
kwargs: dict[str, Any] = {
"model": model,
"api_key": api_key,
"messages": messages,
"stream": False,
}
if temperature is not None:
kwargs["temperature"] = temperature
if max_tokens is not None:
kwargs["max_tokens"] = max_tokens
response = completion(**kwargs)
model_dump = getattr(response, "model_dump", None)
if callable(model_dump):
return model_dump()
+5 -2
View File
@@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.db.session import AsyncSessionLocal
from core.logging import get_logger
from models.llm import Llm
@@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel):
agent_type: str
llm_model_code: str
status: str
config: dict[str, Any]
config: SystemAgentLLMConfig | None = None
class SystemAgentsYaml(BaseModel):
@@ -184,7 +185,9 @@ async def initialize_system_agents() -> None:
agent_type=agent["agent_type"],
llm_id=llm.id,
status=agent["status"],
config=agent["config"],
config=SystemAgentLLMConfig.model_validate(
agent.get("config") or {}
).model_dump(),
)
logger.info("Initialized system agents")
@@ -4,15 +4,18 @@ agents:
status: active
config:
temperature: 0.7
max_tokens: null
- agent_type: TASK_EXECUTION
llm_model_code: deepseek-v3.2
status: active
config:
temperature: 0.7
max_tokens: null
- agent_type: RESULT_REPORTING
llm_model_code: deepseek-v3.2
status: active
config:
temperature: 0.7
max_tokens: null
@@ -1,22 +1,26 @@
from __future__ import annotations
from types import SimpleNamespace
from typing import cast
from core.agent.infrastructure.config.resolver import AgentConfigResolver
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def test_runtime_emits_text_tool_reasoning_events() -> None:
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(settings=settings),
model_code="gpt-4o-mini",
provider_name="dashscope",
)
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
captured: dict[str, object] = {}
def _fake_completion(
*, model: str, api_key: str, messages: list[dict[str, object]]
*,
model: str,
api_key: str,
messages: list[dict[str, object]],
temperature: float | None = None,
max_tokens: int | None = None,
):
captured["model"] = model
captured["api_key"] = api_key
captured["messages"] = messages
captured["temperature"] = temperature
captured["max_tokens"] = max_tokens
return {
"choices": [
{
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
cost=0.001,
),
)
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
),
resolver=AgentConfigResolver(settings=settings),
model_code="qwen3.5-flash",
provider_name="dashscope",
llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
)
result = runtime.execute(user_input="hi")
assert captured["model"] == "dashscope/qwen3.5-flash"
assert captured["api_key"] == "env-api-key"
assert captured["temperature"] == 0.3
assert captured["max_tokens"] == 256
assert result["assistant_text"] == "hello"
@@ -0,0 +1,14 @@
from __future__ import annotations
from core.config.initial.init_data import load_system_agents
def test_load_system_agents_supports_nullable_max_tokens() -> None:
loaded = load_system_agents()
agents = loaded["agents"]
assert len(agents) > 0
for agent in agents:
assert "config" in agent
assert "max_tokens" in agent["config"]
assert agent["config"]["max_tokens"] is None
@@ -0,0 +1,51 @@
from __future__ import annotations
from core.agent.infrastructure.litellm.client import run_completion
def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=0.6,
max_tokens=120,
)
assert captured["temperature"] == 0.6
assert captured["max_tokens"] == 120
def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=None,
max_tokens=None,
)
assert "temperature" not in captured
assert "max_tokens" not in captured
@@ -4,6 +4,23 @@ import pytest
from core.agent.application.resume_service import ResumeService
from core.agent.application.run_service import RunService
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
class _FakeResult:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
def one_or_none(self) -> tuple[object, object, object] | None:
return self._record
class _FakeSession:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
async def execute(self, _stmt: object) -> _FakeResult:
return _FakeResult(self._record)
@pytest.mark.asyncio
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:
with pytest.raises(ValueError):
await resume_service.resume(session_id="session-1", tool_call_id="call-1")
@pytest.mark.asyncio
async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 0.5, "max_tokens": 512},
)
)
(
model_code,
provider_name,
llm_config,
) = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert model_code == "qwen3.5-flash"
assert provider_name == "dashscope"
assert isinstance(llm_config, SystemAgentLLMConfig)
assert llm_config.temperature == 0.5
assert llm_config.max_tokens == 512
@pytest.mark.asyncio
async def test_load_agent_model_selection_rejects_invalid_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 3.0},
)
)
with pytest.raises(ValueError, match="invalid system agent config"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
@pytest.mark.asyncio
async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
"not-a-dict",
)
)
_, _, llm_config = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert llm_config.temperature is None
assert llm_config.max_tokens is None
@pytest.mark.asyncio
async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
run_service = RunService()
fake_session = _FakeSession(None)
with pytest.raises(ValueError, match="active system agent model is required"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
-116
View File
@@ -1,116 +0,0 @@
# 前后端 API 对比分析
**Date:** 2026-03-04
**Status:** Open
**Type:** 架构分析
---
## 一、后端已有、前端缺失的 API
### 1. Friendships API (`/api/v1/friends`)
| 方法 | 路径 | 功能 | 前端状态 |
|------|------|------|----------|
| POST | `/requests` | 发送好友请求 | **缺失** |
| GET | `/requests/inbox` | 获取收件箱 | **缺失** |
| GET | `/requests/outgoing` | 获取发出的请求 | **缺失** |
| POST | `/requests/{id}/accept` | 接受好友请求 | **缺失** |
| POST | `/requests/{id}/decline` | 拒绝好友请求 | **缺失** |
| DELETE | `/requests/{id}` | 取消好友请求 | **缺失** |
| GET | `` | 获取好友列表 | **缺失** |
| DELETE | `/{id}` | 删除好友 | **缺失** |
### 2. Inbox Messages API (`/api/v1/inbox/messages`)
| 方法 | 路径 | 功能 | 前端状态 |
|------|------|------|----------|
| GET | `` | 获取消息列表 | **缺失** |
| POST | `/{id}/accept` | 接受邀请 | **缺失** |
| POST | `/{id}/dismiss` | 忽略消息 | **缺失** |
### 3. Chat/AgUi 流式 API
| 功能 | 前端状态 |
|------|----------|
| 发送消息 SSE 流式 | **仅有 Mock** |
| 加载历史记录 | **仅有 Mock** |
> 前端 `AgUiService` 只有本地 mock (`throw UnimplementedError`),未实现真实 API 调用。
### 4. Infra API
| 方法 | 路径 | 功能 | 前端状态 |
|------|------|------|----------|
| GET | `/infra/health` | 基础设施健康检查 | **未使用** |
---
## 二、前端已有、后端已实现的 API
### Auth API (`/api/v1/auth`)
| 方法 | 路径 | 后端 | 前端 |
|------|------|------|------|
| POST | `/verifications` | ✅ | ✅ |
| POST | `/verifications/verify` | ✅ | ✅ |
| POST | `/verifications/resend` | ✅ | ✅ |
| POST | `/sessions` | ✅ | ✅ |
| POST | `/sessions/refresh` | ✅ | ✅ |
| DELETE | `/sessions` | ✅ | ✅ |
| POST | `/password-reset` | ✅ | ✅ |
| POST | `/password-reset/confirm` | ✅ | ✅ |
| GET | `/users` | ✅ | **未使用** |
### Users API (`/api/v1/users`)
| 方法 | 路径 | 后端 | 前端 |
|------|------|------|------|
| GET | `/me` | ✅ | ✅ |
| PATCH | `/me` | ✅ | ✅ |
| POST | `/search` | ✅ | ✅ |
### Schedule Items API (`/api/v1/schedule-items`)
| 方法 | 路径 | 后端 | 前端 |
|------|------|------|------|
| POST | `` | ✅ | **仅有 Mock** |
| GET | `` (range query) | ✅ | **仅有 Mock** |
| GET | `/{id}` | ✅ | **仅有 Mock** |
| PATCH | `/{id}` | ✅ | **仅有 Mock** |
| DELETE | `/{id}` | ✅ | **仅有 Mock** |
| POST | `/{id}/share` | ✅ | **缺失** |
---
## 三、待实现功能清单
| 优先级 | 功能 | 说明 |
|--------|------|------|
| **P0** | FriendsApi | 前端无 Friendships API 客户端 |
| **P0** | InboxMessagesApi | 前端无 Inbox Messages API 客户端 |
| **P0** | Chat/AgUi 后端连接 | 前端 AgUiService 未实现真实 API |
| **P1** | CalendarService 真实 API | MockCalendarService → 真实 API 调用 |
| **P1** | Schedule Share 接口 | 前端未调用 `POST /{id}/share` |
| **P2** | Infra Health 集成 | 可用于前端健康检查 |
---
## 四、相关文件位置
### 前端 API 客户端
- `apps/lib/features/auth/data/auth_api.dart` - Auth API
- `apps/lib/features/users/data/users_api.dart` - Users API
- `apps/lib/features/calendar/data/services/mock_calendar_service.dart` - Calendar Mock
- `apps/lib/features/chat/data/services/ag_ui_service.dart` - Chat/AgUi Mock
- `apps/lib/features/chat/data/services/mock_history_service.dart` - History Mock
### 后端 Router
- `backend/src/v1/auth/router.py` - Auth 路由
- `backend/src/v1/users/router.py` - Users 路由
- `backend/src/v1/friendships/router.py` - Friendships 路由
- `backend/src/v1/inbox_messages/router.py` - Inbox Messages 路由
- `backend/src/v1/schedule_items/router.py` - Schedule Items 路由
- `backend/src/v1/infra/router.py` - Infra 路由
-145
View File
@@ -1,145 +0,0 @@
# 前后端测试分析报告
**Date:** 2026-03-04
**Status:** Completed
---
## 测试统计
### 后端测试
| 类型 | 数量 | 状态 |
|------|------|------|
| Unit Tests | ~100+ | 可运行 |
| Integration Tests | ~70+ | 可运行 |
| E2E Tests | 5 | **无法运行** (缺少 playwright 依赖) |
### 前端测试
| 类型 | 数量 | 状态 |
|------|------|------|
| Flutter Tests | 140 | ✅ 全部通过 |
---
## 问题发现
### 1. 后端 E2E 测试无法运行 (HIGH)
**问题**: 5 个 E2E 测试文件需要 `playwright` 模块,但依赖未安装。
**影响文件**:
- `tests/e2e/test_auth_flow.py`
- `tests/e2e/test_infra_health_e2e.py`
- `tests/e2e/test_logging_e2e.py`
- `tests/e2e/test_mobile_health_e2e.py`
- `tests/e2e/test_profile_flow.py`
**错误**:
```
ModuleNotFoundError: No module named 'playwright'
```
**建议**:
- 安装 playwright: `uv add playwright && uv run playwright install`
- 或者移除这些无法运行的 E2E 测试文件
---
### 2. 测试文件命名冲突导致收集警告 (LOW)
**问题**: 存在多个同名 `test_schemas.py` 文件在不同目录,导致 pytest 收集时显示警告。
**影响文件**:
- `tests/unit/v1/schedule_items/test_schemas.py`
- `tests/unit/v1/profile/test_schemas.py`
- `tests/unit/v1/inbox_messages/test_schemas.py`
- `tests/unit/v1/friendships/test_schemas.py`
**状态**: 测试实际可以正常运行,只是有警告提示。
**建议**: 可保持现状(这是合理的代码组织方式),或重命名为 `test_*.py` 以消除警告。
---
### 3. 遗留测试验证旧字段 (INFO)
**文件**: `tests/unit/v1/profile/test_schemas.py`
**测试**: `test_profile_update_rejects_display_name_field`
**说明**: 此测试验证旧的 `display_name` 字段被正确拒绝。字段已在之前的重构中删除。
**状态**: **有效** - 这是一个回归测试,确保旧字段不被使用。
---
## 未发现的问题
### 冗余测试
经过检查,未发现明显冗余的测试:
- 每个模块的测试覆盖不同的功能
- Unit tests、Integration tests、E2E tests 有清晰的职责划分
### 死代码
未发现测试文件中有未使用的:
- imports
- mock 类
- helper 函数
### 缺失测试
未发现对应已实现功能但缺少测试的情况。
---
## 测试覆盖模块
### 后端
| 模块 | Unit | Integration | E2E |
|------|------|-------------|-----|
| Auth | ✅ | ✅ | ❌ |
| Users | - | ✅ | - |
| Profile | ✅ | - | ❌ |
| Friendships | ✅ | ✅ | - |
| Inbox Messages | ✅ | ✅ | - |
| Schedule Items | ✅ | ✅ | - |
| Logging | ✅ | ✅ | ✅ |
| Settings | ✅ | - | - |
### 前端
| 模块 | 测试数 |
|------|--------|
| Auth | ~20 |
| Chat | ~70 |
| Home | ~15 |
| Calendar | ~5 |
| Core (API, Storage) | ~30 |
---
## 建议
1. **立即**: 解决 E2E 测试依赖问题或移除无法运行的测试文件
2. **可选**: 清理 test_schemas.py 重名警告(低优先级)
3. **保持**: 现有的测试结构良好,无需重大重构
---
## 附: 测试代码质量问题
### 测试类未完全实现 Protocol (LSP 警告)
**文件**: `tests/unit/v1/auth/test_auth_service.py`
**问题**: `FakeGateway``LogoutAssertingGateway` 类没有实现 `AuthServiceGateway` Protocol 的全部方法:
- `request_password_reset`
- `confirm_password_reset`
**影响**: LSP 类型检查器报告错误,但运行时不受影响(因为这些方法在测试中不会被调用)。
**建议**: 可选择补充缺失的方法实现,或使用 `@pytest.mark.skip` 标记不需要的协议方法。
---
*报告生成时间: 2026-03-04*
@@ -1,201 +0,0 @@
# Agent 后端硬切重构设计
## 目标
- 一次性移除现有 Agent 运行时代码、测试和旧文档契约,避免新旧方案并存。
- 仅从后端重新设计 Agent 体系,不依赖前端实现细节。
- 新方案必须满足以下六项要求:
1. 配置层可通过 `.env` 驱动 LLM API Key。
2. 对话与 resume 通过 Celery 队列处理,不阻塞 Web 主线程。
3. `v1/agent` 仅负责路由组织与服务调用,核心逻辑在 `core/agent`
4. 按 CrewAI 官方模型组织 Agent/Task/Crew/Flow/Tools。
5. 按 AG-UI 协议输出事件,优先使用 `ag-ui-crewai` 适配库。
6. 使用 LiteLLM 统计每次 LLM 调用的 token 和 cost。
## 设计原则
- 单一职责:HTTP 层只做协议和鉴权,编排与执行下沉到核心层。
- 异步优先:长耗时推理、工具调用、恢复流程全部异步化。
- 协议优先:AG-UI 作为唯一事件契约,不维护自定义事件方言。
- 可观测性优先:每次 run、每次 stage、每次 LLM 调用可追踪。
- 配置单一来源:所有密钥和模型配置只走 `core.config.settings`
## 目标架构
### 1) 分层
- `backend/src/v1/agent/`
- `router.py`: 暴露 HTTP/SSE 接口。
- `schemas.py`: 请求/响应 DTO 和输入校验。
- `dependencies.py`: DI 装配。
- `service.py`: 薄服务,仅调用 `core/agent` 应用服务。
- `backend/src/core/agent/`
- `application/`: run/resume 应用服务。
- `domain/`: run 状态机、resume 幂等语义、错误模型。
- `infrastructure/crewai/`: CrewAI Agent/Task/Crew/Flow 装配与执行。
- `infrastructure/agui/`: AG-UI 事件映射与 SSE 序列化。
- `infrastructure/litellm/`: LiteLLM 客户端与 usage/cost 拦截器。
- `infrastructure/queue/`: Celery task producer/consumer。
### 1.1) 配置来源与合并策略
- Agent 运行配置由两部分组成:
- 数据库存量配置:`system_agents`(每种 agent_type 对应 llm 与 llm_config)。
- 静态模板配置:`backend/src/core/config/static/crewai/*.yaml`(角色描述、任务模板、workflow、tools)。
- 合并策略:
- `llm``llm_config``system_agents` 为准。
- prompt 模板、task 描述、flow stage、tool 白名单以 static/crewai 为准。
- 若任一 agent_type 在 `system_agents` 缺失,运行前失败并返回受控错误。
### 2) 核心运行链路
1. `POST /api/v1/agent/runs` 只负责参数校验和鉴权。
2. 路由调用 `AgentRunAppService.enqueue_run()`,写入 run 记录并投递 Celery。
3. Worker 执行 `run_agent_task`
- 读取 run 上下文。
- 构建 CrewAI `Agent/Task/Crew/Flow`
- 通过 `ag-ui-crewai` 将执行事件转为 AG-UI 标准事件。
- 每次 LLM 调用由 LiteLLM 中间层记录 token/cost。
4. 事件落库并发布到事件通道(Redis Stream/Channel)。
5. SSE 接口从事件通道读取并持续推送,直到 `RUN_FINISHED``RUN_ERROR`
### 3) Resume 链路
1. `POST /api/v1/agent/runs/{run_id}/resume` 校验 `interrupt_id` 与决策 payload。
2. 调用 `enqueue_resume()` 投递 `resume_agent_task`
3. Worker 在事务内做并发控制:
- `run_id + interrupt_id` 幂等锁。
- 过期校验与状态迁移。
4. 恢复后继续 CrewAI Flow,事件按 AG-UI 继续输出。
### 4) Session 状态持久化
- 使用 `sessions.state_snapshot` 作为运行态单一快照来源。
- 快照至少包含:
- run 上下文(thread_id、run_id、stage
- pending_tool_callstool_call_id、tool_name、args、status、expires_at
- correlation 索引(tool_call_id -> message_id / step_id
- 所有中断/恢复均以 `state_snapshot` 事务更新为准,避免内存态漂移。
### 5) 会话与消息落库模型
- 会话主表:`sessions`
- 新建 run 时写入:`id/user_id/session_type/status=running/last_activity_at`
- 运行中持续更新:`status``last_activity_at``message_count``total_tokens``total_cost``state_snapshot`
- 运行结束更新:
- 成功:`status=completed`
- 失败:`status=failed`
- 消息表:`messages`
- 用户输入落库为 `role=user`(每次 run 开始时先写入)。
- 模型输出落库为 `role=assistant`(按最终聚合文本落库,保留 metadata 记录增量信息)。
- 工具调用结果落库为 `role=tool`,并写入 `tool_name``metadata.tool_call_id`
- `seq` 由每个 `session_id` 内单调递增分配,满足 `uq_messages_session_seq`
- 计量落库:每次 LLM 调用的 usage/cost 先写消息级,再聚合更新到 session 级。
## 六项要求落地映射
### 要求 1: `.env` 驱动 LLM API Key
- 新增 `LLMSettings``core.config.settings.Settings`,统一定义:
- `SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE`
- `SOCIAL_LLM__PROVIDER_KEYS__MINIMAX`
- `SOCIAL_LLM__PROVIDER_KEYS__MOONSHOT`
- `SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK`
- `SOCIAL_LLM__PROVIDER_KEYS__ARK`
- `SOCIAL_LLM__PROVIDER_KEYS__ZAI`
- 禁止 `os.environ` 直接读取密钥。
### 要求 2: 对话和 resume 走 Celery
- Web 层不直接执行编排。
- `run`/`resume` 一律入队,Worker 处理,Web 仅做事件流转发。
- 加入任务级超时、重试、死信策略。
### 要求 3: v1 仅路由与调用
- `v1/agent/service.py` 仅保留应用服务调用和错误映射。
- 任何编排、状态机、工具执行逻辑禁止进入 `v1`
### 要求 4: CrewAI 官方流程
- 采用 CrewAI 原生对象:`Agent``Task``Crew``Flow`
- tools 通过 CrewAI Tool 机制注册,不做平行实现。
- 任务模板与 agent 配置集中化(静态模板 + 运行时拼装)。
- 配置拼装明确依赖 `system_agents + static/crewai`,不再使用双套来源。
### 要求 5: AG-UI + ag-ui-crewai
- 事件集遵循 AG-UI 协议,生命周期闭环:
- `RUN_STARTED`
- 流式消息和工具事件
- 终态 `RUN_FINISHED``RUN_ERROR`
- 优先引入 `ag-ui-crewai` 做 CrewAI 到 AG-UI 的桥接,避免重复造轮子。
### 要求 6: LiteLLM token/cost 统计
- 所有 LLM 调用通过 LiteLLM 统一出入口。
- 按调用粒度记录:`input_tokens``output_tokens``total_tokens``cost``currency`
- 按 run 粒度聚合并落库,支持后续计费和审计。
## 数据与可观测性
- 保留现有 Agent 相关表结构,不在本次硬切做数据库破坏性变更。
- 新增事件日志与调用指标落点(如已有字段不足,后续增量迁移)。
- 日志使用结构化字段:`run_id``task_id``stage``tool_name``llm_model``latency_ms`
- 持久化原则:run/resume 的关键状态变更必须可重放,禁止仅保存在内存。
## 事务边界
- `run` 入口事务:创建或加载 `session` + 写入用户消息。
- `worker` 执行事务(可分阶段短事务):
- 阶段开始:更新 `session.status/state_snapshot`
- LLM 返回:写 assistant/tool 消息 + 更新 token/cost 聚合。
- 中断:写 `pending_tool_calls``state_snapshot` 并提交。
- 完成:更新终态 `session.status` 并提交。
- `resume` 事务:校验 `interrupt_id` 与 ownershipCAS 更新 `state_snapshot`,然后进入后续执行事务。
## 错误处理与安全
- API Key 缺失启动即失败,不进入运行态。
- 外部工具入参统一白名单和 schema 校验。
- resume 决策必须鉴权与会话所有权校验。
- 错误响应遵循 RFC 7807,避免泄漏敏感上下文。
## 工具调用与恢复语义
- 工具分三类:
- 前端工具:由 `RunAgentInput.tools` 提供能力声明,触发 interrupt,由客户端执行并回传 result。
- 后端工具(需审批):先 interrupt 给前端审批;审批通过后由后端执行,不由前端执行。
- 后端工具(直执):后端直接执行。
- 一致性约束:
- 每个 tool_result 必须携带 `tool_call_id`
- 后端仅接受当前 `state_snapshot.pending_tool_calls` 中存在且状态合法的 `tool_call_id`
- 若收到未知/已消费/过期 `tool_call_id`,立即产出 `RUN_ERROR` 并记录审计日志。
## 测试策略
- 单元测试:
- 配置解析与 key 解析
- run/resume 状态机与幂等
- LiteLLM usage 聚合
- 集成测试:
- API 入队
- Worker 消费
- SSE 事件顺序与终态
- E2E
- run 成功链路
- interrupt + resume 链路
- tool 调用链路
## 迁移策略
- 阶段 0(本次):硬切删除旧代码、旧测试、旧文档契约。
- 阶段 1:搭建新架构骨架和最小可运行 run 流程。
- 阶段 2:接入 CrewAI + ag-ui-crewai + LiteLLM 完整链路。
- 阶段 3:补齐可观测性、压测与稳定性治理。
## 验收标准
- 后端仓库不存在旧 `v1/agent``core/agent` 旧实现。
- 所有 Agent 相关旧测试与旧文档契约已移除。
- 新方案设计文档明确覆盖六项要求并可进入实现阶段。
@@ -1,574 +0,0 @@
# Agent 后端重建 Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** 在后端重建 Agent 运行时,满足队列异步、CrewAI 配置打通、AG-UI 工具中断恢复、LiteLLM 计量、以及 `sessions.state_snapshot` 持久化要求。
**Architecture:** `v1/agent` 仅做 API/鉴权/参数校验与 SSE 输出,`core/agent` 负责编排与执行。Agent 创建配置由 `system_agents`(数据库)+ `core/config/static/crewai/*.yaml`(静态模板)合并生成。run/resume 全链路通过 Celery Worker 执行,状态写入 `sessions.state_snapshot`
**Tech Stack:** FastAPI, Celery, Redis, CrewAI, ag-ui-crewai, LiteLLM, SQLAlchemy, Alembic, pytest
---
### Task 1: 建立配置聚合器(system_agents + static/crewai
**Files:**
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
- Modify: `backend/src/core/config/static/crewai/agents.yaml`
- Modify: `backend/src/core/config/static/crewai/tasks.yaml`
- Create: `backend/src/core/config/static/crewai/workflow.yaml`
- Create: `backend/src/core/config/static/crewai/tools.yaml`
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
**Step 1: Write the failing test**
```python
def test_resolver_merges_system_agents_and_static_templates():
resolved = resolve_agent_runtime_config(...)
assert resolved.intent.llm.model_code == "deepseek-v3.2"
assert "intent" in resolved.workflow_stages
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
Expected: FAIL with `NameError` or import not found
**Step 3: Write minimal implementation**
```python
def resolve_agent_runtime_config(system_agents: list[dict], static_cfg: dict) -> RuntimeConfig:
by_type = {item["agent_type"]: item for item in system_agents}
return RuntimeConfig.from_sources(by_type=by_type, static_cfg=static_cfg)
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/config/resolver.py backend/src/core/config/static/crewai backend/tests/unit/core/agent/test_config_resolver.py
git commit -m "feat: add system_agents and static crewai config resolver"
```
### Task 2: 统一 LLM Key 与模型配置入口
**Files:**
- Modify: `backend/src/core/config/settings.py`
- Modify: `.env.example`
- Create: `backend/tests/unit/core/config/test_llm_settings.py`
**Step 1: Write the failing test**
```python
def test_llm_keys_read_from_settings(monkeypatch):
monkeypatch.setenv("SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK", "k1")
s = Settings()
assert s.llm.provider_keys.deepseek == "k1"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
Expected: FAIL with missing `llm` field
**Step 3: Write minimal implementation**
```python
class LLMProviderKeys(BaseModel):
deepseek: str | None = None
class LLMSettings(BaseModel):
provider_keys: LLMProviderKeys = LLMProviderKeys()
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/config/settings.py .env.example backend/tests/unit/core/config/test_llm_settings.py
git commit -m "feat: centralize llm provider keys in settings"
```
### Task 3: sessions 表状态快照契约落地
**Files:**
- Create: `backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py`
- Modify: `backend/src/models/agent_chat_session.py`
- Create: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
**Step 1: Write the failing test**
```python
def test_sessions_has_state_snapshot_column(db_inspector):
columns = db_inspector.get_columns("sessions")
assert "state_snapshot" in [c["name"] for c in columns]
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
Expected: FAIL when migration not applied
**Step 3: Write minimal implementation**
```python
def upgrade() -> None:
op.add_column("sessions", sa.Column("state_snapshot", postgresql.JSONB, nullable=True))
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py backend/src/models/agent_chat_session.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
git commit -m "feat(db): enforce sessions state_snapshot contract"
```
### Task 3.1: 会话与消息持久化仓储
**Files:**
- Create: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Create: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
**Step 1: Write the failing test**
```python
def test_run_persists_user_and_assistant_messages(db_session):
run = execute_run(...)
rows = list_messages(session_id=run.session_id)
assert rows[0].role == "user"
assert rows[1].role == "assistant"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
async def append_message(...):
session.add(AgentChatMessage(...))
async def update_session_aggregate(...):
session_obj.message_count = message_count
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/persistence backend/tests/integration/core/agent/test_session_message_persistence.py
git commit -m "feat: persist session lifecycle and messages for agent runs"
```
### Task 4: 定义 state_snapshot 结构与并发语义
**Files:**
- Create: `backend/src/core/agent/domain/state_snapshot.py`
- Create: `backend/tests/unit/core/agent/test_state_snapshot.py`
**Step 1: Write the failing test**
```python
def test_pending_tool_call_snapshot_contains_correlation_fields():
snap = StateSnapshot.new(...)
pending = snap.pending_tool_calls[0]
assert pending.tool_call_id
assert pending.status == "PENDING_APPROVAL"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
class PendingToolCall(BaseModel):
tool_call_id: str
tool_name: str
status: Literal["PENDING_APPROVAL", "APPROVED", "EXECUTED", "REJECTED", "EXPIRED"]
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/domain/state_snapshot.py backend/tests/unit/core/agent/test_state_snapshot.py
git commit -m "feat: define sessions state_snapshot schema for run and tool state"
```
### Task 5: 工具路由策略(前端/后端/审批)
**Files:**
- Create: `backend/src/core/agent/domain/tool_policy.py`
- Create: `backend/tests/unit/core/agent/test_tool_policy.py`
**Step 1: Write the failing test**
```python
def test_frontend_tool_requires_interrupt_and_client_execution():
decision = classify_tool_call(name="ui.navigate_to", source="request.tools")
assert decision.mode == "FRONTEND_EXECUTE"
def test_backend_approval_tool_returns_interrupt_but_executes_on_backend_after_approve():
decision = classify_tool_call(name="srv.transfer_funds", requires_approval=True)
assert decision.mode == "BACKEND_APPROVAL_INTERRUPT"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
if tool_name.startswith("ui."):
return ToolDecision(mode="FRONTEND_EXECUTE")
if requires_approval:
return ToolDecision(mode="BACKEND_APPROVAL_INTERRUPT")
return ToolDecision(mode="BACKEND_DIRECT_EXECUTE")
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/domain/tool_policy.py backend/tests/unit/core/agent/test_tool_policy.py
git commit -m "feat: add frontend/backend tool policy and approval routing"
```
### Task 6: tool_call 与 tool_result 对账机制
**Files:**
- Create: `backend/src/core/agent/domain/tool_correlation.py`
- Create: `backend/tests/unit/core/agent/test_tool_correlation.py`
**Step 1: Write the failing test**
```python
def test_rejects_tool_result_when_tool_call_id_not_pending():
store = PendingToolStore([])
with pytest.raises(ToolCorrelationError):
store.apply_result(tool_call_id="unknown", result={"ok": True})
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def apply_result(self, *, tool_call_id: str, result: dict) -> None:
pending = self._pending.get(tool_call_id)
if pending is None:
raise ToolCorrelationError("tool_call_id not pending")
pending.result = result
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/domain/tool_correlation.py backend/tests/unit/core/agent/test_tool_correlation.py
git commit -m "feat: add tool call/result correlation guard"
```
### Task 7: Celery run/resume 异步任务
**Files:**
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
- Create: `backend/src/core/agent/application/run_service.py`
- Create: `backend/src/core/agent/application/resume_service.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write the failing test**
```python
def test_run_api_enqueues_celery_task(client):
resp = client.post("/api/v1/agent/runs", json={...})
assert resp.status_code == 202
def test_resume_updates_session_status_and_snapshot(client):
resp = client.post("/api/v1/agent/runs/r1/resume", json={...})
assert resp.status_code == 202
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def enqueue_run(cmd: RunCommand) -> str:
task = run_agent_task.apply_async(args=[cmd.model_dump()])
return task.id
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/application backend/src/core/agent/infrastructure/queue backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "feat: add celery-based run and resume tasks"
```
### Task 8: CrewAI 运行时加载与创建
**Files:**
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
**Step 1: Write the failing test**
```python
def test_runtime_creates_agents_tasks_from_resolved_config():
runtime = CrewAIRuntime(...)
crew = runtime.build_crew(message="hello")
assert len(crew.agents) >= 1
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def build_crew(self, *, message: str) -> Crew:
agents = self._factory.build_agents(self._config)
tasks = self._factory.build_tasks(self._config, message=message)
return Crew(agents=agents, tasks=tasks)
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/crewai backend/tests/unit/core/agent/test_crewai_runtime.py
git commit -m "feat: create crewai runtime from resolved config"
```
### Task 9: AG-UI 与 ag-ui-crewai 事件桥
**Files:**
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
**Step 1: Write the failing test**
```python
def test_agui_stream_emits_required_lifecycle():
events = to_agui_events(internal_events=[...])
assert events[0]["type"] == "RUN_STARTED"
assert events[-1]["type"] in {"RUN_FINISHED", "RUN_ERROR"}
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def to_agui_events(internal_events: list[dict]) -> list[dict]:
return [map_event(e) for e in internal_events]
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/agui backend/tests/unit/core/agent/test_agui_bridge.py
git commit -m "feat: add ag-ui and ag-ui-crewai event bridge"
```
### Task 10: LiteLLM 调用统计与会话聚合
**Files:**
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
**Step 1: Write the failing test**
```python
def test_tracker_aggregates_per_call_usage_and_cost():
t = UsageTracker()
t.add({"input_tokens": 10, "output_tokens": 5, "cost": "0.1"})
assert t.snapshot()["total_tokens"] == 15
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def add(self, usage: dict[str, object]) -> None:
self.input_tokens += int(usage.get("input_tokens", 0))
self.output_tokens += int(usage.get("output_tokens", 0))
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/litellm backend/tests/unit/core/agent/test_litellm_usage.py
git commit -m "feat: add litellm usage and cost tracking"
```
### Task 11: v1/agent 薄层 API + SSE 出口
**Files:**
- Create: `backend/src/v1/agent/router.py`
- Create: `backend/src/v1/agent/schemas.py`
- Create: `backend/src/v1/agent/dependencies.py`
- Create: `backend/src/v1/agent/service.py`
- Modify: `backend/src/v1/router.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
**Step 1: Write the failing test**
```python
def test_run_endpoint_returns_sse_and_not_blocking(client):
resp = client.post("/api/v1/agent/runs", json={...})
assert resp.status_code == 202
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
@router.post("/runs", status_code=202)
async def create_run(...):
task_id = service.enqueue_run(input_data)
return {"task_id": task_id}
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/v1/agent backend/src/v1/router.py backend/tests/integration/v1/agent/test_routes.py
git commit -m "feat: add thin v1 agent api and sse endpoints"
```
### Task 12: 端到端验证与文档回填
**Files:**
- Modify: `docs/runtime/runtime-route.md`
- Modify: `docs/runtime/runtime-runbook.md`
**Step 1: Run unit tests**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent backend/tests/unit/core/config backend/tests/unit/database -q`
Expected: PASS
**Step 2: Run integration tests**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
Expected: PASS
**Step 3: Run lint and typecheck**
Run: `PYTHONPATH=backend/src uv run ruff check backend/src backend/tests`
Expected: PASS
Run: `PYTHONPATH=backend/src uv run basedpyright backend/src`
Expected: PASS
**Step 4: Document protocol contracts**
在运行手册中补充以下固定规则:
- `system_agents` + `static/crewai` 配置合并优先级。
- `sessions.state_snapshot` 字段结构与版本号。
- `messages` 入库顺序与 `sessions` 聚合字段更新规则。
- 工具调用审批与恢复时序图。
- tool_call/result 不匹配时的错误语义(`RUN_ERROR` + 可审计日志)。
**Step 5: Commit**
```bash
git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md
git commit -m "docs: add new agent runtime contracts and operational guide"
```
## Success Criteria
- [ ] Agent 创建配置由 `system_agents``core/config/static/crewai` 合并生成。
- [ ] run/resume 仅通过 Celery Worker 执行,Web 不执行编排。
- [ ] `v1/agent` 无业务编排代码。
- [ ] `sessions.state_snapshot` 承担运行态和工具审批恢复状态。
- [ ] 每次 run/resume 的会话状态变更均落库到 `sessions`
- [ ] 用户/助手/工具消息按 `messages` 约束落库,`seq` 单调递增。
- [ ] 前端工具与后端工具(审批/非审批)策略完整可测。
- [ ] tool_call 与 tool_result 具备强关联校验并可恢复/报错。
- [ ] LiteLLM 逐次计量与 run 聚合可落库。
@@ -1,199 +0,0 @@
# Agent Architecture Simplification Design
**Date:** 2026-03-04
**Status:** Approved
**Author:** AI Assistant
## Overview
Simplify the agent configuration architecture by removing the redundant `user_agents` table and renaming `user_agent_catalog` to `system_agents`.
## Problem Statement
Current architecture has redundant data:
- `user_agent_catalog`: System-level agent configurations (3 agent types for all users)
- `user_agents`: Per-user agent instances (copies catalog data for each user)
Since every user has the same 3 agents with identical configurations (from catalog), maintaining `user_agents` table creates unnecessary complexity and data duplication.
## Goals
1. Remove `user_agents` table and related code
2. Rename `user_agent_catalog` to `system_agents` for clarity
3. Preserve ability for future user-level prompt customization via `profiles.settings`
4. Maintain backward compatibility in deployment process
## Non-Goals
- User-level agent configuration (LLM selection, temperature, etc.)
- User-level prompt customization implementation (deferred to future iteration)
## Architecture Changes
### Current Architecture
```
user_agent_catalog (system config)
↓ (trigger copies for each new user)
user_agents (per-user instances)
```
### New Architecture
```
system_agents (shared by all users)
profiles.settings.agent_prompts (future: user-level prompts)
```
### Data Flow
1. System startup: Load `system_agents` from YAML
2. User creation: No longer creates `user_agents` records
3. Runtime (future): Read from `system_agents` + merge with `profiles.settings.agent_prompts`
## Database Migration
### Changes
1. **Delete `memories.agent_id` column**
- Remove foreign key `fk_memories_agent_id`
- Remove check constraint `chk_memory_type_agent_id`
- Remove index `ix_memories_agent_type_status`
- Drop column `agent_id`
2. **Delete `user_agents` table**
- Remove all RLS policies
- Remove indexes: `ix_user_agents_agent_type`, `ix_user_agents_status`
- Remove foreign keys: `fk_user_agents_user_id`, `fk_user_agents_llm_id`, etc.
- Remove check constraint `chk_agent_type`
- Remove unique constraint `uq_user_agents_user_id_agent_type`
- Drop table
3. **Rename `user_agent_catalog` → `system_agents`**
- Remove old RLS policies
- Rename table
- Rename constraints: `fk_user_agent_catalog_llm_id``fk_system_agents_llm_id`
- Rename check constraint: `chk_user_agent_catalog_status``chk_system_agents_status`
- Re-create RLS policies with new table name
4. **Update trigger `create_profile_for_new_user()`**
- Remove logic that inserts into `user_agents`
- Initialize `profiles.settings.agent_prompts` with empty object
5. **Update existing `profiles.settings`**
- Add `agent_prompts: {}` to all existing profiles
### Downgrade Path
- Re-create `user_agents` table with all constraints and indexes
- Restore `memories.agent_id` column and constraints
- Rename `system_agents``user_agent_catalog`
- Restore original trigger
## Code Changes
### Model Layer
**Delete:**
- `backend/src/models/user_agents.py`
**Rename:**
- `backend/src/models/user_agent_catalog.py``backend/src/models/system_agents.py`
- Class `UserAgentCatalog``SystemAgents`
**Update:**
- `backend/src/models/__init__.py` - Update imports and exports
### Configuration Layer
**Rename:**
- `backend/src/core/config/static/database/user_agent_catalog.yaml`
`backend/src/core/config/static/database/system_agents.yaml`
**Update:**
- `backend/src/core/config/initial/init_data.py`
- `UserAgentCatalogSeed``SystemAgentsSeed`
- `UserAgentCatalogYaml``SystemAgentsYaml`
- Import from `models.system_agents`
- Path: `system_agents.yaml`
- Function: `initialize_user_agent_catalog()``initialize_system_agents()`
### Future: Profile Settings Structure (Deferred)
```json
{
"agent_prompts": {
"INTENT_RECOGNITION": "custom prompt...",
"TASK_EXECUTION": "custom prompt...",
"RESULT_REPORTING": "custom prompt..."
}
}
```
## Testing Strategy
### Migration Tests
- Verify `user_agents` table is deleted
- Verify `system_agents` table exists with correct structure
- Verify trigger no longer creates `user_agents` records
- Verify `profiles.settings.agent_prompts` is initialized
- Verify downgrade path works correctly
### Model Tests
- Verify `SystemAgents` model CRUD operations
- Verify `Profile.settings` JSONB storage
### Integration Tests
- Verify `initialize_system_agents()` loads from YAML
- Verify data is correctly inserted into `system_agents` table
## Deployment Considerations
### Pre-deployment
- Backup database (especially `user_agents` if any data exists)
- Confirm production `user_agents` table has no critical data
### Deployment
1. Run migration: `alembic upgrade head`
2. Verify migration success
3. Restart application services
4. Verify new user registration works without `user_agents`
### Post-deployment
- Monitor application logs for any references to deleted `user_agents`
- Verify agent-related functionality still works
## Risks and Mitigations
| Risk | Mitigation |
|------|-----------|
| Existing `user_agents` data loss | Backup before migration; data is redundant anyway |
| Code still references `user_agents` | Comprehensive code search and testing |
| Trigger fails on new user creation | Test migration thoroughly; include rollback plan |
| Future need for user-level config | Can add `agent_overrides` to `profiles.settings` |
## Success Criteria
- [ ] All tests pass
- [ ] Migration runs successfully (upgrade and downgrade)
- [ ] New user registration creates profile without `user_agents` records
- [ ] System agents are loaded from YAML correctly
- [ ] No references to `user_agents` remain in codebase
## Timeline
- Design: 2026-03-04 (Completed)
- Implementation: TBD
- Testing: TBD
- Deployment: TBD
## References
- Migration file: `backend/alembic/versions/YYYYMMDD_simplify_agent_architecture.py`
- Original catalog migration: `backend/alembic/versions/50ae013ce530_add_user_agent_catalog.py`
@@ -1,844 +0,0 @@
# Agent Architecture Simplification Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Simplify agent configuration by removing redundant user_agents table and renaming user_agent_catalog to system_agents
**Architecture:** Delete user_agents table (including memories.agent_id dependency), rename user_agent_catalog to system_agents, update all references in code
**Tech Stack:** Python 3.11+, SQLAlchemy, Alembic, PostgreSQL
---
## Prerequisites
- [ ] Current branch: dev
- [ ] No uncommitted changes
- [ ] Docker services running (Supabase local)
## Task 1: Create Database Migration
**Files:**
- Create: `backend/alembic/versions/20260304_simplify_agent_architecture.py`
**Step 1: Create migration file**
Run: `cd backend && uv run alembic revision -m "simplify_agent_architecture"`
Expected: New migration file created with revision ID
**Step 2: Write migration upgrade logic**
Edit the generated migration file with this complete upgrade function:
```python
def upgrade() -> None:
# 1. Delete memories.agent_id dependencies
op.drop_constraint("fk_memories_agent_id", "memories", type_="foreignkey")
op.drop_constraint("chk_memory_type_agent_id", "memories", type_="check")
op.execute("DROP INDEX IF EXISTS ix_memories_agent_type_status")
op.drop_column("memories", "agent_id")
# 2. Delete user_agents table
_drop_rls("user_agents")
op.drop_constraint("fk_user_agents_updated_by", "user_agents", type_="foreignkey")
op.drop_constraint("fk_user_agents_created_by", "user_agents", type_="foreignkey")
op.drop_constraint("fk_user_agents_llm_id", "user_agents", type_="foreignkey")
op.drop_constraint("fk_user_agents_user_id", "user_agents", type_="foreignkey")
op.drop_constraint("chk_agent_type", "user_agents", type_="check")
op.drop_constraint("uq_user_agents_user_id_agent_type", "user_agents", type_="unique")
op.execute("DROP INDEX IF EXISTS ix_user_agents_status")
op.execute("DROP INDEX IF EXISTS ix_user_agents_agent_type")
op.drop_table("user_agents")
# 3. Rename user_agent_catalog to system_agents
_drop_rls("user_agent_catalog")
op.rename_table("user_agent_catalog", "system_agents")
op.execute(
"ALTER TABLE system_agents RENAME CONSTRAINT fk_user_agent_catalog_llm_id "
"TO fk_system_agents_llm_id"
)
op.execute(
"ALTER TABLE system_agents RENAME CONSTRAINT chk_user_agent_catalog_status "
"TO chk_system_agents_status"
)
_enable_rls("system_agents")
# 4. Update trigger
op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
op.execute("""
CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
RETURNS trigger
LANGUAGE plpgsql
SECURITY DEFINER
SET search_path = public
AS $$
BEGIN
INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
VALUES (
NEW.id,
COALESCE(
NEW.raw_user_meta_data ->> 'username',
split_part(NEW.email, '@', 1),
'user_' || substring(NEW.id::text, 1, 8)
),
NULL,
NULL,
'{"agent_prompts": {}}'::jsonb,
now(),
now()
)
ON CONFLICT (id) DO NOTHING;
RETURN NEW;
END;
$$
""")
op.execute("""
CREATE TRIGGER on_auth_user_created
AFTER INSERT ON auth.users
FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
""")
# 5. Update existing profiles.settings
op.execute("""
UPDATE profiles
SET settings = jsonb_set(
COALESCE(settings, '{}'::jsonb),
'{agent_prompts}',
'{}'::jsonb
)
WHERE NOT settings ? 'agent_prompts'
""")
```
**Step 3: Write migration downgrade logic**
Add this complete downgrade function:
```python
def downgrade() -> None:
# 1. Revert trigger
op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
op.execute("""
CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
RETURNS trigger
LANGUAGE plpgsql
SECURITY DEFINER
SET search_path = public
AS $$
BEGIN
INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
VALUES (
NEW.id,
COALESCE(
NEW.raw_user_meta_data ->> 'username',
split_part(NEW.email, '@', 1),
'user_' || substring(NEW.id::text, 1, 8)
),
NULL,
NULL,
'{}'::jsonb,
now(),
now()
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO public.user_agents (id, user_id, llm_id, agent_type, config, status, created_by, updated_by)
SELECT
gen_random_uuid(),
NEW.id,
uac.llm_id,
uac.agent_type,
uac.config,
uac.status,
NEW.id,
NEW.id
FROM public.user_agent_catalog uac;
RETURN NEW;
END;
$$
""")
op.execute("""
CREATE TRIGGER on_auth_user_created
AFTER INSERT ON auth.users
FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
""")
# 2. Revert rename: system_agents -> user_agent_catalog
_drop_rls("system_agents")
op.rename_table("system_agents", "user_agent_catalog")
op.execute(
"ALTER TABLE user_agent_catalog RENAME CONSTRAINT fk_system_agents_llm_id "
"TO fk_user_agent_catalog_llm_id"
)
op.execute(
"ALTER TABLE user_agent_catalog RENAME CONSTRAINT chk_system_agents_status "
"TO chk_user_agent_catalog_status"
)
_enable_rls("user_agent_catalog")
# 3. Recreate user_agents table
op.create_table(
"user_agents",
sa.Column("id", sa.UUID(), nullable=False),
sa.Column("user_id", sa.UUID(), nullable=False),
sa.Column("llm_id", sa.UUID(), nullable=False),
sa.Column("agent_type", sa.String(length=20), nullable=False),
sa.Column(
"config",
postgresql.JSONB(astext_type=sa.Text()),
server_default="{}",
nullable=False,
),
sa.Column("status", sa.String(length=20), nullable=False),
sa.Column("created_by", sa.UUID(), nullable=True),
sa.Column("updated_by", sa.UUID(), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_unique_constraint(
"uq_user_agents_user_id_agent_type",
"user_agents",
["user_id", "agent_type"]
)
op.execute(
"CREATE INDEX ix_user_agents_agent_type ON user_agents (agent_type)"
)
op.execute(
"CREATE INDEX ix_user_agents_status ON user_agents (status)"
)
op.execute(
"ALTER TABLE user_agents ADD CONSTRAINT chk_agent_type "
"CHECK (agent_type IN ('INTENT_RECOGNITION', 'TASK_EXECUTION', 'RESULT_REPORTING'))"
)
op.create_foreign_key(
"fk_user_agents_user_id",
"user_agents",
"users",
["user_id"],
["id"],
referent_schema="auth",
ondelete="CASCADE",
)
op.create_foreign_key(
"fk_user_agents_llm_id",
"user_agents",
"llms",
["llm_id"],
["id"],
ondelete="RESTRICT",
)
op.create_foreign_key(
"fk_user_agents_created_by",
"user_agents",
"users",
["created_by"],
["id"],
referent_schema="auth",
ondelete="SET NULL",
)
op.create_foreign_key(
"fk_user_agents_updated_by",
"user_agents",
"users",
["updated_by"],
["id"],
referent_schema="auth",
ondelete="SET NULL",
)
_enable_rls("user_agents")
# 4. Recreate memories.agent_id
op.add_column(
"memories",
sa.Column("agent_id", sa.UUID(), nullable=True)
)
op.create_foreign_key(
"fk_memories_agent_id",
"memories",
"user_agents",
["agent_id"],
["id"],
ondelete="CASCADE",
)
op.execute(
"CREATE INDEX ix_memories_agent_type_status ON memories (agent_id, memory_type, status)"
)
op.execute(
"ALTER TABLE memories ADD CONSTRAINT chk_memory_type_agent_id "
"CHECK ((memory_type = 'work' AND agent_id IS NOT NULL) OR "
"(memory_type = 'user' AND agent_id IS NULL))"
)
```
**Step 4: Add helper functions**
Add these helper functions at the end of the migration file:
```python
def _enable_rls(table_name: str) -> None:
for role in ["anon", "authenticated"]:
for action in ["select", "insert", "update", "delete"]:
op.execute(
f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
)
op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY")
for role in ["anon", "authenticated"]:
op.execute(
f"CREATE POLICY {role}_select_{table_name} ON {table_name} "
f"FOR SELECT TO {role} USING (false)"
)
op.execute(
f"CREATE POLICY {role}_insert_{table_name} ON {table_name} "
f"FOR INSERT TO {role} WITH CHECK (false)"
)
op.execute(
f"CREATE POLICY {role}_update_{table_name} ON {table_name} "
f"FOR UPDATE TO {role} USING (false) WITH CHECK (false)"
)
op.execute(
f"CREATE POLICY {role}_delete_{table_name} ON {table_name} "
f"FOR DELETE TO {role} USING (false)"
)
def _drop_rls(table_name: str) -> None:
for role in ["anon", "authenticated"]:
op.execute(f"DROP POLICY IF EXISTS {role}_delete_{table_name} ON {table_name}")
op.execute(f"DROP POLICY IF EXISTS {role}_update_{table_name} ON {table_name}")
op.execute(f"DROP POLICY IF EXISTS {role}_insert_{table_name} ON {table_name}")
op.execute(f"DROP POLICY IF EXISTS {role}_select_{table_name} ON {table_name}")
op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY")
```
**Step 5: Verify migration file**
Check that all imports are correct:
```python
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
```
**Step 6: Commit migration**
```bash
git add backend/alembic/versions/20260304_simplify_agent_architecture.py
git commit -m "feat(db): add migration to simplify agent architecture"
```
---
## Task 2: Delete UserAgents Model
**Files:**
- Delete: `backend/src/models/user_agents.py`
- Modify: `backend/src/models/__init__.py`
**Step 1: Remove import from models/__init__.py**
Edit `backend/src/models/__init__.py`:
Remove these lines:
```python
from models.user_agents import UserAgent
```
And remove `"UserAgent"` from `__all__` list.
**Step 2: Delete user_agents.py file**
```bash
rm backend/src/models/user_agents.py
```
**Step 3: Verify no other imports**
Run: `cd backend && grep -r "from models.user_agents" src/`
Expected: No results (or only in __init__.py which we already fixed)
**Step 4: Commit**
```bash
git add backend/src/models/user_agents.py backend/src/models/__init__.py
git commit -m "refactor(models): remove UserAgents model"
```
---
## Task 3: Rename UserAgentCatalog to SystemAgents
**Files:**
- Rename: `backend/src/models/user_agent_catalog.py``backend/src/models/system_agents.py`
- Modify: `backend/src/models/__init__.py`
**Step 1: Rename model file**
```bash
mv backend/src/models/user_agent_catalog.py backend/src/models/system_agents.py
```
**Step 2: Update class name in system_agents.py**
Edit `backend/src/models/system_agents.py`:
Change:
```python
class UserAgentCatalog(TimestampMixin, Base):
__tablename__: str = "user_agent_catalog"
```
To:
```python
class SystemAgents(TimestampMixin, Base):
__tablename__: str = "system_agents"
```
**Step 3: Update imports in models/__init__.py**
Edit `backend/src/models/__init__.py`:
Change:
```python
from models.user_agent_catalog import UserAgentCatalog
```
To:
```python
from models.system_agents import SystemAgents
```
And change `"UserAgentCatalog"` to `"SystemAgents"` in `__all__` list.
**Step 4: Commit**
```bash
git add backend/src/models/
git commit -m "refactor(models): rename UserAgentCatalog to SystemAgents"
```
---
## Task 4: Update Configuration Files
**Files:**
- Rename: `backend/src/core/config/static/database/user_agent_catalog.yaml`
`backend/src/core/config/static/database/system_agents.yaml`
- Modify: `backend/src/core/config/initial/init_data.py`
**Step 1: Rename YAML file**
```bash
mv backend/src/core/config/static/database/user_agent_catalog.yaml \
backend/src/core/config/static/database/system_agents.yaml
```
**Step 2: Update init_data.py imports**
Edit `backend/src/core/config/initial/init_data.py`:
Change:
```python
from models.user_agent_catalog import UserAgentCatalog
```
To:
```python
from models.system_agents import SystemAgents
```
**Step 3: Update Pydantic models**
Change:
```python
class UserAgentCatalogSeed(BaseModel):
agent_type: str
llm_model_code: str
status: str
config: dict[str, Any]
class UserAgentCatalogYaml(BaseModel):
agents: list[UserAgentCatalogSeed]
```
To:
```python
class SystemAgentsSeed(BaseModel):
agent_type: str
llm_model_code: str
status: str
config: dict[str, Any]
class SystemAgentsYaml(BaseModel):
agents: list[SystemAgentsSeed]
```
**Step 4: Update path function**
Change:
```python
def _default_user_agent_catalog_path() -> Path:
return (
Path(__file__).resolve().parents[1]
/ "static"
/ "database"
/ "user_agent_catalog.yaml"
)
```
To:
```python
def _default_system_agents_path() -> Path:
return (
Path(__file__).resolve().parents[1]
/ "static"
/ "database"
/ "system_agents.yaml"
)
```
**Step 5: Update load function**
Change:
```python
def load_user_agent_catalog(catalog_path: Path | None = None) -> dict[str, Any]:
path = catalog_path or _default_user_agent_catalog_path()
with path.open("r", encoding="utf-8") as file:
loaded = yaml.safe_load(file) or {}
if not isinstance(loaded, dict):
raise ValueError(f"Invalid user agent catalog format: {path}")
raw_agents = loaded.get("agents", [])
if not isinstance(raw_agents, list):
raise ValueError(f"Invalid user agent catalog agents section: {path}")
try:
parsed = UserAgentCatalogYaml.model_validate({"agents": list(raw_agents)})
except ValidationError as exc:
raise ValueError(f"Invalid user agent catalog data: {path}") from exc
return parsed.model_dump()
```
To:
```python
def load_system_agents(catalog_path: Path | None = None) -> dict[str, Any]:
path = catalog_path or _default_system_agents_path()
with path.open("r", encoding="utf-8") as file:
loaded = yaml.safe_load(file) or {}
if not isinstance(loaded, dict):
raise ValueError(f"Invalid system agents format: {path}")
raw_agents = loaded.get("agents", [])
if not isinstance(raw_agents, list):
raise ValueError(f"Invalid system agents agents section: {path}")
try:
parsed = SystemAgentsYaml.model_validate({"agents": list(raw_agents)})
except ValidationError as exc:
raise ValueError(f"Invalid system agents data: {path}") from exc
return parsed.model_dump()
```
**Step 6: Update upsert function**
Change:
```python
async def _upsert_user_agent_catalog(
session: AsyncSession,
*,
agent_type: str,
llm_id: uuid.UUID,
status: str,
config: dict[str, Any],
) -> None:
result = await session.execute(
select(UserAgentCatalog).where(UserAgentCatalog.agent_type == agent_type)
)
catalog_entry = result.scalar_one_or_none()
if catalog_entry is None:
session.add(
UserAgentCatalog(
agent_type=agent_type,
llm_id=llm_id,
status=status,
config=config,
)
)
else:
catalog_entry.llm_id = llm_id
catalog_entry.status = status
catalog_entry.config = config
```
To:
```python
async def _upsert_system_agents(
session: AsyncSession,
*,
agent_type: str,
llm_id: uuid.UUID,
status: str,
config: dict[str, Any],
) -> None:
result = await session.execute(
select(SystemAgents).where(SystemAgents.agent_type == agent_type)
)
catalog_entry = result.scalar_one_or_none()
if catalog_entry is None:
session.add(
SystemAgents(
agent_type=agent_type,
llm_id=llm_id,
status=status,
config=config,
)
)
else:
catalog_entry.llm_id = llm_id
catalog_entry.status = status
catalog_entry.config = config
```
**Step 7: Update initialize function**
Change:
```python
async def initialize_user_agent_catalog() -> None:
"""Initialize user agent catalog from YAML."""
catalog = load_user_agent_catalog()
async with AsyncSessionLocal() as session:
async with session.begin():
for agent in catalog["agents"]:
result = await session.execute(
select(Llm).where(Llm.model_code == agent["llm_model_code"])
)
llm = result.scalar_one_or_none()
if llm is None:
raise RuntimeError(
f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
)
await _upsert_user_agent_catalog(
session,
agent_type=agent["agent_type"],
llm_id=llm.id,
status=agent["status"],
config=agent["config"],
)
logger.info("Initialized user agent catalog")
```
To:
```python
async def initialize_system_agents() -> None:
"""Initialize system agents from YAML."""
catalog = load_system_agents()
async with AsyncSessionLocal() as session:
async with session.begin():
for agent in catalog["agents"]:
result = await session.execute(
select(Llm).where(Llm.model_code == agent["llm_model_code"])
)
llm = result.scalar_one_or_none()
if llm is None:
raise RuntimeError(
f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
)
await _upsert_system_agents(
session,
agent_type=agent["agent_type"],
llm_id=llm.id,
status=agent["status"],
config=agent["config"],
)
logger.info("Initialized system agents")
```
**Step 8: Update initialize_data function**
Change:
```python
async def initialize_data() -> bool:
"""Initialize bootstrap data."""
await initialize_llm_catalog()
await initialize_user_agent_catalog()
return True
```
To:
```python
async def initialize_data() -> bool:
"""Initialize bootstrap data."""
await initialize_llm_catalog()
await initialize_system_agents()
return True
```
**Step 9: Commit**
```bash
git add backend/src/core/config/
git commit -m "refactor(config): rename user_agent_catalog to system_agents"
```
---
## Task 5: Run Migration
**Step 1: Run migration**
```bash
cd backend && uv run alembic upgrade head
```
Expected: Migration runs successfully
**Step 2: Verify tables**
Connect to database and check:
- `user_agents` table should NOT exist
- `system_agents` table should exist
- `memories.agent_id` column should NOT exist
**Step 3: Test downgrade (optional but recommended)**
```bash
cd backend && uv run alembic downgrade -1
```
Expected: Previous migration restored
**Step 4: Re-run upgrade**
```bash
cd backend && uv run alembic upgrade head
```
Expected: Migration runs successfully again
---
## Task 6: Run Tests and Linting
**Step 1: Run type checking**
```bash
cd backend && uv run basedpyright src/
```
Expected: No errors
**Step 2: Run linting**
```bash
cd backend && uv run ruff check src/
```
Expected: No errors
**Step 3: Run tests**
```bash
cd backend && uv run pytest tests/
```
Expected: All tests pass
**Step 4: Fix any failures**
If any tests fail due to UserAgent references, update them to use SystemAgents.
---
## Task 7: Final Verification
**Step 1: Search for any remaining references**
```bash
cd backend && grep -r "user_agents" src/ --include="*.py"
cd backend && grep -r "UserAgent" src/ --include="*.py"
```
Expected: No results (except in migration files)
**Step 2: Test new user registration**
Start the backend server and register a new user. Verify:
- Profile is created
- No user_agents records are created
- profiles.settings contains `agent_prompts: {}`
**Step 3: Commit final changes**
```bash
git add .
git commit -m "feat: complete agent architecture simplification"
```
---
## Success Criteria
- [ ] Migration runs successfully (upgrade and downgrade)
- [ ] No UserAgent model references in code
- [ ] SystemAgents model works correctly
- [ ] All tests pass
- [ ] Linting passes
- [ ] Type checking passes
- [ ] New user registration works without user_agents
## Notes
- Keep the design document updated if any changes are made during implementation
- Test migration thoroughly before deploying to production
- Backup database before running migration in production
@@ -1,81 +0,0 @@
# Agent Runtime Closed Loop E2E Design
## 背景
当前 `test_agent_sse_flow.py` 不能稳定证明真实闭环:
- `session_id` 由随机 UUID 生成,导致 `POST /api/v1/agent/runs` 经常 404。
- 测试脚本存在不可达重复代码,诊断信息不完整。
- 未覆盖首聊自动建会话语义,和真实聊天入口不匹配。
目标是验证真实环境下业务闭环是否可用:
1. 用户请求 `agent` 路由
2. 请求进入异步任务
3. runtime 读取 `system_agents``llm` 配置并构建执行流程
4. 真实 LLM 请求发出并返回
5. `sessions`/`messages` 正确落库
6. 成本和 token 统计正确
7. 事件按 AG-UI 规范发布并可由 `stream_events` 订阅
## 设计原则
- 真实优先:不使用 mock,不替换 queue/redis/db/llm。
- 双轨验证:
- 诊断脚本用于本地排障(快速观察全链路状态)。
- pytest E2E 用例用于可重复回归。
- 明确前置条件:必须先使用 `infra/scripts/app.sh start` 启动 tmux 服务。
- 本地真实 LLM 基线:DashScope Qwen。
## API 契约调整
### `POST /api/v1/agent/runs`
- 现状:`session_id` 必填且必须存在。
- 新契约:`session_id` 可选。
- 有值:复用现有会话,校验 owner。
- 无值:在服务层先创建会话,再入队 run。
- 响应扩展:返回 `created` 标识是否为首聊自动建会话。
该契约与聊天产品行为一致:用户首条消息即可开始,不需要前置调用创建会话接口。
## 数据关系与删除语义
- `messages.session_id -> sessions.id` 为外键,且硬删除级联(`ondelete=CASCADE`)。
- 软删除需要补齐级联:
- 软删 `sessions` 时,同事务更新对应 `messages.deleted_at`
- E2E 增加验证,确保软删后默认查询不可见。
## 测试架构
### A. 诊断脚本(根目录)
重构 `test_agent_sse_flow.py`
- 增加环境健康检查(web/redis/db)。
- 支持两种模式:
- `--new-session`:不传 `session_id`,验证首聊自动创建。
- `--reuse-session <id>`:验证复聊路径。
- 输出结构化阶段日志:HTTP、task_id、SSE 事件、数据库断言、失败根因。
### B. pytest E2E`backend/tests/e2e`
新增 `test_agent_closed_loop_live.py`
- 标记为 `live`,默认不在 CI 执行。
- 用真实 JWT、真实 HTTP 请求、真实 SSE 订阅。
- 断言最小闭环标准:
- run 返回 202
- SSE 至少收到 `RUN_STARTED` 与终态(`RUN_FINISHED``RUN_ERROR`
- `sessions` 状态和计数更新
- `messages` 有新增记录
- token/cost 字段非负且会话聚合一致
## 验收标准
- `uv run python test_agent_sse_flow.py --new-session` 通过。
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -v -m live` 通过。
- 首聊场景不需要外部先建 `session_id`
- 软删除会话后,消息软删除行为与约束一致。
## 风险与回退
- 真实 LLM 网络抖动会造成不稳定:通过重试和超时策略降低误报。
- 生产契约变更风险:保持字段向后兼容(原 `session_id` 仍可传)。
- 如果新契约引入问题,可临时退回“必传 session_id”路径并保留测试脚本诊断能力。
@@ -1,230 +0,0 @@
# Agent Runtime Closed Loop E2E Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** 让 agent 闭环在真实本地环境中可验证:`runs` 支持首聊自动建会话,并通过真实异步任务、真实 LLM、真实落库与真实 SSE 证明端到端可用。
**Architecture:**`v1/agent` 服务层引入“可选 session_id + 自动建会话”语义;保持已有 owner 鉴权路径。重构诊断脚本并新增 live E2E 用例,统一验证 run 入队、事件流、数据库状态、成本统计与删除语义。通过最小侵入改造现有 run/resume 流程,确保兼容已存在调用。
**Tech Stack:** FastAPI, SQLAlchemy async, Celery, Redis Stream, LiteLLM, PyJWT, pytest, httpx
---
### Task 1: 扩展 API 契约(session_id 可选)
**Files:**
- Modify: `backend/src/v1/agent/schemas.py`
- Modify: `backend/src/v1/agent/router.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
**Step 1: Write the failing test**
`test_routes.py` 新增用例:请求体不传 `session_id` 仍返回 202,且响应含 `session_id`
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -k "runs and session" -v`
Expected: FAIL,提示 `session_id` 缺失导致 422 或 mock 接口签名不匹配。
**Step 3: Write minimal implementation**
- `RunRequest.session_id` 改为可选。
- `enqueue_run` 调用 service 时传可选值。
- `TaskAcceptedResponse` 增加 `created: bool` 字段。
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/v1/agent/schemas.py backend/src/v1/agent/router.py backend/tests/integration/v1/agent/test_routes.py
git commit -m "feat: allow agent runs without pre-created session"
```
### Task 2: 服务层支持自动建会话并保持鉴权
**Files:**
- Modify: `backend/src/v1/agent/service.py`
- Modify: `backend/src/v1/agent/repository.py`
- Modify: `backend/src/v1/agent/dependencies.py`
- Test: `backend/tests/unit/v1/agent/test_service.py` (new)
**Step 1: Write the failing test**
新增单测覆盖:
- `session_id is None` 时调用 `create_session_for_user` 并返回 `created=True`
- `session_id 有值` 时复用并校验 owner
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
Expected: FAIL,当前 service 无自动建会话能力。
**Step 3: Write minimal implementation**
- repository 增加 `create_session_for_user(user_id)`
- service `enqueue_run` 处理两条路径:
-`session_id`:先创建 session。
-`session_id`:校验 owner。
- 返回 `TaskAccepted(task_id, session_id, created)`
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/v1/agent/service.py backend/src/v1/agent/repository.py backend/src/v1/agent/dependencies.py backend/tests/unit/v1/agent/test_service.py
git commit -m "feat: auto-create chat session on first agent run"
```
### Task 3: 对齐 runtime 闭环数据断言(messages/sessions/cost
**Files:**
- Modify: `backend/src/core/agent/application/run_service.py`
- Modify: `backend/src/core/agent/application/resume_service.py`
- Modify: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write the failing test**
在集成测试增加断言:
- `sessions.total_tokens``sessions.total_cost` 有更新
- `messages` 的 token/cost 字段与 session 聚合一致
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
Expected: FAIL,当前默认 token/cost 为 0,未做聚合更新。
**Step 3: Write minimal implementation**
- run/resume 流程接入 usage/cost 结果(来自 litellm 返回或 fallback 规则)。
- message 写入时填充 input/output tokens 与 cost。
- session 更新时累加 total_tokens/total_cost。
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/core/agent/application/run_service.py backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/persistence/message_repository.py backend/src/core/agent/infrastructure/persistence/session_repository.py backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "feat: persist runtime token and cost aggregates"
```
### Task 4: 补齐软删除级联(session -> messages
**Files:**
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Modify: `backend/src/v1/agent/service.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write the failing test**
新增用例:软删 session 后,同会话 messages 的 `deleted_at` 同步写入。
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
Expected: FAIL,当前无软删级联。
**Step 3: Write minimal implementation**
- repository 增加 `soft_delete_session_with_messages(session_id)`
- service 调用时使用同事务批量更新 messages。
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/persistence/session_repository.py backend/src/v1/agent/service.py backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "fix: cascade soft delete from sessions to messages"
```
### Task 5: 重构诊断脚本并新增 live E2E
**Files:**
- Modify: `test_agent_sse_flow.py`
- Create: `backend/tests/e2e/test_agent_closed_loop_live.py`
- Modify: `docs/bugs/2026-03-05-agent-runtime-bugs.md`
**Step 1: Write the failing test**
新增 live E2E 用例(`@pytest.mark.live`):
- 首聊不传 `session_id` 返回 202
- 订阅 SSE 收到关键事件
- DB 断言 session/messages/tokens/cost
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
Expected: FAIL,当前契约或脚本未对齐。
**Step 3: Write minimal implementation**
- 清理脚本重复/不可达逻辑。
- 增加健康检查、阶段化日志、超时和错误根因输出。
- E2E 用例复用脚本中的 helperJWT、SSE 解析、DB 断言)。
**Step 4: Run test to verify it passes**
Run:
- `uv run python test_agent_sse_flow.py --new-session`
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add test_agent_sse_flow.py backend/tests/e2e/test_agent_closed_loop_live.py docs/bugs/2026-03-05-agent-runtime-bugs.md
git commit -m "test: add live closed-loop agent e2e verification"
```
### Task 6: 全量验证与文档同步
**Files:**
- Modify: `docs/runtime/runtime-runbook.md`
- Modify: `docs/runtime/runtime-route.md`
**Step 1: Run targeted checks**
Run:
- `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
- `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
- `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
Expected: PASS。
**Step 2: Run quality gates**
Run:
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright`
Expected: PASS。
**Step 3: Update docs**
记录本地启动流程、真实 LLM 前置配置、live E2E 执行方式和故障排查。
**Step 4: Commit**
```bash
git add docs/runtime/runtime-runbook.md docs/runtime/runtime-route.md
git commit -m "docs: document live agent closed-loop e2e workflow"
```
@@ -1,469 +0,0 @@
# Agent Runtime Closed Loop Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Build a production-grade closed-loop agent runtime where `frontend -> FastAPI -> Celery -> run/resume service -> CrewAI -> AG-UI events -> Redis Stream -> SSE` is fully connected and verifiable.
**Architecture:** Keep HTTP API as control-plane and worker as data-plane. The API validates auth/ownership and enqueues commands, the Celery worker executes run/resume business logic using DB-driven agent config, runtime emits normalized AG-UI events and usage/cost telemetry, all events are persisted to Redis Stream, and SSE endpoint streams from Redis with resume support (`Last-Event-ID`).
**Tech Stack:** FastAPI, SQLAlchemy AsyncSession, Celery, Redis Streams, CrewAI, LiteLLM, Pydantic, pytest (unit/integration).
**Confirmed Constraints (locked):**
- Persist semantics use existing `messages.role` only (`assistant|user|system|tool`), no new `message_kind` column.
- `tool_result` must be semantically complete (especially UI schema); do not store summary-only payload.
- Store full `tool_result` payload in Supabase Storage (private bucket) and persist durable object reference in DB metadata; do not rely on expiring signed URL as primary reference.
- `metadata` must be fixed and typed via Pydantic model (no free-form drift).
- Do not introduce additional business tables for this scope; keep schema minimal.
- CrewAI runtime must default to streaming mode.
- Full traceability target is final semantic reconstruction of `user/assistant/tool_result`; chunk-level replay is not required.
**Metadata Contract (fixed, Pydantic-enforced):**
- Global required keys for all message metadata: `type`, `run_id`, `turn_id`.
- Global optional keys for all message metadata: `event_id`, `parent_message_id`, `error`.
- `type=user_input`:
- Required: `type`, `run_id`, `turn_id`.
- Optional: `input_source`, `client_ts`.
- `type=assistant_output`:
- Required: `type`, `run_id`, `turn_id`.
- Optional: `finish_reason`, `model_provider`, `cost_source`.
- `type=tool_call` (`role=assistant`):
- Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `tool_args`.
- Optional: `tool_schema_version`, `timeout_ms`.
- `type=tool_result` (`role=tool`):
- Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `storage_bucket`, `storage_path`, `payload_sha256`, `payload_bytes`, `payload_format`.
- Optional: `ui_schema_version`, `compression`, `storage_etag`, `render_hints`.
- Validation rules:
- `messages.role=tool` must use `metadata.type=tool_result`.
- `messages.role=assistant` + tool event must use `metadata.type=tool_call` or `assistant_output`.
- `tool_result` payload in DB must be reconstructable to AG-UI `TOOL_CALL_RESULT` using Storage object + metadata checksum.
---
### Task 1: Add Agent Module Skeleton and Contracts
**Files:**
- Create: `backend/src/core/agent/__init__.py`
- Create: `backend/src/core/agent/application/__init__.py`
- Create: `backend/src/core/agent/domain/__init__.py`
- Create: `backend/src/core/agent/infrastructure/events/__init__.py`
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
**Step 1: Write failing tests for event normalization and SSE formatting**
```python
def test_bridge_normalizes_event_type_to_upper_snake() -> None:
events = [{"type": "runStarted", "data": {"ok": True}}]
out = to_agui_events(events)
assert out[0]["type"] == "RUN_STARTED"
def test_sse_format_includes_id_event_data() -> None:
payload = to_sse_event(stream_id="1-0", event={"type": "RUN_STARTED", "data": {"a": 1}})
assert payload.startswith("id: 1-0\nevent: RUN_STARTED\ndata: {")
```
**Step 2: Run tests and confirm RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`
Expected: FAIL with missing module/function errors.
**Step 3: Implement minimal bridge + stream utilities**
```python
def to_agui_events(internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
...
def to_sse_event(stream_id: str, event: dict[str, Any]) -> str:
...
```
**Step 4: Run tests and confirm GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent backend/tests/unit/core/agent/test_agui_bridge.py
git commit -m "feat(agent): add ag-ui bridge and sse serializer utilities"
```
### Task 2: Implement Redis Stream Event Store and Reader
**Files:**
- Create: `backend/src/core/agent/infrastructure/events/redis_stream.py`
- Modify: `backend/src/core/config/settings.py`
- Test: `backend/tests/unit/core/agent/test_redis_stream.py`
**Step 1: Write failing tests for append/read semantics**
```python
def test_append_event_writes_json_payload() -> None:
...
def test_read_events_respects_last_event_id() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`
Expected: FAIL.
**Step 3: Implement Redis stream adapter**
```python
def append_event_sync(*, session_id: UUID, event: dict[str, Any]) -> str:
...
async def read_events(...):
...
```
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/events/redis_stream.py backend/src/core/config/settings.py backend/tests/unit/core/agent/test_redis_stream.py
git commit -m "feat(agent): add redis stream event transport for run events"
```
### Task 3: Build CrewAI Runtime + AG-UI Event Mapping + Usage Tracking
**Files:**
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
- Modify: `backend/src/core/config/settings.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
**Step 1: Write failing runtime tests (events + cost + strict errors)**
```python
def test_runtime_emits_text_tool_reasoning_events() -> None:
...
def test_runtime_raises_if_model_or_api_key_missing() -> None:
...
def test_usage_tracker_extracts_tokens_and_cost() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`
Expected: FAIL.
**Step 3: Implement runtime and tracker**
- Register CrewAI event handlers (`Task/LLM/Tool/Reasoning`) and map to AG-UI canonical event types.
- Default runtime to streaming mode for CrewAI execution.
- Enforce strict config behavior: no `llm_model_code` or provider key -> raise.
- Use LiteLLM cost calculator for actual cost; if cost cannot be computed, fail closed (raise), do not silently record zero.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py backend/src/core/config/settings.py
git commit -m "feat(agent): implement crewai runtime events and litellm usage-cost auditing"
```
### Task 4: Implement Run/Resume Application Services (DB Config + Persistence)
**Files:**
- Create: `backend/src/core/agent/application/run_service.py`
- Create: `backend/src/core/agent/application/resume_service.py`
- Create: `backend/src/core/agent/application/session_state_persistence.py`
- Create: `backend/src/core/agent/domain/state_snapshot.py`
- Create: `backend/src/core/agent/domain/tool_correlation.py`
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
- Test: `backend/tests/unit/core/agent/test_state_snapshot.py`
- Test: `backend/tests/unit/core/agent/test_tool_correlation.py`
**Step 1: Write failing tests for DB-driven runtime and aggregate updates**
```python
async def test_run_service_loads_agent_config_from_db_and_persists_messages() -> None:
...
async def test_resume_service_requires_pending_tool_call() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`
Expected: FAIL.
**Step 3: Implement services**
- `run_service`: read session + system agent config from DB, execute runtime, persist user/assistant messages, update session aggregates.
- `resume_service`: validate pending tool call status, enforce idempotency semantics, resume runtime, persist audit fields.
- Persist metadata audit (`tokens`, `cost`, `cost_source`, correlation ids) for every assistant message.
- Persist tool lifecycle with role-only model:
- tool call message uses `role=assistant` with fixed metadata (`type=tool_call`, `tool_call_id`, `tool_name`, arguments reference).
- tool result message uses `role=tool` with fixed metadata (`type=tool_result`, `tool_call_id`, `tool_name`, storage bucket/path, checksum, bytes, schema version).
- `tool_result` full payload (UI schema) is uploaded to Supabase Storage private bucket; DB stores durable reference and verification fields.
- Ensure DB->AG-UI `TOOL_CALL_RESULT` reconstruction is equivalent to SSE-streamed final tool result semantics.
- Enforce metadata contract by Pydantic model at write path and read path (reject malformed metadata early).
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/application backend/src/core/agent/domain backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py
git commit -m "feat(agent): add run-resume app services with db config and audit persistence"
```
### Task 5: Wire Celery Worker Task to Run/Resume and Publish Runtime Events
**Files:**
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
- Modify: `backend/src/core/celery/app.py`
- Test: `backend/tests/unit/core/agent/test_queue_tasks.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write failing queue tests**
```python
def test_run_agent_task_emits_started_runtime_and_finished_events() -> None:
...
def test_run_agent_task_emits_error_event_on_exception() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`
Expected: FAIL.
**Step 3: Implement worker task flow**
- Decode command type (`run`/`resume`).
- Emit lifecycle events (`RUN_STARTED/RUN_RESUMED/RUN_FINISHED/RUN_ERROR`).
- Forward runtime callback events to Redis stream immediately.
- Persist session status/snapshot after completion.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/celery/app.py backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "feat(agent): wire celery run-resume execution and redis event publishing"
```
### Task 6: Implement API Contracts (Run/Resume/SSE) + Auth/Ownership/Idempotency
**Files:**
- Create: `backend/src/v1/agent/schemas.py`
- Create: `backend/src/v1/agent/repository.py`
- Create: `backend/src/v1/agent/service.py`
- Create: `backend/src/v1/agent/router.py`
- Create: `backend/src/v1/agent/dependencies.py`
- Modify: `backend/src/v1/router.py`
- Test: `backend/tests/unit/v1/agent/test_service.py`
- Test: `backend/tests/unit/v1/agent/test_owner_guard.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
**Step 1: Write failing API tests**
```python
async def test_run_requires_auth_and_returns_202_task_id() -> None:
...
async def test_stream_reads_from_last_event_id() -> None:
...
def test_resume_idempotency_uses_redis_lock_and_task_key() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`
Expected: FAIL.
**Step 3: Implement API service/router**
- `POST /api/v1/agent/runs` enqueue run command.
- `POST /api/v1/agent/runs/{session_id}/resume` enqueue resume command with async redis lock + dedup task key.
- `GET /api/v1/agent/runs/{session_id}/events` SSE stream from Redis with `Last-Event-ID`.
- Enforce auth and session ownership checks on all endpoints.
- Validate `tool_call_id` and message length/pattern boundaries.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/v1/agent backend/src/v1/router.py backend/tests/unit/v1/agent backend/tests/integration/v1/agent/test_routes.py
git commit -m "feat(agent): add authenticated run-resume-sse api with redis-backed idempotency"
```
### Task 7: Add Schema/Migration Contract for Session Snapshot + Audit Fields
**Files:**
- Create: `backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py`
- Modify: `backend/src/models/agent_chat_session.py`
- Modify: `backend/src/models/agent_chat_message.py`
- Test: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
**Migration scope note:**
- Fix current schema drift: model has `sessions.state_snapshot` but migration chain does not reliably provide this column in current DB state.
- Keep schema minimal; do not add new business tables in this migration.
**Step 1: Write failing migration contract tests**
```python
def test_session_has_state_snapshot_and_status_contract() -> None:
...
def test_message_has_token_cost_and_metadata_contract() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`
Expected: FAIL.
**Step 3: Implement migration and model alignment**
- Ensure `state_snapshot`, `status`, token/cost/metadata fields are present and nullable constraints are explicit.
- Add/verify indexes needed for role-based semantic reconstruction (`session_id, seq`, and targeted metadata lookups if required).
- Ensure `metadata` structure is validated by fixed Pydantic schema at application boundary.
- Add DB-level guardrails where feasible (check constraints) for role/metadata consistency without introducing new tables.
- Keep reversible downgrade path.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py backend/src/models/agent_chat_session.py backend/src/models/agent_chat_message.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
git commit -m "feat(agent): add db contract for session snapshot and usage audit fields"
```
### Task 8: End-to-End Closure Verification and Docs Update
**Files:**
- Modify: `docs/runtime/runtime-route.md`
- Modify: `docs/runtime/runtime-runbook.md`
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
**Step 1: Write integration test for full closure path**
```python
async def test_closed_loop_run_flow_frontend_to_sse() -> None:
# run request -> queue command -> runtime events -> redis stream -> sse read
...
```
Also verify:
- `tool_result` full UI schema is written to Supabase Storage private bucket.
- `messages.role=tool` row contains stable storage reference and checksum metadata.
- Reading from DB can reconstruct final AG-UI `TOOL_CALL_RESULT` event payload semantics.
**Step 2: Run RED**
Run: `uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py -q`
Expected: FAIL.
**Step 3: Implement minimal missing glue and docs**
- Fill any missing wiring revealed by the test.
- Document endpoint contracts, event taxonomy, and operational runbook for redis/celery troubleshooting.
**Step 4: Run GREEN + full gate verification**
Run:
- `PYTHONPATH=backend/src uv run python backend/src/core/runtime/cli.py migrate`
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright backend/src`
Expected:
- All relevant tests PASS.
- Ruff PASS.
- basedpyright 0 errors (notes/warnings can be documented if pre-existing).
**Step 5: Commit**
```bash
git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md backend/tests/integration/core/agent/test_session_message_persistence.py
git commit -m "docs(agent): document closed-loop runtime and verify end-to-end chain"
```
### Task 9: L2 Mandatory Review Gates
**Files:**
- No direct code changes required; apply fixes if findings appear.
**Step 1: Run required agents**
- `tdd-guide` (already enforced by plan sequence)
- `refactor-cleaner`
- `code-reviewer`
- `security-reviewer`
**Step 2: Fix all CRITICAL/HIGH findings**
Run targeted tests after each fix.
**Step 3: Final verification rerun**
Run:
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright backend/src`
Expected: no failing tests; no lint errors; no type errors.
**Step 4: Final commit (if review fixes were needed)**
```bash
git add backend/src backend/tests docs/runtime
git commit -m "fix(agent): resolve L2 review findings for closed-loop runtime"
```
@@ -0,0 +1,746 @@
# UserAgentContext & ProfileSettings v1 设计
**Date:** 2026-03-05
**Status:** Approved
---
## 目标
为 Agent Runtime 提供完整的用户画像上下文,通过 Pydantic 约束 profiles.settings 结构,确保:
1. 运行时入口读取 profileusername/bio/settings
2. settings 结构类型安全、版本可演进
3. 关键配置(语言/时区/国家)符合标准格式
---
## 架构
```
Profile (DB JSONB)
ProfileSettings (Pydantic)
UserAgentContext (DataClass)
build_global_system_prompt(ctx)
```
**设计原则:**
- 唯一入口:`get_user_agent_context(user_id)` 读取并构造上下文
- 不可变:UserAgentContext 使用 frozen dataclass
- 向后兼容:version 字段预留未来演进
---
## ProfileSettings v1 结构
```json
{
"version": 1,
"preferences": {
"interface_language": "zh-CN",
"ai_language": "zh-CN",
"timezone": "Asia/Shanghai",
"country": "CN"
},
"privacy": {},
"notification": {}
}
```
### 字段说明
| 字段 | 类型 | 默认值 | 约束 |
|------|------|--------|------|
| `version` | int | 1 | 必须为 1v1 锁定) |
| `preferences.interface_language` | str | "zh-CN" | BCP-47 格式 |
| `preferences.ai_language` | str | "zh-CN" | BCP-47 格式 |
| `preferences.timezone` | str | "Asia/Shanghai" | IANA 时区 |
| `preferences.country` | str | "CN" | ISO 3166-1 alpha-2 |
| `privacy` | dict | {} | 空对象(预留) |
| `notification` | dict | {} | 空对象(预留) |
### 约束规则
**1. BCP-47 语言格式**
正则:`^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$`
示例:
- ✅ zh-CN, en-US, zh-TW, ja-JP
- ❌ zh_CN, EN, chn
**2. IANA 时区**
使用 `zoneinfo.ZoneInfo` 校验。
示例:
- ✅ Asia/Shanghai, America/New_York, UTC
- ❌ CST, GMT+8
**3. ISO 3166-1 alpha-2 国家代码**
使用 `pycountry.countries.get(alpha_2=...)` 校验。
示例:
- ✅ CN, US, JP, GB
- ❌ CHN, USA, zz
---
## UserAgentContext 结构
```python
@dataclass(frozen=True)
class UserAgentContext:
user_id: UUID
username: str
bio: str | None
settings: ProfileSettings
```
**设计要点:**
- 不可变(frozen=True):防止运行时修改
- 完整画像:包含身份(username/bio)和配置(settings
- 唯一构造入口:`get_user_agent_context(user_id)`
---
## Pydantic 模型实现
```python
from pydantic import BaseModel, Field, field_validator
from dataclasses import dataclass
from uuid import UUID
import re
class PreferenceSettings(BaseModel):
interface_language: str = "zh-CN"
ai_language: str = "zh-CN"
timezone: str = "Asia/Shanghai"
country: str = "CN"
@field_validator("interface_language", "ai_language")
@classmethod
def validate_bcp47(cls, v: str) -> str:
pattern = r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$"
if not re.match(pattern, v):
raise ValueError(f"Invalid BCP-47 language tag: {v}")
return v
@field_validator("timezone")
@classmethod
def validate_iana_timezone(cls, v: str) -> str:
import zoneinfo
try:
zoneinfo.ZoneInfo(v)
except Exception:
raise ValueError(f"Invalid IANA timezone: {v}")
return v
@field_validator("country")
@classmethod
def validate_iso_country(cls, v: str) -> str:
import pycountry
if not pycountry.countries.get(alpha_2=v.upper()):
raise ValueError(f"Invalid ISO 3166-1 alpha-2 country code: {v}")
return v.upper()
class ProfileSettings(BaseModel):
version: int = Field(default=1, ge=1, le=1)
preferences: PreferenceSettings = Field(default_factory=PreferenceSettings)
privacy: dict = Field(default_factory=dict)
notification: dict = Field(default_factory=dict)
@dataclass(frozen=True)
class UserAgentContext:
user_id: UUID
username: str
bio: str | None
settings: ProfileSettings
```
---
## 依赖项
需要添加到 `backend/pyproject.toml`
```toml
[project.dependencies]
pycountry = ">=23.0.0"
```
---
## 迁移策略
**数据库层:**
- profiles.settings 保持 JSONB,不做 schema 变更
- 现有数据默认值:`{"version": 1, "preferences": {"country": "CN"}}`
**应用层:**
- 读取时:`ProfileSettings.model_validate(profile.settings or {})`
- 写入时:`profile.settings = settings.model_dump()`
---
## 未来演进
**版本迁移:**
- Pydantic 支持多版本共存
- 数据库不做破坏性变更
---
---
## AG-UI 事件转发与落库策略
### 核心原则
**1. 事件转发时机:**
- 只有 organization 阶段完成后转发 AG-UI 事件
- AG-UI bridge 已实现底层机制,编排层控制转发时机
**2. 落库时机:**
- 意图识别和任务执行阶段:落库但 seq 取负数(用于审计)
- 结果反馈阶段:seq 取最新 seq 的绝对值 +1(用于展示)
### Seq 设计细节
**意图识别和任务执行阶段(审计用):**
- seq 取负数(如 -1, -2
- role: "assistant"(标记为 agent 输出)
- content: 阶段的完整输出(用于审计/调试)
- 重建会话时通过 `WHERE seq > 0` 过滤,不展示给用户
**结果反馈阶段(展示用):**
- seq 取正数(取最新负数的绝对值 +1)
- role: "assistant"
- content: OrganizationResult.assistant_text
- 重建会话时通过 `WHERE seq > 0` 展示给用户
**示例:**
```
| seq | role | content | 展示 |
|------|----------|----------------------------|------|
| -2 | assistant| ExecutionResult (完整) | 否 |
| -1 | assistant| IntentResult (完整) | 否 |
| 1 | user | 用户输入 | 是 |
| 2 | assistant| OrganizationResult | 是 |
```
### 编排层职责
```python
@listen(intent_stage)
async def persist_intent(self, state: FlowState) -> FlowState:
# seq 取负数
seq = await message_repo.get_next_negative_seq(state.session_id)
await message_repo.create(
session_id=state.session_id,
seq=seq, # 负数
role="assistant",
content=state.intent_result.model_dump_json(),
...
)
return state
@listen(execution_stage)
async def persist_execution(self, state: FlowState) -> FlowState:
# seq 取负数
seq = await message_repo.get_next_negative_seq(state.session_id)
await message_repo.create(
session_id=state.session_id,
seq=seq, # 负数
role="assistant",
content=state.execution_result.model_dump_json(),
...
)
return state
@listen(organization_stage)
async def finalize_flow(self, state: FlowState) -> FlowState:
result = state.organization_result
# seq 取正数(最新负数绝对值+1)
seq = await message_repo.get_next_positive_seq(state.session_id)
await message_repo.create(
session_id=state.session_id,
seq=seq, # 正数
role="assistant",
content=result.assistant_text,
...
)
# 触发 AG-UI 事件(由 bridge 处理)
return state
```
### Token 和 Cost 累加
**策略:在内存中累加所有阶段的 token 和 costorganization 完成后统一落库。**
```python
@dataclass
class FlowState:
# ...
tokens: dict[str, dict] = field(default_factory=dict)
cost: Decimal = Decimal("0")
currency: str = "CNY"
```
---
## CrewAI Flow 三阶段设计
### 架构概览
```
User Input + UserAgentContext
@start() begin()
@listen() intent_stage() → 判断 can_answer_directly
↓ (router)
├─ DIRECT_RESPONSE → 直接返回
└─ NEEDS_EXECUTION
@listen() execution_stage() → 任务执行/工具调用
@listen() organization_stage() → 结果组织与表达
返回给用户
```
### 三阶段职责
**1. Intent Recognition(意图识别)**
- Agent Type: `INTENT_RECOGNITION`
- 输出结构(最小化设计):
```python
class IntentResult(BaseModel):
direct_answer: bool # 是否可以直接回答
intent_analysis: str # 意图分析文本(用于调试/审计)
execution_prompt: str # 给 execution 阶段的提示词(direct_answer=false时使用)
direct_response: str # 直接回复文本(direct_answer=true时使用)
```
- 短路逻辑:
- `direct_answer=true` → 完全跳过 execution 和 organization,直接返回 direct_response
- `direct_answer=false` → 进入 execution 阶段
- 输出约束:使用 `output_pydantic=IntentResult`
- **落库策略**:落库到 messages 表,但重建会话时不展示
**2. Task Execution(任务执行)**
- Agent Type: `TASK_EXECUTION`
- 输入:IntentResult.execution_prompt + IntentResult.intent_analysis
- 职责:
- 执行复杂任务(查询数据库、调用工具、多步骤推理)
- 返回结构化执行结果
- 输出结构(最小化设计):
```python
class ExecutionResult(BaseModel):
execution_summary: str # 任务执行摘要(用于调试/审计)
organization_prompt: str # 给 organization 阶段的提示词
execution_data: dict = {} # 执行结果的结构化数据
```
- 输出约束:使用 `output_pydantic=ExecutionResult`
- **落库策略**:落库到 messages 表,但重建会话时不展示
**3. Result Reporting(结果报告)**
- Agent Type: `RESULT_REPORTING`
- 输入:
- IntentResult(意图识别结果)
- ExecutionResult(任务执行情况)
- 职责:
- 结合意图分析和执行结果,格式化为用户友好的响应
- 应用个性化模板(基于 UserAgentContext
- 输出结构(最小化设计):
```python
class OrganizationResult(BaseModel):
assistant_text: str # 最终回复文本
response_metadata: dict = {} # 响应元数据(可选)
```
- 输出约束:使用 `output_pydantic=OrganizationResult`
- **唯一展示阶段**:重建会话时只展示此阶段的 message
- **唯一转发阶段**:只有此阶段的输出需要通过 AG-UI 事件转发
### Flow 状态管理
```python
@dataclass
class FlowState:
user_input: str
context: UserAgentContext
stage_trace: list[str] = field(default_factory=list)
intent_result: IntentResult | None = None
execution_result: ExecutionResult | None = None
organization_result: OrganizationResult | None = None
assistant_text: str = ""
tokens: dict = field(default_factory=dict)
cost: Decimal = Decimal("0")
```
### 数据流向
```
User Input + UserAgentContext
@start() begin()
@listen() intent_stage()
├─ IntentResult.direct_answer=true
│ ↓
│ 跳过 execution,直接 organization
│ ↓
│ organization_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
│ ↓
│ OrganizationResult → AG-UI 事件 + 落库
└─ IntentResult.direct_answer=false
execution_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
ExecutionResult
organization_stage(ExecutionResult.next_stage_prompt, ExecutionResult.metadata)
OrganizationResult → AG-UI 事件 + 落库
```
### 三阶段输出约束
**所有阶段使用 `output_pydantic` 约束输出:**
```python
from pydantic import BaseModel
class IntentResult(BaseModel):
direct_answer: bool
next_stage_prompt: str
metadata: dict = {}
class ExecutionResult(BaseModel):
next_stage_prompt: str
metadata: dict = {}
class OrganizationResult(BaseModel):
assistant_text: str
metadata: dict = {}
# Task 定义
intent_task = Task(
description="Analyze user intent",
expected_output="Intent analysis",
agent=intent_agent,
output_pydantic=IntentResult,
)
execution_task = Task(
description="Execute tasks",
expected_output="Execution result",
agent=execution_agent,
output_pydantic=ExecutionResult,
)
organization_task = Task(
description="Format response",
expected_output="User-friendly response",
agent=organization_agent,
output_pydantic=OrganizationResult,
)
```
---
## 系统选模逻辑设计
### 问题背景
旧逻辑:`order_by(...).limit(1)` 随机选择一个系统 agent,不区分阶段。
新逻辑:按 `agent_type` 显式映射到三阶段。
### 选模规则
**必需的 Agent Types**
- `INTENT_RECOGNITION` → 用于 intent_stage
- `TASK_EXECUTION` → 用于 execution_stage
- `RESULT_REPORTING` → 用于 organization_stage
**查询逻辑:**
```python
REQUIRED_TYPES = {"INTENT_RECOGNITION", "TASK_EXECUTION", "RESULT_REPORTING"}
@dataclass(frozen=True)
class StageModels:
intent: SystemAgentCatalog
execution: SystemAgentCatalog
organization: SystemAgentCatalog
def resolve_stage_models(rows: list[SystemAgentCatalog]) -> StageModels:
by_type = {row.agent_type: row for row in rows}
missing = REQUIRED_TYPES - set(by_type.keys())
if missing:
raise ValueError(f"Missing required agent types: {missing}")
return StageModels(
intent=by_type["INTENT_RECOGNITION"],
execution=by_type["TASK_EXECUTION"],
organization=by_type["RESULT_REPORTING"],
)
```
**初始化数据约束:**
- `system_agents` 表必须包含三种类型的记录
- 运行时启动时验证完整性
---
## 人民币结算策略设计
### 设计原则
1. **保留 LiteLLM 语义**`completion_cost()` 始终返回 USD
2. **业务层映射**:根据用户国家(`profiles.settings.preferences.country`)决定落库货币
3. **默认人民币**:中国用户或无国家信息默认 CNY
4. **汇率配置**:USD/CNY 汇率通过环境变量配置
### 货币来源
```
UserAgentContext.settings.preferences.country
resolve_billing_currency(country)
CN → CNY
US → USD
其他 → USD
```
### 结算流程
```
LiteLLM completion_cost()
↓ (USD)
resolve_billing_cost(usd_cost, country)
├─ country="CN" or None → CNY (乘以汇率)
└─ country="US" → USD (保持原值)
messages.cost + messages.currency
sessions.total_cost (同一货币)
```
### 汇率配置
```python
# 环境变量
BILLING_USD_CNY_RATE=7.2
# 默认值
DEFAULT_USD_CNY_RATE = Decimal("7.2")
```
### 结算模型
```python
@dataclass(frozen=True)
class BillingCost:
currency: str # "CNY" or "USD"
cost: Decimal # 6位小数精度
def resolve_billing_cost(
usd_cost: Decimal,
country: str | None,
usd_cny_rate: Decimal = DEFAULT_USD_CNY_RATE,
) -> BillingCost:
currency = "CNY" if (country or "CN").upper() == "CN" else "USD"
if currency == "CNY":
cost = usd_cost * usd_cny_rate
else:
cost = usd_cost
return BillingCost(
currency=currency,
cost=cost.quantize(Decimal("0.000001"))
)
```
### 数据库落库
**messages 表:**
- `cost`: NUMERIC(12,6) - 业务货币金额
- `currency`: VARCHAR(3) - "CNY" or "USD"
**sessions 表:**
- `total_cost`: NUMERIC(12,6) - 同一货币累计
**约束:**
- 同一 session 内所有 messages 的 currency 必须一致
- sessions.total_cost 累加时保持货币一致
---
## Session 状态一致性设计
### 问题背景
旧逻辑:
- `sessions.status` 与 `state_snapshot.status` 不同步
- 失败时状态不一致
- title 未自动赋值
### 状态机
```
pending (创建)
running (开始执行)
├─ completed (成功)
└─ failed (异常)
```
### 状态同步规则
**创建时:**
```python
session = AgentChatSession(
user_id=user_uuid,
status=AgentChatSessionStatus.PENDING,
state_snapshot={
"status": "pending",
"pending_tool_call_id": None,
},
)
```
**运行时:**
```python
# 开始执行
session.status = AgentChatSessionStatus.RUNNING
session.state_snapshot["status"] = "running"
# 成功完成
session.status = AgentChatSessionStatus.COMPLETED
session.state_snapshot["status"] = "completed"
# 失败
session.status = AgentChatSessionStatus.FAILED
session.state_snapshot["status"] = "failed"
session.state_snapshot["error_id"] = error_id
```
### 自动 Title 赋值
**规则:**
- 首次运行时,如果 `session.title` 为空,使用 `user_input[:255]` 赋值
- 只在第一次运行时赋值,后续不覆盖
**实现:**
```python
async def _set_title_if_empty(self, session_id: UUID, title: str) -> None:
stmt = (
update(AgentChatSession)
.where(AgentChatSession.id == session_id)
.where(AgentChatSession.title.is_(None))
.values(title=title[:255])
)
await self.db.execute(stmt)
```
### Repository 方法
```python
class SessionRepository:
async def mark_running(self, session_id: UUID) -> None: ...
async def mark_completed(self, session_id: UUID) -> None: ...
async def mark_failed(self, session_id: UUID, error_id: str) -> None: ...
```
---
## 全局 Prompt 构建设计
### 分层结构
```
全局系统 Prompt
├─ 身份段(username/bio
├─ 偏好段(language/timezone/country
└─ 阶段段(动态注入)
├─ intent stage prompt
├─ execution stage prompt
└─ organization stage prompt
```
### 构建函数
```python
def build_global_system_prompt(ctx: UserAgentContext) -> str:
lines = [
"# User Identity",
f"username: {ctx.username}",
f"bio: {ctx.bio or 'N/A'}",
"",
"# User Preferences",
f"interface_language: {ctx.settings.preferences.interface_language}",
f"ai_language: {ctx.settings.preferences.ai_language}",
f"timezone: {ctx.settings.preferences.timezone}",
f"country: {ctx.settings.preferences.country}",
"",
"# Instructions",
"Use the user's preferences to personalize responses.",
"Respond in the user's preferred AI language.",
"Consider the user's timezone for time-related queries.",
]
return "\n".join(lines)
```
### 阶段注入
每个阶段运行时,在全局 prompt 基础上追加阶段特定的指令:
```python
def build_stage_prompt(
base_prompt: str,
stage: str, # "intent" | "execution" | "organization"
ctx: UserAgentContext,
) -> str:
stage_prompts = {
"intent": "Analyze the user's intent and decide if direct response is possible.",
"execution": "Execute the required tasks and tools to fulfill the user's request.",
"organization": "Format the execution results into a user-friendly response.",
}
return f"{base_prompt}\n\n# Stage: {stage}\n{stage_prompts[stage]}"
```
---
## 依赖关系图
```
UserAgentContext (核心上下文)
├─ ProfileSettings (用户配置)
│ └─ preferences.country → 人民币结算
├─ build_global_system_prompt() (全局 Prompt)
│ └─ 三阶段 Flow 使用
└─ resolve_stage_models() (选模逻辑)
└─ 三阶段 Agent 配置
```
---
## 相关文档
- [Runtime Database Schema](../runtime/runtime-database.md)
- [AG-UI Protocol](.opencode/skills/ag-ui/SKILL.md)
- [CrewAI Framework](.opencode/skills/crewai/SKILL.md)
+144
View File
@@ -0,0 +1,144 @@
# Agent LLM Config Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:**`system_agents.config` 中的 `temperature` / `max_tokens` 以受约束方式加载到运行时,并在调用 LiteLLM 时按需透传。
**Architecture:** 在应用层 `RunService` 读取模型选择时同步读取并校验 `SystemAgents.config`;将校验后的 `SystemAgentLLMConfig` 传入 `CrewAIRuntime`;由 runtime 将配置转交给 LiteLLM clientclient 仅在值非 `None` 时向 `completion()` 传参,避免不必要的 provider 兼容风险。
**Tech Stack:** FastAPI, SQLAlchemy (async), Pydantic v2, LiteLLM, pytest
---
## 背景与修正点
- 当前真实调用链为:`RunService._load_agent_model_selection()` -> `create_runtime()` -> `CrewAIRuntime.execute()` -> `run_completion()`,并非 `load_stage_models()`
- `SystemAgentLLMConfig` 已存在:`backend/src/core/agent/domain/system_agent_config.py`
- `system_agents.config` 目前在初始化 YAML 侧有约束,但运行时 DB 读取仍需二次校验,防止脏数据绕过。
## 规则约束
- 严格 TDD:先写失败测试,再做实现。
- Python 命令统一使用 `uv run ...`
- 仅做增量改动,不回滚或覆盖与本任务无关的已有变更。
## 字段映射与透传策略
| 配置字段 | LiteLLM 参数 | 规则 |
|---|---|---|
| `temperature` | `temperature` | `None` 不透传;非空直接透传 |
| `max_tokens` | `max_tokens` | `None` 不透传;非空直接透传 |
---
### Task 1: 应用层加载并校验 Agent LLM Config
**Files:**
- Modify: `backend/src/core/agent/application/run_service.py`
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
**Step 1: 写失败测试(RED**
新增单测覆盖以下行为:
1. `_load_agent_model_selection()` 返回三元组:`(model_code, provider_name, llm_config)`
2. 当 DB `config``{}` 时,`llm_config.temperature/max_tokens``None`
3. 当 DB `config` 含非法值(如 `temperature=3`)时抛 `ValueError`
**Step 2: 运行测试确认失败**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: 新增断言失败(返回值结构/异常行为不匹配)。
**Step 3: 最小实现(GREEN**
`run_service.py`
1. 查询 `SystemAgents.config`
2.`SystemAgentLLMConfig.model_validate(config or {})` 校验。
3.`_load_agent_model_selection()` 改为返回三元组。
4.`run()` 中把 `llm_config` 传递到 `create_runtime(...)`
**Step 4: 运行测试确认通过**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: PASS。
---
### Task 2: Runtime 与 LiteLLM Client 支持可选参数透传
**Files:**
- Modify: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Modify: `backend/src/core/agent/infrastructure/litellm/client.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
**Step 1: 写失败测试(RED**
`test_crewai_runtime.py` 增加用例:
1. 传入 `temperature/max_tokens` 时,`run_completion` 收到对应参数。
2. 参数为 `None` 时,不应被透传到 LiteLLM。
必要时新增 `backend/tests/unit/core/agent/test_litellm_client.py`,单测 `run_completion` 的 kwargs 组装逻辑。
**Step 2: 运行测试确认失败**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
Expected: 新增断言失败(参数未透传或未过滤 `None`)。
**Step 3: 最小实现(GREEN**
1. `create_runtime()` 增加 `llm_config` 参数并传给 `CrewAIRuntime`
2. `CrewAIRuntime` 保存 `llm_config`,执行时调用:
- `run_completion(..., temperature=llm_config.temperature, max_tokens=llm_config.max_tokens)`
3. `run_completion()` 改为支持可选 `temperature/max_tokens`,内部仅在非 `None` 时加入 kwargs 再调用 `completion()`
**Step 4: 运行测试确认通过**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
Expected: PASS。
---
### Task 3: 初始化数据补齐与回归验证
**Files:**
- Modify: `backend/src/core/config/static/database/system_agents.yaml`
- Modify: `backend/src/core/config/initial/init_data.py`(如需补充类型兜底)
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
**Step 1: 写失败测试(RED**
补充断言:YAML 读取后 `config` 可为空或包含 `max_tokens: null`,初始化逻辑不会报错,且生成结构符合 `SystemAgentLLMConfig`
**Step 2: 运行测试确认失败**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: 新增断言失败。
**Step 3: 最小实现(GREEN**
1.`system_agents.yaml` 为各 agent 配置显式补充 `max_tokens: null`
2. `init_data.py` 保持 `config: SystemAgentLLMConfig | None = None`,写库时统一序列化为 dict。
**Step 4: 运行测试确认通过**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: PASS。
---
## 最终验证
1. `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_crewai_runtime.py -q`
2. `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -q`
3. `uv run ruff check backend/src backend/tests`
4. `uv run basedpyright`
预期:全部通过;若集成测试依赖本地 DB 状态导致跳过/失败,需记录原因并给出手工验证步骤。
## 完成标准
- `RunService` 从 DB 读取并校验 `config`
- runtime 到 LiteLLM 链路支持 `temperature/max_tokens` 可选透传。
- `None` 不透传。
- 单测与相关集成测试通过,并给出命令级证据。
+2
View File
@@ -0,0 +1,2 @@
1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai
2.