feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持

This commit is contained in:
qzl
2026-03-05 18:25:51 +08:00
parent c07d339a5f
commit db158de39c
26 changed files with 1215 additions and 2914 deletions
@@ -5,6 +5,10 @@ from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.agent.application.session_state_persistence import SessionStatePersistence from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.domain.message_metadata import (
MessageMetadataAssistantOutput,
MessageMetadataToolResult,
)
from core.agent.infrastructure.persistence.message_repository import MessageRepository from core.agent.infrastructure.persistence.message_repository import MessageRepository
from core.agent.infrastructure.persistence.session_repository import SessionRepository from core.agent.infrastructure.persistence.session_repository import SessionRepository
from core.db import AsyncSessionLocal from core.db import AsyncSessionLocal
@@ -46,14 +50,16 @@ class ResumeService:
seq=next_seq, seq=next_seq,
role=AgentChatMessageRole.TOOL, role=AgentChatMessageRole.TOOL,
content='{"status":"ok"}', content='{"status":"ok"}',
metadata={"type": "tool_result", "tool_call_id": tool_call_id}, metadata=MessageMetadataToolResult(
tool_call_id=tool_call_id,
).model_dump(),
) )
await message_repository.append_message( await message_repository.append_message(
session_id=session_uuid, session_id=session_uuid,
seq=next_seq + 1, seq=next_seq + 1,
role=AgentChatMessageRole.ASSISTANT, role=AgentChatMessageRole.ASSISTANT,
content="Tool result received", content="Tool result received",
metadata={"type": "assistant_output"}, metadata=MessageMetadataAssistantOutput().model_dump(),
) )
snapshot = self._state_persistence.build_completed_snapshot() snapshot = self._state_persistence.build_completed_snapshot()
@@ -3,10 +3,16 @@ from __future__ import annotations
from decimal import Decimal from decimal import Decimal
from uuid import UUID, uuid4 from uuid import UUID, uuid4
from pydantic import ValidationError
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.agent.application.session_state_persistence import SessionStatePersistence from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.domain.message_metadata import (
MessageMetadataToolCall,
MessageMetadataUserInput,
)
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.crewai.factory import create_runtime from core.agent.infrastructure.crewai.factory import create_runtime
from core.agent.infrastructure.persistence.message_repository import MessageRepository from core.agent.infrastructure.persistence.message_repository import MessageRepository
from core.agent.infrastructure.persistence.session_repository import SessionRepository from core.agent.infrastructure.persistence.session_repository import SessionRepository
@@ -58,10 +64,16 @@ class RunService:
if chat_session is None: if chat_session is None:
raise ValueError("session not found") raise ValueError("session not found")
model_code, provider_name = await self._load_agent_model_selection( (
db_session model_code,
provider_name,
llm_config,
) = await self._load_agent_model_selection(db_session)
runtime = create_runtime(
model_code=model_code,
provider_name=provider_name,
llm_config=llm_config,
) )
runtime = create_runtime(model_code=model_code, provider_name=provider_name)
runtime_result = runtime.execute(user_input=user_input) runtime_result = runtime.execute(user_input=user_input)
assistant_text = str(runtime_result.get("assistant_text", "")) assistant_text = str(runtime_result.get("assistant_text", ""))
prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0)) prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0))
@@ -79,7 +91,7 @@ class RunService:
role=AgentChatMessageRole.USER, role=AgentChatMessageRole.USER,
content=user_input, content=user_input,
model_code=model_code, model_code=model_code,
metadata={"type": "user_input"}, metadata=MessageMetadataUserInput().model_dump(),
) )
await message_repository.append_message( await message_repository.append_message(
session_id=session_uuid, session_id=session_uuid,
@@ -87,10 +99,9 @@ class RunService:
role=AgentChatMessageRole.ASSISTANT, role=AgentChatMessageRole.ASSISTANT,
content=assistant_text or "Tool call pending approval", content=assistant_text or "Tool call pending approval",
model_code=model_code, model_code=model_code,
metadata={ metadata=MessageMetadataToolCall(
"type": "tool_call", tool_call_id=pending_tool_call_id,
"tool_call_id": pending_tool_call_id, ).model_dump(),
},
input_tokens=prompt_tokens, input_tokens=prompt_tokens,
output_tokens=completion_tokens, output_tokens=completion_tokens,
cost=cost, cost=cost,
@@ -119,9 +130,9 @@ class RunService:
async def _load_agent_model_selection( async def _load_agent_model_selection(
self, session: AsyncSession self, session: AsyncSession
) -> tuple[str, str]: ) -> tuple[str, str, SystemAgentLLMConfig]:
stmt = ( stmt = (
select(Llm.model_code, LlmFactory.name) select(Llm.model_code, LlmFactory.name, SystemAgents.config)
.join(SystemAgents, SystemAgents.llm_id == Llm.id) .join(SystemAgents, SystemAgents.llm_id == Llm.id)
.join(LlmFactory, LlmFactory.id == Llm.factory_id) .join(LlmFactory, LlmFactory.id == Llm.factory_id)
.where(SystemAgents.status == "active") .where(SystemAgents.status == "active")
@@ -131,4 +142,11 @@ class RunService:
record = (await session.execute(stmt)).one_or_none() record = (await session.execute(stmt)).one_or_none()
if record is None: if record is None:
raise ValueError("active system agent model is required") raise ValueError("active system agent model is required")
return str(record[0]), str(record[1])
raw_config = record[2] if isinstance(record[2], dict) else {}
try:
llm_config = SystemAgentLLMConfig.model_validate(raw_config)
except ValidationError as exc:
raise ValueError("invalid system agent config") from exc
return str(record[0]), str(record[1]), llm_config
@@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel
class MessageMetadataUserInput(BaseModel):
type: Literal["user_input"] = "user_input"
class MessageMetadataToolCall(BaseModel):
type: Literal["tool_call"] = "tool_call"
tool_call_id: str
class MessageMetadataToolResult(BaseModel):
type: Literal["tool_result"] = "tool_result"
tool_call_id: str
run_id: str | None = None
turn_id: str | None = None
tool_name: str | None = None
storage_bucket: str | None = None
storage_path: str | None = None
payload_sha256: str | None = None
payload_bytes: int | None = None
payload_format: str | None = None
class MessageMetadataAssistantOutput(BaseModel):
type: Literal["assistant_output"] = "assistant_output"
MessageMetadata = (
MessageMetadataUserInput
| MessageMetadataToolCall
| MessageMetadataToolResult
| MessageMetadataAssistantOutput
)
@@ -0,0 +1,8 @@
from __future__ import annotations
from pydantic import BaseModel, Field
class SystemAgentLLMConfig(BaseModel):
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
max_tokens: int | None = Field(default=None, ge=1)
@@ -1,5 +1,7 @@
from __future__ import annotations from __future__ import annotations
from core.agent.domain.message_metadata import MessageMetadataToolResult
def reconstruct_tool_call_result_event( def reconstruct_tool_call_result_event(
*, *,
@@ -26,15 +28,14 @@ def build_tool_result_metadata(
payload_bytes: int, payload_bytes: int,
payload_format: str, payload_format: str,
) -> dict[str, object]: ) -> dict[str, object]:
return { return MessageMetadataToolResult(
"type": "tool_result", run_id=run_id,
"run_id": run_id, turn_id=turn_id,
"turn_id": turn_id, tool_call_id=tool_call_id,
"tool_call_id": tool_call_id, tool_name=tool_name,
"tool_name": tool_name, storage_bucket=storage_bucket,
"storage_bucket": storage_bucket, storage_path=storage_path,
"storage_path": storage_path, payload_sha256=payload_sha256,
"payload_sha256": payload_sha256, payload_bytes=payload_bytes,
"payload_bytes": payload_bytes, payload_format=payload_format,
"payload_format": payload_format, ).model_dump()
}
@@ -1,15 +1,20 @@
from __future__ import annotations from __future__ import annotations
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver from core.agent.infrastructure.config.resolver import AgentConfigResolver
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def create_runtime( def create_runtime(
*, model_code: str | None, provider_name: str | None *,
model_code: str | None,
provider_name: str | None,
llm_config: SystemAgentLLMConfig | None = None,
) -> CrewAIRuntime: ) -> CrewAIRuntime:
resolver = AgentConfigResolver() resolver = AgentConfigResolver()
return CrewAIRuntime( return CrewAIRuntime(
resolver=resolver, resolver=resolver,
model_code=model_code, model_code=model_code,
provider_name=provider_name, provider_name=provider_name,
llm_config=llm_config,
) )
@@ -2,6 +2,7 @@ from __future__ import annotations
from typing import Any from typing import Any
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.agui.bridge import to_agui_events from core.agent.infrastructure.agui.bridge import to_agui_events
from core.agent.infrastructure.config.resolver import ( from core.agent.infrastructure.config.resolver import (
AgentConfigResolver, AgentConfigResolver,
@@ -47,11 +48,13 @@ class CrewAIRuntime:
resolver: AgentConfigResolver, resolver: AgentConfigResolver,
model_code: str | None, model_code: str | None,
provider_name: str | None, provider_name: str | None,
llm_config: SystemAgentLLMConfig | None = None,
) -> None: ) -> None:
self._config: ResolvedAgentConfig = resolver.resolve( self._config: ResolvedAgentConfig = resolver.resolve(
model_code=model_code, model_code=model_code,
provider_name=provider_name, provider_name=provider_name,
) )
self._llm_config = llm_config or SystemAgentLLMConfig()
def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]: def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
return to_agui_events(internal_events) return to_agui_events(internal_events)
@@ -65,6 +68,8 @@ class CrewAIRuntime:
model=litellm_model, model=litellm_model,
api_key=self._config.provider_api_key, api_key=self._config.provider_api_key,
messages=[{"role": "user", "content": user_input}], messages=[{"role": "user", "content": user_input}],
temperature=self._llm_config.temperature,
max_tokens=self._llm_config.max_tokens,
) )
if not isinstance(response, dict): if not isinstance(response, dict):
raise ValueError("llm response must be a dict") raise ValueError("llm response must be a dict")
@@ -5,13 +5,26 @@ from typing import Any
from litellm import completion from litellm import completion
def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any: def run_completion(
response = completion( *,
model=model, model: str,
api_key=api_key, api_key: str,
messages=messages, messages: list[dict[str, Any]],
stream=False, temperature: float | None = None,
) max_tokens: int | None = None,
) -> Any:
kwargs: dict[str, Any] = {
"model": model,
"api_key": api_key,
"messages": messages,
"stream": False,
}
if temperature is not None:
kwargs["temperature"] = temperature
if max_tokens is not None:
kwargs["max_tokens"] = max_tokens
response = completion(**kwargs)
model_dump = getattr(response, "model_dump", None) model_dump = getattr(response, "model_dump", None)
if callable(model_dump): if callable(model_dump):
return model_dump() return model_dump()
+5 -2
View File
@@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.db.session import AsyncSessionLocal from core.db.session import AsyncSessionLocal
from core.logging import get_logger from core.logging import get_logger
from models.llm import Llm from models.llm import Llm
@@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel):
agent_type: str agent_type: str
llm_model_code: str llm_model_code: str
status: str status: str
config: dict[str, Any] config: SystemAgentLLMConfig | None = None
class SystemAgentsYaml(BaseModel): class SystemAgentsYaml(BaseModel):
@@ -184,7 +185,9 @@ async def initialize_system_agents() -> None:
agent_type=agent["agent_type"], agent_type=agent["agent_type"],
llm_id=llm.id, llm_id=llm.id,
status=agent["status"], status=agent["status"],
config=agent["config"], config=SystemAgentLLMConfig.model_validate(
agent.get("config") or {}
).model_dump(),
) )
logger.info("Initialized system agents") logger.info("Initialized system agents")
@@ -4,15 +4,18 @@ agents:
status: active status: active
config: config:
temperature: 0.7 temperature: 0.7
max_tokens: null
- agent_type: TASK_EXECUTION - agent_type: TASK_EXECUTION
llm_model_code: deepseek-v3.2 llm_model_code: deepseek-v3.2
status: active status: active
config: config:
temperature: 0.7 temperature: 0.7
max_tokens: null
- agent_type: RESULT_REPORTING - agent_type: RESULT_REPORTING
llm_model_code: deepseek-v3.2 llm_model_code: deepseek-v3.2
status: active status: active
config: config:
temperature: 0.7 temperature: 0.7
max_tokens: null
@@ -1,22 +1,26 @@
from __future__ import annotations from __future__ import annotations
from types import SimpleNamespace from types import SimpleNamespace
from typing import cast
from core.agent.infrastructure.config.resolver import AgentConfigResolver from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def test_runtime_emits_text_tool_reasoning_events() -> None: def test_runtime_emits_text_tool_reasoning_events() -> None:
runtime = CrewAIRuntime( settings = cast(
resolver=AgentConfigResolver( SettingsLike,
settings=SimpleNamespace( SimpleNamespace(
agent_runtime=SimpleNamespace( agent_runtime=SimpleNamespace(
default_model_code="", default_model_code="",
streaming_enabled=True, streaming_enabled=True,
), ),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}), llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
), ),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(settings=settings),
model_code="gpt-4o-mini", model_code="gpt-4o-mini",
provider_name="dashscope", provider_name="dashscope",
) )
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
captured: dict[str, object] = {} captured: dict[str, object] = {}
def _fake_completion( def _fake_completion(
*, model: str, api_key: str, messages: list[dict[str, object]] *,
model: str,
api_key: str,
messages: list[dict[str, object]],
temperature: float | None = None,
max_tokens: int | None = None,
): ):
captured["model"] = model captured["model"] = model
captured["api_key"] = api_key captured["api_key"] = api_key
captured["messages"] = messages captured["messages"] = messages
captured["temperature"] = temperature
captured["max_tokens"] = max_tokens
return { return {
"choices": [ "choices": [
{ {
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
cost=0.001, cost=0.001,
), ),
) )
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime( runtime = CrewAIRuntime(
resolver=AgentConfigResolver( resolver=AgentConfigResolver(settings=settings),
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
),
model_code="qwen3.5-flash", model_code="qwen3.5-flash",
provider_name="dashscope", provider_name="dashscope",
llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
) )
result = runtime.execute(user_input="hi") result = runtime.execute(user_input="hi")
assert captured["model"] == "dashscope/qwen3.5-flash" assert captured["model"] == "dashscope/qwen3.5-flash"
assert captured["api_key"] == "env-api-key" assert captured["api_key"] == "env-api-key"
assert captured["temperature"] == 0.3
assert captured["max_tokens"] == 256
assert result["assistant_text"] == "hello" assert result["assistant_text"] == "hello"
@@ -0,0 +1,14 @@
from __future__ import annotations
from core.config.initial.init_data import load_system_agents
def test_load_system_agents_supports_nullable_max_tokens() -> None:
loaded = load_system_agents()
agents = loaded["agents"]
assert len(agents) > 0
for agent in agents:
assert "config" in agent
assert "max_tokens" in agent["config"]
assert agent["config"]["max_tokens"] is None
@@ -0,0 +1,51 @@
from __future__ import annotations
from core.agent.infrastructure.litellm.client import run_completion
def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=0.6,
max_tokens=120,
)
assert captured["temperature"] == 0.6
assert captured["max_tokens"] == 120
def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=None,
max_tokens=None,
)
assert "temperature" not in captured
assert "max_tokens" not in captured
@@ -4,6 +4,23 @@ import pytest
from core.agent.application.resume_service import ResumeService from core.agent.application.resume_service import ResumeService
from core.agent.application.run_service import RunService from core.agent.application.run_service import RunService
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
class _FakeResult:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
def one_or_none(self) -> tuple[object, object, object] | None:
return self._record
class _FakeSession:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
async def execute(self, _stmt: object) -> _FakeResult:
return _FakeResult(self._record)
@pytest.mark.asyncio @pytest.mark.asyncio
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:
with pytest.raises(ValueError): with pytest.raises(ValueError):
await resume_service.resume(session_id="session-1", tool_call_id="call-1") await resume_service.resume(session_id="session-1", tool_call_id="call-1")
@pytest.mark.asyncio
async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 0.5, "max_tokens": 512},
)
)
(
model_code,
provider_name,
llm_config,
) = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert model_code == "qwen3.5-flash"
assert provider_name == "dashscope"
assert isinstance(llm_config, SystemAgentLLMConfig)
assert llm_config.temperature == 0.5
assert llm_config.max_tokens == 512
@pytest.mark.asyncio
async def test_load_agent_model_selection_rejects_invalid_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 3.0},
)
)
with pytest.raises(ValueError, match="invalid system agent config"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
@pytest.mark.asyncio
async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
"not-a-dict",
)
)
_, _, llm_config = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert llm_config.temperature is None
assert llm_config.max_tokens is None
@pytest.mark.asyncio
async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
run_service = RunService()
fake_session = _FakeSession(None)
with pytest.raises(ValueError, match="active system agent model is required"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
-116
View File
@@ -1,116 +0,0 @@
# 前后端 API 对比分析
**Date:** 2026-03-04
**Status:** Open
**Type:** 架构分析
---
## 一、后端已有、前端缺失的 API
### 1. Friendships API (`/api/v1/friends`)
| 方法 | 路径 | 功能 | 前端状态 |
|------|------|------|----------|
| POST | `/requests` | 发送好友请求 | **缺失** |
| GET | `/requests/inbox` | 获取收件箱 | **缺失** |
| GET | `/requests/outgoing` | 获取发出的请求 | **缺失** |
| POST | `/requests/{id}/accept` | 接受好友请求 | **缺失** |
| POST | `/requests/{id}/decline` | 拒绝好友请求 | **缺失** |
| DELETE | `/requests/{id}` | 取消好友请求 | **缺失** |
| GET | `` | 获取好友列表 | **缺失** |
| DELETE | `/{id}` | 删除好友 | **缺失** |
### 2. Inbox Messages API (`/api/v1/inbox/messages`)
| 方法 | 路径 | 功能 | 前端状态 |
|------|------|------|----------|
| GET | `` | 获取消息列表 | **缺失** |
| POST | `/{id}/accept` | 接受邀请 | **缺失** |
| POST | `/{id}/dismiss` | 忽略消息 | **缺失** |
### 3. Chat/AgUi 流式 API
| 功能 | 前端状态 |
|------|----------|
| 发送消息 SSE 流式 | **仅有 Mock** |
| 加载历史记录 | **仅有 Mock** |
> 前端 `AgUiService` 只有本地 mock (`throw UnimplementedError`),未实现真实 API 调用。
### 4. Infra API
| 方法 | 路径 | 功能 | 前端状态 |
|------|------|------|----------|
| GET | `/infra/health` | 基础设施健康检查 | **未使用** |
---
## 二、前端已有、后端已实现的 API
### Auth API (`/api/v1/auth`)
| 方法 | 路径 | 后端 | 前端 |
|------|------|------|------|
| POST | `/verifications` | ✅ | ✅ |
| POST | `/verifications/verify` | ✅ | ✅ |
| POST | `/verifications/resend` | ✅ | ✅ |
| POST | `/sessions` | ✅ | ✅ |
| POST | `/sessions/refresh` | ✅ | ✅ |
| DELETE | `/sessions` | ✅ | ✅ |
| POST | `/password-reset` | ✅ | ✅ |
| POST | `/password-reset/confirm` | ✅ | ✅ |
| GET | `/users` | ✅ | **未使用** |
### Users API (`/api/v1/users`)
| 方法 | 路径 | 后端 | 前端 |
|------|------|------|------|
| GET | `/me` | ✅ | ✅ |
| PATCH | `/me` | ✅ | ✅ |
| POST | `/search` | ✅ | ✅ |
### Schedule Items API (`/api/v1/schedule-items`)
| 方法 | 路径 | 后端 | 前端 |
|------|------|------|------|
| POST | `` | ✅ | **仅有 Mock** |
| GET | `` (range query) | ✅ | **仅有 Mock** |
| GET | `/{id}` | ✅ | **仅有 Mock** |
| PATCH | `/{id}` | ✅ | **仅有 Mock** |
| DELETE | `/{id}` | ✅ | **仅有 Mock** |
| POST | `/{id}/share` | ✅ | **缺失** |
---
## 三、待实现功能清单
| 优先级 | 功能 | 说明 |
|--------|------|------|
| **P0** | FriendsApi | 前端无 Friendships API 客户端 |
| **P0** | InboxMessagesApi | 前端无 Inbox Messages API 客户端 |
| **P0** | Chat/AgUi 后端连接 | 前端 AgUiService 未实现真实 API |
| **P1** | CalendarService 真实 API | MockCalendarService → 真实 API 调用 |
| **P1** | Schedule Share 接口 | 前端未调用 `POST /{id}/share` |
| **P2** | Infra Health 集成 | 可用于前端健康检查 |
---
## 四、相关文件位置
### 前端 API 客户端
- `apps/lib/features/auth/data/auth_api.dart` - Auth API
- `apps/lib/features/users/data/users_api.dart` - Users API
- `apps/lib/features/calendar/data/services/mock_calendar_service.dart` - Calendar Mock
- `apps/lib/features/chat/data/services/ag_ui_service.dart` - Chat/AgUi Mock
- `apps/lib/features/chat/data/services/mock_history_service.dart` - History Mock
### 后端 Router
- `backend/src/v1/auth/router.py` - Auth 路由
- `backend/src/v1/users/router.py` - Users 路由
- `backend/src/v1/friendships/router.py` - Friendships 路由
- `backend/src/v1/inbox_messages/router.py` - Inbox Messages 路由
- `backend/src/v1/schedule_items/router.py` - Schedule Items 路由
- `backend/src/v1/infra/router.py` - Infra 路由
-145
View File
@@ -1,145 +0,0 @@
# 前后端测试分析报告
**Date:** 2026-03-04
**Status:** Completed
---
## 测试统计
### 后端测试
| 类型 | 数量 | 状态 |
|------|------|------|
| Unit Tests | ~100+ | 可运行 |
| Integration Tests | ~70+ | 可运行 |
| E2E Tests | 5 | **无法运行** (缺少 playwright 依赖) |
### 前端测试
| 类型 | 数量 | 状态 |
|------|------|------|
| Flutter Tests | 140 | ✅ 全部通过 |
---
## 问题发现
### 1. 后端 E2E 测试无法运行 (HIGH)
**问题**: 5 个 E2E 测试文件需要 `playwright` 模块,但依赖未安装。
**影响文件**:
- `tests/e2e/test_auth_flow.py`
- `tests/e2e/test_infra_health_e2e.py`
- `tests/e2e/test_logging_e2e.py`
- `tests/e2e/test_mobile_health_e2e.py`
- `tests/e2e/test_profile_flow.py`
**错误**:
```
ModuleNotFoundError: No module named 'playwright'
```
**建议**:
- 安装 playwright: `uv add playwright && uv run playwright install`
- 或者移除这些无法运行的 E2E 测试文件
---
### 2. 测试文件命名冲突导致收集警告 (LOW)
**问题**: 存在多个同名 `test_schemas.py` 文件在不同目录,导致 pytest 收集时显示警告。
**影响文件**:
- `tests/unit/v1/schedule_items/test_schemas.py`
- `tests/unit/v1/profile/test_schemas.py`
- `tests/unit/v1/inbox_messages/test_schemas.py`
- `tests/unit/v1/friendships/test_schemas.py`
**状态**: 测试实际可以正常运行,只是有警告提示。
**建议**: 可保持现状(这是合理的代码组织方式),或重命名为 `test_*.py` 以消除警告。
---
### 3. 遗留测试验证旧字段 (INFO)
**文件**: `tests/unit/v1/profile/test_schemas.py`
**测试**: `test_profile_update_rejects_display_name_field`
**说明**: 此测试验证旧的 `display_name` 字段被正确拒绝。字段已在之前的重构中删除。
**状态**: **有效** - 这是一个回归测试,确保旧字段不被使用。
---
## 未发现的问题
### 冗余测试
经过检查,未发现明显冗余的测试:
- 每个模块的测试覆盖不同的功能
- Unit tests、Integration tests、E2E tests 有清晰的职责划分
### 死代码
未发现测试文件中有未使用的:
- imports
- mock 类
- helper 函数
### 缺失测试
未发现对应已实现功能但缺少测试的情况。
---
## 测试覆盖模块
### 后端
| 模块 | Unit | Integration | E2E |
|------|------|-------------|-----|
| Auth | ✅ | ✅ | ❌ |
| Users | - | ✅ | - |
| Profile | ✅ | - | ❌ |
| Friendships | ✅ | ✅ | - |
| Inbox Messages | ✅ | ✅ | - |
| Schedule Items | ✅ | ✅ | - |
| Logging | ✅ | ✅ | ✅ |
| Settings | ✅ | - | - |
### 前端
| 模块 | 测试数 |
|------|--------|
| Auth | ~20 |
| Chat | ~70 |
| Home | ~15 |
| Calendar | ~5 |
| Core (API, Storage) | ~30 |
---
## 建议
1. **立即**: 解决 E2E 测试依赖问题或移除无法运行的测试文件
2. **可选**: 清理 test_schemas.py 重名警告(低优先级)
3. **保持**: 现有的测试结构良好,无需重大重构
---
## 附: 测试代码质量问题
### 测试类未完全实现 Protocol (LSP 警告)
**文件**: `tests/unit/v1/auth/test_auth_service.py`
**问题**: `FakeGateway``LogoutAssertingGateway` 类没有实现 `AuthServiceGateway` Protocol 的全部方法:
- `request_password_reset`
- `confirm_password_reset`
**影响**: LSP 类型检查器报告错误,但运行时不受影响(因为这些方法在测试中不会被调用)。
**建议**: 可选择补充缺失的方法实现,或使用 `@pytest.mark.skip` 标记不需要的协议方法。
---
*报告生成时间: 2026-03-04*
@@ -1,201 +0,0 @@
# Agent 后端硬切重构设计
## 目标
- 一次性移除现有 Agent 运行时代码、测试和旧文档契约,避免新旧方案并存。
- 仅从后端重新设计 Agent 体系,不依赖前端实现细节。
- 新方案必须满足以下六项要求:
1. 配置层可通过 `.env` 驱动 LLM API Key。
2. 对话与 resume 通过 Celery 队列处理,不阻塞 Web 主线程。
3. `v1/agent` 仅负责路由组织与服务调用,核心逻辑在 `core/agent`
4. 按 CrewAI 官方模型组织 Agent/Task/Crew/Flow/Tools。
5. 按 AG-UI 协议输出事件,优先使用 `ag-ui-crewai` 适配库。
6. 使用 LiteLLM 统计每次 LLM 调用的 token 和 cost。
## 设计原则
- 单一职责:HTTP 层只做协议和鉴权,编排与执行下沉到核心层。
- 异步优先:长耗时推理、工具调用、恢复流程全部异步化。
- 协议优先:AG-UI 作为唯一事件契约,不维护自定义事件方言。
- 可观测性优先:每次 run、每次 stage、每次 LLM 调用可追踪。
- 配置单一来源:所有密钥和模型配置只走 `core.config.settings`
## 目标架构
### 1) 分层
- `backend/src/v1/agent/`
- `router.py`: 暴露 HTTP/SSE 接口。
- `schemas.py`: 请求/响应 DTO 和输入校验。
- `dependencies.py`: DI 装配。
- `service.py`: 薄服务,仅调用 `core/agent` 应用服务。
- `backend/src/core/agent/`
- `application/`: run/resume 应用服务。
- `domain/`: run 状态机、resume 幂等语义、错误模型。
- `infrastructure/crewai/`: CrewAI Agent/Task/Crew/Flow 装配与执行。
- `infrastructure/agui/`: AG-UI 事件映射与 SSE 序列化。
- `infrastructure/litellm/`: LiteLLM 客户端与 usage/cost 拦截器。
- `infrastructure/queue/`: Celery task producer/consumer。
### 1.1) 配置来源与合并策略
- Agent 运行配置由两部分组成:
- 数据库存量配置:`system_agents`(每种 agent_type 对应 llm 与 llm_config)。
- 静态模板配置:`backend/src/core/config/static/crewai/*.yaml`(角色描述、任务模板、workflow、tools)。
- 合并策略:
- `llm``llm_config``system_agents` 为准。
- prompt 模板、task 描述、flow stage、tool 白名单以 static/crewai 为准。
- 若任一 agent_type 在 `system_agents` 缺失,运行前失败并返回受控错误。
### 2) 核心运行链路
1. `POST /api/v1/agent/runs` 只负责参数校验和鉴权。
2. 路由调用 `AgentRunAppService.enqueue_run()`,写入 run 记录并投递 Celery。
3. Worker 执行 `run_agent_task`
- 读取 run 上下文。
- 构建 CrewAI `Agent/Task/Crew/Flow`
- 通过 `ag-ui-crewai` 将执行事件转为 AG-UI 标准事件。
- 每次 LLM 调用由 LiteLLM 中间层记录 token/cost。
4. 事件落库并发布到事件通道(Redis Stream/Channel)。
5. SSE 接口从事件通道读取并持续推送,直到 `RUN_FINISHED``RUN_ERROR`
### 3) Resume 链路
1. `POST /api/v1/agent/runs/{run_id}/resume` 校验 `interrupt_id` 与决策 payload。
2. 调用 `enqueue_resume()` 投递 `resume_agent_task`
3. Worker 在事务内做并发控制:
- `run_id + interrupt_id` 幂等锁。
- 过期校验与状态迁移。
4. 恢复后继续 CrewAI Flow,事件按 AG-UI 继续输出。
### 4) Session 状态持久化
- 使用 `sessions.state_snapshot` 作为运行态单一快照来源。
- 快照至少包含:
- run 上下文(thread_id、run_id、stage
- pending_tool_callstool_call_id、tool_name、args、status、expires_at
- correlation 索引(tool_call_id -> message_id / step_id
- 所有中断/恢复均以 `state_snapshot` 事务更新为准,避免内存态漂移。
### 5) 会话与消息落库模型
- 会话主表:`sessions`
- 新建 run 时写入:`id/user_id/session_type/status=running/last_activity_at`
- 运行中持续更新:`status``last_activity_at``message_count``total_tokens``total_cost``state_snapshot`
- 运行结束更新:
- 成功:`status=completed`
- 失败:`status=failed`
- 消息表:`messages`
- 用户输入落库为 `role=user`(每次 run 开始时先写入)。
- 模型输出落库为 `role=assistant`(按最终聚合文本落库,保留 metadata 记录增量信息)。
- 工具调用结果落库为 `role=tool`,并写入 `tool_name``metadata.tool_call_id`
- `seq` 由每个 `session_id` 内单调递增分配,满足 `uq_messages_session_seq`
- 计量落库:每次 LLM 调用的 usage/cost 先写消息级,再聚合更新到 session 级。
## 六项要求落地映射
### 要求 1: `.env` 驱动 LLM API Key
- 新增 `LLMSettings``core.config.settings.Settings`,统一定义:
- `SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE`
- `SOCIAL_LLM__PROVIDER_KEYS__MINIMAX`
- `SOCIAL_LLM__PROVIDER_KEYS__MOONSHOT`
- `SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK`
- `SOCIAL_LLM__PROVIDER_KEYS__ARK`
- `SOCIAL_LLM__PROVIDER_KEYS__ZAI`
- 禁止 `os.environ` 直接读取密钥。
### 要求 2: 对话和 resume 走 Celery
- Web 层不直接执行编排。
- `run`/`resume` 一律入队,Worker 处理,Web 仅做事件流转发。
- 加入任务级超时、重试、死信策略。
### 要求 3: v1 仅路由与调用
- `v1/agent/service.py` 仅保留应用服务调用和错误映射。
- 任何编排、状态机、工具执行逻辑禁止进入 `v1`
### 要求 4: CrewAI 官方流程
- 采用 CrewAI 原生对象:`Agent``Task``Crew``Flow`
- tools 通过 CrewAI Tool 机制注册,不做平行实现。
- 任务模板与 agent 配置集中化(静态模板 + 运行时拼装)。
- 配置拼装明确依赖 `system_agents + static/crewai`,不再使用双套来源。
### 要求 5: AG-UI + ag-ui-crewai
- 事件集遵循 AG-UI 协议,生命周期闭环:
- `RUN_STARTED`
- 流式消息和工具事件
- 终态 `RUN_FINISHED``RUN_ERROR`
- 优先引入 `ag-ui-crewai` 做 CrewAI 到 AG-UI 的桥接,避免重复造轮子。
### 要求 6: LiteLLM token/cost 统计
- 所有 LLM 调用通过 LiteLLM 统一出入口。
- 按调用粒度记录:`input_tokens``output_tokens``total_tokens``cost``currency`
- 按 run 粒度聚合并落库,支持后续计费和审计。
## 数据与可观测性
- 保留现有 Agent 相关表结构,不在本次硬切做数据库破坏性变更。
- 新增事件日志与调用指标落点(如已有字段不足,后续增量迁移)。
- 日志使用结构化字段:`run_id``task_id``stage``tool_name``llm_model``latency_ms`
- 持久化原则:run/resume 的关键状态变更必须可重放,禁止仅保存在内存。
## 事务边界
- `run` 入口事务:创建或加载 `session` + 写入用户消息。
- `worker` 执行事务(可分阶段短事务):
- 阶段开始:更新 `session.status/state_snapshot`
- LLM 返回:写 assistant/tool 消息 + 更新 token/cost 聚合。
- 中断:写 `pending_tool_calls``state_snapshot` 并提交。
- 完成:更新终态 `session.status` 并提交。
- `resume` 事务:校验 `interrupt_id` 与 ownershipCAS 更新 `state_snapshot`,然后进入后续执行事务。
## 错误处理与安全
- API Key 缺失启动即失败,不进入运行态。
- 外部工具入参统一白名单和 schema 校验。
- resume 决策必须鉴权与会话所有权校验。
- 错误响应遵循 RFC 7807,避免泄漏敏感上下文。
## 工具调用与恢复语义
- 工具分三类:
- 前端工具:由 `RunAgentInput.tools` 提供能力声明,触发 interrupt,由客户端执行并回传 result。
- 后端工具(需审批):先 interrupt 给前端审批;审批通过后由后端执行,不由前端执行。
- 后端工具(直执):后端直接执行。
- 一致性约束:
- 每个 tool_result 必须携带 `tool_call_id`
- 后端仅接受当前 `state_snapshot.pending_tool_calls` 中存在且状态合法的 `tool_call_id`
- 若收到未知/已消费/过期 `tool_call_id`,立即产出 `RUN_ERROR` 并记录审计日志。
## 测试策略
- 单元测试:
- 配置解析与 key 解析
- run/resume 状态机与幂等
- LiteLLM usage 聚合
- 集成测试:
- API 入队
- Worker 消费
- SSE 事件顺序与终态
- E2E
- run 成功链路
- interrupt + resume 链路
- tool 调用链路
## 迁移策略
- 阶段 0(本次):硬切删除旧代码、旧测试、旧文档契约。
- 阶段 1:搭建新架构骨架和最小可运行 run 流程。
- 阶段 2:接入 CrewAI + ag-ui-crewai + LiteLLM 完整链路。
- 阶段 3:补齐可观测性、压测与稳定性治理。
## 验收标准
- 后端仓库不存在旧 `v1/agent``core/agent` 旧实现。
- 所有 Agent 相关旧测试与旧文档契约已移除。
- 新方案设计文档明确覆盖六项要求并可进入实现阶段。
@@ -1,574 +0,0 @@
# Agent 后端重建 Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** 在后端重建 Agent 运行时,满足队列异步、CrewAI 配置打通、AG-UI 工具中断恢复、LiteLLM 计量、以及 `sessions.state_snapshot` 持久化要求。
**Architecture:** `v1/agent` 仅做 API/鉴权/参数校验与 SSE 输出,`core/agent` 负责编排与执行。Agent 创建配置由 `system_agents`(数据库)+ `core/config/static/crewai/*.yaml`(静态模板)合并生成。run/resume 全链路通过 Celery Worker 执行,状态写入 `sessions.state_snapshot`
**Tech Stack:** FastAPI, Celery, Redis, CrewAI, ag-ui-crewai, LiteLLM, SQLAlchemy, Alembic, pytest
---
### Task 1: 建立配置聚合器(system_agents + static/crewai
**Files:**
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
- Modify: `backend/src/core/config/static/crewai/agents.yaml`
- Modify: `backend/src/core/config/static/crewai/tasks.yaml`
- Create: `backend/src/core/config/static/crewai/workflow.yaml`
- Create: `backend/src/core/config/static/crewai/tools.yaml`
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
**Step 1: Write the failing test**
```python
def test_resolver_merges_system_agents_and_static_templates():
resolved = resolve_agent_runtime_config(...)
assert resolved.intent.llm.model_code == "deepseek-v3.2"
assert "intent" in resolved.workflow_stages
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
Expected: FAIL with `NameError` or import not found
**Step 3: Write minimal implementation**
```python
def resolve_agent_runtime_config(system_agents: list[dict], static_cfg: dict) -> RuntimeConfig:
by_type = {item["agent_type"]: item for item in system_agents}
return RuntimeConfig.from_sources(by_type=by_type, static_cfg=static_cfg)
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/config/resolver.py backend/src/core/config/static/crewai backend/tests/unit/core/agent/test_config_resolver.py
git commit -m "feat: add system_agents and static crewai config resolver"
```
### Task 2: 统一 LLM Key 与模型配置入口
**Files:**
- Modify: `backend/src/core/config/settings.py`
- Modify: `.env.example`
- Create: `backend/tests/unit/core/config/test_llm_settings.py`
**Step 1: Write the failing test**
```python
def test_llm_keys_read_from_settings(monkeypatch):
monkeypatch.setenv("SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK", "k1")
s = Settings()
assert s.llm.provider_keys.deepseek == "k1"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
Expected: FAIL with missing `llm` field
**Step 3: Write minimal implementation**
```python
class LLMProviderKeys(BaseModel):
deepseek: str | None = None
class LLMSettings(BaseModel):
provider_keys: LLMProviderKeys = LLMProviderKeys()
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/config/settings.py .env.example backend/tests/unit/core/config/test_llm_settings.py
git commit -m "feat: centralize llm provider keys in settings"
```
### Task 3: sessions 表状态快照契约落地
**Files:**
- Create: `backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py`
- Modify: `backend/src/models/agent_chat_session.py`
- Create: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
**Step 1: Write the failing test**
```python
def test_sessions_has_state_snapshot_column(db_inspector):
columns = db_inspector.get_columns("sessions")
assert "state_snapshot" in [c["name"] for c in columns]
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
Expected: FAIL when migration not applied
**Step 3: Write minimal implementation**
```python
def upgrade() -> None:
op.add_column("sessions", sa.Column("state_snapshot", postgresql.JSONB, nullable=True))
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py backend/src/models/agent_chat_session.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
git commit -m "feat(db): enforce sessions state_snapshot contract"
```
### Task 3.1: 会话与消息持久化仓储
**Files:**
- Create: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Create: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
**Step 1: Write the failing test**
```python
def test_run_persists_user_and_assistant_messages(db_session):
run = execute_run(...)
rows = list_messages(session_id=run.session_id)
assert rows[0].role == "user"
assert rows[1].role == "assistant"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
async def append_message(...):
session.add(AgentChatMessage(...))
async def update_session_aggregate(...):
session_obj.message_count = message_count
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/persistence backend/tests/integration/core/agent/test_session_message_persistence.py
git commit -m "feat: persist session lifecycle and messages for agent runs"
```
### Task 4: 定义 state_snapshot 结构与并发语义
**Files:**
- Create: `backend/src/core/agent/domain/state_snapshot.py`
- Create: `backend/tests/unit/core/agent/test_state_snapshot.py`
**Step 1: Write the failing test**
```python
def test_pending_tool_call_snapshot_contains_correlation_fields():
snap = StateSnapshot.new(...)
pending = snap.pending_tool_calls[0]
assert pending.tool_call_id
assert pending.status == "PENDING_APPROVAL"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
class PendingToolCall(BaseModel):
tool_call_id: str
tool_name: str
status: Literal["PENDING_APPROVAL", "APPROVED", "EXECUTED", "REJECTED", "EXPIRED"]
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/domain/state_snapshot.py backend/tests/unit/core/agent/test_state_snapshot.py
git commit -m "feat: define sessions state_snapshot schema for run and tool state"
```
### Task 5: 工具路由策略(前端/后端/审批)
**Files:**
- Create: `backend/src/core/agent/domain/tool_policy.py`
- Create: `backend/tests/unit/core/agent/test_tool_policy.py`
**Step 1: Write the failing test**
```python
def test_frontend_tool_requires_interrupt_and_client_execution():
decision = classify_tool_call(name="ui.navigate_to", source="request.tools")
assert decision.mode == "FRONTEND_EXECUTE"
def test_backend_approval_tool_returns_interrupt_but_executes_on_backend_after_approve():
decision = classify_tool_call(name="srv.transfer_funds", requires_approval=True)
assert decision.mode == "BACKEND_APPROVAL_INTERRUPT"
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
if tool_name.startswith("ui."):
return ToolDecision(mode="FRONTEND_EXECUTE")
if requires_approval:
return ToolDecision(mode="BACKEND_APPROVAL_INTERRUPT")
return ToolDecision(mode="BACKEND_DIRECT_EXECUTE")
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/domain/tool_policy.py backend/tests/unit/core/agent/test_tool_policy.py
git commit -m "feat: add frontend/backend tool policy and approval routing"
```
### Task 6: tool_call 与 tool_result 对账机制
**Files:**
- Create: `backend/src/core/agent/domain/tool_correlation.py`
- Create: `backend/tests/unit/core/agent/test_tool_correlation.py`
**Step 1: Write the failing test**
```python
def test_rejects_tool_result_when_tool_call_id_not_pending():
store = PendingToolStore([])
with pytest.raises(ToolCorrelationError):
store.apply_result(tool_call_id="unknown", result={"ok": True})
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def apply_result(self, *, tool_call_id: str, result: dict) -> None:
pending = self._pending.get(tool_call_id)
if pending is None:
raise ToolCorrelationError("tool_call_id not pending")
pending.result = result
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/domain/tool_correlation.py backend/tests/unit/core/agent/test_tool_correlation.py
git commit -m "feat: add tool call/result correlation guard"
```
### Task 7: Celery run/resume 异步任务
**Files:**
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
- Create: `backend/src/core/agent/application/run_service.py`
- Create: `backend/src/core/agent/application/resume_service.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write the failing test**
```python
def test_run_api_enqueues_celery_task(client):
resp = client.post("/api/v1/agent/runs", json={...})
assert resp.status_code == 202
def test_resume_updates_session_status_and_snapshot(client):
resp = client.post("/api/v1/agent/runs/r1/resume", json={...})
assert resp.status_code == 202
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def enqueue_run(cmd: RunCommand) -> str:
task = run_agent_task.apply_async(args=[cmd.model_dump()])
return task.id
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/application backend/src/core/agent/infrastructure/queue backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "feat: add celery-based run and resume tasks"
```
### Task 8: CrewAI 运行时加载与创建
**Files:**
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
**Step 1: Write the failing test**
```python
def test_runtime_creates_agents_tasks_from_resolved_config():
runtime = CrewAIRuntime(...)
crew = runtime.build_crew(message="hello")
assert len(crew.agents) >= 1
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def build_crew(self, *, message: str) -> Crew:
agents = self._factory.build_agents(self._config)
tasks = self._factory.build_tasks(self._config, message=message)
return Crew(agents=agents, tasks=tasks)
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/crewai backend/tests/unit/core/agent/test_crewai_runtime.py
git commit -m "feat: create crewai runtime from resolved config"
```
### Task 9: AG-UI 与 ag-ui-crewai 事件桥
**Files:**
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
**Step 1: Write the failing test**
```python
def test_agui_stream_emits_required_lifecycle():
events = to_agui_events(internal_events=[...])
assert events[0]["type"] == "RUN_STARTED"
assert events[-1]["type"] in {"RUN_FINISHED", "RUN_ERROR"}
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def to_agui_events(internal_events: list[dict]) -> list[dict]:
return [map_event(e) for e in internal_events]
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/agui backend/tests/unit/core/agent/test_agui_bridge.py
git commit -m "feat: add ag-ui and ag-ui-crewai event bridge"
```
### Task 10: LiteLLM 调用统计与会话聚合
**Files:**
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
**Step 1: Write the failing test**
```python
def test_tracker_aggregates_per_call_usage_and_cost():
t = UsageTracker()
t.add({"input_tokens": 10, "output_tokens": 5, "cost": "0.1"})
assert t.snapshot()["total_tokens"] == 15
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
def add(self, usage: dict[str, object]) -> None:
self.input_tokens += int(usage.get("input_tokens", 0))
self.output_tokens += int(usage.get("output_tokens", 0))
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/litellm backend/tests/unit/core/agent/test_litellm_usage.py
git commit -m "feat: add litellm usage and cost tracking"
```
### Task 11: v1/agent 薄层 API + SSE 出口
**Files:**
- Create: `backend/src/v1/agent/router.py`
- Create: `backend/src/v1/agent/schemas.py`
- Create: `backend/src/v1/agent/dependencies.py`
- Create: `backend/src/v1/agent/service.py`
- Modify: `backend/src/v1/router.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
**Step 1: Write the failing test**
```python
def test_run_endpoint_returns_sse_and_not_blocking(client):
resp = client.post("/api/v1/agent/runs", json={...})
assert resp.status_code == 202
```
**Step 2: Run test to verify it fails**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
Expected: FAIL
**Step 3: Write minimal implementation**
```python
@router.post("/runs", status_code=202)
async def create_run(...):
task_id = service.enqueue_run(input_data)
return {"task_id": task_id}
```
**Step 4: Run test to verify it passes**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
Expected: PASS
**Step 5: Commit**
```bash
git add backend/src/v1/agent backend/src/v1/router.py backend/tests/integration/v1/agent/test_routes.py
git commit -m "feat: add thin v1 agent api and sse endpoints"
```
### Task 12: 端到端验证与文档回填
**Files:**
- Modify: `docs/runtime/runtime-route.md`
- Modify: `docs/runtime/runtime-runbook.md`
**Step 1: Run unit tests**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent backend/tests/unit/core/config backend/tests/unit/database -q`
Expected: PASS
**Step 2: Run integration tests**
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
Expected: PASS
**Step 3: Run lint and typecheck**
Run: `PYTHONPATH=backend/src uv run ruff check backend/src backend/tests`
Expected: PASS
Run: `PYTHONPATH=backend/src uv run basedpyright backend/src`
Expected: PASS
**Step 4: Document protocol contracts**
在运行手册中补充以下固定规则:
- `system_agents` + `static/crewai` 配置合并优先级。
- `sessions.state_snapshot` 字段结构与版本号。
- `messages` 入库顺序与 `sessions` 聚合字段更新规则。
- 工具调用审批与恢复时序图。
- tool_call/result 不匹配时的错误语义(`RUN_ERROR` + 可审计日志)。
**Step 5: Commit**
```bash
git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md
git commit -m "docs: add new agent runtime contracts and operational guide"
```
## Success Criteria
- [ ] Agent 创建配置由 `system_agents``core/config/static/crewai` 合并生成。
- [ ] run/resume 仅通过 Celery Worker 执行,Web 不执行编排。
- [ ] `v1/agent` 无业务编排代码。
- [ ] `sessions.state_snapshot` 承担运行态和工具审批恢复状态。
- [ ] 每次 run/resume 的会话状态变更均落库到 `sessions`
- [ ] 用户/助手/工具消息按 `messages` 约束落库,`seq` 单调递增。
- [ ] 前端工具与后端工具(审批/非审批)策略完整可测。
- [ ] tool_call 与 tool_result 具备强关联校验并可恢复/报错。
- [ ] LiteLLM 逐次计量与 run 聚合可落库。
@@ -1,199 +0,0 @@
# Agent Architecture Simplification Design
**Date:** 2026-03-04
**Status:** Approved
**Author:** AI Assistant
## Overview
Simplify the agent configuration architecture by removing the redundant `user_agents` table and renaming `user_agent_catalog` to `system_agents`.
## Problem Statement
Current architecture has redundant data:
- `user_agent_catalog`: System-level agent configurations (3 agent types for all users)
- `user_agents`: Per-user agent instances (copies catalog data for each user)
Since every user has the same 3 agents with identical configurations (from catalog), maintaining `user_agents` table creates unnecessary complexity and data duplication.
## Goals
1. Remove `user_agents` table and related code
2. Rename `user_agent_catalog` to `system_agents` for clarity
3. Preserve ability for future user-level prompt customization via `profiles.settings`
4. Maintain backward compatibility in deployment process
## Non-Goals
- User-level agent configuration (LLM selection, temperature, etc.)
- User-level prompt customization implementation (deferred to future iteration)
## Architecture Changes
### Current Architecture
```
user_agent_catalog (system config)
↓ (trigger copies for each new user)
user_agents (per-user instances)
```
### New Architecture
```
system_agents (shared by all users)
profiles.settings.agent_prompts (future: user-level prompts)
```
### Data Flow
1. System startup: Load `system_agents` from YAML
2. User creation: No longer creates `user_agents` records
3. Runtime (future): Read from `system_agents` + merge with `profiles.settings.agent_prompts`
## Database Migration
### Changes
1. **Delete `memories.agent_id` column**
- Remove foreign key `fk_memories_agent_id`
- Remove check constraint `chk_memory_type_agent_id`
- Remove index `ix_memories_agent_type_status`
- Drop column `agent_id`
2. **Delete `user_agents` table**
- Remove all RLS policies
- Remove indexes: `ix_user_agents_agent_type`, `ix_user_agents_status`
- Remove foreign keys: `fk_user_agents_user_id`, `fk_user_agents_llm_id`, etc.
- Remove check constraint `chk_agent_type`
- Remove unique constraint `uq_user_agents_user_id_agent_type`
- Drop table
3. **Rename `user_agent_catalog` → `system_agents`**
- Remove old RLS policies
- Rename table
- Rename constraints: `fk_user_agent_catalog_llm_id``fk_system_agents_llm_id`
- Rename check constraint: `chk_user_agent_catalog_status``chk_system_agents_status`
- Re-create RLS policies with new table name
4. **Update trigger `create_profile_for_new_user()`**
- Remove logic that inserts into `user_agents`
- Initialize `profiles.settings.agent_prompts` with empty object
5. **Update existing `profiles.settings`**
- Add `agent_prompts: {}` to all existing profiles
### Downgrade Path
- Re-create `user_agents` table with all constraints and indexes
- Restore `memories.agent_id` column and constraints
- Rename `system_agents``user_agent_catalog`
- Restore original trigger
## Code Changes
### Model Layer
**Delete:**
- `backend/src/models/user_agents.py`
**Rename:**
- `backend/src/models/user_agent_catalog.py``backend/src/models/system_agents.py`
- Class `UserAgentCatalog``SystemAgents`
**Update:**
- `backend/src/models/__init__.py` - Update imports and exports
### Configuration Layer
**Rename:**
- `backend/src/core/config/static/database/user_agent_catalog.yaml`
`backend/src/core/config/static/database/system_agents.yaml`
**Update:**
- `backend/src/core/config/initial/init_data.py`
- `UserAgentCatalogSeed``SystemAgentsSeed`
- `UserAgentCatalogYaml``SystemAgentsYaml`
- Import from `models.system_agents`
- Path: `system_agents.yaml`
- Function: `initialize_user_agent_catalog()``initialize_system_agents()`
### Future: Profile Settings Structure (Deferred)
```json
{
"agent_prompts": {
"INTENT_RECOGNITION": "custom prompt...",
"TASK_EXECUTION": "custom prompt...",
"RESULT_REPORTING": "custom prompt..."
}
}
```
## Testing Strategy
### Migration Tests
- Verify `user_agents` table is deleted
- Verify `system_agents` table exists with correct structure
- Verify trigger no longer creates `user_agents` records
- Verify `profiles.settings.agent_prompts` is initialized
- Verify downgrade path works correctly
### Model Tests
- Verify `SystemAgents` model CRUD operations
- Verify `Profile.settings` JSONB storage
### Integration Tests
- Verify `initialize_system_agents()` loads from YAML
- Verify data is correctly inserted into `system_agents` table
## Deployment Considerations
### Pre-deployment
- Backup database (especially `user_agents` if any data exists)
- Confirm production `user_agents` table has no critical data
### Deployment
1. Run migration: `alembic upgrade head`
2. Verify migration success
3. Restart application services
4. Verify new user registration works without `user_agents`
### Post-deployment
- Monitor application logs for any references to deleted `user_agents`
- Verify agent-related functionality still works
## Risks and Mitigations
| Risk | Mitigation |
|------|-----------|
| Existing `user_agents` data loss | Backup before migration; data is redundant anyway |
| Code still references `user_agents` | Comprehensive code search and testing |
| Trigger fails on new user creation | Test migration thoroughly; include rollback plan |
| Future need for user-level config | Can add `agent_overrides` to `profiles.settings` |
## Success Criteria
- [ ] All tests pass
- [ ] Migration runs successfully (upgrade and downgrade)
- [ ] New user registration creates profile without `user_agents` records
- [ ] System agents are loaded from YAML correctly
- [ ] No references to `user_agents` remain in codebase
## Timeline
- Design: 2026-03-04 (Completed)
- Implementation: TBD
- Testing: TBD
- Deployment: TBD
## References
- Migration file: `backend/alembic/versions/YYYYMMDD_simplify_agent_architecture.py`
- Original catalog migration: `backend/alembic/versions/50ae013ce530_add_user_agent_catalog.py`
@@ -1,844 +0,0 @@
# Agent Architecture Simplification Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Simplify agent configuration by removing redundant user_agents table and renaming user_agent_catalog to system_agents
**Architecture:** Delete user_agents table (including memories.agent_id dependency), rename user_agent_catalog to system_agents, update all references in code
**Tech Stack:** Python 3.11+, SQLAlchemy, Alembic, PostgreSQL
---
## Prerequisites
- [ ] Current branch: dev
- [ ] No uncommitted changes
- [ ] Docker services running (Supabase local)
## Task 1: Create Database Migration
**Files:**
- Create: `backend/alembic/versions/20260304_simplify_agent_architecture.py`
**Step 1: Create migration file**
Run: `cd backend && uv run alembic revision -m "simplify_agent_architecture"`
Expected: New migration file created with revision ID
**Step 2: Write migration upgrade logic**
Edit the generated migration file with this complete upgrade function:
```python
def upgrade() -> None:
# 1. Delete memories.agent_id dependencies
op.drop_constraint("fk_memories_agent_id", "memories", type_="foreignkey")
op.drop_constraint("chk_memory_type_agent_id", "memories", type_="check")
op.execute("DROP INDEX IF EXISTS ix_memories_agent_type_status")
op.drop_column("memories", "agent_id")
# 2. Delete user_agents table
_drop_rls("user_agents")
op.drop_constraint("fk_user_agents_updated_by", "user_agents", type_="foreignkey")
op.drop_constraint("fk_user_agents_created_by", "user_agents", type_="foreignkey")
op.drop_constraint("fk_user_agents_llm_id", "user_agents", type_="foreignkey")
op.drop_constraint("fk_user_agents_user_id", "user_agents", type_="foreignkey")
op.drop_constraint("chk_agent_type", "user_agents", type_="check")
op.drop_constraint("uq_user_agents_user_id_agent_type", "user_agents", type_="unique")
op.execute("DROP INDEX IF EXISTS ix_user_agents_status")
op.execute("DROP INDEX IF EXISTS ix_user_agents_agent_type")
op.drop_table("user_agents")
# 3. Rename user_agent_catalog to system_agents
_drop_rls("user_agent_catalog")
op.rename_table("user_agent_catalog", "system_agents")
op.execute(
"ALTER TABLE system_agents RENAME CONSTRAINT fk_user_agent_catalog_llm_id "
"TO fk_system_agents_llm_id"
)
op.execute(
"ALTER TABLE system_agents RENAME CONSTRAINT chk_user_agent_catalog_status "
"TO chk_system_agents_status"
)
_enable_rls("system_agents")
# 4. Update trigger
op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
op.execute("""
CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
RETURNS trigger
LANGUAGE plpgsql
SECURITY DEFINER
SET search_path = public
AS $$
BEGIN
INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
VALUES (
NEW.id,
COALESCE(
NEW.raw_user_meta_data ->> 'username',
split_part(NEW.email, '@', 1),
'user_' || substring(NEW.id::text, 1, 8)
),
NULL,
NULL,
'{"agent_prompts": {}}'::jsonb,
now(),
now()
)
ON CONFLICT (id) DO NOTHING;
RETURN NEW;
END;
$$
""")
op.execute("""
CREATE TRIGGER on_auth_user_created
AFTER INSERT ON auth.users
FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
""")
# 5. Update existing profiles.settings
op.execute("""
UPDATE profiles
SET settings = jsonb_set(
COALESCE(settings, '{}'::jsonb),
'{agent_prompts}',
'{}'::jsonb
)
WHERE NOT settings ? 'agent_prompts'
""")
```
**Step 3: Write migration downgrade logic**
Add this complete downgrade function:
```python
def downgrade() -> None:
# 1. Revert trigger
op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
op.execute("""
CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
RETURNS trigger
LANGUAGE plpgsql
SECURITY DEFINER
SET search_path = public
AS $$
BEGIN
INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
VALUES (
NEW.id,
COALESCE(
NEW.raw_user_meta_data ->> 'username',
split_part(NEW.email, '@', 1),
'user_' || substring(NEW.id::text, 1, 8)
),
NULL,
NULL,
'{}'::jsonb,
now(),
now()
)
ON CONFLICT (id) DO NOTHING;
INSERT INTO public.user_agents (id, user_id, llm_id, agent_type, config, status, created_by, updated_by)
SELECT
gen_random_uuid(),
NEW.id,
uac.llm_id,
uac.agent_type,
uac.config,
uac.status,
NEW.id,
NEW.id
FROM public.user_agent_catalog uac;
RETURN NEW;
END;
$$
""")
op.execute("""
CREATE TRIGGER on_auth_user_created
AFTER INSERT ON auth.users
FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
""")
# 2. Revert rename: system_agents -> user_agent_catalog
_drop_rls("system_agents")
op.rename_table("system_agents", "user_agent_catalog")
op.execute(
"ALTER TABLE user_agent_catalog RENAME CONSTRAINT fk_system_agents_llm_id "
"TO fk_user_agent_catalog_llm_id"
)
op.execute(
"ALTER TABLE user_agent_catalog RENAME CONSTRAINT chk_system_agents_status "
"TO chk_user_agent_catalog_status"
)
_enable_rls("user_agent_catalog")
# 3. Recreate user_agents table
op.create_table(
"user_agents",
sa.Column("id", sa.UUID(), nullable=False),
sa.Column("user_id", sa.UUID(), nullable=False),
sa.Column("llm_id", sa.UUID(), nullable=False),
sa.Column("agent_type", sa.String(length=20), nullable=False),
sa.Column(
"config",
postgresql.JSONB(astext_type=sa.Text()),
server_default="{}",
nullable=False,
),
sa.Column("status", sa.String(length=20), nullable=False),
sa.Column("created_by", sa.UUID(), nullable=True),
sa.Column("updated_by", sa.UUID(), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_unique_constraint(
"uq_user_agents_user_id_agent_type",
"user_agents",
["user_id", "agent_type"]
)
op.execute(
"CREATE INDEX ix_user_agents_agent_type ON user_agents (agent_type)"
)
op.execute(
"CREATE INDEX ix_user_agents_status ON user_agents (status)"
)
op.execute(
"ALTER TABLE user_agents ADD CONSTRAINT chk_agent_type "
"CHECK (agent_type IN ('INTENT_RECOGNITION', 'TASK_EXECUTION', 'RESULT_REPORTING'))"
)
op.create_foreign_key(
"fk_user_agents_user_id",
"user_agents",
"users",
["user_id"],
["id"],
referent_schema="auth",
ondelete="CASCADE",
)
op.create_foreign_key(
"fk_user_agents_llm_id",
"user_agents",
"llms",
["llm_id"],
["id"],
ondelete="RESTRICT",
)
op.create_foreign_key(
"fk_user_agents_created_by",
"user_agents",
"users",
["created_by"],
["id"],
referent_schema="auth",
ondelete="SET NULL",
)
op.create_foreign_key(
"fk_user_agents_updated_by",
"user_agents",
"users",
["updated_by"],
["id"],
referent_schema="auth",
ondelete="SET NULL",
)
_enable_rls("user_agents")
# 4. Recreate memories.agent_id
op.add_column(
"memories",
sa.Column("agent_id", sa.UUID(), nullable=True)
)
op.create_foreign_key(
"fk_memories_agent_id",
"memories",
"user_agents",
["agent_id"],
["id"],
ondelete="CASCADE",
)
op.execute(
"CREATE INDEX ix_memories_agent_type_status ON memories (agent_id, memory_type, status)"
)
op.execute(
"ALTER TABLE memories ADD CONSTRAINT chk_memory_type_agent_id "
"CHECK ((memory_type = 'work' AND agent_id IS NOT NULL) OR "
"(memory_type = 'user' AND agent_id IS NULL))"
)
```
**Step 4: Add helper functions**
Add these helper functions at the end of the migration file:
```python
def _enable_rls(table_name: str) -> None:
for role in ["anon", "authenticated"]:
for action in ["select", "insert", "update", "delete"]:
op.execute(
f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
)
op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY")
for role in ["anon", "authenticated"]:
op.execute(
f"CREATE POLICY {role}_select_{table_name} ON {table_name} "
f"FOR SELECT TO {role} USING (false)"
)
op.execute(
f"CREATE POLICY {role}_insert_{table_name} ON {table_name} "
f"FOR INSERT TO {role} WITH CHECK (false)"
)
op.execute(
f"CREATE POLICY {role}_update_{table_name} ON {table_name} "
f"FOR UPDATE TO {role} USING (false) WITH CHECK (false)"
)
op.execute(
f"CREATE POLICY {role}_delete_{table_name} ON {table_name} "
f"FOR DELETE TO {role} USING (false)"
)
def _drop_rls(table_name: str) -> None:
for role in ["anon", "authenticated"]:
op.execute(f"DROP POLICY IF EXISTS {role}_delete_{table_name} ON {table_name}")
op.execute(f"DROP POLICY IF EXISTS {role}_update_{table_name} ON {table_name}")
op.execute(f"DROP POLICY IF EXISTS {role}_insert_{table_name} ON {table_name}")
op.execute(f"DROP POLICY IF EXISTS {role}_select_{table_name} ON {table_name}")
op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY")
```
**Step 5: Verify migration file**
Check that all imports are correct:
```python
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
```
**Step 6: Commit migration**
```bash
git add backend/alembic/versions/20260304_simplify_agent_architecture.py
git commit -m "feat(db): add migration to simplify agent architecture"
```
---
## Task 2: Delete UserAgents Model
**Files:**
- Delete: `backend/src/models/user_agents.py`
- Modify: `backend/src/models/__init__.py`
**Step 1: Remove import from models/__init__.py**
Edit `backend/src/models/__init__.py`:
Remove these lines:
```python
from models.user_agents import UserAgent
```
And remove `"UserAgent"` from `__all__` list.
**Step 2: Delete user_agents.py file**
```bash
rm backend/src/models/user_agents.py
```
**Step 3: Verify no other imports**
Run: `cd backend && grep -r "from models.user_agents" src/`
Expected: No results (or only in __init__.py which we already fixed)
**Step 4: Commit**
```bash
git add backend/src/models/user_agents.py backend/src/models/__init__.py
git commit -m "refactor(models): remove UserAgents model"
```
---
## Task 3: Rename UserAgentCatalog to SystemAgents
**Files:**
- Rename: `backend/src/models/user_agent_catalog.py``backend/src/models/system_agents.py`
- Modify: `backend/src/models/__init__.py`
**Step 1: Rename model file**
```bash
mv backend/src/models/user_agent_catalog.py backend/src/models/system_agents.py
```
**Step 2: Update class name in system_agents.py**
Edit `backend/src/models/system_agents.py`:
Change:
```python
class UserAgentCatalog(TimestampMixin, Base):
__tablename__: str = "user_agent_catalog"
```
To:
```python
class SystemAgents(TimestampMixin, Base):
__tablename__: str = "system_agents"
```
**Step 3: Update imports in models/__init__.py**
Edit `backend/src/models/__init__.py`:
Change:
```python
from models.user_agent_catalog import UserAgentCatalog
```
To:
```python
from models.system_agents import SystemAgents
```
And change `"UserAgentCatalog"` to `"SystemAgents"` in `__all__` list.
**Step 4: Commit**
```bash
git add backend/src/models/
git commit -m "refactor(models): rename UserAgentCatalog to SystemAgents"
```
---
## Task 4: Update Configuration Files
**Files:**
- Rename: `backend/src/core/config/static/database/user_agent_catalog.yaml`
`backend/src/core/config/static/database/system_agents.yaml`
- Modify: `backend/src/core/config/initial/init_data.py`
**Step 1: Rename YAML file**
```bash
mv backend/src/core/config/static/database/user_agent_catalog.yaml \
backend/src/core/config/static/database/system_agents.yaml
```
**Step 2: Update init_data.py imports**
Edit `backend/src/core/config/initial/init_data.py`:
Change:
```python
from models.user_agent_catalog import UserAgentCatalog
```
To:
```python
from models.system_agents import SystemAgents
```
**Step 3: Update Pydantic models**
Change:
```python
class UserAgentCatalogSeed(BaseModel):
agent_type: str
llm_model_code: str
status: str
config: dict[str, Any]
class UserAgentCatalogYaml(BaseModel):
agents: list[UserAgentCatalogSeed]
```
To:
```python
class SystemAgentsSeed(BaseModel):
agent_type: str
llm_model_code: str
status: str
config: dict[str, Any]
class SystemAgentsYaml(BaseModel):
agents: list[SystemAgentsSeed]
```
**Step 4: Update path function**
Change:
```python
def _default_user_agent_catalog_path() -> Path:
return (
Path(__file__).resolve().parents[1]
/ "static"
/ "database"
/ "user_agent_catalog.yaml"
)
```
To:
```python
def _default_system_agents_path() -> Path:
return (
Path(__file__).resolve().parents[1]
/ "static"
/ "database"
/ "system_agents.yaml"
)
```
**Step 5: Update load function**
Change:
```python
def load_user_agent_catalog(catalog_path: Path | None = None) -> dict[str, Any]:
path = catalog_path or _default_user_agent_catalog_path()
with path.open("r", encoding="utf-8") as file:
loaded = yaml.safe_load(file) or {}
if not isinstance(loaded, dict):
raise ValueError(f"Invalid user agent catalog format: {path}")
raw_agents = loaded.get("agents", [])
if not isinstance(raw_agents, list):
raise ValueError(f"Invalid user agent catalog agents section: {path}")
try:
parsed = UserAgentCatalogYaml.model_validate({"agents": list(raw_agents)})
except ValidationError as exc:
raise ValueError(f"Invalid user agent catalog data: {path}") from exc
return parsed.model_dump()
```
To:
```python
def load_system_agents(catalog_path: Path | None = None) -> dict[str, Any]:
path = catalog_path or _default_system_agents_path()
with path.open("r", encoding="utf-8") as file:
loaded = yaml.safe_load(file) or {}
if not isinstance(loaded, dict):
raise ValueError(f"Invalid system agents format: {path}")
raw_agents = loaded.get("agents", [])
if not isinstance(raw_agents, list):
raise ValueError(f"Invalid system agents agents section: {path}")
try:
parsed = SystemAgentsYaml.model_validate({"agents": list(raw_agents)})
except ValidationError as exc:
raise ValueError(f"Invalid system agents data: {path}") from exc
return parsed.model_dump()
```
**Step 6: Update upsert function**
Change:
```python
async def _upsert_user_agent_catalog(
session: AsyncSession,
*,
agent_type: str,
llm_id: uuid.UUID,
status: str,
config: dict[str, Any],
) -> None:
result = await session.execute(
select(UserAgentCatalog).where(UserAgentCatalog.agent_type == agent_type)
)
catalog_entry = result.scalar_one_or_none()
if catalog_entry is None:
session.add(
UserAgentCatalog(
agent_type=agent_type,
llm_id=llm_id,
status=status,
config=config,
)
)
else:
catalog_entry.llm_id = llm_id
catalog_entry.status = status
catalog_entry.config = config
```
To:
```python
async def _upsert_system_agents(
session: AsyncSession,
*,
agent_type: str,
llm_id: uuid.UUID,
status: str,
config: dict[str, Any],
) -> None:
result = await session.execute(
select(SystemAgents).where(SystemAgents.agent_type == agent_type)
)
catalog_entry = result.scalar_one_or_none()
if catalog_entry is None:
session.add(
SystemAgents(
agent_type=agent_type,
llm_id=llm_id,
status=status,
config=config,
)
)
else:
catalog_entry.llm_id = llm_id
catalog_entry.status = status
catalog_entry.config = config
```
**Step 7: Update initialize function**
Change:
```python
async def initialize_user_agent_catalog() -> None:
"""Initialize user agent catalog from YAML."""
catalog = load_user_agent_catalog()
async with AsyncSessionLocal() as session:
async with session.begin():
for agent in catalog["agents"]:
result = await session.execute(
select(Llm).where(Llm.model_code == agent["llm_model_code"])
)
llm = result.scalar_one_or_none()
if llm is None:
raise RuntimeError(
f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
)
await _upsert_user_agent_catalog(
session,
agent_type=agent["agent_type"],
llm_id=llm.id,
status=agent["status"],
config=agent["config"],
)
logger.info("Initialized user agent catalog")
```
To:
```python
async def initialize_system_agents() -> None:
"""Initialize system agents from YAML."""
catalog = load_system_agents()
async with AsyncSessionLocal() as session:
async with session.begin():
for agent in catalog["agents"]:
result = await session.execute(
select(Llm).where(Llm.model_code == agent["llm_model_code"])
)
llm = result.scalar_one_or_none()
if llm is None:
raise RuntimeError(
f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
)
await _upsert_system_agents(
session,
agent_type=agent["agent_type"],
llm_id=llm.id,
status=agent["status"],
config=agent["config"],
)
logger.info("Initialized system agents")
```
**Step 8: Update initialize_data function**
Change:
```python
async def initialize_data() -> bool:
"""Initialize bootstrap data."""
await initialize_llm_catalog()
await initialize_user_agent_catalog()
return True
```
To:
```python
async def initialize_data() -> bool:
"""Initialize bootstrap data."""
await initialize_llm_catalog()
await initialize_system_agents()
return True
```
**Step 9: Commit**
```bash
git add backend/src/core/config/
git commit -m "refactor(config): rename user_agent_catalog to system_agents"
```
---
## Task 5: Run Migration
**Step 1: Run migration**
```bash
cd backend && uv run alembic upgrade head
```
Expected: Migration runs successfully
**Step 2: Verify tables**
Connect to database and check:
- `user_agents` table should NOT exist
- `system_agents` table should exist
- `memories.agent_id` column should NOT exist
**Step 3: Test downgrade (optional but recommended)**
```bash
cd backend && uv run alembic downgrade -1
```
Expected: Previous migration restored
**Step 4: Re-run upgrade**
```bash
cd backend && uv run alembic upgrade head
```
Expected: Migration runs successfully again
---
## Task 6: Run Tests and Linting
**Step 1: Run type checking**
```bash
cd backend && uv run basedpyright src/
```
Expected: No errors
**Step 2: Run linting**
```bash
cd backend && uv run ruff check src/
```
Expected: No errors
**Step 3: Run tests**
```bash
cd backend && uv run pytest tests/
```
Expected: All tests pass
**Step 4: Fix any failures**
If any tests fail due to UserAgent references, update them to use SystemAgents.
---
## Task 7: Final Verification
**Step 1: Search for any remaining references**
```bash
cd backend && grep -r "user_agents" src/ --include="*.py"
cd backend && grep -r "UserAgent" src/ --include="*.py"
```
Expected: No results (except in migration files)
**Step 2: Test new user registration**
Start the backend server and register a new user. Verify:
- Profile is created
- No user_agents records are created
- profiles.settings contains `agent_prompts: {}`
**Step 3: Commit final changes**
```bash
git add .
git commit -m "feat: complete agent architecture simplification"
```
---
## Success Criteria
- [ ] Migration runs successfully (upgrade and downgrade)
- [ ] No UserAgent model references in code
- [ ] SystemAgents model works correctly
- [ ] All tests pass
- [ ] Linting passes
- [ ] Type checking passes
- [ ] New user registration works without user_agents
## Notes
- Keep the design document updated if any changes are made during implementation
- Test migration thoroughly before deploying to production
- Backup database before running migration in production
@@ -1,81 +0,0 @@
# Agent Runtime Closed Loop E2E Design
## 背景
当前 `test_agent_sse_flow.py` 不能稳定证明真实闭环:
- `session_id` 由随机 UUID 生成,导致 `POST /api/v1/agent/runs` 经常 404。
- 测试脚本存在不可达重复代码,诊断信息不完整。
- 未覆盖首聊自动建会话语义,和真实聊天入口不匹配。
目标是验证真实环境下业务闭环是否可用:
1. 用户请求 `agent` 路由
2. 请求进入异步任务
3. runtime 读取 `system_agents``llm` 配置并构建执行流程
4. 真实 LLM 请求发出并返回
5. `sessions`/`messages` 正确落库
6. 成本和 token 统计正确
7. 事件按 AG-UI 规范发布并可由 `stream_events` 订阅
## 设计原则
- 真实优先:不使用 mock,不替换 queue/redis/db/llm。
- 双轨验证:
- 诊断脚本用于本地排障(快速观察全链路状态)。
- pytest E2E 用例用于可重复回归。
- 明确前置条件:必须先使用 `infra/scripts/app.sh start` 启动 tmux 服务。
- 本地真实 LLM 基线:DashScope Qwen。
## API 契约调整
### `POST /api/v1/agent/runs`
- 现状:`session_id` 必填且必须存在。
- 新契约:`session_id` 可选。
- 有值:复用现有会话,校验 owner。
- 无值:在服务层先创建会话,再入队 run。
- 响应扩展:返回 `created` 标识是否为首聊自动建会话。
该契约与聊天产品行为一致:用户首条消息即可开始,不需要前置调用创建会话接口。
## 数据关系与删除语义
- `messages.session_id -> sessions.id` 为外键,且硬删除级联(`ondelete=CASCADE`)。
- 软删除需要补齐级联:
- 软删 `sessions` 时,同事务更新对应 `messages.deleted_at`
- E2E 增加验证,确保软删后默认查询不可见。
## 测试架构
### A. 诊断脚本(根目录)
重构 `test_agent_sse_flow.py`
- 增加环境健康检查(web/redis/db)。
- 支持两种模式:
- `--new-session`:不传 `session_id`,验证首聊自动创建。
- `--reuse-session <id>`:验证复聊路径。
- 输出结构化阶段日志:HTTP、task_id、SSE 事件、数据库断言、失败根因。
### B. pytest E2E`backend/tests/e2e`
新增 `test_agent_closed_loop_live.py`
- 标记为 `live`,默认不在 CI 执行。
- 用真实 JWT、真实 HTTP 请求、真实 SSE 订阅。
- 断言最小闭环标准:
- run 返回 202
- SSE 至少收到 `RUN_STARTED` 与终态(`RUN_FINISHED``RUN_ERROR`
- `sessions` 状态和计数更新
- `messages` 有新增记录
- token/cost 字段非负且会话聚合一致
## 验收标准
- `uv run python test_agent_sse_flow.py --new-session` 通过。
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -v -m live` 通过。
- 首聊场景不需要外部先建 `session_id`
- 软删除会话后,消息软删除行为与约束一致。
## 风险与回退
- 真实 LLM 网络抖动会造成不稳定:通过重试和超时策略降低误报。
- 生产契约变更风险:保持字段向后兼容(原 `session_id` 仍可传)。
- 如果新契约引入问题,可临时退回“必传 session_id”路径并保留测试脚本诊断能力。
@@ -1,230 +0,0 @@
# Agent Runtime Closed Loop E2E Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** 让 agent 闭环在真实本地环境中可验证:`runs` 支持首聊自动建会话,并通过真实异步任务、真实 LLM、真实落库与真实 SSE 证明端到端可用。
**Architecture:**`v1/agent` 服务层引入“可选 session_id + 自动建会话”语义;保持已有 owner 鉴权路径。重构诊断脚本并新增 live E2E 用例,统一验证 run 入队、事件流、数据库状态、成本统计与删除语义。通过最小侵入改造现有 run/resume 流程,确保兼容已存在调用。
**Tech Stack:** FastAPI, SQLAlchemy async, Celery, Redis Stream, LiteLLM, PyJWT, pytest, httpx
---
### Task 1: 扩展 API 契约(session_id 可选)
**Files:**
- Modify: `backend/src/v1/agent/schemas.py`
- Modify: `backend/src/v1/agent/router.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
**Step 1: Write the failing test**
`test_routes.py` 新增用例:请求体不传 `session_id` 仍返回 202,且响应含 `session_id`
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -k "runs and session" -v`
Expected: FAIL,提示 `session_id` 缺失导致 422 或 mock 接口签名不匹配。
**Step 3: Write minimal implementation**
- `RunRequest.session_id` 改为可选。
- `enqueue_run` 调用 service 时传可选值。
- `TaskAcceptedResponse` 增加 `created: bool` 字段。
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/v1/agent/schemas.py backend/src/v1/agent/router.py backend/tests/integration/v1/agent/test_routes.py
git commit -m "feat: allow agent runs without pre-created session"
```
### Task 2: 服务层支持自动建会话并保持鉴权
**Files:**
- Modify: `backend/src/v1/agent/service.py`
- Modify: `backend/src/v1/agent/repository.py`
- Modify: `backend/src/v1/agent/dependencies.py`
- Test: `backend/tests/unit/v1/agent/test_service.py` (new)
**Step 1: Write the failing test**
新增单测覆盖:
- `session_id is None` 时调用 `create_session_for_user` 并返回 `created=True`
- `session_id 有值` 时复用并校验 owner
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
Expected: FAIL,当前 service 无自动建会话能力。
**Step 3: Write minimal implementation**
- repository 增加 `create_session_for_user(user_id)`
- service `enqueue_run` 处理两条路径:
-`session_id`:先创建 session。
-`session_id`:校验 owner。
- 返回 `TaskAccepted(task_id, session_id, created)`
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/v1/agent/service.py backend/src/v1/agent/repository.py backend/src/v1/agent/dependencies.py backend/tests/unit/v1/agent/test_service.py
git commit -m "feat: auto-create chat session on first agent run"
```
### Task 3: 对齐 runtime 闭环数据断言(messages/sessions/cost
**Files:**
- Modify: `backend/src/core/agent/application/run_service.py`
- Modify: `backend/src/core/agent/application/resume_service.py`
- Modify: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write the failing test**
在集成测试增加断言:
- `sessions.total_tokens``sessions.total_cost` 有更新
- `messages` 的 token/cost 字段与 session 聚合一致
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
Expected: FAIL,当前默认 token/cost 为 0,未做聚合更新。
**Step 3: Write minimal implementation**
- run/resume 流程接入 usage/cost 结果(来自 litellm 返回或 fallback 规则)。
- message 写入时填充 input/output tokens 与 cost。
- session 更新时累加 total_tokens/total_cost。
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/core/agent/application/run_service.py backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/persistence/message_repository.py backend/src/core/agent/infrastructure/persistence/session_repository.py backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "feat: persist runtime token and cost aggregates"
```
### Task 4: 补齐软删除级联(session -> messages
**Files:**
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
- Modify: `backend/src/v1/agent/service.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write the failing test**
新增用例:软删 session 后,同会话 messages 的 `deleted_at` 同步写入。
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
Expected: FAIL,当前无软删级联。
**Step 3: Write minimal implementation**
- repository 增加 `soft_delete_session_with_messages(session_id)`
- service 调用时使用同事务批量更新 messages。
**Step 4: Run test to verify it passes**
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/persistence/session_repository.py backend/src/v1/agent/service.py backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "fix: cascade soft delete from sessions to messages"
```
### Task 5: 重构诊断脚本并新增 live E2E
**Files:**
- Modify: `test_agent_sse_flow.py`
- Create: `backend/tests/e2e/test_agent_closed_loop_live.py`
- Modify: `docs/bugs/2026-03-05-agent-runtime-bugs.md`
**Step 1: Write the failing test**
新增 live E2E 用例(`@pytest.mark.live`):
- 首聊不传 `session_id` 返回 202
- 订阅 SSE 收到关键事件
- DB 断言 session/messages/tokens/cost
**Step 2: Run test to verify it fails**
Run: `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
Expected: FAIL,当前契约或脚本未对齐。
**Step 3: Write minimal implementation**
- 清理脚本重复/不可达逻辑。
- 增加健康检查、阶段化日志、超时和错误根因输出。
- E2E 用例复用脚本中的 helperJWT、SSE 解析、DB 断言)。
**Step 4: Run test to verify it passes**
Run:
- `uv run python test_agent_sse_flow.py --new-session`
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
Expected: PASS。
**Step 5: Commit**
```bash
git add test_agent_sse_flow.py backend/tests/e2e/test_agent_closed_loop_live.py docs/bugs/2026-03-05-agent-runtime-bugs.md
git commit -m "test: add live closed-loop agent e2e verification"
```
### Task 6: 全量验证与文档同步
**Files:**
- Modify: `docs/runtime/runtime-runbook.md`
- Modify: `docs/runtime/runtime-route.md`
**Step 1: Run targeted checks**
Run:
- `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
- `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
- `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
Expected: PASS。
**Step 2: Run quality gates**
Run:
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright`
Expected: PASS。
**Step 3: Update docs**
记录本地启动流程、真实 LLM 前置配置、live E2E 执行方式和故障排查。
**Step 4: Commit**
```bash
git add docs/runtime/runtime-runbook.md docs/runtime/runtime-route.md
git commit -m "docs: document live agent closed-loop e2e workflow"
```
@@ -1,469 +0,0 @@
# Agent Runtime Closed Loop Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Build a production-grade closed-loop agent runtime where `frontend -> FastAPI -> Celery -> run/resume service -> CrewAI -> AG-UI events -> Redis Stream -> SSE` is fully connected and verifiable.
**Architecture:** Keep HTTP API as control-plane and worker as data-plane. The API validates auth/ownership and enqueues commands, the Celery worker executes run/resume business logic using DB-driven agent config, runtime emits normalized AG-UI events and usage/cost telemetry, all events are persisted to Redis Stream, and SSE endpoint streams from Redis with resume support (`Last-Event-ID`).
**Tech Stack:** FastAPI, SQLAlchemy AsyncSession, Celery, Redis Streams, CrewAI, LiteLLM, Pydantic, pytest (unit/integration).
**Confirmed Constraints (locked):**
- Persist semantics use existing `messages.role` only (`assistant|user|system|tool`), no new `message_kind` column.
- `tool_result` must be semantically complete (especially UI schema); do not store summary-only payload.
- Store full `tool_result` payload in Supabase Storage (private bucket) and persist durable object reference in DB metadata; do not rely on expiring signed URL as primary reference.
- `metadata` must be fixed and typed via Pydantic model (no free-form drift).
- Do not introduce additional business tables for this scope; keep schema minimal.
- CrewAI runtime must default to streaming mode.
- Full traceability target is final semantic reconstruction of `user/assistant/tool_result`; chunk-level replay is not required.
**Metadata Contract (fixed, Pydantic-enforced):**
- Global required keys for all message metadata: `type`, `run_id`, `turn_id`.
- Global optional keys for all message metadata: `event_id`, `parent_message_id`, `error`.
- `type=user_input`:
- Required: `type`, `run_id`, `turn_id`.
- Optional: `input_source`, `client_ts`.
- `type=assistant_output`:
- Required: `type`, `run_id`, `turn_id`.
- Optional: `finish_reason`, `model_provider`, `cost_source`.
- `type=tool_call` (`role=assistant`):
- Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `tool_args`.
- Optional: `tool_schema_version`, `timeout_ms`.
- `type=tool_result` (`role=tool`):
- Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `storage_bucket`, `storage_path`, `payload_sha256`, `payload_bytes`, `payload_format`.
- Optional: `ui_schema_version`, `compression`, `storage_etag`, `render_hints`.
- Validation rules:
- `messages.role=tool` must use `metadata.type=tool_result`.
- `messages.role=assistant` + tool event must use `metadata.type=tool_call` or `assistant_output`.
- `tool_result` payload in DB must be reconstructable to AG-UI `TOOL_CALL_RESULT` using Storage object + metadata checksum.
---
### Task 1: Add Agent Module Skeleton and Contracts
**Files:**
- Create: `backend/src/core/agent/__init__.py`
- Create: `backend/src/core/agent/application/__init__.py`
- Create: `backend/src/core/agent/domain/__init__.py`
- Create: `backend/src/core/agent/infrastructure/events/__init__.py`
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
**Step 1: Write failing tests for event normalization and SSE formatting**
```python
def test_bridge_normalizes_event_type_to_upper_snake() -> None:
events = [{"type": "runStarted", "data": {"ok": True}}]
out = to_agui_events(events)
assert out[0]["type"] == "RUN_STARTED"
def test_sse_format_includes_id_event_data() -> None:
payload = to_sse_event(stream_id="1-0", event={"type": "RUN_STARTED", "data": {"a": 1}})
assert payload.startswith("id: 1-0\nevent: RUN_STARTED\ndata: {")
```
**Step 2: Run tests and confirm RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`
Expected: FAIL with missing module/function errors.
**Step 3: Implement minimal bridge + stream utilities**
```python
def to_agui_events(internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
...
def to_sse_event(stream_id: str, event: dict[str, Any]) -> str:
...
```
**Step 4: Run tests and confirm GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent backend/tests/unit/core/agent/test_agui_bridge.py
git commit -m "feat(agent): add ag-ui bridge and sse serializer utilities"
```
### Task 2: Implement Redis Stream Event Store and Reader
**Files:**
- Create: `backend/src/core/agent/infrastructure/events/redis_stream.py`
- Modify: `backend/src/core/config/settings.py`
- Test: `backend/tests/unit/core/agent/test_redis_stream.py`
**Step 1: Write failing tests for append/read semantics**
```python
def test_append_event_writes_json_payload() -> None:
...
def test_read_events_respects_last_event_id() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`
Expected: FAIL.
**Step 3: Implement Redis stream adapter**
```python
def append_event_sync(*, session_id: UUID, event: dict[str, Any]) -> str:
...
async def read_events(...):
...
```
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/events/redis_stream.py backend/src/core/config/settings.py backend/tests/unit/core/agent/test_redis_stream.py
git commit -m "feat(agent): add redis stream event transport for run events"
```
### Task 3: Build CrewAI Runtime + AG-UI Event Mapping + Usage Tracking
**Files:**
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
- Modify: `backend/src/core/config/settings.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
**Step 1: Write failing runtime tests (events + cost + strict errors)**
```python
def test_runtime_emits_text_tool_reasoning_events() -> None:
...
def test_runtime_raises_if_model_or_api_key_missing() -> None:
...
def test_usage_tracker_extracts_tokens_and_cost() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`
Expected: FAIL.
**Step 3: Implement runtime and tracker**
- Register CrewAI event handlers (`Task/LLM/Tool/Reasoning`) and map to AG-UI canonical event types.
- Default runtime to streaming mode for CrewAI execution.
- Enforce strict config behavior: no `llm_model_code` or provider key -> raise.
- Use LiteLLM cost calculator for actual cost; if cost cannot be computed, fail closed (raise), do not silently record zero.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py backend/src/core/config/settings.py
git commit -m "feat(agent): implement crewai runtime events and litellm usage-cost auditing"
```
### Task 4: Implement Run/Resume Application Services (DB Config + Persistence)
**Files:**
- Create: `backend/src/core/agent/application/run_service.py`
- Create: `backend/src/core/agent/application/resume_service.py`
- Create: `backend/src/core/agent/application/session_state_persistence.py`
- Create: `backend/src/core/agent/domain/state_snapshot.py`
- Create: `backend/src/core/agent/domain/tool_correlation.py`
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
- Test: `backend/tests/unit/core/agent/test_state_snapshot.py`
- Test: `backend/tests/unit/core/agent/test_tool_correlation.py`
**Step 1: Write failing tests for DB-driven runtime and aggregate updates**
```python
async def test_run_service_loads_agent_config_from_db_and_persists_messages() -> None:
...
async def test_resume_service_requires_pending_tool_call() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`
Expected: FAIL.
**Step 3: Implement services**
- `run_service`: read session + system agent config from DB, execute runtime, persist user/assistant messages, update session aggregates.
- `resume_service`: validate pending tool call status, enforce idempotency semantics, resume runtime, persist audit fields.
- Persist metadata audit (`tokens`, `cost`, `cost_source`, correlation ids) for every assistant message.
- Persist tool lifecycle with role-only model:
- tool call message uses `role=assistant` with fixed metadata (`type=tool_call`, `tool_call_id`, `tool_name`, arguments reference).
- tool result message uses `role=tool` with fixed metadata (`type=tool_result`, `tool_call_id`, `tool_name`, storage bucket/path, checksum, bytes, schema version).
- `tool_result` full payload (UI schema) is uploaded to Supabase Storage private bucket; DB stores durable reference and verification fields.
- Ensure DB->AG-UI `TOOL_CALL_RESULT` reconstruction is equivalent to SSE-streamed final tool result semantics.
- Enforce metadata contract by Pydantic model at write path and read path (reject malformed metadata early).
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/application backend/src/core/agent/domain backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py
git commit -m "feat(agent): add run-resume app services with db config and audit persistence"
```
### Task 5: Wire Celery Worker Task to Run/Resume and Publish Runtime Events
**Files:**
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
- Modify: `backend/src/core/celery/app.py`
- Test: `backend/tests/unit/core/agent/test_queue_tasks.py`
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
**Step 1: Write failing queue tests**
```python
def test_run_agent_task_emits_started_runtime_and_finished_events() -> None:
...
def test_run_agent_task_emits_error_event_on_exception() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`
Expected: FAIL.
**Step 3: Implement worker task flow**
- Decode command type (`run`/`resume`).
- Emit lifecycle events (`RUN_STARTED/RUN_RESUMED/RUN_FINISHED/RUN_ERROR`).
- Forward runtime callback events to Redis stream immediately.
- Persist session status/snapshot after completion.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/celery/app.py backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py
git commit -m "feat(agent): wire celery run-resume execution and redis event publishing"
```
### Task 6: Implement API Contracts (Run/Resume/SSE) + Auth/Ownership/Idempotency
**Files:**
- Create: `backend/src/v1/agent/schemas.py`
- Create: `backend/src/v1/agent/repository.py`
- Create: `backend/src/v1/agent/service.py`
- Create: `backend/src/v1/agent/router.py`
- Create: `backend/src/v1/agent/dependencies.py`
- Modify: `backend/src/v1/router.py`
- Test: `backend/tests/unit/v1/agent/test_service.py`
- Test: `backend/tests/unit/v1/agent/test_owner_guard.py`
- Test: `backend/tests/integration/v1/agent/test_routes.py`
**Step 1: Write failing API tests**
```python
async def test_run_requires_auth_and_returns_202_task_id() -> None:
...
async def test_stream_reads_from_last_event_id() -> None:
...
def test_resume_idempotency_uses_redis_lock_and_task_key() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`
Expected: FAIL.
**Step 3: Implement API service/router**
- `POST /api/v1/agent/runs` enqueue run command.
- `POST /api/v1/agent/runs/{session_id}/resume` enqueue resume command with async redis lock + dedup task key.
- `GET /api/v1/agent/runs/{session_id}/events` SSE stream from Redis with `Last-Event-ID`.
- Enforce auth and session ownership checks on all endpoints.
- Validate `tool_call_id` and message length/pattern boundaries.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/src/v1/agent backend/src/v1/router.py backend/tests/unit/v1/agent backend/tests/integration/v1/agent/test_routes.py
git commit -m "feat(agent): add authenticated run-resume-sse api with redis-backed idempotency"
```
### Task 7: Add Schema/Migration Contract for Session Snapshot + Audit Fields
**Files:**
- Create: `backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py`
- Modify: `backend/src/models/agent_chat_session.py`
- Modify: `backend/src/models/agent_chat_message.py`
- Test: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
**Migration scope note:**
- Fix current schema drift: model has `sessions.state_snapshot` but migration chain does not reliably provide this column in current DB state.
- Keep schema minimal; do not add new business tables in this migration.
**Step 1: Write failing migration contract tests**
```python
def test_session_has_state_snapshot_and_status_contract() -> None:
...
def test_message_has_token_cost_and_metadata_contract() -> None:
...
```
**Step 2: Run RED**
Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`
Expected: FAIL.
**Step 3: Implement migration and model alignment**
- Ensure `state_snapshot`, `status`, token/cost/metadata fields are present and nullable constraints are explicit.
- Add/verify indexes needed for role-based semantic reconstruction (`session_id, seq`, and targeted metadata lookups if required).
- Ensure `metadata` structure is validated by fixed Pydantic schema at application boundary.
- Add DB-level guardrails where feasible (check constraints) for role/metadata consistency without introducing new tables.
- Keep reversible downgrade path.
**Step 4: Run GREEN**
Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`
Expected: PASS.
**Step 5: Commit**
```bash
git add backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py backend/src/models/agent_chat_session.py backend/src/models/agent_chat_message.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
git commit -m "feat(agent): add db contract for session snapshot and usage audit fields"
```
### Task 8: End-to-End Closure Verification and Docs Update
**Files:**
- Modify: `docs/runtime/runtime-route.md`
- Modify: `docs/runtime/runtime-runbook.md`
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
**Step 1: Write integration test for full closure path**
```python
async def test_closed_loop_run_flow_frontend_to_sse() -> None:
# run request -> queue command -> runtime events -> redis stream -> sse read
...
```
Also verify:
- `tool_result` full UI schema is written to Supabase Storage private bucket.
- `messages.role=tool` row contains stable storage reference and checksum metadata.
- Reading from DB can reconstruct final AG-UI `TOOL_CALL_RESULT` event payload semantics.
**Step 2: Run RED**
Run: `uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py -q`
Expected: FAIL.
**Step 3: Implement minimal missing glue and docs**
- Fill any missing wiring revealed by the test.
- Document endpoint contracts, event taxonomy, and operational runbook for redis/celery troubleshooting.
**Step 4: Run GREEN + full gate verification**
Run:
- `PYTHONPATH=backend/src uv run python backend/src/core/runtime/cli.py migrate`
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright backend/src`
Expected:
- All relevant tests PASS.
- Ruff PASS.
- basedpyright 0 errors (notes/warnings can be documented if pre-existing).
**Step 5: Commit**
```bash
git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md backend/tests/integration/core/agent/test_session_message_persistence.py
git commit -m "docs(agent): document closed-loop runtime and verify end-to-end chain"
```
### Task 9: L2 Mandatory Review Gates
**Files:**
- No direct code changes required; apply fixes if findings appear.
**Step 1: Run required agents**
- `tdd-guide` (already enforced by plan sequence)
- `refactor-cleaner`
- `code-reviewer`
- `security-reviewer`
**Step 2: Fix all CRITICAL/HIGH findings**
Run targeted tests after each fix.
**Step 3: Final verification rerun**
Run:
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
- `uv run ruff check backend/src backend/tests`
- `uv run basedpyright backend/src`
Expected: no failing tests; no lint errors; no type errors.
**Step 4: Final commit (if review fixes were needed)**
```bash
git add backend/src backend/tests docs/runtime
git commit -m "fix(agent): resolve L2 review findings for closed-loop runtime"
```
@@ -0,0 +1,746 @@
# UserAgentContext & ProfileSettings v1 设计
**Date:** 2026-03-05
**Status:** Approved
---
## 目标
为 Agent Runtime 提供完整的用户画像上下文,通过 Pydantic 约束 profiles.settings 结构,确保:
1. 运行时入口读取 profileusername/bio/settings
2. settings 结构类型安全、版本可演进
3. 关键配置(语言/时区/国家)符合标准格式
---
## 架构
```
Profile (DB JSONB)
ProfileSettings (Pydantic)
UserAgentContext (DataClass)
build_global_system_prompt(ctx)
```
**设计原则:**
- 唯一入口:`get_user_agent_context(user_id)` 读取并构造上下文
- 不可变:UserAgentContext 使用 frozen dataclass
- 向后兼容:version 字段预留未来演进
---
## ProfileSettings v1 结构
```json
{
"version": 1,
"preferences": {
"interface_language": "zh-CN",
"ai_language": "zh-CN",
"timezone": "Asia/Shanghai",
"country": "CN"
},
"privacy": {},
"notification": {}
}
```
### 字段说明
| 字段 | 类型 | 默认值 | 约束 |
|------|------|--------|------|
| `version` | int | 1 | 必须为 1v1 锁定) |
| `preferences.interface_language` | str | "zh-CN" | BCP-47 格式 |
| `preferences.ai_language` | str | "zh-CN" | BCP-47 格式 |
| `preferences.timezone` | str | "Asia/Shanghai" | IANA 时区 |
| `preferences.country` | str | "CN" | ISO 3166-1 alpha-2 |
| `privacy` | dict | {} | 空对象(预留) |
| `notification` | dict | {} | 空对象(预留) |
### 约束规则
**1. BCP-47 语言格式**
正则:`^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$`
示例:
- ✅ zh-CN, en-US, zh-TW, ja-JP
- ❌ zh_CN, EN, chn
**2. IANA 时区**
使用 `zoneinfo.ZoneInfo` 校验。
示例:
- ✅ Asia/Shanghai, America/New_York, UTC
- ❌ CST, GMT+8
**3. ISO 3166-1 alpha-2 国家代码**
使用 `pycountry.countries.get(alpha_2=...)` 校验。
示例:
- ✅ CN, US, JP, GB
- ❌ CHN, USA, zz
---
## UserAgentContext 结构
```python
@dataclass(frozen=True)
class UserAgentContext:
user_id: UUID
username: str
bio: str | None
settings: ProfileSettings
```
**设计要点:**
- 不可变(frozen=True):防止运行时修改
- 完整画像:包含身份(username/bio)和配置(settings
- 唯一构造入口:`get_user_agent_context(user_id)`
---
## Pydantic 模型实现
```python
from pydantic import BaseModel, Field, field_validator
from dataclasses import dataclass
from uuid import UUID
import re
class PreferenceSettings(BaseModel):
interface_language: str = "zh-CN"
ai_language: str = "zh-CN"
timezone: str = "Asia/Shanghai"
country: str = "CN"
@field_validator("interface_language", "ai_language")
@classmethod
def validate_bcp47(cls, v: str) -> str:
pattern = r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$"
if not re.match(pattern, v):
raise ValueError(f"Invalid BCP-47 language tag: {v}")
return v
@field_validator("timezone")
@classmethod
def validate_iana_timezone(cls, v: str) -> str:
import zoneinfo
try:
zoneinfo.ZoneInfo(v)
except Exception:
raise ValueError(f"Invalid IANA timezone: {v}")
return v
@field_validator("country")
@classmethod
def validate_iso_country(cls, v: str) -> str:
import pycountry
if not pycountry.countries.get(alpha_2=v.upper()):
raise ValueError(f"Invalid ISO 3166-1 alpha-2 country code: {v}")
return v.upper()
class ProfileSettings(BaseModel):
version: int = Field(default=1, ge=1, le=1)
preferences: PreferenceSettings = Field(default_factory=PreferenceSettings)
privacy: dict = Field(default_factory=dict)
notification: dict = Field(default_factory=dict)
@dataclass(frozen=True)
class UserAgentContext:
user_id: UUID
username: str
bio: str | None
settings: ProfileSettings
```
---
## 依赖项
需要添加到 `backend/pyproject.toml`
```toml
[project.dependencies]
pycountry = ">=23.0.0"
```
---
## 迁移策略
**数据库层:**
- profiles.settings 保持 JSONB,不做 schema 变更
- 现有数据默认值:`{"version": 1, "preferences": {"country": "CN"}}`
**应用层:**
- 读取时:`ProfileSettings.model_validate(profile.settings or {})`
- 写入时:`profile.settings = settings.model_dump()`
---
## 未来演进
**版本迁移:**
- Pydantic 支持多版本共存
- 数据库不做破坏性变更
---
---
## AG-UI 事件转发与落库策略
### 核心原则
**1. 事件转发时机:**
- 只有 organization 阶段完成后转发 AG-UI 事件
- AG-UI bridge 已实现底层机制,编排层控制转发时机
**2. 落库时机:**
- 意图识别和任务执行阶段:落库但 seq 取负数(用于审计)
- 结果反馈阶段:seq 取最新 seq 的绝对值 +1(用于展示)
### Seq 设计细节
**意图识别和任务执行阶段(审计用):**
- seq 取负数(如 -1, -2
- role: "assistant"(标记为 agent 输出)
- content: 阶段的完整输出(用于审计/调试)
- 重建会话时通过 `WHERE seq > 0` 过滤,不展示给用户
**结果反馈阶段(展示用):**
- seq 取正数(取最新负数的绝对值 +1)
- role: "assistant"
- content: OrganizationResult.assistant_text
- 重建会话时通过 `WHERE seq > 0` 展示给用户
**示例:**
```
| seq | role | content | 展示 |
|------|----------|----------------------------|------|
| -2 | assistant| ExecutionResult (完整) | 否 |
| -1 | assistant| IntentResult (完整) | 否 |
| 1 | user | 用户输入 | 是 |
| 2 | assistant| OrganizationResult | 是 |
```
### 编排层职责
```python
@listen(intent_stage)
async def persist_intent(self, state: FlowState) -> FlowState:
# seq 取负数
seq = await message_repo.get_next_negative_seq(state.session_id)
await message_repo.create(
session_id=state.session_id,
seq=seq, # 负数
role="assistant",
content=state.intent_result.model_dump_json(),
...
)
return state
@listen(execution_stage)
async def persist_execution(self, state: FlowState) -> FlowState:
# seq 取负数
seq = await message_repo.get_next_negative_seq(state.session_id)
await message_repo.create(
session_id=state.session_id,
seq=seq, # 负数
role="assistant",
content=state.execution_result.model_dump_json(),
...
)
return state
@listen(organization_stage)
async def finalize_flow(self, state: FlowState) -> FlowState:
result = state.organization_result
# seq 取正数(最新负数绝对值+1)
seq = await message_repo.get_next_positive_seq(state.session_id)
await message_repo.create(
session_id=state.session_id,
seq=seq, # 正数
role="assistant",
content=result.assistant_text,
...
)
# 触发 AG-UI 事件(由 bridge 处理)
return state
```
### Token 和 Cost 累加
**策略:在内存中累加所有阶段的 token 和 costorganization 完成后统一落库。**
```python
@dataclass
class FlowState:
# ...
tokens: dict[str, dict] = field(default_factory=dict)
cost: Decimal = Decimal("0")
currency: str = "CNY"
```
---
## CrewAI Flow 三阶段设计
### 架构概览
```
User Input + UserAgentContext
@start() begin()
@listen() intent_stage() → 判断 can_answer_directly
↓ (router)
├─ DIRECT_RESPONSE → 直接返回
└─ NEEDS_EXECUTION
@listen() execution_stage() → 任务执行/工具调用
@listen() organization_stage() → 结果组织与表达
返回给用户
```
### 三阶段职责
**1. Intent Recognition(意图识别)**
- Agent Type: `INTENT_RECOGNITION`
- 输出结构(最小化设计):
```python
class IntentResult(BaseModel):
direct_answer: bool # 是否可以直接回答
intent_analysis: str # 意图分析文本(用于调试/审计)
execution_prompt: str # 给 execution 阶段的提示词(direct_answer=false时使用)
direct_response: str # 直接回复文本(direct_answer=true时使用)
```
- 短路逻辑:
- `direct_answer=true` → 完全跳过 execution 和 organization,直接返回 direct_response
- `direct_answer=false` → 进入 execution 阶段
- 输出约束:使用 `output_pydantic=IntentResult`
- **落库策略**:落库到 messages 表,但重建会话时不展示
**2. Task Execution(任务执行)**
- Agent Type: `TASK_EXECUTION`
- 输入:IntentResult.execution_prompt + IntentResult.intent_analysis
- 职责:
- 执行复杂任务(查询数据库、调用工具、多步骤推理)
- 返回结构化执行结果
- 输出结构(最小化设计):
```python
class ExecutionResult(BaseModel):
execution_summary: str # 任务执行摘要(用于调试/审计)
organization_prompt: str # 给 organization 阶段的提示词
execution_data: dict = {} # 执行结果的结构化数据
```
- 输出约束:使用 `output_pydantic=ExecutionResult`
- **落库策略**:落库到 messages 表,但重建会话时不展示
**3. Result Reporting(结果报告)**
- Agent Type: `RESULT_REPORTING`
- 输入:
- IntentResult(意图识别结果)
- ExecutionResult(任务执行情况)
- 职责:
- 结合意图分析和执行结果,格式化为用户友好的响应
- 应用个性化模板(基于 UserAgentContext
- 输出结构(最小化设计):
```python
class OrganizationResult(BaseModel):
assistant_text: str # 最终回复文本
response_metadata: dict = {} # 响应元数据(可选)
```
- 输出约束:使用 `output_pydantic=OrganizationResult`
- **唯一展示阶段**:重建会话时只展示此阶段的 message
- **唯一转发阶段**:只有此阶段的输出需要通过 AG-UI 事件转发
### Flow 状态管理
```python
@dataclass
class FlowState:
user_input: str
context: UserAgentContext
stage_trace: list[str] = field(default_factory=list)
intent_result: IntentResult | None = None
execution_result: ExecutionResult | None = None
organization_result: OrganizationResult | None = None
assistant_text: str = ""
tokens: dict = field(default_factory=dict)
cost: Decimal = Decimal("0")
```
### 数据流向
```
User Input + UserAgentContext
@start() begin()
@listen() intent_stage()
├─ IntentResult.direct_answer=true
│ ↓
│ 跳过 execution,直接 organization
│ ↓
│ organization_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
│ ↓
│ OrganizationResult → AG-UI 事件 + 落库
└─ IntentResult.direct_answer=false
execution_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
ExecutionResult
organization_stage(ExecutionResult.next_stage_prompt, ExecutionResult.metadata)
OrganizationResult → AG-UI 事件 + 落库
```
### 三阶段输出约束
**所有阶段使用 `output_pydantic` 约束输出:**
```python
from pydantic import BaseModel
class IntentResult(BaseModel):
direct_answer: bool
next_stage_prompt: str
metadata: dict = {}
class ExecutionResult(BaseModel):
next_stage_prompt: str
metadata: dict = {}
class OrganizationResult(BaseModel):
assistant_text: str
metadata: dict = {}
# Task 定义
intent_task = Task(
description="Analyze user intent",
expected_output="Intent analysis",
agent=intent_agent,
output_pydantic=IntentResult,
)
execution_task = Task(
description="Execute tasks",
expected_output="Execution result",
agent=execution_agent,
output_pydantic=ExecutionResult,
)
organization_task = Task(
description="Format response",
expected_output="User-friendly response",
agent=organization_agent,
output_pydantic=OrganizationResult,
)
```
---
## 系统选模逻辑设计
### 问题背景
旧逻辑:`order_by(...).limit(1)` 随机选择一个系统 agent,不区分阶段。
新逻辑:按 `agent_type` 显式映射到三阶段。
### 选模规则
**必需的 Agent Types**
- `INTENT_RECOGNITION` → 用于 intent_stage
- `TASK_EXECUTION` → 用于 execution_stage
- `RESULT_REPORTING` → 用于 organization_stage
**查询逻辑:**
```python
REQUIRED_TYPES = {"INTENT_RECOGNITION", "TASK_EXECUTION", "RESULT_REPORTING"}
@dataclass(frozen=True)
class StageModels:
intent: SystemAgentCatalog
execution: SystemAgentCatalog
organization: SystemAgentCatalog
def resolve_stage_models(rows: list[SystemAgentCatalog]) -> StageModels:
by_type = {row.agent_type: row for row in rows}
missing = REQUIRED_TYPES - set(by_type.keys())
if missing:
raise ValueError(f"Missing required agent types: {missing}")
return StageModels(
intent=by_type["INTENT_RECOGNITION"],
execution=by_type["TASK_EXECUTION"],
organization=by_type["RESULT_REPORTING"],
)
```
**初始化数据约束:**
- `system_agents` 表必须包含三种类型的记录
- 运行时启动时验证完整性
---
## 人民币结算策略设计
### 设计原则
1. **保留 LiteLLM 语义**`completion_cost()` 始终返回 USD
2. **业务层映射**:根据用户国家(`profiles.settings.preferences.country`)决定落库货币
3. **默认人民币**:中国用户或无国家信息默认 CNY
4. **汇率配置**:USD/CNY 汇率通过环境变量配置
### 货币来源
```
UserAgentContext.settings.preferences.country
resolve_billing_currency(country)
CN → CNY
US → USD
其他 → USD
```
### 结算流程
```
LiteLLM completion_cost()
↓ (USD)
resolve_billing_cost(usd_cost, country)
├─ country="CN" or None → CNY (乘以汇率)
└─ country="US" → USD (保持原值)
messages.cost + messages.currency
sessions.total_cost (同一货币)
```
### 汇率配置
```python
# 环境变量
BILLING_USD_CNY_RATE=7.2
# 默认值
DEFAULT_USD_CNY_RATE = Decimal("7.2")
```
### 结算模型
```python
@dataclass(frozen=True)
class BillingCost:
currency: str # "CNY" or "USD"
cost: Decimal # 6位小数精度
def resolve_billing_cost(
usd_cost: Decimal,
country: str | None,
usd_cny_rate: Decimal = DEFAULT_USD_CNY_RATE,
) -> BillingCost:
currency = "CNY" if (country or "CN").upper() == "CN" else "USD"
if currency == "CNY":
cost = usd_cost * usd_cny_rate
else:
cost = usd_cost
return BillingCost(
currency=currency,
cost=cost.quantize(Decimal("0.000001"))
)
```
### 数据库落库
**messages 表:**
- `cost`: NUMERIC(12,6) - 业务货币金额
- `currency`: VARCHAR(3) - "CNY" or "USD"
**sessions 表:**
- `total_cost`: NUMERIC(12,6) - 同一货币累计
**约束:**
- 同一 session 内所有 messages 的 currency 必须一致
- sessions.total_cost 累加时保持货币一致
---
## Session 状态一致性设计
### 问题背景
旧逻辑:
- `sessions.status` 与 `state_snapshot.status` 不同步
- 失败时状态不一致
- title 未自动赋值
### 状态机
```
pending (创建)
running (开始执行)
├─ completed (成功)
└─ failed (异常)
```
### 状态同步规则
**创建时:**
```python
session = AgentChatSession(
user_id=user_uuid,
status=AgentChatSessionStatus.PENDING,
state_snapshot={
"status": "pending",
"pending_tool_call_id": None,
},
)
```
**运行时:**
```python
# 开始执行
session.status = AgentChatSessionStatus.RUNNING
session.state_snapshot["status"] = "running"
# 成功完成
session.status = AgentChatSessionStatus.COMPLETED
session.state_snapshot["status"] = "completed"
# 失败
session.status = AgentChatSessionStatus.FAILED
session.state_snapshot["status"] = "failed"
session.state_snapshot["error_id"] = error_id
```
### 自动 Title 赋值
**规则:**
- 首次运行时,如果 `session.title` 为空,使用 `user_input[:255]` 赋值
- 只在第一次运行时赋值,后续不覆盖
**实现:**
```python
async def _set_title_if_empty(self, session_id: UUID, title: str) -> None:
stmt = (
update(AgentChatSession)
.where(AgentChatSession.id == session_id)
.where(AgentChatSession.title.is_(None))
.values(title=title[:255])
)
await self.db.execute(stmt)
```
### Repository 方法
```python
class SessionRepository:
async def mark_running(self, session_id: UUID) -> None: ...
async def mark_completed(self, session_id: UUID) -> None: ...
async def mark_failed(self, session_id: UUID, error_id: str) -> None: ...
```
---
## 全局 Prompt 构建设计
### 分层结构
```
全局系统 Prompt
├─ 身份段(username/bio
├─ 偏好段(language/timezone/country
└─ 阶段段(动态注入)
├─ intent stage prompt
├─ execution stage prompt
└─ organization stage prompt
```
### 构建函数
```python
def build_global_system_prompt(ctx: UserAgentContext) -> str:
lines = [
"# User Identity",
f"username: {ctx.username}",
f"bio: {ctx.bio or 'N/A'}",
"",
"# User Preferences",
f"interface_language: {ctx.settings.preferences.interface_language}",
f"ai_language: {ctx.settings.preferences.ai_language}",
f"timezone: {ctx.settings.preferences.timezone}",
f"country: {ctx.settings.preferences.country}",
"",
"# Instructions",
"Use the user's preferences to personalize responses.",
"Respond in the user's preferred AI language.",
"Consider the user's timezone for time-related queries.",
]
return "\n".join(lines)
```
### 阶段注入
每个阶段运行时,在全局 prompt 基础上追加阶段特定的指令:
```python
def build_stage_prompt(
base_prompt: str,
stage: str, # "intent" | "execution" | "organization"
ctx: UserAgentContext,
) -> str:
stage_prompts = {
"intent": "Analyze the user's intent and decide if direct response is possible.",
"execution": "Execute the required tasks and tools to fulfill the user's request.",
"organization": "Format the execution results into a user-friendly response.",
}
return f"{base_prompt}\n\n# Stage: {stage}\n{stage_prompts[stage]}"
```
---
## 依赖关系图
```
UserAgentContext (核心上下文)
├─ ProfileSettings (用户配置)
│ └─ preferences.country → 人民币结算
├─ build_global_system_prompt() (全局 Prompt)
│ └─ 三阶段 Flow 使用
└─ resolve_stage_models() (选模逻辑)
└─ 三阶段 Agent 配置
```
---
## 相关文档
- [Runtime Database Schema](../runtime/runtime-database.md)
- [AG-UI Protocol](.opencode/skills/ag-ui/SKILL.md)
- [CrewAI Framework](.opencode/skills/crewai/SKILL.md)
+144
View File
@@ -0,0 +1,144 @@
# Agent LLM Config Implementation Plan
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:**`system_agents.config` 中的 `temperature` / `max_tokens` 以受约束方式加载到运行时,并在调用 LiteLLM 时按需透传。
**Architecture:** 在应用层 `RunService` 读取模型选择时同步读取并校验 `SystemAgents.config`;将校验后的 `SystemAgentLLMConfig` 传入 `CrewAIRuntime`;由 runtime 将配置转交给 LiteLLM clientclient 仅在值非 `None` 时向 `completion()` 传参,避免不必要的 provider 兼容风险。
**Tech Stack:** FastAPI, SQLAlchemy (async), Pydantic v2, LiteLLM, pytest
---
## 背景与修正点
- 当前真实调用链为:`RunService._load_agent_model_selection()` -> `create_runtime()` -> `CrewAIRuntime.execute()` -> `run_completion()`,并非 `load_stage_models()`
- `SystemAgentLLMConfig` 已存在:`backend/src/core/agent/domain/system_agent_config.py`
- `system_agents.config` 目前在初始化 YAML 侧有约束,但运行时 DB 读取仍需二次校验,防止脏数据绕过。
## 规则约束
- 严格 TDD:先写失败测试,再做实现。
- Python 命令统一使用 `uv run ...`
- 仅做增量改动,不回滚或覆盖与本任务无关的已有变更。
## 字段映射与透传策略
| 配置字段 | LiteLLM 参数 | 规则 |
|---|---|---|
| `temperature` | `temperature` | `None` 不透传;非空直接透传 |
| `max_tokens` | `max_tokens` | `None` 不透传;非空直接透传 |
---
### Task 1: 应用层加载并校验 Agent LLM Config
**Files:**
- Modify: `backend/src/core/agent/application/run_service.py`
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
**Step 1: 写失败测试(RED**
新增单测覆盖以下行为:
1. `_load_agent_model_selection()` 返回三元组:`(model_code, provider_name, llm_config)`
2. 当 DB `config``{}` 时,`llm_config.temperature/max_tokens``None`
3. 当 DB `config` 含非法值(如 `temperature=3`)时抛 `ValueError`
**Step 2: 运行测试确认失败**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: 新增断言失败(返回值结构/异常行为不匹配)。
**Step 3: 最小实现(GREEN**
`run_service.py`
1. 查询 `SystemAgents.config`
2.`SystemAgentLLMConfig.model_validate(config or {})` 校验。
3.`_load_agent_model_selection()` 改为返回三元组。
4.`run()` 中把 `llm_config` 传递到 `create_runtime(...)`
**Step 4: 运行测试确认通过**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: PASS。
---
### Task 2: Runtime 与 LiteLLM Client 支持可选参数透传
**Files:**
- Modify: `backend/src/core/agent/infrastructure/crewai/factory.py`
- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py`
- Modify: `backend/src/core/agent/infrastructure/litellm/client.py`
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
**Step 1: 写失败测试(RED**
`test_crewai_runtime.py` 增加用例:
1. 传入 `temperature/max_tokens` 时,`run_completion` 收到对应参数。
2. 参数为 `None` 时,不应被透传到 LiteLLM。
必要时新增 `backend/tests/unit/core/agent/test_litellm_client.py`,单测 `run_completion` 的 kwargs 组装逻辑。
**Step 2: 运行测试确认失败**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
Expected: 新增断言失败(参数未透传或未过滤 `None`)。
**Step 3: 最小实现(GREEN**
1. `create_runtime()` 增加 `llm_config` 参数并传给 `CrewAIRuntime`
2. `CrewAIRuntime` 保存 `llm_config`,执行时调用:
- `run_completion(..., temperature=llm_config.temperature, max_tokens=llm_config.max_tokens)`
3. `run_completion()` 改为支持可选 `temperature/max_tokens`,内部仅在非 `None` 时加入 kwargs 再调用 `completion()`
**Step 4: 运行测试确认通过**
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
Expected: PASS。
---
### Task 3: 初始化数据补齐与回归验证
**Files:**
- Modify: `backend/src/core/config/static/database/system_agents.yaml`
- Modify: `backend/src/core/config/initial/init_data.py`(如需补充类型兜底)
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
**Step 1: 写失败测试(RED**
补充断言:YAML 读取后 `config` 可为空或包含 `max_tokens: null`,初始化逻辑不会报错,且生成结构符合 `SystemAgentLLMConfig`
**Step 2: 运行测试确认失败**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: 新增断言失败。
**Step 3: 最小实现(GREEN**
1.`system_agents.yaml` 为各 agent 配置显式补充 `max_tokens: null`
2. `init_data.py` 保持 `config: SystemAgentLLMConfig | None = None`,写库时统一序列化为 dict。
**Step 4: 运行测试确认通过**
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
Expected: PASS。
---
## 最终验证
1. `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_crewai_runtime.py -q`
2. `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -q`
3. `uv run ruff check backend/src backend/tests`
4. `uv run basedpyright`
预期:全部通过;若集成测试依赖本地 DB 状态导致跳过/失败,需记录原因并给出手工验证步骤。
## 完成标准
- `RunService` 从 DB 读取并校验 `config`
- runtime 到 LiteLLM 链路支持 `temperature/max_tokens` 可选透传。
- `None` 不透传。
- 单测与相关集成测试通过,并给出命令级证据。
+2
View File
@@ -0,0 +1,2 @@
1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai
2.