feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持
This commit is contained in:
@@ -5,6 +5,10 @@ from uuid import UUID
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from core.agent.application.session_state_persistence import SessionStatePersistence
|
||||
from core.agent.domain.message_metadata import (
|
||||
MessageMetadataAssistantOutput,
|
||||
MessageMetadataToolResult,
|
||||
)
|
||||
from core.agent.infrastructure.persistence.message_repository import MessageRepository
|
||||
from core.agent.infrastructure.persistence.session_repository import SessionRepository
|
||||
from core.db import AsyncSessionLocal
|
||||
@@ -46,14 +50,16 @@ class ResumeService:
|
||||
seq=next_seq,
|
||||
role=AgentChatMessageRole.TOOL,
|
||||
content='{"status":"ok"}',
|
||||
metadata={"type": "tool_result", "tool_call_id": tool_call_id},
|
||||
metadata=MessageMetadataToolResult(
|
||||
tool_call_id=tool_call_id,
|
||||
).model_dump(),
|
||||
)
|
||||
await message_repository.append_message(
|
||||
session_id=session_uuid,
|
||||
seq=next_seq + 1,
|
||||
role=AgentChatMessageRole.ASSISTANT,
|
||||
content="Tool result received",
|
||||
metadata={"type": "assistant_output"},
|
||||
metadata=MessageMetadataAssistantOutput().model_dump(),
|
||||
)
|
||||
|
||||
snapshot = self._state_persistence.build_completed_snapshot()
|
||||
|
||||
@@ -3,10 +3,16 @@ from __future__ import annotations
|
||||
from decimal import Decimal
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from core.agent.application.session_state_persistence import SessionStatePersistence
|
||||
from core.agent.domain.message_metadata import (
|
||||
MessageMetadataToolCall,
|
||||
MessageMetadataUserInput,
|
||||
)
|
||||
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
|
||||
from core.agent.infrastructure.crewai.factory import create_runtime
|
||||
from core.agent.infrastructure.persistence.message_repository import MessageRepository
|
||||
from core.agent.infrastructure.persistence.session_repository import SessionRepository
|
||||
@@ -58,10 +64,16 @@ class RunService:
|
||||
if chat_session is None:
|
||||
raise ValueError("session not found")
|
||||
|
||||
model_code, provider_name = await self._load_agent_model_selection(
|
||||
db_session
|
||||
(
|
||||
model_code,
|
||||
provider_name,
|
||||
llm_config,
|
||||
) = await self._load_agent_model_selection(db_session)
|
||||
runtime = create_runtime(
|
||||
model_code=model_code,
|
||||
provider_name=provider_name,
|
||||
llm_config=llm_config,
|
||||
)
|
||||
runtime = create_runtime(model_code=model_code, provider_name=provider_name)
|
||||
runtime_result = runtime.execute(user_input=user_input)
|
||||
assistant_text = str(runtime_result.get("assistant_text", ""))
|
||||
prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0))
|
||||
@@ -79,7 +91,7 @@ class RunService:
|
||||
role=AgentChatMessageRole.USER,
|
||||
content=user_input,
|
||||
model_code=model_code,
|
||||
metadata={"type": "user_input"},
|
||||
metadata=MessageMetadataUserInput().model_dump(),
|
||||
)
|
||||
await message_repository.append_message(
|
||||
session_id=session_uuid,
|
||||
@@ -87,10 +99,9 @@ class RunService:
|
||||
role=AgentChatMessageRole.ASSISTANT,
|
||||
content=assistant_text or "Tool call pending approval",
|
||||
model_code=model_code,
|
||||
metadata={
|
||||
"type": "tool_call",
|
||||
"tool_call_id": pending_tool_call_id,
|
||||
},
|
||||
metadata=MessageMetadataToolCall(
|
||||
tool_call_id=pending_tool_call_id,
|
||||
).model_dump(),
|
||||
input_tokens=prompt_tokens,
|
||||
output_tokens=completion_tokens,
|
||||
cost=cost,
|
||||
@@ -119,9 +130,9 @@ class RunService:
|
||||
|
||||
async def _load_agent_model_selection(
|
||||
self, session: AsyncSession
|
||||
) -> tuple[str, str]:
|
||||
) -> tuple[str, str, SystemAgentLLMConfig]:
|
||||
stmt = (
|
||||
select(Llm.model_code, LlmFactory.name)
|
||||
select(Llm.model_code, LlmFactory.name, SystemAgents.config)
|
||||
.join(SystemAgents, SystemAgents.llm_id == Llm.id)
|
||||
.join(LlmFactory, LlmFactory.id == Llm.factory_id)
|
||||
.where(SystemAgents.status == "active")
|
||||
@@ -131,4 +142,11 @@ class RunService:
|
||||
record = (await session.execute(stmt)).one_or_none()
|
||||
if record is None:
|
||||
raise ValueError("active system agent model is required")
|
||||
return str(record[0]), str(record[1])
|
||||
|
||||
raw_config = record[2] if isinstance(record[2], dict) else {}
|
||||
try:
|
||||
llm_config = SystemAgentLLMConfig.model_validate(raw_config)
|
||||
except ValidationError as exc:
|
||||
raise ValueError("invalid system agent config") from exc
|
||||
|
||||
return str(record[0]), str(record[1]), llm_config
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MessageMetadataUserInput(BaseModel):
|
||||
type: Literal["user_input"] = "user_input"
|
||||
|
||||
|
||||
class MessageMetadataToolCall(BaseModel):
|
||||
type: Literal["tool_call"] = "tool_call"
|
||||
tool_call_id: str
|
||||
|
||||
|
||||
class MessageMetadataToolResult(BaseModel):
|
||||
type: Literal["tool_result"] = "tool_result"
|
||||
tool_call_id: str
|
||||
run_id: str | None = None
|
||||
turn_id: str | None = None
|
||||
tool_name: str | None = None
|
||||
storage_bucket: str | None = None
|
||||
storage_path: str | None = None
|
||||
payload_sha256: str | None = None
|
||||
payload_bytes: int | None = None
|
||||
payload_format: str | None = None
|
||||
|
||||
|
||||
class MessageMetadataAssistantOutput(BaseModel):
|
||||
type: Literal["assistant_output"] = "assistant_output"
|
||||
|
||||
|
||||
MessageMetadata = (
|
||||
MessageMetadataUserInput
|
||||
| MessageMetadataToolCall
|
||||
| MessageMetadataToolResult
|
||||
| MessageMetadataAssistantOutput
|
||||
)
|
||||
@@ -0,0 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SystemAgentLLMConfig(BaseModel):
|
||||
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
|
||||
max_tokens: int | None = Field(default=None, ge=1)
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agent.domain.message_metadata import MessageMetadataToolResult
|
||||
|
||||
|
||||
def reconstruct_tool_call_result_event(
|
||||
*,
|
||||
@@ -26,15 +28,14 @@ def build_tool_result_metadata(
|
||||
payload_bytes: int,
|
||||
payload_format: str,
|
||||
) -> dict[str, object]:
|
||||
return {
|
||||
"type": "tool_result",
|
||||
"run_id": run_id,
|
||||
"turn_id": turn_id,
|
||||
"tool_call_id": tool_call_id,
|
||||
"tool_name": tool_name,
|
||||
"storage_bucket": storage_bucket,
|
||||
"storage_path": storage_path,
|
||||
"payload_sha256": payload_sha256,
|
||||
"payload_bytes": payload_bytes,
|
||||
"payload_format": payload_format,
|
||||
}
|
||||
return MessageMetadataToolResult(
|
||||
run_id=run_id,
|
||||
turn_id=turn_id,
|
||||
tool_call_id=tool_call_id,
|
||||
tool_name=tool_name,
|
||||
storage_bucket=storage_bucket,
|
||||
storage_path=storage_path,
|
||||
payload_sha256=payload_sha256,
|
||||
payload_bytes=payload_bytes,
|
||||
payload_format=payload_format,
|
||||
).model_dump()
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
|
||||
from core.agent.infrastructure.config.resolver import AgentConfigResolver
|
||||
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
|
||||
|
||||
|
||||
def create_runtime(
|
||||
*, model_code: str | None, provider_name: str | None
|
||||
*,
|
||||
model_code: str | None,
|
||||
provider_name: str | None,
|
||||
llm_config: SystemAgentLLMConfig | None = None,
|
||||
) -> CrewAIRuntime:
|
||||
resolver = AgentConfigResolver()
|
||||
return CrewAIRuntime(
|
||||
resolver=resolver,
|
||||
model_code=model_code,
|
||||
provider_name=provider_name,
|
||||
llm_config=llm_config,
|
||||
)
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
|
||||
from core.agent.infrastructure.agui.bridge import to_agui_events
|
||||
from core.agent.infrastructure.config.resolver import (
|
||||
AgentConfigResolver,
|
||||
@@ -47,11 +48,13 @@ class CrewAIRuntime:
|
||||
resolver: AgentConfigResolver,
|
||||
model_code: str | None,
|
||||
provider_name: str | None,
|
||||
llm_config: SystemAgentLLMConfig | None = None,
|
||||
) -> None:
|
||||
self._config: ResolvedAgentConfig = resolver.resolve(
|
||||
model_code=model_code,
|
||||
provider_name=provider_name,
|
||||
)
|
||||
self._llm_config = llm_config or SystemAgentLLMConfig()
|
||||
|
||||
def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return to_agui_events(internal_events)
|
||||
@@ -65,6 +68,8 @@ class CrewAIRuntime:
|
||||
model=litellm_model,
|
||||
api_key=self._config.provider_api_key,
|
||||
messages=[{"role": "user", "content": user_input}],
|
||||
temperature=self._llm_config.temperature,
|
||||
max_tokens=self._llm_config.max_tokens,
|
||||
)
|
||||
if not isinstance(response, dict):
|
||||
raise ValueError("llm response must be a dict")
|
||||
|
||||
@@ -5,13 +5,26 @@ from typing import Any
|
||||
from litellm import completion
|
||||
|
||||
|
||||
def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any:
|
||||
response = completion(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
messages=messages,
|
||||
stream=False,
|
||||
)
|
||||
def run_completion(
|
||||
*,
|
||||
model: str,
|
||||
api_key: str,
|
||||
messages: list[dict[str, Any]],
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
) -> Any:
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"api_key": api_key,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
}
|
||||
if temperature is not None:
|
||||
kwargs["temperature"] = temperature
|
||||
if max_tokens is not None:
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
response = completion(**kwargs)
|
||||
model_dump = getattr(response, "model_dump", None)
|
||||
if callable(model_dump):
|
||||
return model_dump()
|
||||
|
||||
@@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
|
||||
from core.db.session import AsyncSessionLocal
|
||||
from core.logging import get_logger
|
||||
from models.llm import Llm
|
||||
@@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel):
|
||||
agent_type: str
|
||||
llm_model_code: str
|
||||
status: str
|
||||
config: dict[str, Any]
|
||||
config: SystemAgentLLMConfig | None = None
|
||||
|
||||
|
||||
class SystemAgentsYaml(BaseModel):
|
||||
@@ -184,7 +185,9 @@ async def initialize_system_agents() -> None:
|
||||
agent_type=agent["agent_type"],
|
||||
llm_id=llm.id,
|
||||
status=agent["status"],
|
||||
config=agent["config"],
|
||||
config=SystemAgentLLMConfig.model_validate(
|
||||
agent.get("config") or {}
|
||||
).model_dump(),
|
||||
)
|
||||
|
||||
logger.info("Initialized system agents")
|
||||
|
||||
@@ -4,15 +4,18 @@ agents:
|
||||
status: active
|
||||
config:
|
||||
temperature: 0.7
|
||||
max_tokens: null
|
||||
|
||||
- agent_type: TASK_EXECUTION
|
||||
llm_model_code: deepseek-v3.2
|
||||
status: active
|
||||
config:
|
||||
temperature: 0.7
|
||||
max_tokens: null
|
||||
|
||||
- agent_type: RESULT_REPORTING
|
||||
llm_model_code: deepseek-v3.2
|
||||
status: active
|
||||
config:
|
||||
temperature: 0.7
|
||||
max_tokens: null
|
||||
|
||||
@@ -1,22 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from typing import cast
|
||||
|
||||
from core.agent.infrastructure.config.resolver import AgentConfigResolver
|
||||
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
|
||||
from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
|
||||
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
|
||||
|
||||
|
||||
def test_runtime_emits_text_tool_reasoning_events() -> None:
|
||||
runtime = CrewAIRuntime(
|
||||
resolver=AgentConfigResolver(
|
||||
settings=SimpleNamespace(
|
||||
agent_runtime=SimpleNamespace(
|
||||
default_model_code="",
|
||||
streaming_enabled=True,
|
||||
),
|
||||
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
|
||||
)
|
||||
settings = cast(
|
||||
SettingsLike,
|
||||
SimpleNamespace(
|
||||
agent_runtime=SimpleNamespace(
|
||||
default_model_code="",
|
||||
streaming_enabled=True,
|
||||
),
|
||||
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
|
||||
),
|
||||
)
|
||||
runtime = CrewAIRuntime(
|
||||
resolver=AgentConfigResolver(settings=settings),
|
||||
model_code="gpt-4o-mini",
|
||||
provider_name="dashscope",
|
||||
)
|
||||
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def _fake_completion(
|
||||
*, model: str, api_key: str, messages: list[dict[str, object]]
|
||||
*,
|
||||
model: str,
|
||||
api_key: str,
|
||||
messages: list[dict[str, object]],
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
):
|
||||
captured["model"] = model
|
||||
captured["api_key"] = api_key
|
||||
captured["messages"] = messages
|
||||
captured["temperature"] = temperature
|
||||
captured["max_tokens"] = max_tokens
|
||||
return {
|
||||
"choices": [
|
||||
{
|
||||
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
|
||||
cost=0.001,
|
||||
),
|
||||
)
|
||||
settings = cast(
|
||||
SettingsLike,
|
||||
SimpleNamespace(
|
||||
agent_runtime=SimpleNamespace(
|
||||
default_model_code="",
|
||||
streaming_enabled=True,
|
||||
),
|
||||
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
|
||||
),
|
||||
)
|
||||
|
||||
runtime = CrewAIRuntime(
|
||||
resolver=AgentConfigResolver(
|
||||
settings=SimpleNamespace(
|
||||
agent_runtime=SimpleNamespace(
|
||||
default_model_code="",
|
||||
streaming_enabled=True,
|
||||
),
|
||||
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
|
||||
)
|
||||
),
|
||||
resolver=AgentConfigResolver(settings=settings),
|
||||
model_code="qwen3.5-flash",
|
||||
provider_name="dashscope",
|
||||
llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
|
||||
)
|
||||
|
||||
result = runtime.execute(user_input="hi")
|
||||
|
||||
assert captured["model"] == "dashscope/qwen3.5-flash"
|
||||
assert captured["api_key"] == "env-api-key"
|
||||
assert captured["temperature"] == 0.3
|
||||
assert captured["max_tokens"] == 256
|
||||
assert result["assistant_text"] == "hello"
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.config.initial.init_data import load_system_agents
|
||||
|
||||
|
||||
def test_load_system_agents_supports_nullable_max_tokens() -> None:
|
||||
loaded = load_system_agents()
|
||||
|
||||
agents = loaded["agents"]
|
||||
assert len(agents) > 0
|
||||
for agent in agents:
|
||||
assert "config" in agent
|
||||
assert "max_tokens" in agent["config"]
|
||||
assert agent["config"]["max_tokens"] is None
|
||||
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agent.infrastructure.litellm.client import run_completion
|
||||
|
||||
|
||||
def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
|
||||
captured.update(kwargs)
|
||||
return {"ok": True}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"core.agent.infrastructure.litellm.client.completion",
|
||||
_fake_completion,
|
||||
)
|
||||
|
||||
run_completion(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
api_key="key",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.6,
|
||||
max_tokens=120,
|
||||
)
|
||||
|
||||
assert captured["temperature"] == 0.6
|
||||
assert captured["max_tokens"] == 120
|
||||
|
||||
|
||||
def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
|
||||
captured.update(kwargs)
|
||||
return {"ok": True}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"core.agent.infrastructure.litellm.client.completion",
|
||||
_fake_completion,
|
||||
)
|
||||
|
||||
run_completion(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
api_key="key",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=None,
|
||||
max_tokens=None,
|
||||
)
|
||||
|
||||
assert "temperature" not in captured
|
||||
assert "max_tokens" not in captured
|
||||
@@ -4,6 +4,23 @@ import pytest
|
||||
|
||||
from core.agent.application.resume_service import ResumeService
|
||||
from core.agent.application.run_service import RunService
|
||||
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
|
||||
|
||||
|
||||
class _FakeResult:
|
||||
def __init__(self, record: tuple[object, object, object] | None) -> None:
|
||||
self._record = record
|
||||
|
||||
def one_or_none(self) -> tuple[object, object, object] | None:
|
||||
return self._record
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
def __init__(self, record: tuple[object, object, object] | None) -> None:
|
||||
self._record = record
|
||||
|
||||
async def execute(self, _stmt: object) -> _FakeResult:
|
||||
return _FakeResult(self._record)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
await resume_service.resume(session_id="session-1", tool_call_id="call-1")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
|
||||
run_service = RunService()
|
||||
fake_session = _FakeSession(
|
||||
(
|
||||
"qwen3.5-flash",
|
||||
"dashscope",
|
||||
{"temperature": 0.5, "max_tokens": 512},
|
||||
)
|
||||
)
|
||||
|
||||
(
|
||||
model_code,
|
||||
provider_name,
|
||||
llm_config,
|
||||
) = await run_service._load_agent_model_selection(
|
||||
fake_session # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
assert model_code == "qwen3.5-flash"
|
||||
assert provider_name == "dashscope"
|
||||
assert isinstance(llm_config, SystemAgentLLMConfig)
|
||||
assert llm_config.temperature == 0.5
|
||||
assert llm_config.max_tokens == 512
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_agent_model_selection_rejects_invalid_config() -> None:
|
||||
run_service = RunService()
|
||||
fake_session = _FakeSession(
|
||||
(
|
||||
"qwen3.5-flash",
|
||||
"dashscope",
|
||||
{"temperature": 3.0},
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="invalid system agent config"):
|
||||
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
|
||||
run_service = RunService()
|
||||
fake_session = _FakeSession(
|
||||
(
|
||||
"qwen3.5-flash",
|
||||
"dashscope",
|
||||
"not-a-dict",
|
||||
)
|
||||
)
|
||||
|
||||
_, _, llm_config = await run_service._load_agent_model_selection(
|
||||
fake_session # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
assert llm_config.temperature is None
|
||||
assert llm_config.max_tokens is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
|
||||
run_service = RunService()
|
||||
fake_session = _FakeSession(None)
|
||||
|
||||
with pytest.raises(ValueError, match="active system agent model is required"):
|
||||
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
# 前后端 API 对比分析
|
||||
|
||||
**Date:** 2026-03-04
|
||||
**Status:** Open
|
||||
**Type:** 架构分析
|
||||
|
||||
---
|
||||
|
||||
## 一、后端已有、前端缺失的 API
|
||||
|
||||
### 1. Friendships API (`/api/v1/friends`)
|
||||
|
||||
| 方法 | 路径 | 功能 | 前端状态 |
|
||||
|------|------|------|----------|
|
||||
| POST | `/requests` | 发送好友请求 | **缺失** |
|
||||
| GET | `/requests/inbox` | 获取收件箱 | **缺失** |
|
||||
| GET | `/requests/outgoing` | 获取发出的请求 | **缺失** |
|
||||
| POST | `/requests/{id}/accept` | 接受好友请求 | **缺失** |
|
||||
| POST | `/requests/{id}/decline` | 拒绝好友请求 | **缺失** |
|
||||
| DELETE | `/requests/{id}` | 取消好友请求 | **缺失** |
|
||||
| GET | `` | 获取好友列表 | **缺失** |
|
||||
| DELETE | `/{id}` | 删除好友 | **缺失** |
|
||||
|
||||
### 2. Inbox Messages API (`/api/v1/inbox/messages`)
|
||||
|
||||
| 方法 | 路径 | 功能 | 前端状态 |
|
||||
|------|------|------|----------|
|
||||
| GET | `` | 获取消息列表 | **缺失** |
|
||||
| POST | `/{id}/accept` | 接受邀请 | **缺失** |
|
||||
| POST | `/{id}/dismiss` | 忽略消息 | **缺失** |
|
||||
|
||||
### 3. Chat/AgUi 流式 API
|
||||
|
||||
| 功能 | 前端状态 |
|
||||
|------|----------|
|
||||
| 发送消息 SSE 流式 | **仅有 Mock** |
|
||||
| 加载历史记录 | **仅有 Mock** |
|
||||
|
||||
> 前端 `AgUiService` 只有本地 mock (`throw UnimplementedError`),未实现真实 API 调用。
|
||||
|
||||
### 4. Infra API
|
||||
|
||||
| 方法 | 路径 | 功能 | 前端状态 |
|
||||
|------|------|------|----------|
|
||||
| GET | `/infra/health` | 基础设施健康检查 | **未使用** |
|
||||
|
||||
---
|
||||
|
||||
## 二、前端已有、后端已实现的 API
|
||||
|
||||
### Auth API (`/api/v1/auth`)
|
||||
|
||||
| 方法 | 路径 | 后端 | 前端 |
|
||||
|------|------|------|------|
|
||||
| POST | `/verifications` | ✅ | ✅ |
|
||||
| POST | `/verifications/verify` | ✅ | ✅ |
|
||||
| POST | `/verifications/resend` | ✅ | ✅ |
|
||||
| POST | `/sessions` | ✅ | ✅ |
|
||||
| POST | `/sessions/refresh` | ✅ | ✅ |
|
||||
| DELETE | `/sessions` | ✅ | ✅ |
|
||||
| POST | `/password-reset` | ✅ | ✅ |
|
||||
| POST | `/password-reset/confirm` | ✅ | ✅ |
|
||||
| GET | `/users` | ✅ | **未使用** |
|
||||
|
||||
### Users API (`/api/v1/users`)
|
||||
|
||||
| 方法 | 路径 | 后端 | 前端 |
|
||||
|------|------|------|------|
|
||||
| GET | `/me` | ✅ | ✅ |
|
||||
| PATCH | `/me` | ✅ | ✅ |
|
||||
| POST | `/search` | ✅ | ✅ |
|
||||
|
||||
### Schedule Items API (`/api/v1/schedule-items`)
|
||||
|
||||
| 方法 | 路径 | 后端 | 前端 |
|
||||
|------|------|------|------|
|
||||
| POST | `` | ✅ | **仅有 Mock** |
|
||||
| GET | `` (range query) | ✅ | **仅有 Mock** |
|
||||
| GET | `/{id}` | ✅ | **仅有 Mock** |
|
||||
| PATCH | `/{id}` | ✅ | **仅有 Mock** |
|
||||
| DELETE | `/{id}` | ✅ | **仅有 Mock** |
|
||||
| POST | `/{id}/share` | ✅ | **缺失** |
|
||||
|
||||
---
|
||||
|
||||
## 三、待实现功能清单
|
||||
|
||||
| 优先级 | 功能 | 说明 |
|
||||
|--------|------|------|
|
||||
| **P0** | FriendsApi | 前端无 Friendships API 客户端 |
|
||||
| **P0** | InboxMessagesApi | 前端无 Inbox Messages API 客户端 |
|
||||
| **P0** | Chat/AgUi 后端连接 | 前端 AgUiService 未实现真实 API |
|
||||
| **P1** | CalendarService 真实 API | MockCalendarService → 真实 API 调用 |
|
||||
| **P1** | Schedule Share 接口 | 前端未调用 `POST /{id}/share` |
|
||||
| **P2** | Infra Health 集成 | 可用于前端健康检查 |
|
||||
|
||||
---
|
||||
|
||||
## 四、相关文件位置
|
||||
|
||||
### 前端 API 客户端
|
||||
|
||||
- `apps/lib/features/auth/data/auth_api.dart` - Auth API
|
||||
- `apps/lib/features/users/data/users_api.dart` - Users API
|
||||
- `apps/lib/features/calendar/data/services/mock_calendar_service.dart` - Calendar Mock
|
||||
- `apps/lib/features/chat/data/services/ag_ui_service.dart` - Chat/AgUi Mock
|
||||
- `apps/lib/features/chat/data/services/mock_history_service.dart` - History Mock
|
||||
|
||||
### 后端 Router
|
||||
|
||||
- `backend/src/v1/auth/router.py` - Auth 路由
|
||||
- `backend/src/v1/users/router.py` - Users 路由
|
||||
- `backend/src/v1/friendships/router.py` - Friendships 路由
|
||||
- `backend/src/v1/inbox_messages/router.py` - Inbox Messages 路由
|
||||
- `backend/src/v1/schedule_items/router.py` - Schedule Items 路由
|
||||
- `backend/src/v1/infra/router.py` - Infra 路由
|
||||
@@ -1,145 +0,0 @@
|
||||
# 前后端测试分析报告
|
||||
|
||||
**Date:** 2026-03-04
|
||||
**Status:** Completed
|
||||
|
||||
---
|
||||
|
||||
## 测试统计
|
||||
|
||||
### 后端测试
|
||||
|
||||
| 类型 | 数量 | 状态 |
|
||||
|------|------|------|
|
||||
| Unit Tests | ~100+ | 可运行 |
|
||||
| Integration Tests | ~70+ | 可运行 |
|
||||
| E2E Tests | 5 | **无法运行** (缺少 playwright 依赖) |
|
||||
|
||||
### 前端测试
|
||||
|
||||
| 类型 | 数量 | 状态 |
|
||||
|------|------|------|
|
||||
| Flutter Tests | 140 | ✅ 全部通过 |
|
||||
|
||||
---
|
||||
|
||||
## 问题发现
|
||||
|
||||
### 1. 后端 E2E 测试无法运行 (HIGH)
|
||||
|
||||
**问题**: 5 个 E2E 测试文件需要 `playwright` 模块,但依赖未安装。
|
||||
|
||||
**影响文件**:
|
||||
- `tests/e2e/test_auth_flow.py`
|
||||
- `tests/e2e/test_infra_health_e2e.py`
|
||||
- `tests/e2e/test_logging_e2e.py`
|
||||
- `tests/e2e/test_mobile_health_e2e.py`
|
||||
- `tests/e2e/test_profile_flow.py`
|
||||
|
||||
**错误**:
|
||||
```
|
||||
ModuleNotFoundError: No module named 'playwright'
|
||||
```
|
||||
|
||||
**建议**:
|
||||
- 安装 playwright: `uv add playwright && uv run playwright install`
|
||||
- 或者移除这些无法运行的 E2E 测试文件
|
||||
|
||||
---
|
||||
|
||||
### 2. 测试文件命名冲突导致收集警告 (LOW)
|
||||
|
||||
**问题**: 存在多个同名 `test_schemas.py` 文件在不同目录,导致 pytest 收集时显示警告。
|
||||
|
||||
**影响文件**:
|
||||
- `tests/unit/v1/schedule_items/test_schemas.py`
|
||||
- `tests/unit/v1/profile/test_schemas.py`
|
||||
- `tests/unit/v1/inbox_messages/test_schemas.py`
|
||||
- `tests/unit/v1/friendships/test_schemas.py`
|
||||
|
||||
**状态**: 测试实际可以正常运行,只是有警告提示。
|
||||
|
||||
**建议**: 可保持现状(这是合理的代码组织方式),或重命名为 `test_*.py` 以消除警告。
|
||||
|
||||
---
|
||||
|
||||
### 3. 遗留测试验证旧字段 (INFO)
|
||||
|
||||
**文件**: `tests/unit/v1/profile/test_schemas.py`
|
||||
|
||||
**测试**: `test_profile_update_rejects_display_name_field`
|
||||
|
||||
**说明**: 此测试验证旧的 `display_name` 字段被正确拒绝。字段已在之前的重构中删除。
|
||||
|
||||
**状态**: **有效** - 这是一个回归测试,确保旧字段不被使用。
|
||||
|
||||
---
|
||||
|
||||
## 未发现的问题
|
||||
|
||||
### 冗余测试
|
||||
经过检查,未发现明显冗余的测试:
|
||||
- 每个模块的测试覆盖不同的功能
|
||||
- Unit tests、Integration tests、E2E tests 有清晰的职责划分
|
||||
|
||||
### 死代码
|
||||
未发现测试文件中有未使用的:
|
||||
- imports
|
||||
- mock 类
|
||||
- helper 函数
|
||||
|
||||
### 缺失测试
|
||||
未发现对应已实现功能但缺少测试的情况。
|
||||
|
||||
---
|
||||
|
||||
## 测试覆盖模块
|
||||
|
||||
### 后端
|
||||
| 模块 | Unit | Integration | E2E |
|
||||
|------|------|-------------|-----|
|
||||
| Auth | ✅ | ✅ | ❌ |
|
||||
| Users | - | ✅ | - |
|
||||
| Profile | ✅ | - | ❌ |
|
||||
| Friendships | ✅ | ✅ | - |
|
||||
| Inbox Messages | ✅ | ✅ | - |
|
||||
| Schedule Items | ✅ | ✅ | - |
|
||||
| Logging | ✅ | ✅ | ✅ |
|
||||
| Settings | ✅ | - | - |
|
||||
|
||||
### 前端
|
||||
| 模块 | 测试数 |
|
||||
|------|--------|
|
||||
| Auth | ~20 |
|
||||
| Chat | ~70 |
|
||||
| Home | ~15 |
|
||||
| Calendar | ~5 |
|
||||
| Core (API, Storage) | ~30 |
|
||||
|
||||
---
|
||||
|
||||
## 建议
|
||||
|
||||
1. **立即**: 解决 E2E 测试依赖问题或移除无法运行的测试文件
|
||||
2. **可选**: 清理 test_schemas.py 重名警告(低优先级)
|
||||
3. **保持**: 现有的测试结构良好,无需重大重构
|
||||
|
||||
---
|
||||
|
||||
## 附: 测试代码质量问题
|
||||
|
||||
### 测试类未完全实现 Protocol (LSP 警告)
|
||||
|
||||
**文件**: `tests/unit/v1/auth/test_auth_service.py`
|
||||
|
||||
**问题**: `FakeGateway` 和 `LogoutAssertingGateway` 类没有实现 `AuthServiceGateway` Protocol 的全部方法:
|
||||
- `request_password_reset`
|
||||
- `confirm_password_reset`
|
||||
|
||||
**影响**: LSP 类型检查器报告错误,但运行时不受影响(因为这些方法在测试中不会被调用)。
|
||||
|
||||
**建议**: 可选择补充缺失的方法实现,或使用 `@pytest.mark.skip` 标记不需要的协议方法。
|
||||
|
||||
---
|
||||
|
||||
*报告生成时间: 2026-03-04*
|
||||
@@ -1,201 +0,0 @@
|
||||
# Agent 后端硬切重构设计
|
||||
|
||||
## 目标
|
||||
|
||||
- 一次性移除现有 Agent 运行时代码、测试和旧文档契约,避免新旧方案并存。
|
||||
- 仅从后端重新设计 Agent 体系,不依赖前端实现细节。
|
||||
- 新方案必须满足以下六项要求:
|
||||
1. 配置层可通过 `.env` 驱动 LLM API Key。
|
||||
2. 对话与 resume 通过 Celery 队列处理,不阻塞 Web 主线程。
|
||||
3. `v1/agent` 仅负责路由组织与服务调用,核心逻辑在 `core/agent`。
|
||||
4. 按 CrewAI 官方模型组织 Agent/Task/Crew/Flow/Tools。
|
||||
5. 按 AG-UI 协议输出事件,优先使用 `ag-ui-crewai` 适配库。
|
||||
6. 使用 LiteLLM 统计每次 LLM 调用的 token 和 cost。
|
||||
|
||||
## 设计原则
|
||||
|
||||
- 单一职责:HTTP 层只做协议和鉴权,编排与执行下沉到核心层。
|
||||
- 异步优先:长耗时推理、工具调用、恢复流程全部异步化。
|
||||
- 协议优先:AG-UI 作为唯一事件契约,不维护自定义事件方言。
|
||||
- 可观测性优先:每次 run、每次 stage、每次 LLM 调用可追踪。
|
||||
- 配置单一来源:所有密钥和模型配置只走 `core.config.settings`。
|
||||
|
||||
## 目标架构
|
||||
|
||||
### 1) 分层
|
||||
|
||||
- `backend/src/v1/agent/`
|
||||
- `router.py`: 暴露 HTTP/SSE 接口。
|
||||
- `schemas.py`: 请求/响应 DTO 和输入校验。
|
||||
- `dependencies.py`: DI 装配。
|
||||
- `service.py`: 薄服务,仅调用 `core/agent` 应用服务。
|
||||
- `backend/src/core/agent/`
|
||||
- `application/`: run/resume 应用服务。
|
||||
- `domain/`: run 状态机、resume 幂等语义、错误模型。
|
||||
- `infrastructure/crewai/`: CrewAI Agent/Task/Crew/Flow 装配与执行。
|
||||
- `infrastructure/agui/`: AG-UI 事件映射与 SSE 序列化。
|
||||
- `infrastructure/litellm/`: LiteLLM 客户端与 usage/cost 拦截器。
|
||||
- `infrastructure/queue/`: Celery task producer/consumer。
|
||||
|
||||
### 1.1) 配置来源与合并策略
|
||||
|
||||
- Agent 运行配置由两部分组成:
|
||||
- 数据库存量配置:`system_agents`(每种 agent_type 对应 llm 与 llm_config)。
|
||||
- 静态模板配置:`backend/src/core/config/static/crewai/*.yaml`(角色描述、任务模板、workflow、tools)。
|
||||
- 合并策略:
|
||||
- `llm` 与 `llm_config` 以 `system_agents` 为准。
|
||||
- prompt 模板、task 描述、flow stage、tool 白名单以 static/crewai 为准。
|
||||
- 若任一 agent_type 在 `system_agents` 缺失,运行前失败并返回受控错误。
|
||||
|
||||
### 2) 核心运行链路
|
||||
|
||||
1. `POST /api/v1/agent/runs` 只负责参数校验和鉴权。
|
||||
2. 路由调用 `AgentRunAppService.enqueue_run()`,写入 run 记录并投递 Celery。
|
||||
3. Worker 执行 `run_agent_task`:
|
||||
- 读取 run 上下文。
|
||||
- 构建 CrewAI `Agent/Task/Crew/Flow`。
|
||||
- 通过 `ag-ui-crewai` 将执行事件转为 AG-UI 标准事件。
|
||||
- 每次 LLM 调用由 LiteLLM 中间层记录 token/cost。
|
||||
4. 事件落库并发布到事件通道(Redis Stream/Channel)。
|
||||
5. SSE 接口从事件通道读取并持续推送,直到 `RUN_FINISHED` 或 `RUN_ERROR`。
|
||||
|
||||
### 3) Resume 链路
|
||||
|
||||
1. `POST /api/v1/agent/runs/{run_id}/resume` 校验 `interrupt_id` 与决策 payload。
|
||||
2. 调用 `enqueue_resume()` 投递 `resume_agent_task`。
|
||||
3. Worker 在事务内做并发控制:
|
||||
- `run_id + interrupt_id` 幂等锁。
|
||||
- 过期校验与状态迁移。
|
||||
4. 恢复后继续 CrewAI Flow,事件按 AG-UI 继续输出。
|
||||
|
||||
### 4) Session 状态持久化
|
||||
|
||||
- 使用 `sessions.state_snapshot` 作为运行态单一快照来源。
|
||||
- 快照至少包含:
|
||||
- run 上下文(thread_id、run_id、stage)
|
||||
- pending_tool_calls(tool_call_id、tool_name、args、status、expires_at)
|
||||
- correlation 索引(tool_call_id -> message_id / step_id)
|
||||
- 所有中断/恢复均以 `state_snapshot` 事务更新为准,避免内存态漂移。
|
||||
|
||||
### 5) 会话与消息落库模型
|
||||
|
||||
- 会话主表:`sessions`
|
||||
- 新建 run 时写入:`id/user_id/session_type/status=running/last_activity_at`。
|
||||
- 运行中持续更新:`status`、`last_activity_at`、`message_count`、`total_tokens`、`total_cost`、`state_snapshot`。
|
||||
- 运行结束更新:
|
||||
- 成功:`status=completed`
|
||||
- 失败:`status=failed`
|
||||
- 消息表:`messages`
|
||||
- 用户输入落库为 `role=user`(每次 run 开始时先写入)。
|
||||
- 模型输出落库为 `role=assistant`(按最终聚合文本落库,保留 metadata 记录增量信息)。
|
||||
- 工具调用结果落库为 `role=tool`,并写入 `tool_name` 与 `metadata.tool_call_id`。
|
||||
- `seq` 由每个 `session_id` 内单调递增分配,满足 `uq_messages_session_seq`。
|
||||
- 计量落库:每次 LLM 调用的 usage/cost 先写消息级,再聚合更新到 session 级。
|
||||
|
||||
## 六项要求落地映射
|
||||
|
||||
### 要求 1: `.env` 驱动 LLM API Key
|
||||
|
||||
- 新增 `LLMSettings` 到 `core.config.settings.Settings`,统一定义:
|
||||
- `SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE`
|
||||
- `SOCIAL_LLM__PROVIDER_KEYS__MINIMAX`
|
||||
- `SOCIAL_LLM__PROVIDER_KEYS__MOONSHOT`
|
||||
- `SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK`
|
||||
- `SOCIAL_LLM__PROVIDER_KEYS__ARK`
|
||||
- `SOCIAL_LLM__PROVIDER_KEYS__ZAI`
|
||||
- 禁止 `os.environ` 直接读取密钥。
|
||||
|
||||
### 要求 2: 对话和 resume 走 Celery
|
||||
|
||||
- Web 层不直接执行编排。
|
||||
- `run`/`resume` 一律入队,Worker 处理,Web 仅做事件流转发。
|
||||
- 加入任务级超时、重试、死信策略。
|
||||
|
||||
### 要求 3: v1 仅路由与调用
|
||||
|
||||
- `v1/agent/service.py` 仅保留应用服务调用和错误映射。
|
||||
- 任何编排、状态机、工具执行逻辑禁止进入 `v1`。
|
||||
|
||||
### 要求 4: CrewAI 官方流程
|
||||
|
||||
- 采用 CrewAI 原生对象:`Agent`、`Task`、`Crew`、`Flow`。
|
||||
- tools 通过 CrewAI Tool 机制注册,不做平行实现。
|
||||
- 任务模板与 agent 配置集中化(静态模板 + 运行时拼装)。
|
||||
- 配置拼装明确依赖 `system_agents + static/crewai`,不再使用双套来源。
|
||||
|
||||
### 要求 5: AG-UI + ag-ui-crewai
|
||||
|
||||
- 事件集遵循 AG-UI 协议,生命周期闭环:
|
||||
- `RUN_STARTED`
|
||||
- 流式消息和工具事件
|
||||
- 终态 `RUN_FINISHED` 或 `RUN_ERROR`
|
||||
- 优先引入 `ag-ui-crewai` 做 CrewAI 到 AG-UI 的桥接,避免重复造轮子。
|
||||
|
||||
### 要求 6: LiteLLM token/cost 统计
|
||||
|
||||
- 所有 LLM 调用通过 LiteLLM 统一出入口。
|
||||
- 按调用粒度记录:`input_tokens`、`output_tokens`、`total_tokens`、`cost`、`currency`。
|
||||
- 按 run 粒度聚合并落库,支持后续计费和审计。
|
||||
|
||||
## 数据与可观测性
|
||||
|
||||
- 保留现有 Agent 相关表结构,不在本次硬切做数据库破坏性变更。
|
||||
- 新增事件日志与调用指标落点(如已有字段不足,后续增量迁移)。
|
||||
- 日志使用结构化字段:`run_id`、`task_id`、`stage`、`tool_name`、`llm_model`、`latency_ms`。
|
||||
- 持久化原则:run/resume 的关键状态变更必须可重放,禁止仅保存在内存。
|
||||
|
||||
## 事务边界
|
||||
|
||||
- `run` 入口事务:创建或加载 `session` + 写入用户消息。
|
||||
- `worker` 执行事务(可分阶段短事务):
|
||||
- 阶段开始:更新 `session.status/state_snapshot`。
|
||||
- LLM 返回:写 assistant/tool 消息 + 更新 token/cost 聚合。
|
||||
- 中断:写 `pending_tool_calls` 到 `state_snapshot` 并提交。
|
||||
- 完成:更新终态 `session.status` 并提交。
|
||||
- `resume` 事务:校验 `interrupt_id` 与 ownership,CAS 更新 `state_snapshot`,然后进入后续执行事务。
|
||||
|
||||
## 错误处理与安全
|
||||
|
||||
- API Key 缺失启动即失败,不进入运行态。
|
||||
- 外部工具入参统一白名单和 schema 校验。
|
||||
- resume 决策必须鉴权与会话所有权校验。
|
||||
- 错误响应遵循 RFC 7807,避免泄漏敏感上下文。
|
||||
|
||||
## 工具调用与恢复语义
|
||||
|
||||
- 工具分三类:
|
||||
- 前端工具:由 `RunAgentInput.tools` 提供能力声明,触发 interrupt,由客户端执行并回传 result。
|
||||
- 后端工具(需审批):先 interrupt 给前端审批;审批通过后由后端执行,不由前端执行。
|
||||
- 后端工具(直执):后端直接执行。
|
||||
- 一致性约束:
|
||||
- 每个 tool_result 必须携带 `tool_call_id`。
|
||||
- 后端仅接受当前 `state_snapshot.pending_tool_calls` 中存在且状态合法的 `tool_call_id`。
|
||||
- 若收到未知/已消费/过期 `tool_call_id`,立即产出 `RUN_ERROR` 并记录审计日志。
|
||||
|
||||
## 测试策略
|
||||
|
||||
- 单元测试:
|
||||
- 配置解析与 key 解析
|
||||
- run/resume 状态机与幂等
|
||||
- LiteLLM usage 聚合
|
||||
- 集成测试:
|
||||
- API 入队
|
||||
- Worker 消费
|
||||
- SSE 事件顺序与终态
|
||||
- E2E:
|
||||
- run 成功链路
|
||||
- interrupt + resume 链路
|
||||
- tool 调用链路
|
||||
|
||||
## 迁移策略
|
||||
|
||||
- 阶段 0(本次):硬切删除旧代码、旧测试、旧文档契约。
|
||||
- 阶段 1:搭建新架构骨架和最小可运行 run 流程。
|
||||
- 阶段 2:接入 CrewAI + ag-ui-crewai + LiteLLM 完整链路。
|
||||
- 阶段 3:补齐可观测性、压测与稳定性治理。
|
||||
|
||||
## 验收标准
|
||||
|
||||
- 后端仓库不存在旧 `v1/agent` 和 `core/agent` 旧实现。
|
||||
- 所有 Agent 相关旧测试与旧文档契约已移除。
|
||||
- 新方案设计文档明确覆盖六项要求并可进入实现阶段。
|
||||
@@ -1,574 +0,0 @@
|
||||
# Agent 后端重建 Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** 在后端重建 Agent 运行时,满足队列异步、CrewAI 配置打通、AG-UI 工具中断恢复、LiteLLM 计量、以及 `sessions.state_snapshot` 持久化要求。
|
||||
|
||||
**Architecture:** `v1/agent` 仅做 API/鉴权/参数校验与 SSE 输出,`core/agent` 负责编排与执行。Agent 创建配置由 `system_agents`(数据库)+ `core/config/static/crewai/*.yaml`(静态模板)合并生成。run/resume 全链路通过 Celery Worker 执行,状态写入 `sessions.state_snapshot`。
|
||||
|
||||
**Tech Stack:** FastAPI, Celery, Redis, CrewAI, ag-ui-crewai, LiteLLM, SQLAlchemy, Alembic, pytest
|
||||
|
||||
---
|
||||
|
||||
### Task 1: 建立配置聚合器(system_agents + static/crewai)
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
|
||||
- Modify: `backend/src/core/config/static/crewai/agents.yaml`
|
||||
- Modify: `backend/src/core/config/static/crewai/tasks.yaml`
|
||||
- Create: `backend/src/core/config/static/crewai/workflow.yaml`
|
||||
- Create: `backend/src/core/config/static/crewai/tools.yaml`
|
||||
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_resolver_merges_system_agents_and_static_templates():
|
||||
resolved = resolve_agent_runtime_config(...)
|
||||
assert resolved.intent.llm.model_code == "deepseek-v3.2"
|
||||
assert "intent" in resolved.workflow_stages
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
|
||||
Expected: FAIL with `NameError` or import not found
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def resolve_agent_runtime_config(system_agents: list[dict], static_cfg: dict) -> RuntimeConfig:
|
||||
by_type = {item["agent_type"]: item for item in system_agents}
|
||||
return RuntimeConfig.from_sources(by_type=by_type, static_cfg=static_cfg)
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/config/resolver.py backend/src/core/config/static/crewai backend/tests/unit/core/agent/test_config_resolver.py
|
||||
git commit -m "feat: add system_agents and static crewai config resolver"
|
||||
```
|
||||
|
||||
### Task 2: 统一 LLM Key 与模型配置入口
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/core/config/settings.py`
|
||||
- Modify: `.env.example`
|
||||
- Create: `backend/tests/unit/core/config/test_llm_settings.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_llm_keys_read_from_settings(monkeypatch):
|
||||
monkeypatch.setenv("SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK", "k1")
|
||||
s = Settings()
|
||||
assert s.llm.provider_keys.deepseek == "k1"
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
|
||||
Expected: FAIL with missing `llm` field
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
class LLMProviderKeys(BaseModel):
|
||||
deepseek: str | None = None
|
||||
|
||||
class LLMSettings(BaseModel):
|
||||
provider_keys: LLMProviderKeys = LLMProviderKeys()
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/config/settings.py .env.example backend/tests/unit/core/config/test_llm_settings.py
|
||||
git commit -m "feat: centralize llm provider keys in settings"
|
||||
```
|
||||
|
||||
### Task 3: sessions 表状态快照契约落地
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py`
|
||||
- Modify: `backend/src/models/agent_chat_session.py`
|
||||
- Create: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_sessions_has_state_snapshot_column(db_inspector):
|
||||
columns = db_inspector.get_columns("sessions")
|
||||
assert "state_snapshot" in [c["name"] for c in columns]
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
|
||||
Expected: FAIL when migration not applied
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def upgrade() -> None:
|
||||
op.add_column("sessions", sa.Column("state_snapshot", postgresql.JSONB, nullable=True))
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py backend/src/models/agent_chat_session.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
|
||||
git commit -m "feat(db): enforce sessions state_snapshot contract"
|
||||
```
|
||||
|
||||
### Task 3.1: 会话与消息持久化仓储
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
|
||||
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_run_persists_user_and_assistant_messages(db_session):
|
||||
run = execute_run(...)
|
||||
rows = list_messages(session_id=run.session_id)
|
||||
assert rows[0].role == "user"
|
||||
assert rows[1].role == "assistant"
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
async def append_message(...):
|
||||
session.add(AgentChatMessage(...))
|
||||
|
||||
async def update_session_aggregate(...):
|
||||
session_obj.message_count = message_count
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/persistence backend/tests/integration/core/agent/test_session_message_persistence.py
|
||||
git commit -m "feat: persist session lifecycle and messages for agent runs"
|
||||
```
|
||||
|
||||
### Task 4: 定义 state_snapshot 结构与并发语义
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/domain/state_snapshot.py`
|
||||
- Create: `backend/tests/unit/core/agent/test_state_snapshot.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_pending_tool_call_snapshot_contains_correlation_fields():
|
||||
snap = StateSnapshot.new(...)
|
||||
pending = snap.pending_tool_calls[0]
|
||||
assert pending.tool_call_id
|
||||
assert pending.status == "PENDING_APPROVAL"
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
class PendingToolCall(BaseModel):
|
||||
tool_call_id: str
|
||||
tool_name: str
|
||||
status: Literal["PENDING_APPROVAL", "APPROVED", "EXECUTED", "REJECTED", "EXPIRED"]
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/domain/state_snapshot.py backend/tests/unit/core/agent/test_state_snapshot.py
|
||||
git commit -m "feat: define sessions state_snapshot schema for run and tool state"
|
||||
```
|
||||
|
||||
### Task 5: 工具路由策略(前端/后端/审批)
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/domain/tool_policy.py`
|
||||
- Create: `backend/tests/unit/core/agent/test_tool_policy.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_frontend_tool_requires_interrupt_and_client_execution():
|
||||
decision = classify_tool_call(name="ui.navigate_to", source="request.tools")
|
||||
assert decision.mode == "FRONTEND_EXECUTE"
|
||||
|
||||
def test_backend_approval_tool_returns_interrupt_but_executes_on_backend_after_approve():
|
||||
decision = classify_tool_call(name="srv.transfer_funds", requires_approval=True)
|
||||
assert decision.mode == "BACKEND_APPROVAL_INTERRUPT"
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
if tool_name.startswith("ui."):
|
||||
return ToolDecision(mode="FRONTEND_EXECUTE")
|
||||
if requires_approval:
|
||||
return ToolDecision(mode="BACKEND_APPROVAL_INTERRUPT")
|
||||
return ToolDecision(mode="BACKEND_DIRECT_EXECUTE")
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/domain/tool_policy.py backend/tests/unit/core/agent/test_tool_policy.py
|
||||
git commit -m "feat: add frontend/backend tool policy and approval routing"
|
||||
```
|
||||
|
||||
### Task 6: tool_call 与 tool_result 对账机制
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/domain/tool_correlation.py`
|
||||
- Create: `backend/tests/unit/core/agent/test_tool_correlation.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_rejects_tool_result_when_tool_call_id_not_pending():
|
||||
store = PendingToolStore([])
|
||||
with pytest.raises(ToolCorrelationError):
|
||||
store.apply_result(tool_call_id="unknown", result={"ok": True})
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def apply_result(self, *, tool_call_id: str, result: dict) -> None:
|
||||
pending = self._pending.get(tool_call_id)
|
||||
if pending is None:
|
||||
raise ToolCorrelationError("tool_call_id not pending")
|
||||
pending.result = result
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/domain/tool_correlation.py backend/tests/unit/core/agent/test_tool_correlation.py
|
||||
git commit -m "feat: add tool call/result correlation guard"
|
||||
```
|
||||
|
||||
### Task 7: Celery run/resume 异步任务
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
|
||||
- Create: `backend/src/core/agent/application/run_service.py`
|
||||
- Create: `backend/src/core/agent/application/resume_service.py`
|
||||
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_run_api_enqueues_celery_task(client):
|
||||
resp = client.post("/api/v1/agent/runs", json={...})
|
||||
assert resp.status_code == 202
|
||||
|
||||
def test_resume_updates_session_status_and_snapshot(client):
|
||||
resp = client.post("/api/v1/agent/runs/r1/resume", json={...})
|
||||
assert resp.status_code == 202
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def enqueue_run(cmd: RunCommand) -> str:
|
||||
task = run_agent_task.apply_async(args=[cmd.model_dump()])
|
||||
return task.id
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/application backend/src/core/agent/infrastructure/queue backend/tests/integration/core/agent/test_queue_run_resume.py
|
||||
git commit -m "feat: add celery-based run and resume tasks"
|
||||
```
|
||||
|
||||
### Task 8: CrewAI 运行时加载与创建
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_runtime_creates_agents_tasks_from_resolved_config():
|
||||
runtime = CrewAIRuntime(...)
|
||||
crew = runtime.build_crew(message="hello")
|
||||
assert len(crew.agents) >= 1
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def build_crew(self, *, message: str) -> Crew:
|
||||
agents = self._factory.build_agents(self._config)
|
||||
tasks = self._factory.build_tasks(self._config, message=message)
|
||||
return Crew(agents=agents, tasks=tasks)
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/crewai backend/tests/unit/core/agent/test_crewai_runtime.py
|
||||
git commit -m "feat: create crewai runtime from resolved config"
|
||||
```
|
||||
|
||||
### Task 9: AG-UI 与 ag-ui-crewai 事件桥
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_agui_stream_emits_required_lifecycle():
|
||||
events = to_agui_events(internal_events=[...])
|
||||
assert events[0]["type"] == "RUN_STARTED"
|
||||
assert events[-1]["type"] in {"RUN_FINISHED", "RUN_ERROR"}
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def to_agui_events(internal_events: list[dict]) -> list[dict]:
|
||||
return [map_event(e) for e in internal_events]
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/agui backend/tests/unit/core/agent/test_agui_bridge.py
|
||||
git commit -m "feat: add ag-ui and ag-ui-crewai event bridge"
|
||||
```
|
||||
|
||||
### Task 10: LiteLLM 调用统计与会话聚合
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_tracker_aggregates_per_call_usage_and_cost():
|
||||
t = UsageTracker()
|
||||
t.add({"input_tokens": 10, "output_tokens": 5, "cost": "0.1"})
|
||||
assert t.snapshot()["total_tokens"] == 15
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
def add(self, usage: dict[str, object]) -> None:
|
||||
self.input_tokens += int(usage.get("input_tokens", 0))
|
||||
self.output_tokens += int(usage.get("output_tokens", 0))
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/litellm backend/tests/unit/core/agent/test_litellm_usage.py
|
||||
git commit -m "feat: add litellm usage and cost tracking"
|
||||
```
|
||||
|
||||
### Task 11: v1/agent 薄层 API + SSE 出口
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/v1/agent/router.py`
|
||||
- Create: `backend/src/v1/agent/schemas.py`
|
||||
- Create: `backend/src/v1/agent/dependencies.py`
|
||||
- Create: `backend/src/v1/agent/service.py`
|
||||
- Modify: `backend/src/v1/router.py`
|
||||
- Test: `backend/tests/integration/v1/agent/test_routes.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
```python
|
||||
def test_run_endpoint_returns_sse_and_not_blocking(client):
|
||||
resp = client.post("/api/v1/agent/runs", json={...})
|
||||
assert resp.status_code == 202
|
||||
```
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
```python
|
||||
@router.post("/runs", status_code=202)
|
||||
async def create_run(...):
|
||||
task_id = service.enqueue_run(input_data)
|
||||
return {"task_id": task_id}
|
||||
```
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/v1/agent backend/src/v1/router.py backend/tests/integration/v1/agent/test_routes.py
|
||||
git commit -m "feat: add thin v1 agent api and sse endpoints"
|
||||
```
|
||||
|
||||
### Task 12: 端到端验证与文档回填
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/runtime/runtime-route.md`
|
||||
- Modify: `docs/runtime/runtime-runbook.md`
|
||||
|
||||
**Step 1: Run unit tests**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent backend/tests/unit/core/config backend/tests/unit/database -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 2: Run integration tests**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
|
||||
Expected: PASS
|
||||
|
||||
**Step 3: Run lint and typecheck**
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run ruff check backend/src backend/tests`
|
||||
Expected: PASS
|
||||
|
||||
Run: `PYTHONPATH=backend/src uv run basedpyright backend/src`
|
||||
Expected: PASS
|
||||
|
||||
**Step 4: Document protocol contracts**
|
||||
|
||||
在运行手册中补充以下固定规则:
|
||||
- `system_agents` + `static/crewai` 配置合并优先级。
|
||||
- `sessions.state_snapshot` 字段结构与版本号。
|
||||
- `messages` 入库顺序与 `sessions` 聚合字段更新规则。
|
||||
- 工具调用审批与恢复时序图。
|
||||
- tool_call/result 不匹配时的错误语义(`RUN_ERROR` + 可审计日志)。
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md
|
||||
git commit -m "docs: add new agent runtime contracts and operational guide"
|
||||
```
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Agent 创建配置由 `system_agents` 与 `core/config/static/crewai` 合并生成。
|
||||
- [ ] run/resume 仅通过 Celery Worker 执行,Web 不执行编排。
|
||||
- [ ] `v1/agent` 无业务编排代码。
|
||||
- [ ] `sessions.state_snapshot` 承担运行态和工具审批恢复状态。
|
||||
- [ ] 每次 run/resume 的会话状态变更均落库到 `sessions`。
|
||||
- [ ] 用户/助手/工具消息按 `messages` 约束落库,`seq` 单调递增。
|
||||
- [ ] 前端工具与后端工具(审批/非审批)策略完整可测。
|
||||
- [ ] tool_call 与 tool_result 具备强关联校验并可恢复/报错。
|
||||
- [ ] LiteLLM 逐次计量与 run 聚合可落库。
|
||||
@@ -1,199 +0,0 @@
|
||||
# Agent Architecture Simplification Design
|
||||
|
||||
**Date:** 2026-03-04
|
||||
**Status:** Approved
|
||||
**Author:** AI Assistant
|
||||
|
||||
## Overview
|
||||
|
||||
Simplify the agent configuration architecture by removing the redundant `user_agents` table and renaming `user_agent_catalog` to `system_agents`.
|
||||
|
||||
## Problem Statement
|
||||
|
||||
Current architecture has redundant data:
|
||||
- `user_agent_catalog`: System-level agent configurations (3 agent types for all users)
|
||||
- `user_agents`: Per-user agent instances (copies catalog data for each user)
|
||||
|
||||
Since every user has the same 3 agents with identical configurations (from catalog), maintaining `user_agents` table creates unnecessary complexity and data duplication.
|
||||
|
||||
## Goals
|
||||
|
||||
1. Remove `user_agents` table and related code
|
||||
2. Rename `user_agent_catalog` to `system_agents` for clarity
|
||||
3. Preserve ability for future user-level prompt customization via `profiles.settings`
|
||||
4. Maintain backward compatibility in deployment process
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- User-level agent configuration (LLM selection, temperature, etc.)
|
||||
- User-level prompt customization implementation (deferred to future iteration)
|
||||
|
||||
## Architecture Changes
|
||||
|
||||
### Current Architecture
|
||||
|
||||
```
|
||||
user_agent_catalog (system config)
|
||||
↓ (trigger copies for each new user)
|
||||
user_agents (per-user instances)
|
||||
```
|
||||
|
||||
### New Architecture
|
||||
|
||||
```
|
||||
system_agents (shared by all users)
|
||||
profiles.settings.agent_prompts (future: user-level prompts)
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
1. System startup: Load `system_agents` from YAML
|
||||
2. User creation: No longer creates `user_agents` records
|
||||
3. Runtime (future): Read from `system_agents` + merge with `profiles.settings.agent_prompts`
|
||||
|
||||
## Database Migration
|
||||
|
||||
### Changes
|
||||
|
||||
1. **Delete `memories.agent_id` column**
|
||||
- Remove foreign key `fk_memories_agent_id`
|
||||
- Remove check constraint `chk_memory_type_agent_id`
|
||||
- Remove index `ix_memories_agent_type_status`
|
||||
- Drop column `agent_id`
|
||||
|
||||
2. **Delete `user_agents` table**
|
||||
- Remove all RLS policies
|
||||
- Remove indexes: `ix_user_agents_agent_type`, `ix_user_agents_status`
|
||||
- Remove foreign keys: `fk_user_agents_user_id`, `fk_user_agents_llm_id`, etc.
|
||||
- Remove check constraint `chk_agent_type`
|
||||
- Remove unique constraint `uq_user_agents_user_id_agent_type`
|
||||
- Drop table
|
||||
|
||||
3. **Rename `user_agent_catalog` → `system_agents`**
|
||||
- Remove old RLS policies
|
||||
- Rename table
|
||||
- Rename constraints: `fk_user_agent_catalog_llm_id` → `fk_system_agents_llm_id`
|
||||
- Rename check constraint: `chk_user_agent_catalog_status` → `chk_system_agents_status`
|
||||
- Re-create RLS policies with new table name
|
||||
|
||||
4. **Update trigger `create_profile_for_new_user()`**
|
||||
- Remove logic that inserts into `user_agents`
|
||||
- Initialize `profiles.settings.agent_prompts` with empty object
|
||||
|
||||
5. **Update existing `profiles.settings`**
|
||||
- Add `agent_prompts: {}` to all existing profiles
|
||||
|
||||
### Downgrade Path
|
||||
|
||||
- Re-create `user_agents` table with all constraints and indexes
|
||||
- Restore `memories.agent_id` column and constraints
|
||||
- Rename `system_agents` → `user_agent_catalog`
|
||||
- Restore original trigger
|
||||
|
||||
## Code Changes
|
||||
|
||||
### Model Layer
|
||||
|
||||
**Delete:**
|
||||
- `backend/src/models/user_agents.py`
|
||||
|
||||
**Rename:**
|
||||
- `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py`
|
||||
- Class `UserAgentCatalog` → `SystemAgents`
|
||||
|
||||
**Update:**
|
||||
- `backend/src/models/__init__.py` - Update imports and exports
|
||||
|
||||
### Configuration Layer
|
||||
|
||||
**Rename:**
|
||||
- `backend/src/core/config/static/database/user_agent_catalog.yaml`
|
||||
→ `backend/src/core/config/static/database/system_agents.yaml`
|
||||
|
||||
**Update:**
|
||||
- `backend/src/core/config/initial/init_data.py`
|
||||
- `UserAgentCatalogSeed` → `SystemAgentsSeed`
|
||||
- `UserAgentCatalogYaml` → `SystemAgentsYaml`
|
||||
- Import from `models.system_agents`
|
||||
- Path: `system_agents.yaml`
|
||||
- Function: `initialize_user_agent_catalog()` → `initialize_system_agents()`
|
||||
|
||||
### Future: Profile Settings Structure (Deferred)
|
||||
|
||||
```json
|
||||
{
|
||||
"agent_prompts": {
|
||||
"INTENT_RECOGNITION": "custom prompt...",
|
||||
"TASK_EXECUTION": "custom prompt...",
|
||||
"RESULT_REPORTING": "custom prompt..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Migration Tests
|
||||
|
||||
- Verify `user_agents` table is deleted
|
||||
- Verify `system_agents` table exists with correct structure
|
||||
- Verify trigger no longer creates `user_agents` records
|
||||
- Verify `profiles.settings.agent_prompts` is initialized
|
||||
- Verify downgrade path works correctly
|
||||
|
||||
### Model Tests
|
||||
|
||||
- Verify `SystemAgents` model CRUD operations
|
||||
- Verify `Profile.settings` JSONB storage
|
||||
|
||||
### Integration Tests
|
||||
|
||||
- Verify `initialize_system_agents()` loads from YAML
|
||||
- Verify data is correctly inserted into `system_agents` table
|
||||
|
||||
## Deployment Considerations
|
||||
|
||||
### Pre-deployment
|
||||
|
||||
- Backup database (especially `user_agents` if any data exists)
|
||||
- Confirm production `user_agents` table has no critical data
|
||||
|
||||
### Deployment
|
||||
|
||||
1. Run migration: `alembic upgrade head`
|
||||
2. Verify migration success
|
||||
3. Restart application services
|
||||
4. Verify new user registration works without `user_agents`
|
||||
|
||||
### Post-deployment
|
||||
|
||||
- Monitor application logs for any references to deleted `user_agents`
|
||||
- Verify agent-related functionality still works
|
||||
|
||||
## Risks and Mitigations
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|-----------|
|
||||
| Existing `user_agents` data loss | Backup before migration; data is redundant anyway |
|
||||
| Code still references `user_agents` | Comprehensive code search and testing |
|
||||
| Trigger fails on new user creation | Test migration thoroughly; include rollback plan |
|
||||
| Future need for user-level config | Can add `agent_overrides` to `profiles.settings` |
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] All tests pass
|
||||
- [ ] Migration runs successfully (upgrade and downgrade)
|
||||
- [ ] New user registration creates profile without `user_agents` records
|
||||
- [ ] System agents are loaded from YAML correctly
|
||||
- [ ] No references to `user_agents` remain in codebase
|
||||
|
||||
## Timeline
|
||||
|
||||
- Design: 2026-03-04 (Completed)
|
||||
- Implementation: TBD
|
||||
- Testing: TBD
|
||||
- Deployment: TBD
|
||||
|
||||
## References
|
||||
|
||||
- Migration file: `backend/alembic/versions/YYYYMMDD_simplify_agent_architecture.py`
|
||||
- Original catalog migration: `backend/alembic/versions/50ae013ce530_add_user_agent_catalog.py`
|
||||
@@ -1,844 +0,0 @@
|
||||
# Agent Architecture Simplification Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Simplify agent configuration by removing redundant user_agents table and renaming user_agent_catalog to system_agents
|
||||
|
||||
**Architecture:** Delete user_agents table (including memories.agent_id dependency), rename user_agent_catalog to system_agents, update all references in code
|
||||
|
||||
**Tech Stack:** Python 3.11+, SQLAlchemy, Alembic, PostgreSQL
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [ ] Current branch: dev
|
||||
- [ ] No uncommitted changes
|
||||
- [ ] Docker services running (Supabase local)
|
||||
|
||||
## Task 1: Create Database Migration
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/alembic/versions/20260304_simplify_agent_architecture.py`
|
||||
|
||||
**Step 1: Create migration file**
|
||||
|
||||
Run: `cd backend && uv run alembic revision -m "simplify_agent_architecture"`
|
||||
|
||||
Expected: New migration file created with revision ID
|
||||
|
||||
**Step 2: Write migration upgrade logic**
|
||||
|
||||
Edit the generated migration file with this complete upgrade function:
|
||||
|
||||
```python
|
||||
def upgrade() -> None:
|
||||
# 1. Delete memories.agent_id dependencies
|
||||
op.drop_constraint("fk_memories_agent_id", "memories", type_="foreignkey")
|
||||
op.drop_constraint("chk_memory_type_agent_id", "memories", type_="check")
|
||||
op.execute("DROP INDEX IF EXISTS ix_memories_agent_type_status")
|
||||
op.drop_column("memories", "agent_id")
|
||||
|
||||
# 2. Delete user_agents table
|
||||
_drop_rls("user_agents")
|
||||
|
||||
op.drop_constraint("fk_user_agents_updated_by", "user_agents", type_="foreignkey")
|
||||
op.drop_constraint("fk_user_agents_created_by", "user_agents", type_="foreignkey")
|
||||
op.drop_constraint("fk_user_agents_llm_id", "user_agents", type_="foreignkey")
|
||||
op.drop_constraint("fk_user_agents_user_id", "user_agents", type_="foreignkey")
|
||||
op.drop_constraint("chk_agent_type", "user_agents", type_="check")
|
||||
op.drop_constraint("uq_user_agents_user_id_agent_type", "user_agents", type_="unique")
|
||||
|
||||
op.execute("DROP INDEX IF EXISTS ix_user_agents_status")
|
||||
op.execute("DROP INDEX IF EXISTS ix_user_agents_agent_type")
|
||||
|
||||
op.drop_table("user_agents")
|
||||
|
||||
# 3. Rename user_agent_catalog to system_agents
|
||||
_drop_rls("user_agent_catalog")
|
||||
|
||||
op.rename_table("user_agent_catalog", "system_agents")
|
||||
|
||||
op.execute(
|
||||
"ALTER TABLE system_agents RENAME CONSTRAINT fk_user_agent_catalog_llm_id "
|
||||
"TO fk_system_agents_llm_id"
|
||||
)
|
||||
op.execute(
|
||||
"ALTER TABLE system_agents RENAME CONSTRAINT chk_user_agent_catalog_status "
|
||||
"TO chk_system_agents_status"
|
||||
)
|
||||
|
||||
_enable_rls("system_agents")
|
||||
|
||||
# 4. Update trigger
|
||||
op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
|
||||
op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
|
||||
|
||||
op.execute("""
|
||||
CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
SECURITY DEFINER
|
||||
SET search_path = public
|
||||
AS $$
|
||||
BEGIN
|
||||
INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
|
||||
VALUES (
|
||||
NEW.id,
|
||||
COALESCE(
|
||||
NEW.raw_user_meta_data ->> 'username',
|
||||
split_part(NEW.email, '@', 1),
|
||||
'user_' || substring(NEW.id::text, 1, 8)
|
||||
),
|
||||
NULL,
|
||||
NULL,
|
||||
'{"agent_prompts": {}}'::jsonb,
|
||||
now(),
|
||||
now()
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$
|
||||
""")
|
||||
|
||||
op.execute("""
|
||||
CREATE TRIGGER on_auth_user_created
|
||||
AFTER INSERT ON auth.users
|
||||
FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
|
||||
""")
|
||||
|
||||
# 5. Update existing profiles.settings
|
||||
op.execute("""
|
||||
UPDATE profiles
|
||||
SET settings = jsonb_set(
|
||||
COALESCE(settings, '{}'::jsonb),
|
||||
'{agent_prompts}',
|
||||
'{}'::jsonb
|
||||
)
|
||||
WHERE NOT settings ? 'agent_prompts'
|
||||
""")
|
||||
```
|
||||
|
||||
**Step 3: Write migration downgrade logic**
|
||||
|
||||
Add this complete downgrade function:
|
||||
|
||||
```python
|
||||
def downgrade() -> None:
|
||||
# 1. Revert trigger
|
||||
op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users")
|
||||
op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()")
|
||||
|
||||
op.execute("""
|
||||
CREATE OR REPLACE FUNCTION public.create_profile_for_new_user()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
SECURITY DEFINER
|
||||
SET search_path = public
|
||||
AS $$
|
||||
BEGIN
|
||||
INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at)
|
||||
VALUES (
|
||||
NEW.id,
|
||||
COALESCE(
|
||||
NEW.raw_user_meta_data ->> 'username',
|
||||
split_part(NEW.email, '@', 1),
|
||||
'user_' || substring(NEW.id::text, 1, 8)
|
||||
),
|
||||
NULL,
|
||||
NULL,
|
||||
'{}'::jsonb,
|
||||
now(),
|
||||
now()
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
|
||||
INSERT INTO public.user_agents (id, user_id, llm_id, agent_type, config, status, created_by, updated_by)
|
||||
SELECT
|
||||
gen_random_uuid(),
|
||||
NEW.id,
|
||||
uac.llm_id,
|
||||
uac.agent_type,
|
||||
uac.config,
|
||||
uac.status,
|
||||
NEW.id,
|
||||
NEW.id
|
||||
FROM public.user_agent_catalog uac;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$
|
||||
""")
|
||||
|
||||
op.execute("""
|
||||
CREATE TRIGGER on_auth_user_created
|
||||
AFTER INSERT ON auth.users
|
||||
FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user()
|
||||
""")
|
||||
|
||||
# 2. Revert rename: system_agents -> user_agent_catalog
|
||||
_drop_rls("system_agents")
|
||||
|
||||
op.rename_table("system_agents", "user_agent_catalog")
|
||||
|
||||
op.execute(
|
||||
"ALTER TABLE user_agent_catalog RENAME CONSTRAINT fk_system_agents_llm_id "
|
||||
"TO fk_user_agent_catalog_llm_id"
|
||||
)
|
||||
op.execute(
|
||||
"ALTER TABLE user_agent_catalog RENAME CONSTRAINT chk_system_agents_status "
|
||||
"TO chk_user_agent_catalog_status"
|
||||
)
|
||||
|
||||
_enable_rls("user_agent_catalog")
|
||||
|
||||
# 3. Recreate user_agents table
|
||||
op.create_table(
|
||||
"user_agents",
|
||||
sa.Column("id", sa.UUID(), nullable=False),
|
||||
sa.Column("user_id", sa.UUID(), nullable=False),
|
||||
sa.Column("llm_id", sa.UUID(), nullable=False),
|
||||
sa.Column("agent_type", sa.String(length=20), nullable=False),
|
||||
sa.Column(
|
||||
"config",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
server_default="{}",
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("status", sa.String(length=20), nullable=False),
|
||||
sa.Column("created_by", sa.UUID(), nullable=True),
|
||||
sa.Column("updated_by", sa.UUID(), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
||||
op.create_unique_constraint(
|
||||
"uq_user_agents_user_id_agent_type",
|
||||
"user_agents",
|
||||
["user_id", "agent_type"]
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"CREATE INDEX ix_user_agents_agent_type ON user_agents (agent_type)"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX ix_user_agents_status ON user_agents (status)"
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"ALTER TABLE user_agents ADD CONSTRAINT chk_agent_type "
|
||||
"CHECK (agent_type IN ('INTENT_RECOGNITION', 'TASK_EXECUTION', 'RESULT_REPORTING'))"
|
||||
)
|
||||
|
||||
op.create_foreign_key(
|
||||
"fk_user_agents_user_id",
|
||||
"user_agents",
|
||||
"users",
|
||||
["user_id"],
|
||||
["id"],
|
||||
referent_schema="auth",
|
||||
ondelete="CASCADE",
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_user_agents_llm_id",
|
||||
"user_agents",
|
||||
"llms",
|
||||
["llm_id"],
|
||||
["id"],
|
||||
ondelete="RESTRICT",
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_user_agents_created_by",
|
||||
"user_agents",
|
||||
"users",
|
||||
["created_by"],
|
||||
["id"],
|
||||
referent_schema="auth",
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"fk_user_agents_updated_by",
|
||||
"user_agents",
|
||||
"users",
|
||||
["updated_by"],
|
||||
["id"],
|
||||
referent_schema="auth",
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
_enable_rls("user_agents")
|
||||
|
||||
# 4. Recreate memories.agent_id
|
||||
op.add_column(
|
||||
"memories",
|
||||
sa.Column("agent_id", sa.UUID(), nullable=True)
|
||||
)
|
||||
|
||||
op.create_foreign_key(
|
||||
"fk_memories_agent_id",
|
||||
"memories",
|
||||
"user_agents",
|
||||
["agent_id"],
|
||||
["id"],
|
||||
ondelete="CASCADE",
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"CREATE INDEX ix_memories_agent_type_status ON memories (agent_id, memory_type, status)"
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"ALTER TABLE memories ADD CONSTRAINT chk_memory_type_agent_id "
|
||||
"CHECK ((memory_type = 'work' AND agent_id IS NOT NULL) OR "
|
||||
"(memory_type = 'user' AND agent_id IS NULL))"
|
||||
)
|
||||
```
|
||||
|
||||
**Step 4: Add helper functions**
|
||||
|
||||
Add these helper functions at the end of the migration file:
|
||||
|
||||
```python
|
||||
def _enable_rls(table_name: str) -> None:
|
||||
for role in ["anon", "authenticated"]:
|
||||
for action in ["select", "insert", "update", "delete"]:
|
||||
op.execute(
|
||||
f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
|
||||
)
|
||||
op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY")
|
||||
for role in ["anon", "authenticated"]:
|
||||
op.execute(
|
||||
f"CREATE POLICY {role}_select_{table_name} ON {table_name} "
|
||||
f"FOR SELECT TO {role} USING (false)"
|
||||
)
|
||||
op.execute(
|
||||
f"CREATE POLICY {role}_insert_{table_name} ON {table_name} "
|
||||
f"FOR INSERT TO {role} WITH CHECK (false)"
|
||||
)
|
||||
op.execute(
|
||||
f"CREATE POLICY {role}_update_{table_name} ON {table_name} "
|
||||
f"FOR UPDATE TO {role} USING (false) WITH CHECK (false)"
|
||||
)
|
||||
op.execute(
|
||||
f"CREATE POLICY {role}_delete_{table_name} ON {table_name} "
|
||||
f"FOR DELETE TO {role} USING (false)"
|
||||
)
|
||||
|
||||
|
||||
def _drop_rls(table_name: str) -> None:
|
||||
for role in ["anon", "authenticated"]:
|
||||
op.execute(f"DROP POLICY IF EXISTS {role}_delete_{table_name} ON {table_name}")
|
||||
op.execute(f"DROP POLICY IF EXISTS {role}_update_{table_name} ON {table_name}")
|
||||
op.execute(f"DROP POLICY IF EXISTS {role}_insert_{table_name} ON {table_name}")
|
||||
op.execute(f"DROP POLICY IF EXISTS {role}_select_{table_name} ON {table_name}")
|
||||
op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY")
|
||||
```
|
||||
|
||||
**Step 5: Verify migration file**
|
||||
|
||||
Check that all imports are correct:
|
||||
|
||||
```python
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
```
|
||||
|
||||
**Step 6: Commit migration**
|
||||
|
||||
```bash
|
||||
git add backend/alembic/versions/20260304_simplify_agent_architecture.py
|
||||
git commit -m "feat(db): add migration to simplify agent architecture"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Delete UserAgents Model
|
||||
|
||||
**Files:**
|
||||
- Delete: `backend/src/models/user_agents.py`
|
||||
- Modify: `backend/src/models/__init__.py`
|
||||
|
||||
**Step 1: Remove import from models/__init__.py**
|
||||
|
||||
Edit `backend/src/models/__init__.py`:
|
||||
|
||||
Remove these lines:
|
||||
```python
|
||||
from models.user_agents import UserAgent
|
||||
```
|
||||
|
||||
And remove `"UserAgent"` from `__all__` list.
|
||||
|
||||
**Step 2: Delete user_agents.py file**
|
||||
|
||||
```bash
|
||||
rm backend/src/models/user_agents.py
|
||||
```
|
||||
|
||||
**Step 3: Verify no other imports**
|
||||
|
||||
Run: `cd backend && grep -r "from models.user_agents" src/`
|
||||
|
||||
Expected: No results (or only in __init__.py which we already fixed)
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/models/user_agents.py backend/src/models/__init__.py
|
||||
git commit -m "refactor(models): remove UserAgents model"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Rename UserAgentCatalog to SystemAgents
|
||||
|
||||
**Files:**
|
||||
- Rename: `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py`
|
||||
- Modify: `backend/src/models/__init__.py`
|
||||
|
||||
**Step 1: Rename model file**
|
||||
|
||||
```bash
|
||||
mv backend/src/models/user_agent_catalog.py backend/src/models/system_agents.py
|
||||
```
|
||||
|
||||
**Step 2: Update class name in system_agents.py**
|
||||
|
||||
Edit `backend/src/models/system_agents.py`:
|
||||
|
||||
Change:
|
||||
```python
|
||||
class UserAgentCatalog(TimestampMixin, Base):
|
||||
__tablename__: str = "user_agent_catalog"
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
class SystemAgents(TimestampMixin, Base):
|
||||
__tablename__: str = "system_agents"
|
||||
```
|
||||
|
||||
**Step 3: Update imports in models/__init__.py**
|
||||
|
||||
Edit `backend/src/models/__init__.py`:
|
||||
|
||||
Change:
|
||||
```python
|
||||
from models.user_agent_catalog import UserAgentCatalog
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
from models.system_agents import SystemAgents
|
||||
```
|
||||
|
||||
And change `"UserAgentCatalog"` to `"SystemAgents"` in `__all__` list.
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/models/
|
||||
git commit -m "refactor(models): rename UserAgentCatalog to SystemAgents"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Update Configuration Files
|
||||
|
||||
**Files:**
|
||||
- Rename: `backend/src/core/config/static/database/user_agent_catalog.yaml`
|
||||
→ `backend/src/core/config/static/database/system_agents.yaml`
|
||||
- Modify: `backend/src/core/config/initial/init_data.py`
|
||||
|
||||
**Step 1: Rename YAML file**
|
||||
|
||||
```bash
|
||||
mv backend/src/core/config/static/database/user_agent_catalog.yaml \
|
||||
backend/src/core/config/static/database/system_agents.yaml
|
||||
```
|
||||
|
||||
**Step 2: Update init_data.py imports**
|
||||
|
||||
Edit `backend/src/core/config/initial/init_data.py`:
|
||||
|
||||
Change:
|
||||
```python
|
||||
from models.user_agent_catalog import UserAgentCatalog
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
from models.system_agents import SystemAgents
|
||||
```
|
||||
|
||||
**Step 3: Update Pydantic models**
|
||||
|
||||
Change:
|
||||
```python
|
||||
class UserAgentCatalogSeed(BaseModel):
|
||||
agent_type: str
|
||||
llm_model_code: str
|
||||
status: str
|
||||
config: dict[str, Any]
|
||||
|
||||
|
||||
class UserAgentCatalogYaml(BaseModel):
|
||||
agents: list[UserAgentCatalogSeed]
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
class SystemAgentsSeed(BaseModel):
|
||||
agent_type: str
|
||||
llm_model_code: str
|
||||
status: str
|
||||
config: dict[str, Any]
|
||||
|
||||
|
||||
class SystemAgentsYaml(BaseModel):
|
||||
agents: list[SystemAgentsSeed]
|
||||
```
|
||||
|
||||
**Step 4: Update path function**
|
||||
|
||||
Change:
|
||||
```python
|
||||
def _default_user_agent_catalog_path() -> Path:
|
||||
return (
|
||||
Path(__file__).resolve().parents[1]
|
||||
/ "static"
|
||||
/ "database"
|
||||
/ "user_agent_catalog.yaml"
|
||||
)
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
def _default_system_agents_path() -> Path:
|
||||
return (
|
||||
Path(__file__).resolve().parents[1]
|
||||
/ "static"
|
||||
/ "database"
|
||||
/ "system_agents.yaml"
|
||||
)
|
||||
```
|
||||
|
||||
**Step 5: Update load function**
|
||||
|
||||
Change:
|
||||
```python
|
||||
def load_user_agent_catalog(catalog_path: Path | None = None) -> dict[str, Any]:
|
||||
path = catalog_path or _default_user_agent_catalog_path()
|
||||
with path.open("r", encoding="utf-8") as file:
|
||||
loaded = yaml.safe_load(file) or {}
|
||||
if not isinstance(loaded, dict):
|
||||
raise ValueError(f"Invalid user agent catalog format: {path}")
|
||||
raw_agents = loaded.get("agents", [])
|
||||
if not isinstance(raw_agents, list):
|
||||
raise ValueError(f"Invalid user agent catalog agents section: {path}")
|
||||
try:
|
||||
parsed = UserAgentCatalogYaml.model_validate({"agents": list(raw_agents)})
|
||||
except ValidationError as exc:
|
||||
raise ValueError(f"Invalid user agent catalog data: {path}") from exc
|
||||
|
||||
return parsed.model_dump()
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
def load_system_agents(catalog_path: Path | None = None) -> dict[str, Any]:
|
||||
path = catalog_path or _default_system_agents_path()
|
||||
with path.open("r", encoding="utf-8") as file:
|
||||
loaded = yaml.safe_load(file) or {}
|
||||
if not isinstance(loaded, dict):
|
||||
raise ValueError(f"Invalid system agents format: {path}")
|
||||
raw_agents = loaded.get("agents", [])
|
||||
if not isinstance(raw_agents, list):
|
||||
raise ValueError(f"Invalid system agents agents section: {path}")
|
||||
try:
|
||||
parsed = SystemAgentsYaml.model_validate({"agents": list(raw_agents)})
|
||||
except ValidationError as exc:
|
||||
raise ValueError(f"Invalid system agents data: {path}") from exc
|
||||
|
||||
return parsed.model_dump()
|
||||
```
|
||||
|
||||
**Step 6: Update upsert function**
|
||||
|
||||
Change:
|
||||
```python
|
||||
async def _upsert_user_agent_catalog(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
agent_type: str,
|
||||
llm_id: uuid.UUID,
|
||||
status: str,
|
||||
config: dict[str, Any],
|
||||
) -> None:
|
||||
result = await session.execute(
|
||||
select(UserAgentCatalog).where(UserAgentCatalog.agent_type == agent_type)
|
||||
)
|
||||
catalog_entry = result.scalar_one_or_none()
|
||||
|
||||
if catalog_entry is None:
|
||||
session.add(
|
||||
UserAgentCatalog(
|
||||
agent_type=agent_type,
|
||||
llm_id=llm_id,
|
||||
status=status,
|
||||
config=config,
|
||||
)
|
||||
)
|
||||
else:
|
||||
catalog_entry.llm_id = llm_id
|
||||
catalog_entry.status = status
|
||||
catalog_entry.config = config
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
async def _upsert_system_agents(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
agent_type: str,
|
||||
llm_id: uuid.UUID,
|
||||
status: str,
|
||||
config: dict[str, Any],
|
||||
) -> None:
|
||||
result = await session.execute(
|
||||
select(SystemAgents).where(SystemAgents.agent_type == agent_type)
|
||||
)
|
||||
catalog_entry = result.scalar_one_or_none()
|
||||
|
||||
if catalog_entry is None:
|
||||
session.add(
|
||||
SystemAgents(
|
||||
agent_type=agent_type,
|
||||
llm_id=llm_id,
|
||||
status=status,
|
||||
config=config,
|
||||
)
|
||||
)
|
||||
else:
|
||||
catalog_entry.llm_id = llm_id
|
||||
catalog_entry.status = status
|
||||
catalog_entry.config = config
|
||||
```
|
||||
|
||||
**Step 7: Update initialize function**
|
||||
|
||||
Change:
|
||||
```python
|
||||
async def initialize_user_agent_catalog() -> None:
|
||||
"""Initialize user agent catalog from YAML."""
|
||||
catalog = load_user_agent_catalog()
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
async with session.begin():
|
||||
for agent in catalog["agents"]:
|
||||
result = await session.execute(
|
||||
select(Llm).where(Llm.model_code == agent["llm_model_code"])
|
||||
)
|
||||
llm = result.scalar_one_or_none()
|
||||
if llm is None:
|
||||
raise RuntimeError(
|
||||
f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
|
||||
)
|
||||
|
||||
await _upsert_user_agent_catalog(
|
||||
session,
|
||||
agent_type=agent["agent_type"],
|
||||
llm_id=llm.id,
|
||||
status=agent["status"],
|
||||
config=agent["config"],
|
||||
)
|
||||
|
||||
logger.info("Initialized user agent catalog")
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
async def initialize_system_agents() -> None:
|
||||
"""Initialize system agents from YAML."""
|
||||
catalog = load_system_agents()
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
async with session.begin():
|
||||
for agent in catalog["agents"]:
|
||||
result = await session.execute(
|
||||
select(Llm).where(Llm.model_code == agent["llm_model_code"])
|
||||
)
|
||||
llm = result.scalar_one_or_none()
|
||||
if llm is None:
|
||||
raise RuntimeError(
|
||||
f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'"
|
||||
)
|
||||
|
||||
await _upsert_system_agents(
|
||||
session,
|
||||
agent_type=agent["agent_type"],
|
||||
llm_id=llm.id,
|
||||
status=agent["status"],
|
||||
config=agent["config"],
|
||||
)
|
||||
|
||||
logger.info("Initialized system agents")
|
||||
```
|
||||
|
||||
**Step 8: Update initialize_data function**
|
||||
|
||||
Change:
|
||||
```python
|
||||
async def initialize_data() -> bool:
|
||||
"""Initialize bootstrap data."""
|
||||
await initialize_llm_catalog()
|
||||
await initialize_user_agent_catalog()
|
||||
|
||||
return True
|
||||
```
|
||||
|
||||
To:
|
||||
```python
|
||||
async def initialize_data() -> bool:
|
||||
"""Initialize bootstrap data."""
|
||||
await initialize_llm_catalog()
|
||||
await initialize_system_agents()
|
||||
|
||||
return True
|
||||
```
|
||||
|
||||
**Step 9: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/config/
|
||||
git commit -m "refactor(config): rename user_agent_catalog to system_agents"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Run Migration
|
||||
|
||||
**Step 1: Run migration**
|
||||
|
||||
```bash
|
||||
cd backend && uv run alembic upgrade head
|
||||
```
|
||||
|
||||
Expected: Migration runs successfully
|
||||
|
||||
**Step 2: Verify tables**
|
||||
|
||||
Connect to database and check:
|
||||
- `user_agents` table should NOT exist
|
||||
- `system_agents` table should exist
|
||||
- `memories.agent_id` column should NOT exist
|
||||
|
||||
**Step 3: Test downgrade (optional but recommended)**
|
||||
|
||||
```bash
|
||||
cd backend && uv run alembic downgrade -1
|
||||
```
|
||||
|
||||
Expected: Previous migration restored
|
||||
|
||||
**Step 4: Re-run upgrade**
|
||||
|
||||
```bash
|
||||
cd backend && uv run alembic upgrade head
|
||||
```
|
||||
|
||||
Expected: Migration runs successfully again
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Run Tests and Linting
|
||||
|
||||
**Step 1: Run type checking**
|
||||
|
||||
```bash
|
||||
cd backend && uv run basedpyright src/
|
||||
```
|
||||
|
||||
Expected: No errors
|
||||
|
||||
**Step 2: Run linting**
|
||||
|
||||
```bash
|
||||
cd backend && uv run ruff check src/
|
||||
```
|
||||
|
||||
Expected: No errors
|
||||
|
||||
**Step 3: Run tests**
|
||||
|
||||
```bash
|
||||
cd backend && uv run pytest tests/
|
||||
```
|
||||
|
||||
Expected: All tests pass
|
||||
|
||||
**Step 4: Fix any failures**
|
||||
|
||||
If any tests fail due to UserAgent references, update them to use SystemAgents.
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Final Verification
|
||||
|
||||
**Step 1: Search for any remaining references**
|
||||
|
||||
```bash
|
||||
cd backend && grep -r "user_agents" src/ --include="*.py"
|
||||
cd backend && grep -r "UserAgent" src/ --include="*.py"
|
||||
```
|
||||
|
||||
Expected: No results (except in migration files)
|
||||
|
||||
**Step 2: Test new user registration**
|
||||
|
||||
Start the backend server and register a new user. Verify:
|
||||
- Profile is created
|
||||
- No user_agents records are created
|
||||
- profiles.settings contains `agent_prompts: {}`
|
||||
|
||||
**Step 3: Commit final changes**
|
||||
|
||||
```bash
|
||||
git add .
|
||||
git commit -m "feat: complete agent architecture simplification"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Migration runs successfully (upgrade and downgrade)
|
||||
- [ ] No UserAgent model references in code
|
||||
- [ ] SystemAgents model works correctly
|
||||
- [ ] All tests pass
|
||||
- [ ] Linting passes
|
||||
- [ ] Type checking passes
|
||||
- [ ] New user registration works without user_agents
|
||||
|
||||
## Notes
|
||||
|
||||
- Keep the design document updated if any changes are made during implementation
|
||||
- Test migration thoroughly before deploying to production
|
||||
- Backup database before running migration in production
|
||||
@@ -1,81 +0,0 @@
|
||||
# Agent Runtime Closed Loop E2E Design
|
||||
|
||||
## 背景
|
||||
|
||||
当前 `test_agent_sse_flow.py` 不能稳定证明真实闭环:
|
||||
- `session_id` 由随机 UUID 生成,导致 `POST /api/v1/agent/runs` 经常 404。
|
||||
- 测试脚本存在不可达重复代码,诊断信息不完整。
|
||||
- 未覆盖首聊自动建会话语义,和真实聊天入口不匹配。
|
||||
|
||||
目标是验证真实环境下业务闭环是否可用:
|
||||
1. 用户请求 `agent` 路由
|
||||
2. 请求进入异步任务
|
||||
3. runtime 读取 `system_agents` 和 `llm` 配置并构建执行流程
|
||||
4. 真实 LLM 请求发出并返回
|
||||
5. `sessions`/`messages` 正确落库
|
||||
6. 成本和 token 统计正确
|
||||
7. 事件按 AG-UI 规范发布并可由 `stream_events` 订阅
|
||||
|
||||
## 设计原则
|
||||
|
||||
- 真实优先:不使用 mock,不替换 queue/redis/db/llm。
|
||||
- 双轨验证:
|
||||
- 诊断脚本用于本地排障(快速观察全链路状态)。
|
||||
- pytest E2E 用例用于可重复回归。
|
||||
- 明确前置条件:必须先使用 `infra/scripts/app.sh start` 启动 tmux 服务。
|
||||
- 本地真实 LLM 基线:DashScope Qwen。
|
||||
|
||||
## API 契约调整
|
||||
|
||||
### `POST /api/v1/agent/runs`
|
||||
|
||||
- 现状:`session_id` 必填且必须存在。
|
||||
- 新契约:`session_id` 可选。
|
||||
- 有值:复用现有会话,校验 owner。
|
||||
- 无值:在服务层先创建会话,再入队 run。
|
||||
- 响应扩展:返回 `created` 标识是否为首聊自动建会话。
|
||||
|
||||
该契约与聊天产品行为一致:用户首条消息即可开始,不需要前置调用创建会话接口。
|
||||
|
||||
## 数据关系与删除语义
|
||||
|
||||
- `messages.session_id -> sessions.id` 为外键,且硬删除级联(`ondelete=CASCADE`)。
|
||||
- 软删除需要补齐级联:
|
||||
- 软删 `sessions` 时,同事务更新对应 `messages.deleted_at`。
|
||||
- E2E 增加验证,确保软删后默认查询不可见。
|
||||
|
||||
## 测试架构
|
||||
|
||||
### A. 诊断脚本(根目录)
|
||||
|
||||
重构 `test_agent_sse_flow.py`:
|
||||
- 增加环境健康检查(web/redis/db)。
|
||||
- 支持两种模式:
|
||||
- `--new-session`:不传 `session_id`,验证首聊自动创建。
|
||||
- `--reuse-session <id>`:验证复聊路径。
|
||||
- 输出结构化阶段日志:HTTP、task_id、SSE 事件、数据库断言、失败根因。
|
||||
|
||||
### B. pytest E2E(`backend/tests/e2e`)
|
||||
|
||||
新增 `test_agent_closed_loop_live.py`:
|
||||
- 标记为 `live`,默认不在 CI 执行。
|
||||
- 用真实 JWT、真实 HTTP 请求、真实 SSE 订阅。
|
||||
- 断言最小闭环标准:
|
||||
- run 返回 202
|
||||
- SSE 至少收到 `RUN_STARTED` 与终态(`RUN_FINISHED` 或 `RUN_ERROR`)
|
||||
- `sessions` 状态和计数更新
|
||||
- `messages` 有新增记录
|
||||
- token/cost 字段非负且会话聚合一致
|
||||
|
||||
## 验收标准
|
||||
|
||||
- `uv run python test_agent_sse_flow.py --new-session` 通过。
|
||||
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -v -m live` 通过。
|
||||
- 首聊场景不需要外部先建 `session_id`。
|
||||
- 软删除会话后,消息软删除行为与约束一致。
|
||||
|
||||
## 风险与回退
|
||||
|
||||
- 真实 LLM 网络抖动会造成不稳定:通过重试和超时策略降低误报。
|
||||
- 生产契约变更风险:保持字段向后兼容(原 `session_id` 仍可传)。
|
||||
- 如果新契约引入问题,可临时退回“必传 session_id”路径并保留测试脚本诊断能力。
|
||||
@@ -1,230 +0,0 @@
|
||||
# Agent Runtime Closed Loop E2E Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** 让 agent 闭环在真实本地环境中可验证:`runs` 支持首聊自动建会话,并通过真实异步任务、真实 LLM、真实落库与真实 SSE 证明端到端可用。
|
||||
|
||||
**Architecture:** 在 `v1/agent` 服务层引入“可选 session_id + 自动建会话”语义;保持已有 owner 鉴权路径。重构诊断脚本并新增 live E2E 用例,统一验证 run 入队、事件流、数据库状态、成本统计与删除语义。通过最小侵入改造现有 run/resume 流程,确保兼容已存在调用。
|
||||
|
||||
**Tech Stack:** FastAPI, SQLAlchemy async, Celery, Redis Stream, LiteLLM, PyJWT, pytest, httpx
|
||||
|
||||
---
|
||||
|
||||
### Task 1: 扩展 API 契约(session_id 可选)
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/v1/agent/schemas.py`
|
||||
- Modify: `backend/src/v1/agent/router.py`
|
||||
- Test: `backend/tests/integration/v1/agent/test_routes.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
在 `test_routes.py` 新增用例:请求体不传 `session_id` 仍返回 202,且响应含 `session_id`。
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -k "runs and session" -v`
|
||||
Expected: FAIL,提示 `session_id` 缺失导致 422 或 mock 接口签名不匹配。
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
- `RunRequest.session_id` 改为可选。
|
||||
- `enqueue_run` 调用 service 时传可选值。
|
||||
- `TaskAcceptedResponse` 增加 `created: bool` 字段。
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
|
||||
Expected: PASS。
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/v1/agent/schemas.py backend/src/v1/agent/router.py backend/tests/integration/v1/agent/test_routes.py
|
||||
git commit -m "feat: allow agent runs without pre-created session"
|
||||
```
|
||||
|
||||
### Task 2: 服务层支持自动建会话并保持鉴权
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/v1/agent/service.py`
|
||||
- Modify: `backend/src/v1/agent/repository.py`
|
||||
- Modify: `backend/src/v1/agent/dependencies.py`
|
||||
- Test: `backend/tests/unit/v1/agent/test_service.py` (new)
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
新增单测覆盖:
|
||||
- `session_id is None` 时调用 `create_session_for_user` 并返回 `created=True`
|
||||
- `session_id 有值` 时复用并校验 owner
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
|
||||
Expected: FAIL,当前 service 无自动建会话能力。
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
- repository 增加 `create_session_for_user(user_id)`。
|
||||
- service `enqueue_run` 处理两条路径:
|
||||
- 无 `session_id`:先创建 session。
|
||||
- 有 `session_id`:校验 owner。
|
||||
- 返回 `TaskAccepted(task_id, session_id, created)`。
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
|
||||
Expected: PASS。
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/v1/agent/service.py backend/src/v1/agent/repository.py backend/src/v1/agent/dependencies.py backend/tests/unit/v1/agent/test_service.py
|
||||
git commit -m "feat: auto-create chat session on first agent run"
|
||||
```
|
||||
|
||||
### Task 3: 对齐 runtime 闭环数据断言(messages/sessions/cost)
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/core/agent/application/run_service.py`
|
||||
- Modify: `backend/src/core/agent/application/resume_service.py`
|
||||
- Modify: `backend/src/core/agent/infrastructure/persistence/message_repository.py`
|
||||
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
|
||||
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
在集成测试增加断言:
|
||||
- `sessions.total_tokens`、`sessions.total_cost` 有更新
|
||||
- `messages` 的 token/cost 字段与 session 聚合一致
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
|
||||
Expected: FAIL,当前默认 token/cost 为 0,未做聚合更新。
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
- run/resume 流程接入 usage/cost 结果(来自 litellm 返回或 fallback 规则)。
|
||||
- message 写入时填充 input/output tokens 与 cost。
|
||||
- session 更新时累加 total_tokens/total_cost。
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
|
||||
Expected: PASS。
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/application/run_service.py backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/persistence/message_repository.py backend/src/core/agent/infrastructure/persistence/session_repository.py backend/tests/integration/core/agent/test_queue_run_resume.py
|
||||
git commit -m "feat: persist runtime token and cost aggregates"
|
||||
```
|
||||
|
||||
### Task 4: 补齐软删除级联(session -> messages)
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py`
|
||||
- Modify: `backend/src/v1/agent/service.py`
|
||||
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
新增用例:软删 session 后,同会话 messages 的 `deleted_at` 同步写入。
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
|
||||
Expected: FAIL,当前无软删级联。
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
- repository 增加 `soft_delete_session_with_messages(session_id)`。
|
||||
- service 调用时使用同事务批量更新 messages。
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v`
|
||||
Expected: PASS。
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/persistence/session_repository.py backend/src/v1/agent/service.py backend/tests/integration/core/agent/test_queue_run_resume.py
|
||||
git commit -m "fix: cascade soft delete from sessions to messages"
|
||||
```
|
||||
|
||||
### Task 5: 重构诊断脚本并新增 live E2E
|
||||
|
||||
**Files:**
|
||||
- Modify: `test_agent_sse_flow.py`
|
||||
- Create: `backend/tests/e2e/test_agent_closed_loop_live.py`
|
||||
- Modify: `docs/bugs/2026-03-05-agent-runtime-bugs.md`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
新增 live E2E 用例(`@pytest.mark.live`):
|
||||
- 首聊不传 `session_id` 返回 202
|
||||
- 订阅 SSE 收到关键事件
|
||||
- DB 断言 session/messages/tokens/cost
|
||||
|
||||
**Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
|
||||
Expected: FAIL,当前契约或脚本未对齐。
|
||||
|
||||
**Step 3: Write minimal implementation**
|
||||
|
||||
- 清理脚本重复/不可达逻辑。
|
||||
- 增加健康检查、阶段化日志、超时和错误根因输出。
|
||||
- E2E 用例复用脚本中的 helper(JWT、SSE 解析、DB 断言)。
|
||||
|
||||
**Step 4: Run test to verify it passes**
|
||||
|
||||
Run:
|
||||
- `uv run python test_agent_sse_flow.py --new-session`
|
||||
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
|
||||
|
||||
Expected: PASS。
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add test_agent_sse_flow.py backend/tests/e2e/test_agent_closed_loop_live.py docs/bugs/2026-03-05-agent-runtime-bugs.md
|
||||
git commit -m "test: add live closed-loop agent e2e verification"
|
||||
```
|
||||
|
||||
### Task 6: 全量验证与文档同步
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/runtime/runtime-runbook.md`
|
||||
- Modify: `docs/runtime/runtime-route.md`
|
||||
|
||||
**Step 1: Run targeted checks**
|
||||
|
||||
Run:
|
||||
- `uv run pytest backend/tests/unit/v1/agent/test_service.py -v`
|
||||
- `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v`
|
||||
- `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v`
|
||||
- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v`
|
||||
|
||||
Expected: PASS。
|
||||
|
||||
**Step 2: Run quality gates**
|
||||
|
||||
Run:
|
||||
- `uv run ruff check backend/src backend/tests`
|
||||
- `uv run basedpyright`
|
||||
|
||||
Expected: PASS。
|
||||
|
||||
**Step 3: Update docs**
|
||||
|
||||
记录本地启动流程、真实 LLM 前置配置、live E2E 执行方式和故障排查。
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/runtime/runtime-runbook.md docs/runtime/runtime-route.md
|
||||
git commit -m "docs: document live agent closed-loop e2e workflow"
|
||||
```
|
||||
@@ -1,469 +0,0 @@
|
||||
# Agent Runtime Closed Loop Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Build a production-grade closed-loop agent runtime where `frontend -> FastAPI -> Celery -> run/resume service -> CrewAI -> AG-UI events -> Redis Stream -> SSE` is fully connected and verifiable.
|
||||
|
||||
**Architecture:** Keep HTTP API as control-plane and worker as data-plane. The API validates auth/ownership and enqueues commands, the Celery worker executes run/resume business logic using DB-driven agent config, runtime emits normalized AG-UI events and usage/cost telemetry, all events are persisted to Redis Stream, and SSE endpoint streams from Redis with resume support (`Last-Event-ID`).
|
||||
|
||||
**Tech Stack:** FastAPI, SQLAlchemy AsyncSession, Celery, Redis Streams, CrewAI, LiteLLM, Pydantic, pytest (unit/integration).
|
||||
|
||||
**Confirmed Constraints (locked):**
|
||||
- Persist semantics use existing `messages.role` only (`assistant|user|system|tool`), no new `message_kind` column.
|
||||
- `tool_result` must be semantically complete (especially UI schema); do not store summary-only payload.
|
||||
- Store full `tool_result` payload in Supabase Storage (private bucket) and persist durable object reference in DB metadata; do not rely on expiring signed URL as primary reference.
|
||||
- `metadata` must be fixed and typed via Pydantic model (no free-form drift).
|
||||
- Do not introduce additional business tables for this scope; keep schema minimal.
|
||||
- CrewAI runtime must default to streaming mode.
|
||||
- Full traceability target is final semantic reconstruction of `user/assistant/tool_result`; chunk-level replay is not required.
|
||||
|
||||
**Metadata Contract (fixed, Pydantic-enforced):**
|
||||
- Global required keys for all message metadata: `type`, `run_id`, `turn_id`.
|
||||
- Global optional keys for all message metadata: `event_id`, `parent_message_id`, `error`.
|
||||
- `type=user_input`:
|
||||
- Required: `type`, `run_id`, `turn_id`.
|
||||
- Optional: `input_source`, `client_ts`.
|
||||
- `type=assistant_output`:
|
||||
- Required: `type`, `run_id`, `turn_id`.
|
||||
- Optional: `finish_reason`, `model_provider`, `cost_source`.
|
||||
- `type=tool_call` (`role=assistant`):
|
||||
- Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `tool_args`.
|
||||
- Optional: `tool_schema_version`, `timeout_ms`.
|
||||
- `type=tool_result` (`role=tool`):
|
||||
- Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `storage_bucket`, `storage_path`, `payload_sha256`, `payload_bytes`, `payload_format`.
|
||||
- Optional: `ui_schema_version`, `compression`, `storage_etag`, `render_hints`.
|
||||
- Validation rules:
|
||||
- `messages.role=tool` must use `metadata.type=tool_result`.
|
||||
- `messages.role=assistant` + tool event must use `metadata.type=tool_call` or `assistant_output`.
|
||||
- `tool_result` payload in DB must be reconstructable to AG-UI `TOOL_CALL_RESULT` using Storage object + metadata checksum.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Add Agent Module Skeleton and Contracts
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/__init__.py`
|
||||
- Create: `backend/src/core/agent/application/__init__.py`
|
||||
- Create: `backend/src/core/agent/domain/__init__.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/events/__init__.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/agui/bridge.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/agui/stream.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_agui_bridge.py`
|
||||
|
||||
**Step 1: Write failing tests for event normalization and SSE formatting**
|
||||
|
||||
```python
|
||||
def test_bridge_normalizes_event_type_to_upper_snake() -> None:
|
||||
events = [{"type": "runStarted", "data": {"ok": True}}]
|
||||
out = to_agui_events(events)
|
||||
assert out[0]["type"] == "RUN_STARTED"
|
||||
|
||||
|
||||
def test_sse_format_includes_id_event_data() -> None:
|
||||
payload = to_sse_event(stream_id="1-0", event={"type": "RUN_STARTED", "data": {"a": 1}})
|
||||
assert payload.startswith("id: 1-0\nevent: RUN_STARTED\ndata: {")
|
||||
```
|
||||
|
||||
**Step 2: Run tests and confirm RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`
|
||||
Expected: FAIL with missing module/function errors.
|
||||
|
||||
**Step 3: Implement minimal bridge + stream utilities**
|
||||
|
||||
```python
|
||||
def to_agui_events(internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
...
|
||||
|
||||
|
||||
def to_sse_event(stream_id: str, event: dict[str, Any]) -> str:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 4: Run tests and confirm GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent backend/tests/unit/core/agent/test_agui_bridge.py
|
||||
git commit -m "feat(agent): add ag-ui bridge and sse serializer utilities"
|
||||
```
|
||||
|
||||
### Task 2: Implement Redis Stream Event Store and Reader
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/events/redis_stream.py`
|
||||
- Modify: `backend/src/core/config/settings.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_redis_stream.py`
|
||||
|
||||
**Step 1: Write failing tests for append/read semantics**
|
||||
|
||||
```python
|
||||
def test_append_event_writes_json_payload() -> None:
|
||||
...
|
||||
|
||||
|
||||
def test_read_events_respects_last_event_id() -> None:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement Redis stream adapter**
|
||||
|
||||
```python
|
||||
def append_event_sync(*, session_id: UUID, event: dict[str, Any]) -> str:
|
||||
...
|
||||
|
||||
|
||||
async def read_events(...):
|
||||
...
|
||||
```
|
||||
|
||||
**Step 4: Run GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/events/redis_stream.py backend/src/core/config/settings.py backend/tests/unit/core/agent/test_redis_stream.py
|
||||
git commit -m "feat(agent): add redis stream event transport for run events"
|
||||
```
|
||||
|
||||
### Task 3: Build CrewAI Runtime + AG-UI Event Mapping + Usage Tracking
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/crewai/factory.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/litellm/client.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py`
|
||||
- Create: `backend/src/core/agent/infrastructure/config/resolver.py`
|
||||
- Modify: `backend/src/core/config/settings.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_litellm_usage.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_config_resolver.py`
|
||||
|
||||
**Step 1: Write failing runtime tests (events + cost + strict errors)**
|
||||
|
||||
```python
|
||||
def test_runtime_emits_text_tool_reasoning_events() -> None:
|
||||
...
|
||||
|
||||
|
||||
def test_runtime_raises_if_model_or_api_key_missing() -> None:
|
||||
...
|
||||
|
||||
|
||||
def test_usage_tracker_extracts_tokens_and_cost() -> None:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement runtime and tracker**
|
||||
|
||||
- Register CrewAI event handlers (`Task/LLM/Tool/Reasoning`) and map to AG-UI canonical event types.
|
||||
- Default runtime to streaming mode for CrewAI execution.
|
||||
- Enforce strict config behavior: no `llm_model_code` or provider key -> raise.
|
||||
- Use LiteLLM cost calculator for actual cost; if cost cannot be computed, fail closed (raise), do not silently record zero.
|
||||
|
||||
**Step 4: Run GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py backend/src/core/config/settings.py
|
||||
git commit -m "feat(agent): implement crewai runtime events and litellm usage-cost auditing"
|
||||
```
|
||||
|
||||
### Task 4: Implement Run/Resume Application Services (DB Config + Persistence)
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/application/run_service.py`
|
||||
- Create: `backend/src/core/agent/application/resume_service.py`
|
||||
- Create: `backend/src/core/agent/application/session_state_persistence.py`
|
||||
- Create: `backend/src/core/agent/domain/state_snapshot.py`
|
||||
- Create: `backend/src/core/agent/domain/tool_correlation.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_state_snapshot.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_tool_correlation.py`
|
||||
|
||||
**Step 1: Write failing tests for DB-driven runtime and aggregate updates**
|
||||
|
||||
```python
|
||||
async def test_run_service_loads_agent_config_from_db_and_persists_messages() -> None:
|
||||
...
|
||||
|
||||
|
||||
async def test_resume_service_requires_pending_tool_call() -> None:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement services**
|
||||
|
||||
- `run_service`: read session + system agent config from DB, execute runtime, persist user/assistant messages, update session aggregates.
|
||||
- `resume_service`: validate pending tool call status, enforce idempotency semantics, resume runtime, persist audit fields.
|
||||
- Persist metadata audit (`tokens`, `cost`, `cost_source`, correlation ids) for every assistant message.
|
||||
- Persist tool lifecycle with role-only model:
|
||||
- tool call message uses `role=assistant` with fixed metadata (`type=tool_call`, `tool_call_id`, `tool_name`, arguments reference).
|
||||
- tool result message uses `role=tool` with fixed metadata (`type=tool_result`, `tool_call_id`, `tool_name`, storage bucket/path, checksum, bytes, schema version).
|
||||
- `tool_result` full payload (UI schema) is uploaded to Supabase Storage private bucket; DB stores durable reference and verification fields.
|
||||
- Ensure DB->AG-UI `TOOL_CALL_RESULT` reconstruction is equivalent to SSE-streamed final tool result semantics.
|
||||
- Enforce metadata contract by Pydantic model at write path and read path (reject malformed metadata early).
|
||||
|
||||
**Step 4: Run GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/application backend/src/core/agent/domain backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py
|
||||
git commit -m "feat(agent): add run-resume app services with db config and audit persistence"
|
||||
```
|
||||
|
||||
### Task 5: Wire Celery Worker Task to Run/Resume and Publish Runtime Events
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/core/agent/infrastructure/queue/tasks.py`
|
||||
- Modify: `backend/src/core/celery/app.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_queue_tasks.py`
|
||||
- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py`
|
||||
|
||||
**Step 1: Write failing queue tests**
|
||||
|
||||
```python
|
||||
def test_run_agent_task_emits_started_runtime_and_finished_events() -> None:
|
||||
...
|
||||
|
||||
|
||||
def test_run_agent_task_emits_error_event_on_exception() -> None:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement worker task flow**
|
||||
|
||||
- Decode command type (`run`/`resume`).
|
||||
- Emit lifecycle events (`RUN_STARTED/RUN_RESUMED/RUN_FINISHED/RUN_ERROR`).
|
||||
- Forward runtime callback events to Redis stream immediately.
|
||||
- Persist session status/snapshot after completion.
|
||||
|
||||
**Step 4: Run GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/celery/app.py backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py
|
||||
git commit -m "feat(agent): wire celery run-resume execution and redis event publishing"
|
||||
```
|
||||
|
||||
### Task 6: Implement API Contracts (Run/Resume/SSE) + Auth/Ownership/Idempotency
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/src/v1/agent/schemas.py`
|
||||
- Create: `backend/src/v1/agent/repository.py`
|
||||
- Create: `backend/src/v1/agent/service.py`
|
||||
- Create: `backend/src/v1/agent/router.py`
|
||||
- Create: `backend/src/v1/agent/dependencies.py`
|
||||
- Modify: `backend/src/v1/router.py`
|
||||
- Test: `backend/tests/unit/v1/agent/test_service.py`
|
||||
- Test: `backend/tests/unit/v1/agent/test_owner_guard.py`
|
||||
- Test: `backend/tests/integration/v1/agent/test_routes.py`
|
||||
|
||||
**Step 1: Write failing API tests**
|
||||
|
||||
```python
|
||||
async def test_run_requires_auth_and_returns_202_task_id() -> None:
|
||||
...
|
||||
|
||||
|
||||
async def test_stream_reads_from_last_event_id() -> None:
|
||||
...
|
||||
|
||||
|
||||
def test_resume_idempotency_uses_redis_lock_and_task_key() -> None:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement API service/router**
|
||||
|
||||
- `POST /api/v1/agent/runs` enqueue run command.
|
||||
- `POST /api/v1/agent/runs/{session_id}/resume` enqueue resume command with async redis lock + dedup task key.
|
||||
- `GET /api/v1/agent/runs/{session_id}/events` SSE stream from Redis with `Last-Event-ID`.
|
||||
- Enforce auth and session ownership checks on all endpoints.
|
||||
- Validate `tool_call_id` and message length/pattern boundaries.
|
||||
|
||||
**Step 4: Run GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/src/v1/agent backend/src/v1/router.py backend/tests/unit/v1/agent backend/tests/integration/v1/agent/test_routes.py
|
||||
git commit -m "feat(agent): add authenticated run-resume-sse api with redis-backed idempotency"
|
||||
```
|
||||
|
||||
### Task 7: Add Schema/Migration Contract for Session Snapshot + Audit Fields
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py`
|
||||
- Modify: `backend/src/models/agent_chat_session.py`
|
||||
- Modify: `backend/src/models/agent_chat_message.py`
|
||||
- Test: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py`
|
||||
|
||||
**Migration scope note:**
|
||||
- Fix current schema drift: model has `sessions.state_snapshot` but migration chain does not reliably provide this column in current DB state.
|
||||
- Keep schema minimal; do not add new business tables in this migration.
|
||||
|
||||
**Step 1: Write failing migration contract tests**
|
||||
|
||||
```python
|
||||
def test_session_has_state_snapshot_and_status_contract() -> None:
|
||||
...
|
||||
|
||||
|
||||
def test_message_has_token_cost_and_metadata_contract() -> None:
|
||||
...
|
||||
```
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement migration and model alignment**
|
||||
|
||||
- Ensure `state_snapshot`, `status`, token/cost/metadata fields are present and nullable constraints are explicit.
|
||||
- Add/verify indexes needed for role-based semantic reconstruction (`session_id, seq`, and targeted metadata lookups if required).
|
||||
- Ensure `metadata` structure is validated by fixed Pydantic schema at application boundary.
|
||||
- Add DB-level guardrails where feasible (check constraints) for role/metadata consistency without introducing new tables.
|
||||
- Keep reversible downgrade path.
|
||||
|
||||
**Step 4: Run GREEN**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q`
|
||||
Expected: PASS.
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py backend/src/models/agent_chat_session.py backend/src/models/agent_chat_message.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py
|
||||
git commit -m "feat(agent): add db contract for session snapshot and usage audit fields"
|
||||
```
|
||||
|
||||
### Task 8: End-to-End Closure Verification and Docs Update
|
||||
|
||||
**Files:**
|
||||
- Modify: `docs/runtime/runtime-route.md`
|
||||
- Modify: `docs/runtime/runtime-runbook.md`
|
||||
- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py`
|
||||
|
||||
**Step 1: Write integration test for full closure path**
|
||||
|
||||
```python
|
||||
async def test_closed_loop_run_flow_frontend_to_sse() -> None:
|
||||
# run request -> queue command -> runtime events -> redis stream -> sse read
|
||||
...
|
||||
```
|
||||
|
||||
Also verify:
|
||||
- `tool_result` full UI schema is written to Supabase Storage private bucket.
|
||||
- `messages.role=tool` row contains stable storage reference and checksum metadata.
|
||||
- Reading from DB can reconstruct final AG-UI `TOOL_CALL_RESULT` event payload semantics.
|
||||
|
||||
**Step 2: Run RED**
|
||||
|
||||
Run: `uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py -q`
|
||||
Expected: FAIL.
|
||||
|
||||
**Step 3: Implement minimal missing glue and docs**
|
||||
|
||||
- Fill any missing wiring revealed by the test.
|
||||
- Document endpoint contracts, event taxonomy, and operational runbook for redis/celery troubleshooting.
|
||||
|
||||
**Step 4: Run GREEN + full gate verification**
|
||||
|
||||
Run:
|
||||
- `PYTHONPATH=backend/src uv run python backend/src/core/runtime/cli.py migrate`
|
||||
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
|
||||
- `uv run ruff check backend/src backend/tests`
|
||||
- `uv run basedpyright backend/src`
|
||||
|
||||
Expected:
|
||||
- All relevant tests PASS.
|
||||
- Ruff PASS.
|
||||
- basedpyright 0 errors (notes/warnings can be documented if pre-existing).
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md backend/tests/integration/core/agent/test_session_message_persistence.py
|
||||
git commit -m "docs(agent): document closed-loop runtime and verify end-to-end chain"
|
||||
```
|
||||
|
||||
### Task 9: L2 Mandatory Review Gates
|
||||
|
||||
**Files:**
|
||||
- No direct code changes required; apply fixes if findings appear.
|
||||
|
||||
**Step 1: Run required agents**
|
||||
|
||||
- `tdd-guide` (already enforced by plan sequence)
|
||||
- `refactor-cleaner`
|
||||
- `code-reviewer`
|
||||
- `security-reviewer`
|
||||
|
||||
**Step 2: Fix all CRITICAL/HIGH findings**
|
||||
|
||||
Run targeted tests after each fix.
|
||||
|
||||
**Step 3: Final verification rerun**
|
||||
|
||||
Run:
|
||||
- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q`
|
||||
- `uv run ruff check backend/src backend/tests`
|
||||
- `uv run basedpyright backend/src`
|
||||
|
||||
Expected: no failing tests; no lint errors; no type errors.
|
||||
|
||||
**Step 4: Final commit (if review fixes were needed)**
|
||||
|
||||
```bash
|
||||
git add backend/src backend/tests docs/runtime
|
||||
git commit -m "fix(agent): resolve L2 review findings for closed-loop runtime"
|
||||
```
|
||||
@@ -0,0 +1,746 @@
|
||||
# UserAgentContext & ProfileSettings v1 设计
|
||||
|
||||
**Date:** 2026-03-05
|
||||
**Status:** Approved
|
||||
|
||||
---
|
||||
|
||||
## 目标
|
||||
|
||||
为 Agent Runtime 提供完整的用户画像上下文,通过 Pydantic 约束 profiles.settings 结构,确保:
|
||||
|
||||
1. 运行时入口读取 profile(username/bio/settings)
|
||||
2. settings 结构类型安全、版本可演进
|
||||
3. 关键配置(语言/时区/国家)符合标准格式
|
||||
|
||||
---
|
||||
|
||||
## 架构
|
||||
|
||||
```
|
||||
Profile (DB JSONB)
|
||||
↓
|
||||
ProfileSettings (Pydantic)
|
||||
↓
|
||||
UserAgentContext (DataClass)
|
||||
↓
|
||||
build_global_system_prompt(ctx)
|
||||
```
|
||||
|
||||
**设计原则:**
|
||||
- 唯一入口:`get_user_agent_context(user_id)` 读取并构造上下文
|
||||
- 不可变:UserAgentContext 使用 frozen dataclass
|
||||
- 向后兼容:version 字段预留未来演进
|
||||
|
||||
---
|
||||
|
||||
## ProfileSettings v1 结构
|
||||
|
||||
```json
|
||||
{
|
||||
"version": 1,
|
||||
"preferences": {
|
||||
"interface_language": "zh-CN",
|
||||
"ai_language": "zh-CN",
|
||||
"timezone": "Asia/Shanghai",
|
||||
"country": "CN"
|
||||
},
|
||||
"privacy": {},
|
||||
"notification": {}
|
||||
}
|
||||
```
|
||||
|
||||
### 字段说明
|
||||
|
||||
| 字段 | 类型 | 默认值 | 约束 |
|
||||
|------|------|--------|------|
|
||||
| `version` | int | 1 | 必须为 1(v1 锁定) |
|
||||
| `preferences.interface_language` | str | "zh-CN" | BCP-47 格式 |
|
||||
| `preferences.ai_language` | str | "zh-CN" | BCP-47 格式 |
|
||||
| `preferences.timezone` | str | "Asia/Shanghai" | IANA 时区 |
|
||||
| `preferences.country` | str | "CN" | ISO 3166-1 alpha-2 |
|
||||
| `privacy` | dict | {} | 空对象(预留) |
|
||||
| `notification` | dict | {} | 空对象(预留) |
|
||||
|
||||
### 约束规则
|
||||
|
||||
**1. BCP-47 语言格式**
|
||||
|
||||
正则:`^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$`
|
||||
|
||||
示例:
|
||||
- ✅ zh-CN, en-US, zh-TW, ja-JP
|
||||
- ❌ zh_CN, EN, chn
|
||||
|
||||
**2. IANA 时区**
|
||||
|
||||
使用 `zoneinfo.ZoneInfo` 校验。
|
||||
|
||||
示例:
|
||||
- ✅ Asia/Shanghai, America/New_York, UTC
|
||||
- ❌ CST, GMT+8
|
||||
|
||||
**3. ISO 3166-1 alpha-2 国家代码**
|
||||
|
||||
使用 `pycountry.countries.get(alpha_2=...)` 校验。
|
||||
|
||||
示例:
|
||||
- ✅ CN, US, JP, GB
|
||||
- ❌ CHN, USA, zz
|
||||
|
||||
---
|
||||
|
||||
## UserAgentContext 结构
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class UserAgentContext:
|
||||
user_id: UUID
|
||||
username: str
|
||||
bio: str | None
|
||||
settings: ProfileSettings
|
||||
```
|
||||
|
||||
**设计要点:**
|
||||
- 不可变(frozen=True):防止运行时修改
|
||||
- 完整画像:包含身份(username/bio)和配置(settings)
|
||||
- 唯一构造入口:`get_user_agent_context(user_id)`
|
||||
|
||||
---
|
||||
|
||||
## Pydantic 模型实现
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from dataclasses import dataclass
|
||||
from uuid import UUID
|
||||
import re
|
||||
|
||||
class PreferenceSettings(BaseModel):
|
||||
interface_language: str = "zh-CN"
|
||||
ai_language: str = "zh-CN"
|
||||
timezone: str = "Asia/Shanghai"
|
||||
country: str = "CN"
|
||||
|
||||
@field_validator("interface_language", "ai_language")
|
||||
@classmethod
|
||||
def validate_bcp47(cls, v: str) -> str:
|
||||
pattern = r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$"
|
||||
if not re.match(pattern, v):
|
||||
raise ValueError(f"Invalid BCP-47 language tag: {v}")
|
||||
return v
|
||||
|
||||
@field_validator("timezone")
|
||||
@classmethod
|
||||
def validate_iana_timezone(cls, v: str) -> str:
|
||||
import zoneinfo
|
||||
try:
|
||||
zoneinfo.ZoneInfo(v)
|
||||
except Exception:
|
||||
raise ValueError(f"Invalid IANA timezone: {v}")
|
||||
return v
|
||||
|
||||
@field_validator("country")
|
||||
@classmethod
|
||||
def validate_iso_country(cls, v: str) -> str:
|
||||
import pycountry
|
||||
if not pycountry.countries.get(alpha_2=v.upper()):
|
||||
raise ValueError(f"Invalid ISO 3166-1 alpha-2 country code: {v}")
|
||||
return v.upper()
|
||||
|
||||
class ProfileSettings(BaseModel):
|
||||
version: int = Field(default=1, ge=1, le=1)
|
||||
preferences: PreferenceSettings = Field(default_factory=PreferenceSettings)
|
||||
privacy: dict = Field(default_factory=dict)
|
||||
notification: dict = Field(default_factory=dict)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UserAgentContext:
|
||||
user_id: UUID
|
||||
username: str
|
||||
bio: str | None
|
||||
settings: ProfileSettings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 依赖项
|
||||
|
||||
需要添加到 `backend/pyproject.toml`:
|
||||
|
||||
```toml
|
||||
[project.dependencies]
|
||||
pycountry = ">=23.0.0"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 迁移策略
|
||||
|
||||
**数据库层:**
|
||||
- profiles.settings 保持 JSONB,不做 schema 变更
|
||||
- 现有数据默认值:`{"version": 1, "preferences": {"country": "CN"}}`
|
||||
|
||||
**应用层:**
|
||||
- 读取时:`ProfileSettings.model_validate(profile.settings or {})`
|
||||
- 写入时:`profile.settings = settings.model_dump()`
|
||||
|
||||
---
|
||||
|
||||
## 未来演进
|
||||
|
||||
|
||||
**版本迁移:**
|
||||
- Pydantic 支持多版本共存
|
||||
- 数据库不做破坏性变更
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
## AG-UI 事件转发与落库策略
|
||||
|
||||
### 核心原则
|
||||
|
||||
**1. 事件转发时机:**
|
||||
- 只有 organization 阶段完成后转发 AG-UI 事件
|
||||
- AG-UI bridge 已实现底层机制,编排层控制转发时机
|
||||
|
||||
**2. 落库时机:**
|
||||
- 意图识别和任务执行阶段:落库但 seq 取负数(用于审计)
|
||||
- 结果反馈阶段:seq 取最新 seq 的绝对值 +1(用于展示)
|
||||
|
||||
### Seq 设计细节
|
||||
|
||||
**意图识别和任务执行阶段(审计用):**
|
||||
- seq 取负数(如 -1, -2)
|
||||
- role: "assistant"(标记为 agent 输出)
|
||||
- content: 阶段的完整输出(用于审计/调试)
|
||||
- 重建会话时通过 `WHERE seq > 0` 过滤,不展示给用户
|
||||
|
||||
**结果反馈阶段(展示用):**
|
||||
- seq 取正数(取最新负数的绝对值 +1)
|
||||
- role: "assistant"
|
||||
- content: OrganizationResult.assistant_text
|
||||
- 重建会话时通过 `WHERE seq > 0` 展示给用户
|
||||
|
||||
**示例:**
|
||||
```
|
||||
| seq | role | content | 展示 |
|
||||
|------|----------|----------------------------|------|
|
||||
| -2 | assistant| ExecutionResult (完整) | 否 |
|
||||
| -1 | assistant| IntentResult (完整) | 否 |
|
||||
| 1 | user | 用户输入 | 是 |
|
||||
| 2 | assistant| OrganizationResult | 是 |
|
||||
```
|
||||
|
||||
### 编排层职责
|
||||
|
||||
```python
|
||||
@listen(intent_stage)
|
||||
async def persist_intent(self, state: FlowState) -> FlowState:
|
||||
# seq 取负数
|
||||
seq = await message_repo.get_next_negative_seq(state.session_id)
|
||||
await message_repo.create(
|
||||
session_id=state.session_id,
|
||||
seq=seq, # 负数
|
||||
role="assistant",
|
||||
content=state.intent_result.model_dump_json(),
|
||||
...
|
||||
)
|
||||
return state
|
||||
|
||||
@listen(execution_stage)
|
||||
async def persist_execution(self, state: FlowState) -> FlowState:
|
||||
# seq 取负数
|
||||
seq = await message_repo.get_next_negative_seq(state.session_id)
|
||||
await message_repo.create(
|
||||
session_id=state.session_id,
|
||||
seq=seq, # 负数
|
||||
role="assistant",
|
||||
content=state.execution_result.model_dump_json(),
|
||||
...
|
||||
)
|
||||
return state
|
||||
|
||||
@listen(organization_stage)
|
||||
async def finalize_flow(self, state: FlowState) -> FlowState:
|
||||
result = state.organization_result
|
||||
|
||||
# seq 取正数(最新负数绝对值+1)
|
||||
seq = await message_repo.get_next_positive_seq(state.session_id)
|
||||
await message_repo.create(
|
||||
session_id=state.session_id,
|
||||
seq=seq, # 正数
|
||||
role="assistant",
|
||||
content=result.assistant_text,
|
||||
...
|
||||
)
|
||||
|
||||
# 触发 AG-UI 事件(由 bridge 处理)
|
||||
return state
|
||||
```
|
||||
|
||||
### Token 和 Cost 累加
|
||||
|
||||
**策略:在内存中累加所有阶段的 token 和 cost,organization 完成后统一落库。**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class FlowState:
|
||||
# ...
|
||||
tokens: dict[str, dict] = field(default_factory=dict)
|
||||
cost: Decimal = Decimal("0")
|
||||
currency: str = "CNY"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CrewAI Flow 三阶段设计
|
||||
|
||||
### 架构概览
|
||||
|
||||
```
|
||||
User Input + UserAgentContext
|
||||
↓
|
||||
@start() begin()
|
||||
↓
|
||||
@listen() intent_stage() → 判断 can_answer_directly
|
||||
↓ (router)
|
||||
├─ DIRECT_RESPONSE → 直接返回
|
||||
└─ NEEDS_EXECUTION
|
||||
↓
|
||||
@listen() execution_stage() → 任务执行/工具调用
|
||||
↓
|
||||
@listen() organization_stage() → 结果组织与表达
|
||||
↓
|
||||
返回给用户
|
||||
```
|
||||
|
||||
### 三阶段职责
|
||||
|
||||
**1. Intent Recognition(意图识别)**
|
||||
- Agent Type: `INTENT_RECOGNITION`
|
||||
- 输出结构(最小化设计):
|
||||
```python
|
||||
class IntentResult(BaseModel):
|
||||
direct_answer: bool # 是否可以直接回答
|
||||
intent_analysis: str # 意图分析文本(用于调试/审计)
|
||||
execution_prompt: str # 给 execution 阶段的提示词(direct_answer=false时使用)
|
||||
direct_response: str # 直接回复文本(direct_answer=true时使用)
|
||||
```
|
||||
- 短路逻辑:
|
||||
- `direct_answer=true` → 完全跳过 execution 和 organization,直接返回 direct_response
|
||||
- `direct_answer=false` → 进入 execution 阶段
|
||||
- 输出约束:使用 `output_pydantic=IntentResult`
|
||||
- **落库策略**:落库到 messages 表,但重建会话时不展示
|
||||
|
||||
**2. Task Execution(任务执行)**
|
||||
- Agent Type: `TASK_EXECUTION`
|
||||
- 输入:IntentResult.execution_prompt + IntentResult.intent_analysis
|
||||
- 职责:
|
||||
- 执行复杂任务(查询数据库、调用工具、多步骤推理)
|
||||
- 返回结构化执行结果
|
||||
- 输出结构(最小化设计):
|
||||
```python
|
||||
class ExecutionResult(BaseModel):
|
||||
execution_summary: str # 任务执行摘要(用于调试/审计)
|
||||
organization_prompt: str # 给 organization 阶段的提示词
|
||||
execution_data: dict = {} # 执行结果的结构化数据
|
||||
```
|
||||
- 输出约束:使用 `output_pydantic=ExecutionResult`
|
||||
- **落库策略**:落库到 messages 表,但重建会话时不展示
|
||||
|
||||
**3. Result Reporting(结果报告)**
|
||||
- Agent Type: `RESULT_REPORTING`
|
||||
- 输入:
|
||||
- IntentResult(意图识别结果)
|
||||
- ExecutionResult(任务执行情况)
|
||||
- 职责:
|
||||
- 结合意图分析和执行结果,格式化为用户友好的响应
|
||||
- 应用个性化模板(基于 UserAgentContext)
|
||||
- 输出结构(最小化设计):
|
||||
```python
|
||||
class OrganizationResult(BaseModel):
|
||||
assistant_text: str # 最终回复文本
|
||||
response_metadata: dict = {} # 响应元数据(可选)
|
||||
```
|
||||
- 输出约束:使用 `output_pydantic=OrganizationResult`
|
||||
- **唯一展示阶段**:重建会话时只展示此阶段的 message
|
||||
- **唯一转发阶段**:只有此阶段的输出需要通过 AG-UI 事件转发
|
||||
|
||||
### Flow 状态管理
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class FlowState:
|
||||
user_input: str
|
||||
context: UserAgentContext
|
||||
stage_trace: list[str] = field(default_factory=list)
|
||||
intent_result: IntentResult | None = None
|
||||
execution_result: ExecutionResult | None = None
|
||||
organization_result: OrganizationResult | None = None
|
||||
assistant_text: str = ""
|
||||
tokens: dict = field(default_factory=dict)
|
||||
cost: Decimal = Decimal("0")
|
||||
```
|
||||
|
||||
### 数据流向
|
||||
|
||||
```
|
||||
User Input + UserAgentContext
|
||||
↓
|
||||
@start() begin()
|
||||
↓
|
||||
@listen() intent_stage()
|
||||
├─ IntentResult.direct_answer=true
|
||||
│ ↓
|
||||
│ 跳过 execution,直接 organization
|
||||
│ ↓
|
||||
│ organization_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
|
||||
│ ↓
|
||||
│ OrganizationResult → AG-UI 事件 + 落库
|
||||
│
|
||||
└─ IntentResult.direct_answer=false
|
||||
↓
|
||||
execution_stage(IntentResult.next_stage_prompt, IntentResult.metadata)
|
||||
↓
|
||||
ExecutionResult
|
||||
↓
|
||||
organization_stage(ExecutionResult.next_stage_prompt, ExecutionResult.metadata)
|
||||
↓
|
||||
OrganizationResult → AG-UI 事件 + 落库
|
||||
```
|
||||
|
||||
### 三阶段输出约束
|
||||
|
||||
**所有阶段使用 `output_pydantic` 约束输出:**
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
|
||||
class IntentResult(BaseModel):
|
||||
direct_answer: bool
|
||||
next_stage_prompt: str
|
||||
metadata: dict = {}
|
||||
|
||||
class ExecutionResult(BaseModel):
|
||||
next_stage_prompt: str
|
||||
metadata: dict = {}
|
||||
|
||||
class OrganizationResult(BaseModel):
|
||||
assistant_text: str
|
||||
metadata: dict = {}
|
||||
|
||||
# Task 定义
|
||||
intent_task = Task(
|
||||
description="Analyze user intent",
|
||||
expected_output="Intent analysis",
|
||||
agent=intent_agent,
|
||||
output_pydantic=IntentResult,
|
||||
)
|
||||
|
||||
execution_task = Task(
|
||||
description="Execute tasks",
|
||||
expected_output="Execution result",
|
||||
agent=execution_agent,
|
||||
output_pydantic=ExecutionResult,
|
||||
)
|
||||
|
||||
organization_task = Task(
|
||||
description="Format response",
|
||||
expected_output="User-friendly response",
|
||||
agent=organization_agent,
|
||||
output_pydantic=OrganizationResult,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 系统选模逻辑设计
|
||||
|
||||
### 问题背景
|
||||
|
||||
旧逻辑:`order_by(...).limit(1)` 随机选择一个系统 agent,不区分阶段。
|
||||
|
||||
新逻辑:按 `agent_type` 显式映射到三阶段。
|
||||
|
||||
### 选模规则
|
||||
|
||||
**必需的 Agent Types:**
|
||||
- `INTENT_RECOGNITION` → 用于 intent_stage
|
||||
- `TASK_EXECUTION` → 用于 execution_stage
|
||||
- `RESULT_REPORTING` → 用于 organization_stage
|
||||
|
||||
**查询逻辑:**
|
||||
|
||||
```python
|
||||
REQUIRED_TYPES = {"INTENT_RECOGNITION", "TASK_EXECUTION", "RESULT_REPORTING"}
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StageModels:
|
||||
intent: SystemAgentCatalog
|
||||
execution: SystemAgentCatalog
|
||||
organization: SystemAgentCatalog
|
||||
|
||||
def resolve_stage_models(rows: list[SystemAgentCatalog]) -> StageModels:
|
||||
by_type = {row.agent_type: row for row in rows}
|
||||
missing = REQUIRED_TYPES - set(by_type.keys())
|
||||
if missing:
|
||||
raise ValueError(f"Missing required agent types: {missing}")
|
||||
|
||||
return StageModels(
|
||||
intent=by_type["INTENT_RECOGNITION"],
|
||||
execution=by_type["TASK_EXECUTION"],
|
||||
organization=by_type["RESULT_REPORTING"],
|
||||
)
|
||||
```
|
||||
|
||||
**初始化数据约束:**
|
||||
- `system_agents` 表必须包含三种类型的记录
|
||||
- 运行时启动时验证完整性
|
||||
|
||||
---
|
||||
|
||||
## 人民币结算策略设计
|
||||
|
||||
### 设计原则
|
||||
|
||||
1. **保留 LiteLLM 语义**:`completion_cost()` 始终返回 USD
|
||||
2. **业务层映射**:根据用户国家(`profiles.settings.preferences.country`)决定落库货币
|
||||
3. **默认人民币**:中国用户或无国家信息默认 CNY
|
||||
4. **汇率配置**:USD/CNY 汇率通过环境变量配置
|
||||
|
||||
### 货币来源
|
||||
|
||||
```
|
||||
UserAgentContext.settings.preferences.country
|
||||
↓
|
||||
resolve_billing_currency(country)
|
||||
↓
|
||||
CN → CNY
|
||||
US → USD
|
||||
其他 → USD
|
||||
```
|
||||
|
||||
### 结算流程
|
||||
|
||||
```
|
||||
LiteLLM completion_cost()
|
||||
↓ (USD)
|
||||
resolve_billing_cost(usd_cost, country)
|
||||
↓
|
||||
├─ country="CN" or None → CNY (乘以汇率)
|
||||
└─ country="US" → USD (保持原值)
|
||||
↓
|
||||
messages.cost + messages.currency
|
||||
sessions.total_cost (同一货币)
|
||||
```
|
||||
|
||||
### 汇率配置
|
||||
|
||||
```python
|
||||
# 环境变量
|
||||
BILLING_USD_CNY_RATE=7.2
|
||||
|
||||
# 默认值
|
||||
DEFAULT_USD_CNY_RATE = Decimal("7.2")
|
||||
```
|
||||
|
||||
### 结算模型
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class BillingCost:
|
||||
currency: str # "CNY" or "USD"
|
||||
cost: Decimal # 6位小数精度
|
||||
|
||||
def resolve_billing_cost(
|
||||
usd_cost: Decimal,
|
||||
country: str | None,
|
||||
usd_cny_rate: Decimal = DEFAULT_USD_CNY_RATE,
|
||||
) -> BillingCost:
|
||||
currency = "CNY" if (country or "CN").upper() == "CN" else "USD"
|
||||
if currency == "CNY":
|
||||
cost = usd_cost * usd_cny_rate
|
||||
else:
|
||||
cost = usd_cost
|
||||
return BillingCost(
|
||||
currency=currency,
|
||||
cost=cost.quantize(Decimal("0.000001"))
|
||||
)
|
||||
```
|
||||
|
||||
### 数据库落库
|
||||
|
||||
**messages 表:**
|
||||
- `cost`: NUMERIC(12,6) - 业务货币金额
|
||||
- `currency`: VARCHAR(3) - "CNY" or "USD"
|
||||
|
||||
**sessions 表:**
|
||||
- `total_cost`: NUMERIC(12,6) - 同一货币累计
|
||||
|
||||
**约束:**
|
||||
- 同一 session 内所有 messages 的 currency 必须一致
|
||||
- sessions.total_cost 累加时保持货币一致
|
||||
|
||||
---
|
||||
|
||||
## Session 状态一致性设计
|
||||
|
||||
### 问题背景
|
||||
|
||||
旧逻辑:
|
||||
- `sessions.status` 与 `state_snapshot.status` 不同步
|
||||
- 失败时状态不一致
|
||||
- title 未自动赋值
|
||||
|
||||
### 状态机
|
||||
|
||||
```
|
||||
pending (创建)
|
||||
↓
|
||||
running (开始执行)
|
||||
↓
|
||||
├─ completed (成功)
|
||||
└─ failed (异常)
|
||||
```
|
||||
|
||||
### 状态同步规则
|
||||
|
||||
**创建时:**
|
||||
```python
|
||||
session = AgentChatSession(
|
||||
user_id=user_uuid,
|
||||
status=AgentChatSessionStatus.PENDING,
|
||||
state_snapshot={
|
||||
"status": "pending",
|
||||
"pending_tool_call_id": None,
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**运行时:**
|
||||
```python
|
||||
# 开始执行
|
||||
session.status = AgentChatSessionStatus.RUNNING
|
||||
session.state_snapshot["status"] = "running"
|
||||
|
||||
# 成功完成
|
||||
session.status = AgentChatSessionStatus.COMPLETED
|
||||
session.state_snapshot["status"] = "completed"
|
||||
|
||||
# 失败
|
||||
session.status = AgentChatSessionStatus.FAILED
|
||||
session.state_snapshot["status"] = "failed"
|
||||
session.state_snapshot["error_id"] = error_id
|
||||
```
|
||||
|
||||
### 自动 Title 赋值
|
||||
|
||||
**规则:**
|
||||
- 首次运行时,如果 `session.title` 为空,使用 `user_input[:255]` 赋值
|
||||
- 只在第一次运行时赋值,后续不覆盖
|
||||
|
||||
**实现:**
|
||||
```python
|
||||
async def _set_title_if_empty(self, session_id: UUID, title: str) -> None:
|
||||
stmt = (
|
||||
update(AgentChatSession)
|
||||
.where(AgentChatSession.id == session_id)
|
||||
.where(AgentChatSession.title.is_(None))
|
||||
.values(title=title[:255])
|
||||
)
|
||||
await self.db.execute(stmt)
|
||||
```
|
||||
|
||||
### Repository 方法
|
||||
|
||||
```python
|
||||
class SessionRepository:
|
||||
async def mark_running(self, session_id: UUID) -> None: ...
|
||||
async def mark_completed(self, session_id: UUID) -> None: ...
|
||||
async def mark_failed(self, session_id: UUID, error_id: str) -> None: ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 全局 Prompt 构建设计
|
||||
|
||||
### 分层结构
|
||||
|
||||
```
|
||||
全局系统 Prompt
|
||||
├─ 身份段(username/bio)
|
||||
├─ 偏好段(language/timezone/country)
|
||||
└─ 阶段段(动态注入)
|
||||
├─ intent stage prompt
|
||||
├─ execution stage prompt
|
||||
└─ organization stage prompt
|
||||
```
|
||||
|
||||
### 构建函数
|
||||
|
||||
```python
|
||||
def build_global_system_prompt(ctx: UserAgentContext) -> str:
|
||||
lines = [
|
||||
"# User Identity",
|
||||
f"username: {ctx.username}",
|
||||
f"bio: {ctx.bio or 'N/A'}",
|
||||
"",
|
||||
"# User Preferences",
|
||||
f"interface_language: {ctx.settings.preferences.interface_language}",
|
||||
f"ai_language: {ctx.settings.preferences.ai_language}",
|
||||
f"timezone: {ctx.settings.preferences.timezone}",
|
||||
f"country: {ctx.settings.preferences.country}",
|
||||
"",
|
||||
"# Instructions",
|
||||
"Use the user's preferences to personalize responses.",
|
||||
"Respond in the user's preferred AI language.",
|
||||
"Consider the user's timezone for time-related queries.",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
```
|
||||
|
||||
### 阶段注入
|
||||
|
||||
每个阶段运行时,在全局 prompt 基础上追加阶段特定的指令:
|
||||
|
||||
```python
|
||||
def build_stage_prompt(
|
||||
base_prompt: str,
|
||||
stage: str, # "intent" | "execution" | "organization"
|
||||
ctx: UserAgentContext,
|
||||
) -> str:
|
||||
stage_prompts = {
|
||||
"intent": "Analyze the user's intent and decide if direct response is possible.",
|
||||
"execution": "Execute the required tasks and tools to fulfill the user's request.",
|
||||
"organization": "Format the execution results into a user-friendly response.",
|
||||
}
|
||||
return f"{base_prompt}\n\n# Stage: {stage}\n{stage_prompts[stage]}"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 依赖关系图
|
||||
|
||||
```
|
||||
UserAgentContext (核心上下文)
|
||||
↓
|
||||
├─ ProfileSettings (用户配置)
|
||||
│ └─ preferences.country → 人民币结算
|
||||
│
|
||||
├─ build_global_system_prompt() (全局 Prompt)
|
||||
│ └─ 三阶段 Flow 使用
|
||||
│
|
||||
└─ resolve_stage_models() (选模逻辑)
|
||||
└─ 三阶段 Agent 配置
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [Runtime Database Schema](../runtime/runtime-database.md)
|
||||
- [AG-UI Protocol](.opencode/skills/ag-ui/SKILL.md)
|
||||
- [CrewAI Framework](.opencode/skills/crewai/SKILL.md)
|
||||
@@ -0,0 +1,144 @@
|
||||
# Agent LLM Config Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** 将 `system_agents.config` 中的 `temperature` / `max_tokens` 以受约束方式加载到运行时,并在调用 LiteLLM 时按需透传。
|
||||
|
||||
**Architecture:** 在应用层 `RunService` 读取模型选择时同步读取并校验 `SystemAgents.config`;将校验后的 `SystemAgentLLMConfig` 传入 `CrewAIRuntime`;由 runtime 将配置转交给 LiteLLM client,client 仅在值非 `None` 时向 `completion()` 传参,避免不必要的 provider 兼容风险。
|
||||
|
||||
**Tech Stack:** FastAPI, SQLAlchemy (async), Pydantic v2, LiteLLM, pytest
|
||||
|
||||
---
|
||||
|
||||
## 背景与修正点
|
||||
|
||||
- 当前真实调用链为:`RunService._load_agent_model_selection()` -> `create_runtime()` -> `CrewAIRuntime.execute()` -> `run_completion()`,并非 `load_stage_models()`。
|
||||
- `SystemAgentLLMConfig` 已存在:`backend/src/core/agent/domain/system_agent_config.py`。
|
||||
- `system_agents.config` 目前在初始化 YAML 侧有约束,但运行时 DB 读取仍需二次校验,防止脏数据绕过。
|
||||
|
||||
## 规则约束
|
||||
|
||||
- 严格 TDD:先写失败测试,再做实现。
|
||||
- Python 命令统一使用 `uv run ...`。
|
||||
- 仅做增量改动,不回滚或覆盖与本任务无关的已有变更。
|
||||
|
||||
## 字段映射与透传策略
|
||||
|
||||
| 配置字段 | LiteLLM 参数 | 规则 |
|
||||
|---|---|---|
|
||||
| `temperature` | `temperature` | `None` 不透传;非空直接透传 |
|
||||
| `max_tokens` | `max_tokens` | `None` 不透传;非空直接透传 |
|
||||
|
||||
---
|
||||
|
||||
### Task 1: 应用层加载并校验 Agent LLM Config
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/core/agent/application/run_service.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
|
||||
|
||||
**Step 1: 写失败测试(RED)**
|
||||
|
||||
新增单测覆盖以下行为:
|
||||
1. `_load_agent_model_selection()` 返回三元组:`(model_code, provider_name, llm_config)`。
|
||||
2. 当 DB `config` 为 `{}` 时,`llm_config.temperature/max_tokens` 为 `None`。
|
||||
3. 当 DB `config` 含非法值(如 `temperature=3`)时抛 `ValueError`。
|
||||
|
||||
**Step 2: 运行测试确认失败**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
|
||||
Expected: 新增断言失败(返回值结构/异常行为不匹配)。
|
||||
|
||||
**Step 3: 最小实现(GREEN)**
|
||||
|
||||
在 `run_service.py`:
|
||||
1. 查询 `SystemAgents.config`。
|
||||
2. 用 `SystemAgentLLMConfig.model_validate(config or {})` 校验。
|
||||
3. 将 `_load_agent_model_selection()` 改为返回三元组。
|
||||
4. 在 `run()` 中把 `llm_config` 传递到 `create_runtime(...)`。
|
||||
|
||||
**Step 4: 运行测试确认通过**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
|
||||
Expected: PASS。
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Runtime 与 LiteLLM Client 支持可选参数透传
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/core/agent/infrastructure/crewai/factory.py`
|
||||
- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py`
|
||||
- Modify: `backend/src/core/agent/infrastructure/litellm/client.py`
|
||||
- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py`
|
||||
|
||||
**Step 1: 写失败测试(RED)**
|
||||
|
||||
在 `test_crewai_runtime.py` 增加用例:
|
||||
1. 传入 `temperature/max_tokens` 时,`run_completion` 收到对应参数。
|
||||
2. 参数为 `None` 时,不应被透传到 LiteLLM。
|
||||
|
||||
必要时新增 `backend/tests/unit/core/agent/test_litellm_client.py`,单测 `run_completion` 的 kwargs 组装逻辑。
|
||||
|
||||
**Step 2: 运行测试确认失败**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
|
||||
Expected: 新增断言失败(参数未透传或未过滤 `None`)。
|
||||
|
||||
**Step 3: 最小实现(GREEN)**
|
||||
|
||||
1. `create_runtime()` 增加 `llm_config` 参数并传给 `CrewAIRuntime`。
|
||||
2. `CrewAIRuntime` 保存 `llm_config`,执行时调用:
|
||||
- `run_completion(..., temperature=llm_config.temperature, max_tokens=llm_config.max_tokens)`
|
||||
3. `run_completion()` 改为支持可选 `temperature/max_tokens`,内部仅在非 `None` 时加入 kwargs 再调用 `completion()`。
|
||||
|
||||
**Step 4: 运行测试确认通过**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q`
|
||||
Expected: PASS。
|
||||
|
||||
---
|
||||
|
||||
### Task 3: 初始化数据补齐与回归验证
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/src/core/config/static/database/system_agents.yaml`
|
||||
- Modify: `backend/src/core/config/initial/init_data.py`(如需补充类型兜底)
|
||||
- Test: `backend/tests/unit/core/agent/test_run_resume_service.py`
|
||||
|
||||
**Step 1: 写失败测试(RED)**
|
||||
|
||||
补充断言:YAML 读取后 `config` 可为空或包含 `max_tokens: null`,初始化逻辑不会报错,且生成结构符合 `SystemAgentLLMConfig`。
|
||||
|
||||
**Step 2: 运行测试确认失败**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
|
||||
Expected: 新增断言失败。
|
||||
|
||||
**Step 3: 最小实现(GREEN)**
|
||||
|
||||
1. 在 `system_agents.yaml` 为各 agent 配置显式补充 `max_tokens: null`。
|
||||
2. `init_data.py` 保持 `config: SystemAgentLLMConfig | None = None`,写库时统一序列化为 dict。
|
||||
|
||||
**Step 4: 运行测试确认通过**
|
||||
|
||||
Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q`
|
||||
Expected: PASS。
|
||||
|
||||
---
|
||||
|
||||
## 最终验证
|
||||
|
||||
1. `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_crewai_runtime.py -q`
|
||||
2. `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -q`
|
||||
3. `uv run ruff check backend/src backend/tests`
|
||||
4. `uv run basedpyright`
|
||||
|
||||
预期:全部通过;若集成测试依赖本地 DB 状态导致跳过/失败,需记录原因并给出手工验证步骤。
|
||||
|
||||
## 完成标准
|
||||
|
||||
- `RunService` 从 DB 读取并校验 `config`。
|
||||
- runtime 到 LiteLLM 链路支持 `temperature/max_tokens` 可选透传。
|
||||
- `None` 不透传。
|
||||
- 单测与相关集成测试通过,并给出命令级证据。
|
||||
@@ -0,0 +1,2 @@
|
||||
1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai
|
||||
2.
|
||||
Reference in New Issue
Block a user