From db158de39c07d6879db446e6dc8468b028db15f8 Mon Sep 17 00:00:00 2001 From: qzl Date: Thu, 5 Mar 2026 18:25:51 +0800 Subject: [PATCH] =?UTF-8?q?feat(agent):=20=E5=AE=9E=E7=8E=B0=20Agent=20Run?= =?UTF-8?q?time=20LLM=20=E9=85=8D=E7=BD=AE=E4=B8=8E=E6=B6=88=E6=81=AF?= =?UTF-8?q?=E5=85=83=E6=95=B0=E6=8D=AE=E7=BB=93=E6=9E=84=E5=8C=96=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/agent/application/resume_service.py | 10 +- .../src/core/agent/application/run_service.py | 40 +- .../src/core/agent/domain/message_metadata.py | 39 + .../core/agent/domain/system_agent_config.py | 8 + .../src/core/agent/domain/tool_correlation.py | 25 +- .../agent/infrastructure/crewai/factory.py | 7 +- .../agent/infrastructure/crewai/runtime.py | 5 + .../agent/infrastructure/litellm/client.py | 27 +- backend/src/core/config/initial/init_data.py | 7 +- .../config/static/database/system_agents.yaml | 3 + .../unit/core/agent/test_crewai_runtime.py | 56 +- .../tests/unit/core/agent/test_init_data.py | 14 + .../unit/core/agent/test_litellm_client.py | 51 ++ .../core/agent/test_run_resume_service.py | 86 ++ docs/bugs/api-mismatch.md | 116 --- docs/bugs/test-analysis.md | 145 --- .../2026-03-04-agent-hard-reset-design.md | 201 ----- .../plans/2026-03-04-agent-hard-reset-plan.md | 574 ------------ ...3-04-simplify-agent-architecture-design.md | 199 ----- .../2026-03-04-simplify-agent-architecture.md | 844 ------------------ ...05-agent-runtime-closed-loop-e2e-design.md | 81 -- ...3-05-agent-runtime-closed-loop-e2e-plan.md | 230 ----- ...runtime-closed-loop-implementation-plan.md | 469 ---------- ...3-05-user-agent-context-settings-design.md | 746 ++++++++++++++++ docs/plans/agent-llm-config.md | 144 +++ docs/todo/todo.md | 2 + 26 files changed, 1215 insertions(+), 2914 deletions(-) create mode 100644 backend/src/core/agent/domain/message_metadata.py create mode 100644 backend/src/core/agent/domain/system_agent_config.py create mode 100644 backend/tests/unit/core/agent/test_init_data.py create mode 100644 backend/tests/unit/core/agent/test_litellm_client.py delete mode 100644 docs/bugs/api-mismatch.md delete mode 100644 docs/bugs/test-analysis.md delete mode 100644 docs/plans/2026-03-04-agent-hard-reset-design.md delete mode 100644 docs/plans/2026-03-04-agent-hard-reset-plan.md delete mode 100644 docs/plans/2026-03-04-simplify-agent-architecture-design.md delete mode 100644 docs/plans/2026-03-04-simplify-agent-architecture.md delete mode 100644 docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-design.md delete mode 100644 docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-plan.md delete mode 100644 docs/plans/2026-03-05-agent-runtime-closed-loop-implementation-plan.md create mode 100644 docs/plans/2026-03-05-user-agent-context-settings-design.md create mode 100644 docs/plans/agent-llm-config.md create mode 100644 docs/todo/todo.md diff --git a/backend/src/core/agent/application/resume_service.py b/backend/src/core/agent/application/resume_service.py index 87675f0..d79d62d 100644 --- a/backend/src/core/agent/application/resume_service.py +++ b/backend/src/core/agent/application/resume_service.py @@ -5,6 +5,10 @@ from uuid import UUID from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from core.agent.application.session_state_persistence import SessionStatePersistence +from core.agent.domain.message_metadata import ( + MessageMetadataAssistantOutput, + MessageMetadataToolResult, +) from core.agent.infrastructure.persistence.message_repository import MessageRepository from core.agent.infrastructure.persistence.session_repository import SessionRepository from core.db import AsyncSessionLocal @@ -46,14 +50,16 @@ class ResumeService: seq=next_seq, role=AgentChatMessageRole.TOOL, content='{"status":"ok"}', - metadata={"type": "tool_result", "tool_call_id": tool_call_id}, + metadata=MessageMetadataToolResult( + tool_call_id=tool_call_id, + ).model_dump(), ) await message_repository.append_message( session_id=session_uuid, seq=next_seq + 1, role=AgentChatMessageRole.ASSISTANT, content="Tool result received", - metadata={"type": "assistant_output"}, + metadata=MessageMetadataAssistantOutput().model_dump(), ) snapshot = self._state_persistence.build_completed_snapshot() diff --git a/backend/src/core/agent/application/run_service.py b/backend/src/core/agent/application/run_service.py index 60056b4..8c1538d 100644 --- a/backend/src/core/agent/application/run_service.py +++ b/backend/src/core/agent/application/run_service.py @@ -3,10 +3,16 @@ from __future__ import annotations from decimal import Decimal from uuid import UUID, uuid4 +from pydantic import ValidationError from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from core.agent.application.session_state_persistence import SessionStatePersistence +from core.agent.domain.message_metadata import ( + MessageMetadataToolCall, + MessageMetadataUserInput, +) +from core.agent.domain.system_agent_config import SystemAgentLLMConfig from core.agent.infrastructure.crewai.factory import create_runtime from core.agent.infrastructure.persistence.message_repository import MessageRepository from core.agent.infrastructure.persistence.session_repository import SessionRepository @@ -58,10 +64,16 @@ class RunService: if chat_session is None: raise ValueError("session not found") - model_code, provider_name = await self._load_agent_model_selection( - db_session + ( + model_code, + provider_name, + llm_config, + ) = await self._load_agent_model_selection(db_session) + runtime = create_runtime( + model_code=model_code, + provider_name=provider_name, + llm_config=llm_config, ) - runtime = create_runtime(model_code=model_code, provider_name=provider_name) runtime_result = runtime.execute(user_input=user_input) assistant_text = str(runtime_result.get("assistant_text", "")) prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0)) @@ -79,7 +91,7 @@ class RunService: role=AgentChatMessageRole.USER, content=user_input, model_code=model_code, - metadata={"type": "user_input"}, + metadata=MessageMetadataUserInput().model_dump(), ) await message_repository.append_message( session_id=session_uuid, @@ -87,10 +99,9 @@ class RunService: role=AgentChatMessageRole.ASSISTANT, content=assistant_text or "Tool call pending approval", model_code=model_code, - metadata={ - "type": "tool_call", - "tool_call_id": pending_tool_call_id, - }, + metadata=MessageMetadataToolCall( + tool_call_id=pending_tool_call_id, + ).model_dump(), input_tokens=prompt_tokens, output_tokens=completion_tokens, cost=cost, @@ -119,9 +130,9 @@ class RunService: async def _load_agent_model_selection( self, session: AsyncSession - ) -> tuple[str, str]: + ) -> tuple[str, str, SystemAgentLLMConfig]: stmt = ( - select(Llm.model_code, LlmFactory.name) + select(Llm.model_code, LlmFactory.name, SystemAgents.config) .join(SystemAgents, SystemAgents.llm_id == Llm.id) .join(LlmFactory, LlmFactory.id == Llm.factory_id) .where(SystemAgents.status == "active") @@ -131,4 +142,11 @@ class RunService: record = (await session.execute(stmt)).one_or_none() if record is None: raise ValueError("active system agent model is required") - return str(record[0]), str(record[1]) + + raw_config = record[2] if isinstance(record[2], dict) else {} + try: + llm_config = SystemAgentLLMConfig.model_validate(raw_config) + except ValidationError as exc: + raise ValueError("invalid system agent config") from exc + + return str(record[0]), str(record[1]), llm_config diff --git a/backend/src/core/agent/domain/message_metadata.py b/backend/src/core/agent/domain/message_metadata.py new file mode 100644 index 0000000..e9165d4 --- /dev/null +++ b/backend/src/core/agent/domain/message_metadata.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel + + +class MessageMetadataUserInput(BaseModel): + type: Literal["user_input"] = "user_input" + + +class MessageMetadataToolCall(BaseModel): + type: Literal["tool_call"] = "tool_call" + tool_call_id: str + + +class MessageMetadataToolResult(BaseModel): + type: Literal["tool_result"] = "tool_result" + tool_call_id: str + run_id: str | None = None + turn_id: str | None = None + tool_name: str | None = None + storage_bucket: str | None = None + storage_path: str | None = None + payload_sha256: str | None = None + payload_bytes: int | None = None + payload_format: str | None = None + + +class MessageMetadataAssistantOutput(BaseModel): + type: Literal["assistant_output"] = "assistant_output" + + +MessageMetadata = ( + MessageMetadataUserInput + | MessageMetadataToolCall + | MessageMetadataToolResult + | MessageMetadataAssistantOutput +) diff --git a/backend/src/core/agent/domain/system_agent_config.py b/backend/src/core/agent/domain/system_agent_config.py new file mode 100644 index 0000000..1fc0927 --- /dev/null +++ b/backend/src/core/agent/domain/system_agent_config.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from pydantic import BaseModel, Field + + +class SystemAgentLLMConfig(BaseModel): + temperature: float | None = Field(default=None, ge=0.0, le=2.0) + max_tokens: int | None = Field(default=None, ge=1) diff --git a/backend/src/core/agent/domain/tool_correlation.py b/backend/src/core/agent/domain/tool_correlation.py index d66b471..7068413 100644 --- a/backend/src/core/agent/domain/tool_correlation.py +++ b/backend/src/core/agent/domain/tool_correlation.py @@ -1,5 +1,7 @@ from __future__ import annotations +from core.agent.domain.message_metadata import MessageMetadataToolResult + def reconstruct_tool_call_result_event( *, @@ -26,15 +28,14 @@ def build_tool_result_metadata( payload_bytes: int, payload_format: str, ) -> dict[str, object]: - return { - "type": "tool_result", - "run_id": run_id, - "turn_id": turn_id, - "tool_call_id": tool_call_id, - "tool_name": tool_name, - "storage_bucket": storage_bucket, - "storage_path": storage_path, - "payload_sha256": payload_sha256, - "payload_bytes": payload_bytes, - "payload_format": payload_format, - } + return MessageMetadataToolResult( + run_id=run_id, + turn_id=turn_id, + tool_call_id=tool_call_id, + tool_name=tool_name, + storage_bucket=storage_bucket, + storage_path=storage_path, + payload_sha256=payload_sha256, + payload_bytes=payload_bytes, + payload_format=payload_format, + ).model_dump() diff --git a/backend/src/core/agent/infrastructure/crewai/factory.py b/backend/src/core/agent/infrastructure/crewai/factory.py index 054bbd3..d98a7fc 100644 --- a/backend/src/core/agent/infrastructure/crewai/factory.py +++ b/backend/src/core/agent/infrastructure/crewai/factory.py @@ -1,15 +1,20 @@ from __future__ import annotations +from core.agent.domain.system_agent_config import SystemAgentLLMConfig from core.agent.infrastructure.config.resolver import AgentConfigResolver from core.agent.infrastructure.crewai.runtime import CrewAIRuntime def create_runtime( - *, model_code: str | None, provider_name: str | None + *, + model_code: str | None, + provider_name: str | None, + llm_config: SystemAgentLLMConfig | None = None, ) -> CrewAIRuntime: resolver = AgentConfigResolver() return CrewAIRuntime( resolver=resolver, model_code=model_code, provider_name=provider_name, + llm_config=llm_config, ) diff --git a/backend/src/core/agent/infrastructure/crewai/runtime.py b/backend/src/core/agent/infrastructure/crewai/runtime.py index 3076f69..f9cfb85 100644 --- a/backend/src/core/agent/infrastructure/crewai/runtime.py +++ b/backend/src/core/agent/infrastructure/crewai/runtime.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import Any +from core.agent.domain.system_agent_config import SystemAgentLLMConfig from core.agent.infrastructure.agui.bridge import to_agui_events from core.agent.infrastructure.config.resolver import ( AgentConfigResolver, @@ -47,11 +48,13 @@ class CrewAIRuntime: resolver: AgentConfigResolver, model_code: str | None, provider_name: str | None, + llm_config: SystemAgentLLMConfig | None = None, ) -> None: self._config: ResolvedAgentConfig = resolver.resolve( model_code=model_code, provider_name=provider_name, ) + self._llm_config = llm_config or SystemAgentLLMConfig() def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]: return to_agui_events(internal_events) @@ -65,6 +68,8 @@ class CrewAIRuntime: model=litellm_model, api_key=self._config.provider_api_key, messages=[{"role": "user", "content": user_input}], + temperature=self._llm_config.temperature, + max_tokens=self._llm_config.max_tokens, ) if not isinstance(response, dict): raise ValueError("llm response must be a dict") diff --git a/backend/src/core/agent/infrastructure/litellm/client.py b/backend/src/core/agent/infrastructure/litellm/client.py index 0303f87..5534d7f 100644 --- a/backend/src/core/agent/infrastructure/litellm/client.py +++ b/backend/src/core/agent/infrastructure/litellm/client.py @@ -5,13 +5,26 @@ from typing import Any from litellm import completion -def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any: - response = completion( - model=model, - api_key=api_key, - messages=messages, - stream=False, - ) +def run_completion( + *, + model: str, + api_key: str, + messages: list[dict[str, Any]], + temperature: float | None = None, + max_tokens: int | None = None, +) -> Any: + kwargs: dict[str, Any] = { + "model": model, + "api_key": api_key, + "messages": messages, + "stream": False, + } + if temperature is not None: + kwargs["temperature"] = temperature + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = completion(**kwargs) model_dump = getattr(response, "model_dump", None) if callable(model_dump): return model_dump() diff --git a/backend/src/core/config/initial/init_data.py b/backend/src/core/config/initial/init_data.py index a26781e..c615483 100644 --- a/backend/src/core/config/initial/init_data.py +++ b/backend/src/core/config/initial/init_data.py @@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from core.agent.domain.system_agent_config import SystemAgentLLMConfig from core.db.session import AsyncSessionLocal from core.logging import get_logger from models.llm import Llm @@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel): agent_type: str llm_model_code: str status: str - config: dict[str, Any] + config: SystemAgentLLMConfig | None = None class SystemAgentsYaml(BaseModel): @@ -184,7 +185,9 @@ async def initialize_system_agents() -> None: agent_type=agent["agent_type"], llm_id=llm.id, status=agent["status"], - config=agent["config"], + config=SystemAgentLLMConfig.model_validate( + agent.get("config") or {} + ).model_dump(), ) logger.info("Initialized system agents") diff --git a/backend/src/core/config/static/database/system_agents.yaml b/backend/src/core/config/static/database/system_agents.yaml index 82a8546..9d7ca25 100644 --- a/backend/src/core/config/static/database/system_agents.yaml +++ b/backend/src/core/config/static/database/system_agents.yaml @@ -4,15 +4,18 @@ agents: status: active config: temperature: 0.7 + max_tokens: null - agent_type: TASK_EXECUTION llm_model_code: deepseek-v3.2 status: active config: temperature: 0.7 + max_tokens: null - agent_type: RESULT_REPORTING llm_model_code: deepseek-v3.2 status: active config: temperature: 0.7 + max_tokens: null diff --git a/backend/tests/unit/core/agent/test_crewai_runtime.py b/backend/tests/unit/core/agent/test_crewai_runtime.py index 0be8d0c..0163b8e 100644 --- a/backend/tests/unit/core/agent/test_crewai_runtime.py +++ b/backend/tests/unit/core/agent/test_crewai_runtime.py @@ -1,22 +1,26 @@ from __future__ import annotations from types import SimpleNamespace +from typing import cast -from core.agent.infrastructure.config.resolver import AgentConfigResolver +from core.agent.domain.system_agent_config import SystemAgentLLMConfig +from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike from core.agent.infrastructure.crewai.runtime import CrewAIRuntime def test_runtime_emits_text_tool_reasoning_events() -> None: - runtime = CrewAIRuntime( - resolver=AgentConfigResolver( - settings=SimpleNamespace( - agent_runtime=SimpleNamespace( - default_model_code="", - streaming_enabled=True, - ), - llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}), - ) + settings = cast( + SettingsLike, + SimpleNamespace( + agent_runtime=SimpleNamespace( + default_model_code="", + streaming_enabled=True, + ), + llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}), ), + ) + runtime = CrewAIRuntime( + resolver=AgentConfigResolver(settings=settings), model_code="gpt-4o-mini", provider_name="dashscope", ) @@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model( captured: dict[str, object] = {} def _fake_completion( - *, model: str, api_key: str, messages: list[dict[str, object]] + *, + model: str, + api_key: str, + messages: list[dict[str, object]], + temperature: float | None = None, + max_tokens: int | None = None, ): captured["model"] = model captured["api_key"] = api_key captured["messages"] = messages + captured["temperature"] = temperature + captured["max_tokens"] = max_tokens return { "choices": [ { @@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model( cost=0.001, ), ) + settings = cast( + SettingsLike, + SimpleNamespace( + agent_runtime=SimpleNamespace( + default_model_code="", + streaming_enabled=True, + ), + llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}), + ), + ) runtime = CrewAIRuntime( - resolver=AgentConfigResolver( - settings=SimpleNamespace( - agent_runtime=SimpleNamespace( - default_model_code="", - streaming_enabled=True, - ), - llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}), - ) - ), + resolver=AgentConfigResolver(settings=settings), model_code="qwen3.5-flash", provider_name="dashscope", + llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256), ) result = runtime.execute(user_input="hi") assert captured["model"] == "dashscope/qwen3.5-flash" assert captured["api_key"] == "env-api-key" + assert captured["temperature"] == 0.3 + assert captured["max_tokens"] == 256 assert result["assistant_text"] == "hello" diff --git a/backend/tests/unit/core/agent/test_init_data.py b/backend/tests/unit/core/agent/test_init_data.py new file mode 100644 index 0000000..cae8f39 --- /dev/null +++ b/backend/tests/unit/core/agent/test_init_data.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from core.config.initial.init_data import load_system_agents + + +def test_load_system_agents_supports_nullable_max_tokens() -> None: + loaded = load_system_agents() + + agents = loaded["agents"] + assert len(agents) > 0 + for agent in agents: + assert "config" in agent + assert "max_tokens" in agent["config"] + assert agent["config"]["max_tokens"] is None diff --git a/backend/tests/unit/core/agent/test_litellm_client.py b/backend/tests/unit/core/agent/test_litellm_client.py new file mode 100644 index 0000000..be61909 --- /dev/null +++ b/backend/tests/unit/core/agent/test_litellm_client.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from core.agent.infrastructure.litellm.client import run_completion + + +def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None: + captured: dict[str, object] = {} + + def _fake_completion(**kwargs): # type: ignore[no-untyped-def] + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr( + "core.agent.infrastructure.litellm.client.completion", + _fake_completion, + ) + + run_completion( + model="dashscope/qwen3.5-flash", + api_key="key", + messages=[{"role": "user", "content": "hi"}], + temperature=0.6, + max_tokens=120, + ) + + assert captured["temperature"] == 0.6 + assert captured["max_tokens"] == 120 + + +def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None: + captured: dict[str, object] = {} + + def _fake_completion(**kwargs): # type: ignore[no-untyped-def] + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr( + "core.agent.infrastructure.litellm.client.completion", + _fake_completion, + ) + + run_completion( + model="dashscope/qwen3.5-flash", + api_key="key", + messages=[{"role": "user", "content": "hi"}], + temperature=None, + max_tokens=None, + ) + + assert "temperature" not in captured + assert "max_tokens" not in captured diff --git a/backend/tests/unit/core/agent/test_run_resume_service.py b/backend/tests/unit/core/agent/test_run_resume_service.py index 54548a9..4fbd846 100644 --- a/backend/tests/unit/core/agent/test_run_resume_service.py +++ b/backend/tests/unit/core/agent/test_run_resume_service.py @@ -4,6 +4,23 @@ import pytest from core.agent.application.resume_service import ResumeService from core.agent.application.run_service import RunService +from core.agent.domain.system_agent_config import SystemAgentLLMConfig + + +class _FakeResult: + def __init__(self, record: tuple[object, object, object] | None) -> None: + self._record = record + + def one_or_none(self) -> tuple[object, object, object] | None: + return self._record + + +class _FakeSession: + def __init__(self, record: tuple[object, object, object] | None) -> None: + self._record = record + + async def execute(self, _stmt: object) -> _FakeResult: + return _FakeResult(self._record) @pytest.mark.asyncio @@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None: with pytest.raises(ValueError): await resume_service.resume(session_id="session-1", tool_call_id="call-1") + + +@pytest.mark.asyncio +async def test_load_agent_model_selection_returns_validated_llm_config() -> None: + run_service = RunService() + fake_session = _FakeSession( + ( + "qwen3.5-flash", + "dashscope", + {"temperature": 0.5, "max_tokens": 512}, + ) + ) + + ( + model_code, + provider_name, + llm_config, + ) = await run_service._load_agent_model_selection( + fake_session # type: ignore[arg-type] + ) + + assert model_code == "qwen3.5-flash" + assert provider_name == "dashscope" + assert isinstance(llm_config, SystemAgentLLMConfig) + assert llm_config.temperature == 0.5 + assert llm_config.max_tokens == 512 + + +@pytest.mark.asyncio +async def test_load_agent_model_selection_rejects_invalid_config() -> None: + run_service = RunService() + fake_session = _FakeSession( + ( + "qwen3.5-flash", + "dashscope", + {"temperature": 3.0}, + ) + ) + + with pytest.raises(ValueError, match="invalid system agent config"): + await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type] + + +@pytest.mark.asyncio +async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None: + run_service = RunService() + fake_session = _FakeSession( + ( + "qwen3.5-flash", + "dashscope", + "not-a-dict", + ) + ) + + _, _, llm_config = await run_service._load_agent_model_selection( + fake_session # type: ignore[arg-type] + ) + + assert llm_config.temperature is None + assert llm_config.max_tokens is None + + +@pytest.mark.asyncio +async def test_load_agent_model_selection_raises_when_no_active_agent() -> None: + run_service = RunService() + fake_session = _FakeSession(None) + + with pytest.raises(ValueError, match="active system agent model is required"): + await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type] diff --git a/docs/bugs/api-mismatch.md b/docs/bugs/api-mismatch.md deleted file mode 100644 index f2ab077..0000000 --- a/docs/bugs/api-mismatch.md +++ /dev/null @@ -1,116 +0,0 @@ -# 前后端 API 对比分析 - -**Date:** 2026-03-04 -**Status:** Open -**Type:** 架构分析 - ---- - -## 一、后端已有、前端缺失的 API - -### 1. Friendships API (`/api/v1/friends`) - -| 方法 | 路径 | 功能 | 前端状态 | -|------|------|------|----------| -| POST | `/requests` | 发送好友请求 | **缺失** | -| GET | `/requests/inbox` | 获取收件箱 | **缺失** | -| GET | `/requests/outgoing` | 获取发出的请求 | **缺失** | -| POST | `/requests/{id}/accept` | 接受好友请求 | **缺失** | -| POST | `/requests/{id}/decline` | 拒绝好友请求 | **缺失** | -| DELETE | `/requests/{id}` | 取消好友请求 | **缺失** | -| GET | `` | 获取好友列表 | **缺失** | -| DELETE | `/{id}` | 删除好友 | **缺失** | - -### 2. Inbox Messages API (`/api/v1/inbox/messages`) - -| 方法 | 路径 | 功能 | 前端状态 | -|------|------|------|----------| -| GET | `` | 获取消息列表 | **缺失** | -| POST | `/{id}/accept` | 接受邀请 | **缺失** | -| POST | `/{id}/dismiss` | 忽略消息 | **缺失** | - -### 3. Chat/AgUi 流式 API - -| 功能 | 前端状态 | -|------|----------| -| 发送消息 SSE 流式 | **仅有 Mock** | -| 加载历史记录 | **仅有 Mock** | - -> 前端 `AgUiService` 只有本地 mock (`throw UnimplementedError`),未实现真实 API 调用。 - -### 4. Infra API - -| 方法 | 路径 | 功能 | 前端状态 | -|------|------|------|----------| -| GET | `/infra/health` | 基础设施健康检查 | **未使用** | - ---- - -## 二、前端已有、后端已实现的 API - -### Auth API (`/api/v1/auth`) - -| 方法 | 路径 | 后端 | 前端 | -|------|------|------|------| -| POST | `/verifications` | ✅ | ✅ | -| POST | `/verifications/verify` | ✅ | ✅ | -| POST | `/verifications/resend` | ✅ | ✅ | -| POST | `/sessions` | ✅ | ✅ | -| POST | `/sessions/refresh` | ✅ | ✅ | -| DELETE | `/sessions` | ✅ | ✅ | -| POST | `/password-reset` | ✅ | ✅ | -| POST | `/password-reset/confirm` | ✅ | ✅ | -| GET | `/users` | ✅ | **未使用** | - -### Users API (`/api/v1/users`) - -| 方法 | 路径 | 后端 | 前端 | -|------|------|------|------| -| GET | `/me` | ✅ | ✅ | -| PATCH | `/me` | ✅ | ✅ | -| POST | `/search` | ✅ | ✅ | - -### Schedule Items API (`/api/v1/schedule-items`) - -| 方法 | 路径 | 后端 | 前端 | -|------|------|------|------| -| POST | `` | ✅ | **仅有 Mock** | -| GET | `` (range query) | ✅ | **仅有 Mock** | -| GET | `/{id}` | ✅ | **仅有 Mock** | -| PATCH | `/{id}` | ✅ | **仅有 Mock** | -| DELETE | `/{id}` | ✅ | **仅有 Mock** | -| POST | `/{id}/share` | ✅ | **缺失** | - ---- - -## 三、待实现功能清单 - -| 优先级 | 功能 | 说明 | -|--------|------|------| -| **P0** | FriendsApi | 前端无 Friendships API 客户端 | -| **P0** | InboxMessagesApi | 前端无 Inbox Messages API 客户端 | -| **P0** | Chat/AgUi 后端连接 | 前端 AgUiService 未实现真实 API | -| **P1** | CalendarService 真实 API | MockCalendarService → 真实 API 调用 | -| **P1** | Schedule Share 接口 | 前端未调用 `POST /{id}/share` | -| **P2** | Infra Health 集成 | 可用于前端健康检查 | - ---- - -## 四、相关文件位置 - -### 前端 API 客户端 - -- `apps/lib/features/auth/data/auth_api.dart` - Auth API -- `apps/lib/features/users/data/users_api.dart` - Users API -- `apps/lib/features/calendar/data/services/mock_calendar_service.dart` - Calendar Mock -- `apps/lib/features/chat/data/services/ag_ui_service.dart` - Chat/AgUi Mock -- `apps/lib/features/chat/data/services/mock_history_service.dart` - History Mock - -### 后端 Router - -- `backend/src/v1/auth/router.py` - Auth 路由 -- `backend/src/v1/users/router.py` - Users 路由 -- `backend/src/v1/friendships/router.py` - Friendships 路由 -- `backend/src/v1/inbox_messages/router.py` - Inbox Messages 路由 -- `backend/src/v1/schedule_items/router.py` - Schedule Items 路由 -- `backend/src/v1/infra/router.py` - Infra 路由 diff --git a/docs/bugs/test-analysis.md b/docs/bugs/test-analysis.md deleted file mode 100644 index f930356..0000000 --- a/docs/bugs/test-analysis.md +++ /dev/null @@ -1,145 +0,0 @@ -# 前后端测试分析报告 - -**Date:** 2026-03-04 -**Status:** Completed - ---- - -## 测试统计 - -### 后端测试 - -| 类型 | 数量 | 状态 | -|------|------|------| -| Unit Tests | ~100+ | 可运行 | -| Integration Tests | ~70+ | 可运行 | -| E2E Tests | 5 | **无法运行** (缺少 playwright 依赖) | - -### 前端测试 - -| 类型 | 数量 | 状态 | -|------|------|------| -| Flutter Tests | 140 | ✅ 全部通过 | - ---- - -## 问题发现 - -### 1. 后端 E2E 测试无法运行 (HIGH) - -**问题**: 5 个 E2E 测试文件需要 `playwright` 模块,但依赖未安装。 - -**影响文件**: -- `tests/e2e/test_auth_flow.py` -- `tests/e2e/test_infra_health_e2e.py` -- `tests/e2e/test_logging_e2e.py` -- `tests/e2e/test_mobile_health_e2e.py` -- `tests/e2e/test_profile_flow.py` - -**错误**: -``` -ModuleNotFoundError: No module named 'playwright' -``` - -**建议**: -- 安装 playwright: `uv add playwright && uv run playwright install` -- 或者移除这些无法运行的 E2E 测试文件 - ---- - -### 2. 测试文件命名冲突导致收集警告 (LOW) - -**问题**: 存在多个同名 `test_schemas.py` 文件在不同目录,导致 pytest 收集时显示警告。 - -**影响文件**: -- `tests/unit/v1/schedule_items/test_schemas.py` -- `tests/unit/v1/profile/test_schemas.py` -- `tests/unit/v1/inbox_messages/test_schemas.py` -- `tests/unit/v1/friendships/test_schemas.py` - -**状态**: 测试实际可以正常运行,只是有警告提示。 - -**建议**: 可保持现状(这是合理的代码组织方式),或重命名为 `test_*.py` 以消除警告。 - ---- - -### 3. 遗留测试验证旧字段 (INFO) - -**文件**: `tests/unit/v1/profile/test_schemas.py` - -**测试**: `test_profile_update_rejects_display_name_field` - -**说明**: 此测试验证旧的 `display_name` 字段被正确拒绝。字段已在之前的重构中删除。 - -**状态**: **有效** - 这是一个回归测试,确保旧字段不被使用。 - ---- - -## 未发现的问题 - -### 冗余测试 -经过检查,未发现明显冗余的测试: -- 每个模块的测试覆盖不同的功能 -- Unit tests、Integration tests、E2E tests 有清晰的职责划分 - -### 死代码 -未发现测试文件中有未使用的: -- imports -- mock 类 -- helper 函数 - -### 缺失测试 -未发现对应已实现功能但缺少测试的情况。 - ---- - -## 测试覆盖模块 - -### 后端 -| 模块 | Unit | Integration | E2E | -|------|------|-------------|-----| -| Auth | ✅ | ✅ | ❌ | -| Users | - | ✅ | - | -| Profile | ✅ | - | ❌ | -| Friendships | ✅ | ✅ | - | -| Inbox Messages | ✅ | ✅ | - | -| Schedule Items | ✅ | ✅ | - | -| Logging | ✅ | ✅ | ✅ | -| Settings | ✅ | - | - | - -### 前端 -| 模块 | 测试数 | -|------|--------| -| Auth | ~20 | -| Chat | ~70 | -| Home | ~15 | -| Calendar | ~5 | -| Core (API, Storage) | ~30 | - ---- - -## 建议 - -1. **立即**: 解决 E2E 测试依赖问题或移除无法运行的测试文件 -2. **可选**: 清理 test_schemas.py 重名警告(低优先级) -3. **保持**: 现有的测试结构良好,无需重大重构 - ---- - -## 附: 测试代码质量问题 - -### 测试类未完全实现 Protocol (LSP 警告) - -**文件**: `tests/unit/v1/auth/test_auth_service.py` - -**问题**: `FakeGateway` 和 `LogoutAssertingGateway` 类没有实现 `AuthServiceGateway` Protocol 的全部方法: -- `request_password_reset` -- `confirm_password_reset` - -**影响**: LSP 类型检查器报告错误,但运行时不受影响(因为这些方法在测试中不会被调用)。 - -**建议**: 可选择补充缺失的方法实现,或使用 `@pytest.mark.skip` 标记不需要的协议方法。 - ---- - -*报告生成时间: 2026-03-04* diff --git a/docs/plans/2026-03-04-agent-hard-reset-design.md b/docs/plans/2026-03-04-agent-hard-reset-design.md deleted file mode 100644 index 401b67c..0000000 --- a/docs/plans/2026-03-04-agent-hard-reset-design.md +++ /dev/null @@ -1,201 +0,0 @@ -# Agent 后端硬切重构设计 - -## 目标 - -- 一次性移除现有 Agent 运行时代码、测试和旧文档契约,避免新旧方案并存。 -- 仅从后端重新设计 Agent 体系,不依赖前端实现细节。 -- 新方案必须满足以下六项要求: - 1. 配置层可通过 `.env` 驱动 LLM API Key。 - 2. 对话与 resume 通过 Celery 队列处理,不阻塞 Web 主线程。 - 3. `v1/agent` 仅负责路由组织与服务调用,核心逻辑在 `core/agent`。 - 4. 按 CrewAI 官方模型组织 Agent/Task/Crew/Flow/Tools。 - 5. 按 AG-UI 协议输出事件,优先使用 `ag-ui-crewai` 适配库。 - 6. 使用 LiteLLM 统计每次 LLM 调用的 token 和 cost。 - -## 设计原则 - -- 单一职责:HTTP 层只做协议和鉴权,编排与执行下沉到核心层。 -- 异步优先:长耗时推理、工具调用、恢复流程全部异步化。 -- 协议优先:AG-UI 作为唯一事件契约,不维护自定义事件方言。 -- 可观测性优先:每次 run、每次 stage、每次 LLM 调用可追踪。 -- 配置单一来源:所有密钥和模型配置只走 `core.config.settings`。 - -## 目标架构 - -### 1) 分层 - -- `backend/src/v1/agent/` - - `router.py`: 暴露 HTTP/SSE 接口。 - - `schemas.py`: 请求/响应 DTO 和输入校验。 - - `dependencies.py`: DI 装配。 - - `service.py`: 薄服务,仅调用 `core/agent` 应用服务。 -- `backend/src/core/agent/` - - `application/`: run/resume 应用服务。 - - `domain/`: run 状态机、resume 幂等语义、错误模型。 - - `infrastructure/crewai/`: CrewAI Agent/Task/Crew/Flow 装配与执行。 - - `infrastructure/agui/`: AG-UI 事件映射与 SSE 序列化。 - - `infrastructure/litellm/`: LiteLLM 客户端与 usage/cost 拦截器。 - - `infrastructure/queue/`: Celery task producer/consumer。 - -### 1.1) 配置来源与合并策略 - -- Agent 运行配置由两部分组成: - - 数据库存量配置:`system_agents`(每种 agent_type 对应 llm 与 llm_config)。 - - 静态模板配置:`backend/src/core/config/static/crewai/*.yaml`(角色描述、任务模板、workflow、tools)。 -- 合并策略: - - `llm` 与 `llm_config` 以 `system_agents` 为准。 - - prompt 模板、task 描述、flow stage、tool 白名单以 static/crewai 为准。 - - 若任一 agent_type 在 `system_agents` 缺失,运行前失败并返回受控错误。 - -### 2) 核心运行链路 - -1. `POST /api/v1/agent/runs` 只负责参数校验和鉴权。 -2. 路由调用 `AgentRunAppService.enqueue_run()`,写入 run 记录并投递 Celery。 -3. Worker 执行 `run_agent_task`: - - 读取 run 上下文。 - - 构建 CrewAI `Agent/Task/Crew/Flow`。 - - 通过 `ag-ui-crewai` 将执行事件转为 AG-UI 标准事件。 - - 每次 LLM 调用由 LiteLLM 中间层记录 token/cost。 -4. 事件落库并发布到事件通道(Redis Stream/Channel)。 -5. SSE 接口从事件通道读取并持续推送,直到 `RUN_FINISHED` 或 `RUN_ERROR`。 - -### 3) Resume 链路 - -1. `POST /api/v1/agent/runs/{run_id}/resume` 校验 `interrupt_id` 与决策 payload。 -2. 调用 `enqueue_resume()` 投递 `resume_agent_task`。 -3. Worker 在事务内做并发控制: - - `run_id + interrupt_id` 幂等锁。 - - 过期校验与状态迁移。 -4. 恢复后继续 CrewAI Flow,事件按 AG-UI 继续输出。 - -### 4) Session 状态持久化 - -- 使用 `sessions.state_snapshot` 作为运行态单一快照来源。 -- 快照至少包含: - - run 上下文(thread_id、run_id、stage) - - pending_tool_calls(tool_call_id、tool_name、args、status、expires_at) - - correlation 索引(tool_call_id -> message_id / step_id) -- 所有中断/恢复均以 `state_snapshot` 事务更新为准,避免内存态漂移。 - -### 5) 会话与消息落库模型 - -- 会话主表:`sessions` - - 新建 run 时写入:`id/user_id/session_type/status=running/last_activity_at`。 - - 运行中持续更新:`status`、`last_activity_at`、`message_count`、`total_tokens`、`total_cost`、`state_snapshot`。 - - 运行结束更新: - - 成功:`status=completed` - - 失败:`status=failed` -- 消息表:`messages` - - 用户输入落库为 `role=user`(每次 run 开始时先写入)。 - - 模型输出落库为 `role=assistant`(按最终聚合文本落库,保留 metadata 记录增量信息)。 - - 工具调用结果落库为 `role=tool`,并写入 `tool_name` 与 `metadata.tool_call_id`。 - - `seq` 由每个 `session_id` 内单调递增分配,满足 `uq_messages_session_seq`。 -- 计量落库:每次 LLM 调用的 usage/cost 先写消息级,再聚合更新到 session 级。 - -## 六项要求落地映射 - -### 要求 1: `.env` 驱动 LLM API Key - -- 新增 `LLMSettings` 到 `core.config.settings.Settings`,统一定义: - - `SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE` - - `SOCIAL_LLM__PROVIDER_KEYS__MINIMAX` - - `SOCIAL_LLM__PROVIDER_KEYS__MOONSHOT` - - `SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK` - - `SOCIAL_LLM__PROVIDER_KEYS__ARK` - - `SOCIAL_LLM__PROVIDER_KEYS__ZAI` -- 禁止 `os.environ` 直接读取密钥。 - -### 要求 2: 对话和 resume 走 Celery - -- Web 层不直接执行编排。 -- `run`/`resume` 一律入队,Worker 处理,Web 仅做事件流转发。 -- 加入任务级超时、重试、死信策略。 - -### 要求 3: v1 仅路由与调用 - -- `v1/agent/service.py` 仅保留应用服务调用和错误映射。 -- 任何编排、状态机、工具执行逻辑禁止进入 `v1`。 - -### 要求 4: CrewAI 官方流程 - -- 采用 CrewAI 原生对象:`Agent`、`Task`、`Crew`、`Flow`。 -- tools 通过 CrewAI Tool 机制注册,不做平行实现。 -- 任务模板与 agent 配置集中化(静态模板 + 运行时拼装)。 -- 配置拼装明确依赖 `system_agents + static/crewai`,不再使用双套来源。 - -### 要求 5: AG-UI + ag-ui-crewai - -- 事件集遵循 AG-UI 协议,生命周期闭环: - - `RUN_STARTED` - - 流式消息和工具事件 - - 终态 `RUN_FINISHED` 或 `RUN_ERROR` -- 优先引入 `ag-ui-crewai` 做 CrewAI 到 AG-UI 的桥接,避免重复造轮子。 - -### 要求 6: LiteLLM token/cost 统计 - -- 所有 LLM 调用通过 LiteLLM 统一出入口。 -- 按调用粒度记录:`input_tokens`、`output_tokens`、`total_tokens`、`cost`、`currency`。 -- 按 run 粒度聚合并落库,支持后续计费和审计。 - -## 数据与可观测性 - -- 保留现有 Agent 相关表结构,不在本次硬切做数据库破坏性变更。 -- 新增事件日志与调用指标落点(如已有字段不足,后续增量迁移)。 -- 日志使用结构化字段:`run_id`、`task_id`、`stage`、`tool_name`、`llm_model`、`latency_ms`。 -- 持久化原则:run/resume 的关键状态变更必须可重放,禁止仅保存在内存。 - -## 事务边界 - -- `run` 入口事务:创建或加载 `session` + 写入用户消息。 -- `worker` 执行事务(可分阶段短事务): - - 阶段开始:更新 `session.status/state_snapshot`。 - - LLM 返回:写 assistant/tool 消息 + 更新 token/cost 聚合。 - - 中断:写 `pending_tool_calls` 到 `state_snapshot` 并提交。 - - 完成:更新终态 `session.status` 并提交。 -- `resume` 事务:校验 `interrupt_id` 与 ownership,CAS 更新 `state_snapshot`,然后进入后续执行事务。 - -## 错误处理与安全 - -- API Key 缺失启动即失败,不进入运行态。 -- 外部工具入参统一白名单和 schema 校验。 -- resume 决策必须鉴权与会话所有权校验。 -- 错误响应遵循 RFC 7807,避免泄漏敏感上下文。 - -## 工具调用与恢复语义 - -- 工具分三类: - - 前端工具:由 `RunAgentInput.tools` 提供能力声明,触发 interrupt,由客户端执行并回传 result。 - - 后端工具(需审批):先 interrupt 给前端审批;审批通过后由后端执行,不由前端执行。 - - 后端工具(直执):后端直接执行。 -- 一致性约束: - - 每个 tool_result 必须携带 `tool_call_id`。 - - 后端仅接受当前 `state_snapshot.pending_tool_calls` 中存在且状态合法的 `tool_call_id`。 - - 若收到未知/已消费/过期 `tool_call_id`,立即产出 `RUN_ERROR` 并记录审计日志。 - -## 测试策略 - -- 单元测试: - - 配置解析与 key 解析 - - run/resume 状态机与幂等 - - LiteLLM usage 聚合 -- 集成测试: - - API 入队 - - Worker 消费 - - SSE 事件顺序与终态 -- E2E: - - run 成功链路 - - interrupt + resume 链路 - - tool 调用链路 - -## 迁移策略 - -- 阶段 0(本次):硬切删除旧代码、旧测试、旧文档契约。 -- 阶段 1:搭建新架构骨架和最小可运行 run 流程。 -- 阶段 2:接入 CrewAI + ag-ui-crewai + LiteLLM 完整链路。 -- 阶段 3:补齐可观测性、压测与稳定性治理。 - -## 验收标准 - -- 后端仓库不存在旧 `v1/agent` 和 `core/agent` 旧实现。 -- 所有 Agent 相关旧测试与旧文档契约已移除。 -- 新方案设计文档明确覆盖六项要求并可进入实现阶段。 diff --git a/docs/plans/2026-03-04-agent-hard-reset-plan.md b/docs/plans/2026-03-04-agent-hard-reset-plan.md deleted file mode 100644 index c665090..0000000 --- a/docs/plans/2026-03-04-agent-hard-reset-plan.md +++ /dev/null @@ -1,574 +0,0 @@ -# Agent 后端重建 Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** 在后端重建 Agent 运行时,满足队列异步、CrewAI 配置打通、AG-UI 工具中断恢复、LiteLLM 计量、以及 `sessions.state_snapshot` 持久化要求。 - -**Architecture:** `v1/agent` 仅做 API/鉴权/参数校验与 SSE 输出,`core/agent` 负责编排与执行。Agent 创建配置由 `system_agents`(数据库)+ `core/config/static/crewai/*.yaml`(静态模板)合并生成。run/resume 全链路通过 Celery Worker 执行,状态写入 `sessions.state_snapshot`。 - -**Tech Stack:** FastAPI, Celery, Redis, CrewAI, ag-ui-crewai, LiteLLM, SQLAlchemy, Alembic, pytest - ---- - -### Task 1: 建立配置聚合器(system_agents + static/crewai) - -**Files:** -- Create: `backend/src/core/agent/infrastructure/config/resolver.py` -- Modify: `backend/src/core/config/static/crewai/agents.yaml` -- Modify: `backend/src/core/config/static/crewai/tasks.yaml` -- Create: `backend/src/core/config/static/crewai/workflow.yaml` -- Create: `backend/src/core/config/static/crewai/tools.yaml` -- Test: `backend/tests/unit/core/agent/test_config_resolver.py` - -**Step 1: Write the failing test** - -```python -def test_resolver_merges_system_agents_and_static_templates(): - resolved = resolve_agent_runtime_config(...) - assert resolved.intent.llm.model_code == "deepseek-v3.2" - assert "intent" in resolved.workflow_stages -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q` -Expected: FAIL with `NameError` or import not found - -**Step 3: Write minimal implementation** - -```python -def resolve_agent_runtime_config(system_agents: list[dict], static_cfg: dict) -> RuntimeConfig: - by_type = {item["agent_type"]: item for item in system_agents} - return RuntimeConfig.from_sources(by_type=by_type, static_cfg=static_cfg) -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_config_resolver.py::test_resolver_merges_system_agents_and_static_templates -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/config/resolver.py backend/src/core/config/static/crewai backend/tests/unit/core/agent/test_config_resolver.py -git commit -m "feat: add system_agents and static crewai config resolver" -``` - -### Task 2: 统一 LLM Key 与模型配置入口 - -**Files:** -- Modify: `backend/src/core/config/settings.py` -- Modify: `.env.example` -- Create: `backend/tests/unit/core/config/test_llm_settings.py` - -**Step 1: Write the failing test** - -```python -def test_llm_keys_read_from_settings(monkeypatch): - monkeypatch.setenv("SOCIAL_LLM__PROVIDER_KEYS__DEEPSEEK", "k1") - s = Settings() - assert s.llm.provider_keys.deepseek == "k1" -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q` -Expected: FAIL with missing `llm` field - -**Step 3: Write minimal implementation** - -```python -class LLMProviderKeys(BaseModel): - deepseek: str | None = None - -class LLMSettings(BaseModel): - provider_keys: LLMProviderKeys = LLMProviderKeys() -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/config/test_llm_settings.py::test_llm_keys_read_from_settings -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/config/settings.py .env.example backend/tests/unit/core/config/test_llm_settings.py -git commit -m "feat: centralize llm provider keys in settings" -``` - -### Task 3: sessions 表状态快照契约落地 - -**Files:** -- Create: `backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py` -- Modify: `backend/src/models/agent_chat_session.py` -- Create: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py` - -**Step 1: Write the failing test** - -```python -def test_sessions_has_state_snapshot_column(db_inspector): - columns = db_inspector.get_columns("sessions") - assert "state_snapshot" in [c["name"] for c in columns] -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q` -Expected: FAIL when migration not applied - -**Step 3: Write minimal implementation** - -```python -def upgrade() -> None: - op.add_column("sessions", sa.Column("state_snapshot", postgresql.JSONB, nullable=True)) -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py::test_sessions_has_state_snapshot_column -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/alembic/versions/20260304_add_sessions_state_snapshot_contract.py backend/src/models/agent_chat_session.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py -git commit -m "feat(db): enforce sessions state_snapshot contract" -``` - -### Task 3.1: 会话与消息持久化仓储 - -**Files:** -- Create: `backend/src/core/agent/infrastructure/persistence/session_repository.py` -- Create: `backend/src/core/agent/infrastructure/persistence/message_repository.py` -- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py` - -**Step 1: Write the failing test** - -```python -def test_run_persists_user_and_assistant_messages(db_session): - run = execute_run(...) - rows = list_messages(session_id=run.session_id) - assert rows[0].role == "user" - assert rows[1].role == "assistant" -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -async def append_message(...): - session.add(AgentChatMessage(...)) - -async def update_session_aggregate(...): - session_obj.message_count = message_count -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py::test_run_persists_user_and_assistant_messages -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/persistence backend/tests/integration/core/agent/test_session_message_persistence.py -git commit -m "feat: persist session lifecycle and messages for agent runs" -``` - -### Task 4: 定义 state_snapshot 结构与并发语义 - -**Files:** -- Create: `backend/src/core/agent/domain/state_snapshot.py` -- Create: `backend/tests/unit/core/agent/test_state_snapshot.py` - -**Step 1: Write the failing test** - -```python -def test_pending_tool_call_snapshot_contains_correlation_fields(): - snap = StateSnapshot.new(...) - pending = snap.pending_tool_calls[0] - assert pending.tool_call_id - assert pending.status == "PENDING_APPROVAL" -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -class PendingToolCall(BaseModel): - tool_call_id: str - tool_name: str - status: Literal["PENDING_APPROVAL", "APPROVED", "EXECUTED", "REJECTED", "EXPIRED"] -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_state_snapshot.py::test_pending_tool_call_snapshot_contains_correlation_fields -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/domain/state_snapshot.py backend/tests/unit/core/agent/test_state_snapshot.py -git commit -m "feat: define sessions state_snapshot schema for run and tool state" -``` - -### Task 5: 工具路由策略(前端/后端/审批) - -**Files:** -- Create: `backend/src/core/agent/domain/tool_policy.py` -- Create: `backend/tests/unit/core/agent/test_tool_policy.py` - -**Step 1: Write the failing test** - -```python -def test_frontend_tool_requires_interrupt_and_client_execution(): - decision = classify_tool_call(name="ui.navigate_to", source="request.tools") - assert decision.mode == "FRONTEND_EXECUTE" - -def test_backend_approval_tool_returns_interrupt_but_executes_on_backend_after_approve(): - decision = classify_tool_call(name="srv.transfer_funds", requires_approval=True) - assert decision.mode == "BACKEND_APPROVAL_INTERRUPT" -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -if tool_name.startswith("ui."): - return ToolDecision(mode="FRONTEND_EXECUTE") -if requires_approval: - return ToolDecision(mode="BACKEND_APPROVAL_INTERRUPT") -return ToolDecision(mode="BACKEND_DIRECT_EXECUTE") -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_policy.py -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/domain/tool_policy.py backend/tests/unit/core/agent/test_tool_policy.py -git commit -m "feat: add frontend/backend tool policy and approval routing" -``` - -### Task 6: tool_call 与 tool_result 对账机制 - -**Files:** -- Create: `backend/src/core/agent/domain/tool_correlation.py` -- Create: `backend/tests/unit/core/agent/test_tool_correlation.py` - -**Step 1: Write the failing test** - -```python -def test_rejects_tool_result_when_tool_call_id_not_pending(): - store = PendingToolStore([]) - with pytest.raises(ToolCorrelationError): - store.apply_result(tool_call_id="unknown", result={"ok": True}) -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -def apply_result(self, *, tool_call_id: str, result: dict) -> None: - pending = self._pending.get(tool_call_id) - if pending is None: - raise ToolCorrelationError("tool_call_id not pending") - pending.result = result -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_tool_correlation.py::test_rejects_tool_result_when_tool_call_id_not_pending -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/domain/tool_correlation.py backend/tests/unit/core/agent/test_tool_correlation.py -git commit -m "feat: add tool call/result correlation guard" -``` - -### Task 7: Celery run/resume 异步任务 - -**Files:** -- Create: `backend/src/core/agent/infrastructure/queue/tasks.py` -- Create: `backend/src/core/agent/application/run_service.py` -- Create: `backend/src/core/agent/application/resume_service.py` -- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py` - -**Step 1: Write the failing test** - -```python -def test_run_api_enqueues_celery_task(client): - resp = client.post("/api/v1/agent/runs", json={...}) - assert resp.status_code == 202 - -def test_resume_updates_session_status_and_snapshot(client): - resp = client.post("/api/v1/agent/runs/r1/resume", json={...}) - assert resp.status_code == 202 -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -def enqueue_run(cmd: RunCommand) -> str: - task = run_agent_task.apply_async(args=[cmd.model_dump()]) - return task.id -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py::test_run_api_enqueues_celery_task -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/application backend/src/core/agent/infrastructure/queue backend/tests/integration/core/agent/test_queue_run_resume.py -git commit -m "feat: add celery-based run and resume tasks" -``` - -### Task 8: CrewAI 运行时加载与创建 - -**Files:** -- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py` -- Create: `backend/src/core/agent/infrastructure/crewai/factory.py` -- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py` - -**Step 1: Write the failing test** - -```python -def test_runtime_creates_agents_tasks_from_resolved_config(): - runtime = CrewAIRuntime(...) - crew = runtime.build_crew(message="hello") - assert len(crew.agents) >= 1 -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -def build_crew(self, *, message: str) -> Crew: - agents = self._factory.build_agents(self._config) - tasks = self._factory.build_tasks(self._config, message=message) - return Crew(agents=agents, tasks=tasks) -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py::test_runtime_creates_agents_tasks_from_resolved_config -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/crewai backend/tests/unit/core/agent/test_crewai_runtime.py -git commit -m "feat: create crewai runtime from resolved config" -``` - -### Task 9: AG-UI 与 ag-ui-crewai 事件桥 - -**Files:** -- Create: `backend/src/core/agent/infrastructure/agui/bridge.py` -- Create: `backend/src/core/agent/infrastructure/agui/stream.py` -- Test: `backend/tests/unit/core/agent/test_agui_bridge.py` - -**Step 1: Write the failing test** - -```python -def test_agui_stream_emits_required_lifecycle(): - events = to_agui_events(internal_events=[...]) - assert events[0]["type"] == "RUN_STARTED" - assert events[-1]["type"] in {"RUN_FINISHED", "RUN_ERROR"} -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -def to_agui_events(internal_events: list[dict]) -> list[dict]: - return [map_event(e) for e in internal_events] -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py::test_agui_stream_emits_required_lifecycle -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/agui backend/tests/unit/core/agent/test_agui_bridge.py -git commit -m "feat: add ag-ui and ag-ui-crewai event bridge" -``` - -### Task 10: LiteLLM 调用统计与会话聚合 - -**Files:** -- Create: `backend/src/core/agent/infrastructure/litellm/client.py` -- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py` -- Test: `backend/tests/unit/core/agent/test_litellm_usage.py` - -**Step 1: Write the failing test** - -```python -def test_tracker_aggregates_per_call_usage_and_cost(): - t = UsageTracker() - t.add({"input_tokens": 10, "output_tokens": 5, "cost": "0.1"}) - assert t.snapshot()["total_tokens"] == 15 -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -def add(self, usage: dict[str, object]) -> None: - self.input_tokens += int(usage.get("input_tokens", 0)) - self.output_tokens += int(usage.get("output_tokens", 0)) -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent/test_litellm_usage.py::test_tracker_aggregates_per_call_usage_and_cost -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/litellm backend/tests/unit/core/agent/test_litellm_usage.py -git commit -m "feat: add litellm usage and cost tracking" -``` - -### Task 11: v1/agent 薄层 API + SSE 出口 - -**Files:** -- Create: `backend/src/v1/agent/router.py` -- Create: `backend/src/v1/agent/schemas.py` -- Create: `backend/src/v1/agent/dependencies.py` -- Create: `backend/src/v1/agent/service.py` -- Modify: `backend/src/v1/router.py` -- Test: `backend/tests/integration/v1/agent/test_routes.py` - -**Step 1: Write the failing test** - -```python -def test_run_endpoint_returns_sse_and_not_blocking(client): - resp = client.post("/api/v1/agent/runs", json={...}) - assert resp.status_code == 202 -``` - -**Step 2: Run test to verify it fails** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q` -Expected: FAIL - -**Step 3: Write minimal implementation** - -```python -@router.post("/runs", status_code=202) -async def create_run(...): - task_id = service.enqueue_run(input_data) - return {"task_id": task_id} -``` - -**Step 4: Run test to verify it passes** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/v1/agent/test_routes.py::test_run_endpoint_returns_sse_and_not_blocking -q` -Expected: PASS - -**Step 5: Commit** - -```bash -git add backend/src/v1/agent backend/src/v1/router.py backend/tests/integration/v1/agent/test_routes.py -git commit -m "feat: add thin v1 agent api and sse endpoints" -``` - -### Task 12: 端到端验证与文档回填 - -**Files:** -- Modify: `docs/runtime/runtime-route.md` -- Modify: `docs/runtime/runtime-runbook.md` - -**Step 1: Run unit tests** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/unit/core/agent backend/tests/unit/core/config backend/tests/unit/database -q` -Expected: PASS - -**Step 2: Run integration tests** - -Run: `PYTHONPATH=backend/src uv run pytest backend/tests/integration/core/agent backend/tests/integration/v1/agent -q` -Expected: PASS - -**Step 3: Run lint and typecheck** - -Run: `PYTHONPATH=backend/src uv run ruff check backend/src backend/tests` -Expected: PASS - -Run: `PYTHONPATH=backend/src uv run basedpyright backend/src` -Expected: PASS - -**Step 4: Document protocol contracts** - -在运行手册中补充以下固定规则: -- `system_agents` + `static/crewai` 配置合并优先级。 -- `sessions.state_snapshot` 字段结构与版本号。 -- `messages` 入库顺序与 `sessions` 聚合字段更新规则。 -- 工具调用审批与恢复时序图。 -- tool_call/result 不匹配时的错误语义(`RUN_ERROR` + 可审计日志)。 - -**Step 5: Commit** - -```bash -git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md -git commit -m "docs: add new agent runtime contracts and operational guide" -``` - -## Success Criteria - -- [ ] Agent 创建配置由 `system_agents` 与 `core/config/static/crewai` 合并生成。 -- [ ] run/resume 仅通过 Celery Worker 执行,Web 不执行编排。 -- [ ] `v1/agent` 无业务编排代码。 -- [ ] `sessions.state_snapshot` 承担运行态和工具审批恢复状态。 -- [ ] 每次 run/resume 的会话状态变更均落库到 `sessions`。 -- [ ] 用户/助手/工具消息按 `messages` 约束落库,`seq` 单调递增。 -- [ ] 前端工具与后端工具(审批/非审批)策略完整可测。 -- [ ] tool_call 与 tool_result 具备强关联校验并可恢复/报错。 -- [ ] LiteLLM 逐次计量与 run 聚合可落库。 diff --git a/docs/plans/2026-03-04-simplify-agent-architecture-design.md b/docs/plans/2026-03-04-simplify-agent-architecture-design.md deleted file mode 100644 index a5cd637..0000000 --- a/docs/plans/2026-03-04-simplify-agent-architecture-design.md +++ /dev/null @@ -1,199 +0,0 @@ -# Agent Architecture Simplification Design - -**Date:** 2026-03-04 -**Status:** Approved -**Author:** AI Assistant - -## Overview - -Simplify the agent configuration architecture by removing the redundant `user_agents` table and renaming `user_agent_catalog` to `system_agents`. - -## Problem Statement - -Current architecture has redundant data: -- `user_agent_catalog`: System-level agent configurations (3 agent types for all users) -- `user_agents`: Per-user agent instances (copies catalog data for each user) - -Since every user has the same 3 agents with identical configurations (from catalog), maintaining `user_agents` table creates unnecessary complexity and data duplication. - -## Goals - -1. Remove `user_agents` table and related code -2. Rename `user_agent_catalog` to `system_agents` for clarity -3. Preserve ability for future user-level prompt customization via `profiles.settings` -4. Maintain backward compatibility in deployment process - -## Non-Goals - -- User-level agent configuration (LLM selection, temperature, etc.) -- User-level prompt customization implementation (deferred to future iteration) - -## Architecture Changes - -### Current Architecture - -``` -user_agent_catalog (system config) - ↓ (trigger copies for each new user) -user_agents (per-user instances) -``` - -### New Architecture - -``` -system_agents (shared by all users) -profiles.settings.agent_prompts (future: user-level prompts) -``` - -### Data Flow - -1. System startup: Load `system_agents` from YAML -2. User creation: No longer creates `user_agents` records -3. Runtime (future): Read from `system_agents` + merge with `profiles.settings.agent_prompts` - -## Database Migration - -### Changes - -1. **Delete `memories.agent_id` column** - - Remove foreign key `fk_memories_agent_id` - - Remove check constraint `chk_memory_type_agent_id` - - Remove index `ix_memories_agent_type_status` - - Drop column `agent_id` - -2. **Delete `user_agents` table** - - Remove all RLS policies - - Remove indexes: `ix_user_agents_agent_type`, `ix_user_agents_status` - - Remove foreign keys: `fk_user_agents_user_id`, `fk_user_agents_llm_id`, etc. - - Remove check constraint `chk_agent_type` - - Remove unique constraint `uq_user_agents_user_id_agent_type` - - Drop table - -3. **Rename `user_agent_catalog` → `system_agents`** - - Remove old RLS policies - - Rename table - - Rename constraints: `fk_user_agent_catalog_llm_id` → `fk_system_agents_llm_id` - - Rename check constraint: `chk_user_agent_catalog_status` → `chk_system_agents_status` - - Re-create RLS policies with new table name - -4. **Update trigger `create_profile_for_new_user()`** - - Remove logic that inserts into `user_agents` - - Initialize `profiles.settings.agent_prompts` with empty object - -5. **Update existing `profiles.settings`** - - Add `agent_prompts: {}` to all existing profiles - -### Downgrade Path - -- Re-create `user_agents` table with all constraints and indexes -- Restore `memories.agent_id` column and constraints -- Rename `system_agents` → `user_agent_catalog` -- Restore original trigger - -## Code Changes - -### Model Layer - -**Delete:** -- `backend/src/models/user_agents.py` - -**Rename:** -- `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py` -- Class `UserAgentCatalog` → `SystemAgents` - -**Update:** -- `backend/src/models/__init__.py` - Update imports and exports - -### Configuration Layer - -**Rename:** -- `backend/src/core/config/static/database/user_agent_catalog.yaml` - → `backend/src/core/config/static/database/system_agents.yaml` - -**Update:** -- `backend/src/core/config/initial/init_data.py` - - `UserAgentCatalogSeed` → `SystemAgentsSeed` - - `UserAgentCatalogYaml` → `SystemAgentsYaml` - - Import from `models.system_agents` - - Path: `system_agents.yaml` - - Function: `initialize_user_agent_catalog()` → `initialize_system_agents()` - -### Future: Profile Settings Structure (Deferred) - -```json -{ - "agent_prompts": { - "INTENT_RECOGNITION": "custom prompt...", - "TASK_EXECUTION": "custom prompt...", - "RESULT_REPORTING": "custom prompt..." - } -} -``` - -## Testing Strategy - -### Migration Tests - -- Verify `user_agents` table is deleted -- Verify `system_agents` table exists with correct structure -- Verify trigger no longer creates `user_agents` records -- Verify `profiles.settings.agent_prompts` is initialized -- Verify downgrade path works correctly - -### Model Tests - -- Verify `SystemAgents` model CRUD operations -- Verify `Profile.settings` JSONB storage - -### Integration Tests - -- Verify `initialize_system_agents()` loads from YAML -- Verify data is correctly inserted into `system_agents` table - -## Deployment Considerations - -### Pre-deployment - -- Backup database (especially `user_agents` if any data exists) -- Confirm production `user_agents` table has no critical data - -### Deployment - -1. Run migration: `alembic upgrade head` -2. Verify migration success -3. Restart application services -4. Verify new user registration works without `user_agents` - -### Post-deployment - -- Monitor application logs for any references to deleted `user_agents` -- Verify agent-related functionality still works - -## Risks and Mitigations - -| Risk | Mitigation | -|------|-----------| -| Existing `user_agents` data loss | Backup before migration; data is redundant anyway | -| Code still references `user_agents` | Comprehensive code search and testing | -| Trigger fails on new user creation | Test migration thoroughly; include rollback plan | -| Future need for user-level config | Can add `agent_overrides` to `profiles.settings` | - -## Success Criteria - -- [ ] All tests pass -- [ ] Migration runs successfully (upgrade and downgrade) -- [ ] New user registration creates profile without `user_agents` records -- [ ] System agents are loaded from YAML correctly -- [ ] No references to `user_agents` remain in codebase - -## Timeline - -- Design: 2026-03-04 (Completed) -- Implementation: TBD -- Testing: TBD -- Deployment: TBD - -## References - -- Migration file: `backend/alembic/versions/YYYYMMDD_simplify_agent_architecture.py` -- Original catalog migration: `backend/alembic/versions/50ae013ce530_add_user_agent_catalog.py` diff --git a/docs/plans/2026-03-04-simplify-agent-architecture.md b/docs/plans/2026-03-04-simplify-agent-architecture.md deleted file mode 100644 index 186e86d..0000000 --- a/docs/plans/2026-03-04-simplify-agent-architecture.md +++ /dev/null @@ -1,844 +0,0 @@ -# Agent Architecture Simplification Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Simplify agent configuration by removing redundant user_agents table and renaming user_agent_catalog to system_agents - -**Architecture:** Delete user_agents table (including memories.agent_id dependency), rename user_agent_catalog to system_agents, update all references in code - -**Tech Stack:** Python 3.11+, SQLAlchemy, Alembic, PostgreSQL - ---- - -## Prerequisites - -- [ ] Current branch: dev -- [ ] No uncommitted changes -- [ ] Docker services running (Supabase local) - -## Task 1: Create Database Migration - -**Files:** -- Create: `backend/alembic/versions/20260304_simplify_agent_architecture.py` - -**Step 1: Create migration file** - -Run: `cd backend && uv run alembic revision -m "simplify_agent_architecture"` - -Expected: New migration file created with revision ID - -**Step 2: Write migration upgrade logic** - -Edit the generated migration file with this complete upgrade function: - -```python -def upgrade() -> None: - # 1. Delete memories.agent_id dependencies - op.drop_constraint("fk_memories_agent_id", "memories", type_="foreignkey") - op.drop_constraint("chk_memory_type_agent_id", "memories", type_="check") - op.execute("DROP INDEX IF EXISTS ix_memories_agent_type_status") - op.drop_column("memories", "agent_id") - - # 2. Delete user_agents table - _drop_rls("user_agents") - - op.drop_constraint("fk_user_agents_updated_by", "user_agents", type_="foreignkey") - op.drop_constraint("fk_user_agents_created_by", "user_agents", type_="foreignkey") - op.drop_constraint("fk_user_agents_llm_id", "user_agents", type_="foreignkey") - op.drop_constraint("fk_user_agents_user_id", "user_agents", type_="foreignkey") - op.drop_constraint("chk_agent_type", "user_agents", type_="check") - op.drop_constraint("uq_user_agents_user_id_agent_type", "user_agents", type_="unique") - - op.execute("DROP INDEX IF EXISTS ix_user_agents_status") - op.execute("DROP INDEX IF EXISTS ix_user_agents_agent_type") - - op.drop_table("user_agents") - - # 3. Rename user_agent_catalog to system_agents - _drop_rls("user_agent_catalog") - - op.rename_table("user_agent_catalog", "system_agents") - - op.execute( - "ALTER TABLE system_agents RENAME CONSTRAINT fk_user_agent_catalog_llm_id " - "TO fk_system_agents_llm_id" - ) - op.execute( - "ALTER TABLE system_agents RENAME CONSTRAINT chk_user_agent_catalog_status " - "TO chk_system_agents_status" - ) - - _enable_rls("system_agents") - - # 4. Update trigger - op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users") - op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()") - - op.execute(""" - CREATE OR REPLACE FUNCTION public.create_profile_for_new_user() - RETURNS trigger - LANGUAGE plpgsql - SECURITY DEFINER - SET search_path = public - AS $$ - BEGIN - INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at) - VALUES ( - NEW.id, - COALESCE( - NEW.raw_user_meta_data ->> 'username', - split_part(NEW.email, '@', 1), - 'user_' || substring(NEW.id::text, 1, 8) - ), - NULL, - NULL, - '{"agent_prompts": {}}'::jsonb, - now(), - now() - ) - ON CONFLICT (id) DO NOTHING; - - RETURN NEW; - END; - $$ - """) - - op.execute(""" - CREATE TRIGGER on_auth_user_created - AFTER INSERT ON auth.users - FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user() - """) - - # 5. Update existing profiles.settings - op.execute(""" - UPDATE profiles - SET settings = jsonb_set( - COALESCE(settings, '{}'::jsonb), - '{agent_prompts}', - '{}'::jsonb - ) - WHERE NOT settings ? 'agent_prompts' - """) -``` - -**Step 3: Write migration downgrade logic** - -Add this complete downgrade function: - -```python -def downgrade() -> None: - # 1. Revert trigger - op.execute("DROP TRIGGER IF EXISTS on_auth_user_created ON auth.users") - op.execute("DROP FUNCTION IF EXISTS public.create_profile_for_new_user()") - - op.execute(""" - CREATE OR REPLACE FUNCTION public.create_profile_for_new_user() - RETURNS trigger - LANGUAGE plpgsql - SECURITY DEFINER - SET search_path = public - AS $$ - BEGIN - INSERT INTO public.profiles (id, username, avatar_url, bio, settings, created_at, updated_at) - VALUES ( - NEW.id, - COALESCE( - NEW.raw_user_meta_data ->> 'username', - split_part(NEW.email, '@', 1), - 'user_' || substring(NEW.id::text, 1, 8) - ), - NULL, - NULL, - '{}'::jsonb, - now(), - now() - ) - ON CONFLICT (id) DO NOTHING; - - INSERT INTO public.user_agents (id, user_id, llm_id, agent_type, config, status, created_by, updated_by) - SELECT - gen_random_uuid(), - NEW.id, - uac.llm_id, - uac.agent_type, - uac.config, - uac.status, - NEW.id, - NEW.id - FROM public.user_agent_catalog uac; - - RETURN NEW; - END; - $$ - """) - - op.execute(""" - CREATE TRIGGER on_auth_user_created - AFTER INSERT ON auth.users - FOR EACH ROW EXECUTE FUNCTION public.create_profile_for_new_user() - """) - - # 2. Revert rename: system_agents -> user_agent_catalog - _drop_rls("system_agents") - - op.rename_table("system_agents", "user_agent_catalog") - - op.execute( - "ALTER TABLE user_agent_catalog RENAME CONSTRAINT fk_system_agents_llm_id " - "TO fk_user_agent_catalog_llm_id" - ) - op.execute( - "ALTER TABLE user_agent_catalog RENAME CONSTRAINT chk_system_agents_status " - "TO chk_user_agent_catalog_status" - ) - - _enable_rls("user_agent_catalog") - - # 3. Recreate user_agents table - op.create_table( - "user_agents", - sa.Column("id", sa.UUID(), nullable=False), - sa.Column("user_id", sa.UUID(), nullable=False), - sa.Column("llm_id", sa.UUID(), nullable=False), - sa.Column("agent_type", sa.String(length=20), nullable=False), - sa.Column( - "config", - postgresql.JSONB(astext_type=sa.Text()), - server_default="{}", - nullable=False, - ), - sa.Column("status", sa.String(length=20), nullable=False), - sa.Column("created_by", sa.UUID(), nullable=True), - sa.Column("updated_by", sa.UUID(), nullable=True), - sa.Column( - "created_at", - sa.DateTime(timezone=True), - server_default=sa.text("now()"), - nullable=False, - ), - sa.Column( - "updated_at", - sa.DateTime(timezone=True), - server_default=sa.text("now()"), - nullable=False, - ), - sa.Column("deleted_at", sa.DateTime(timezone=True), nullable=True), - sa.PrimaryKeyConstraint("id"), - ) - - op.create_unique_constraint( - "uq_user_agents_user_id_agent_type", - "user_agents", - ["user_id", "agent_type"] - ) - - op.execute( - "CREATE INDEX ix_user_agents_agent_type ON user_agents (agent_type)" - ) - op.execute( - "CREATE INDEX ix_user_agents_status ON user_agents (status)" - ) - - op.execute( - "ALTER TABLE user_agents ADD CONSTRAINT chk_agent_type " - "CHECK (agent_type IN ('INTENT_RECOGNITION', 'TASK_EXECUTION', 'RESULT_REPORTING'))" - ) - - op.create_foreign_key( - "fk_user_agents_user_id", - "user_agents", - "users", - ["user_id"], - ["id"], - referent_schema="auth", - ondelete="CASCADE", - ) - op.create_foreign_key( - "fk_user_agents_llm_id", - "user_agents", - "llms", - ["llm_id"], - ["id"], - ondelete="RESTRICT", - ) - op.create_foreign_key( - "fk_user_agents_created_by", - "user_agents", - "users", - ["created_by"], - ["id"], - referent_schema="auth", - ondelete="SET NULL", - ) - op.create_foreign_key( - "fk_user_agents_updated_by", - "user_agents", - "users", - ["updated_by"], - ["id"], - referent_schema="auth", - ondelete="SET NULL", - ) - - _enable_rls("user_agents") - - # 4. Recreate memories.agent_id - op.add_column( - "memories", - sa.Column("agent_id", sa.UUID(), nullable=True) - ) - - op.create_foreign_key( - "fk_memories_agent_id", - "memories", - "user_agents", - ["agent_id"], - ["id"], - ondelete="CASCADE", - ) - - op.execute( - "CREATE INDEX ix_memories_agent_type_status ON memories (agent_id, memory_type, status)" - ) - - op.execute( - "ALTER TABLE memories ADD CONSTRAINT chk_memory_type_agent_id " - "CHECK ((memory_type = 'work' AND agent_id IS NOT NULL) OR " - "(memory_type = 'user' AND agent_id IS NULL))" - ) -``` - -**Step 4: Add helper functions** - -Add these helper functions at the end of the migration file: - -```python -def _enable_rls(table_name: str) -> None: - for role in ["anon", "authenticated"]: - for action in ["select", "insert", "update", "delete"]: - op.execute( - f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}" - ) - op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY") - for role in ["anon", "authenticated"]: - op.execute( - f"CREATE POLICY {role}_select_{table_name} ON {table_name} " - f"FOR SELECT TO {role} USING (false)" - ) - op.execute( - f"CREATE POLICY {role}_insert_{table_name} ON {table_name} " - f"FOR INSERT TO {role} WITH CHECK (false)" - ) - op.execute( - f"CREATE POLICY {role}_update_{table_name} ON {table_name} " - f"FOR UPDATE TO {role} USING (false) WITH CHECK (false)" - ) - op.execute( - f"CREATE POLICY {role}_delete_{table_name} ON {table_name} " - f"FOR DELETE TO {role} USING (false)" - ) - - -def _drop_rls(table_name: str) -> None: - for role in ["anon", "authenticated"]: - op.execute(f"DROP POLICY IF EXISTS {role}_delete_{table_name} ON {table_name}") - op.execute(f"DROP POLICY IF EXISTS {role}_update_{table_name} ON {table_name}") - op.execute(f"DROP POLICY IF EXISTS {role}_insert_{table_name} ON {table_name}") - op.execute(f"DROP POLICY IF EXISTS {role}_select_{table_name} ON {table_name}") - op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY") -``` - -**Step 5: Verify migration file** - -Check that all imports are correct: - -```python -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql -``` - -**Step 6: Commit migration** - -```bash -git add backend/alembic/versions/20260304_simplify_agent_architecture.py -git commit -m "feat(db): add migration to simplify agent architecture" -``` - ---- - -## Task 2: Delete UserAgents Model - -**Files:** -- Delete: `backend/src/models/user_agents.py` -- Modify: `backend/src/models/__init__.py` - -**Step 1: Remove import from models/__init__.py** - -Edit `backend/src/models/__init__.py`: - -Remove these lines: -```python -from models.user_agents import UserAgent -``` - -And remove `"UserAgent"` from `__all__` list. - -**Step 2: Delete user_agents.py file** - -```bash -rm backend/src/models/user_agents.py -``` - -**Step 3: Verify no other imports** - -Run: `cd backend && grep -r "from models.user_agents" src/` - -Expected: No results (or only in __init__.py which we already fixed) - -**Step 4: Commit** - -```bash -git add backend/src/models/user_agents.py backend/src/models/__init__.py -git commit -m "refactor(models): remove UserAgents model" -``` - ---- - -## Task 3: Rename UserAgentCatalog to SystemAgents - -**Files:** -- Rename: `backend/src/models/user_agent_catalog.py` → `backend/src/models/system_agents.py` -- Modify: `backend/src/models/__init__.py` - -**Step 1: Rename model file** - -```bash -mv backend/src/models/user_agent_catalog.py backend/src/models/system_agents.py -``` - -**Step 2: Update class name in system_agents.py** - -Edit `backend/src/models/system_agents.py`: - -Change: -```python -class UserAgentCatalog(TimestampMixin, Base): - __tablename__: str = "user_agent_catalog" -``` - -To: -```python -class SystemAgents(TimestampMixin, Base): - __tablename__: str = "system_agents" -``` - -**Step 3: Update imports in models/__init__.py** - -Edit `backend/src/models/__init__.py`: - -Change: -```python -from models.user_agent_catalog import UserAgentCatalog -``` - -To: -```python -from models.system_agents import SystemAgents -``` - -And change `"UserAgentCatalog"` to `"SystemAgents"` in `__all__` list. - -**Step 4: Commit** - -```bash -git add backend/src/models/ -git commit -m "refactor(models): rename UserAgentCatalog to SystemAgents" -``` - ---- - -## Task 4: Update Configuration Files - -**Files:** -- Rename: `backend/src/core/config/static/database/user_agent_catalog.yaml` - → `backend/src/core/config/static/database/system_agents.yaml` -- Modify: `backend/src/core/config/initial/init_data.py` - -**Step 1: Rename YAML file** - -```bash -mv backend/src/core/config/static/database/user_agent_catalog.yaml \ - backend/src/core/config/static/database/system_agents.yaml -``` - -**Step 2: Update init_data.py imports** - -Edit `backend/src/core/config/initial/init_data.py`: - -Change: -```python -from models.user_agent_catalog import UserAgentCatalog -``` - -To: -```python -from models.system_agents import SystemAgents -``` - -**Step 3: Update Pydantic models** - -Change: -```python -class UserAgentCatalogSeed(BaseModel): - agent_type: str - llm_model_code: str - status: str - config: dict[str, Any] - - -class UserAgentCatalogYaml(BaseModel): - agents: list[UserAgentCatalogSeed] -``` - -To: -```python -class SystemAgentsSeed(BaseModel): - agent_type: str - llm_model_code: str - status: str - config: dict[str, Any] - - -class SystemAgentsYaml(BaseModel): - agents: list[SystemAgentsSeed] -``` - -**Step 4: Update path function** - -Change: -```python -def _default_user_agent_catalog_path() -> Path: - return ( - Path(__file__).resolve().parents[1] - / "static" - / "database" - / "user_agent_catalog.yaml" - ) -``` - -To: -```python -def _default_system_agents_path() -> Path: - return ( - Path(__file__).resolve().parents[1] - / "static" - / "database" - / "system_agents.yaml" - ) -``` - -**Step 5: Update load function** - -Change: -```python -def load_user_agent_catalog(catalog_path: Path | None = None) -> dict[str, Any]: - path = catalog_path or _default_user_agent_catalog_path() - with path.open("r", encoding="utf-8") as file: - loaded = yaml.safe_load(file) or {} - if not isinstance(loaded, dict): - raise ValueError(f"Invalid user agent catalog format: {path}") - raw_agents = loaded.get("agents", []) - if not isinstance(raw_agents, list): - raise ValueError(f"Invalid user agent catalog agents section: {path}") - try: - parsed = UserAgentCatalogYaml.model_validate({"agents": list(raw_agents)}) - except ValidationError as exc: - raise ValueError(f"Invalid user agent catalog data: {path}") from exc - - return parsed.model_dump() -``` - -To: -```python -def load_system_agents(catalog_path: Path | None = None) -> dict[str, Any]: - path = catalog_path or _default_system_agents_path() - with path.open("r", encoding="utf-8") as file: - loaded = yaml.safe_load(file) or {} - if not isinstance(loaded, dict): - raise ValueError(f"Invalid system agents format: {path}") - raw_agents = loaded.get("agents", []) - if not isinstance(raw_agents, list): - raise ValueError(f"Invalid system agents agents section: {path}") - try: - parsed = SystemAgentsYaml.model_validate({"agents": list(raw_agents)}) - except ValidationError as exc: - raise ValueError(f"Invalid system agents data: {path}") from exc - - return parsed.model_dump() -``` - -**Step 6: Update upsert function** - -Change: -```python -async def _upsert_user_agent_catalog( - session: AsyncSession, - *, - agent_type: str, - llm_id: uuid.UUID, - status: str, - config: dict[str, Any], -) -> None: - result = await session.execute( - select(UserAgentCatalog).where(UserAgentCatalog.agent_type == agent_type) - ) - catalog_entry = result.scalar_one_or_none() - - if catalog_entry is None: - session.add( - UserAgentCatalog( - agent_type=agent_type, - llm_id=llm_id, - status=status, - config=config, - ) - ) - else: - catalog_entry.llm_id = llm_id - catalog_entry.status = status - catalog_entry.config = config -``` - -To: -```python -async def _upsert_system_agents( - session: AsyncSession, - *, - agent_type: str, - llm_id: uuid.UUID, - status: str, - config: dict[str, Any], -) -> None: - result = await session.execute( - select(SystemAgents).where(SystemAgents.agent_type == agent_type) - ) - catalog_entry = result.scalar_one_or_none() - - if catalog_entry is None: - session.add( - SystemAgents( - agent_type=agent_type, - llm_id=llm_id, - status=status, - config=config, - ) - ) - else: - catalog_entry.llm_id = llm_id - catalog_entry.status = status - catalog_entry.config = config -``` - -**Step 7: Update initialize function** - -Change: -```python -async def initialize_user_agent_catalog() -> None: - """Initialize user agent catalog from YAML.""" - catalog = load_user_agent_catalog() - - async with AsyncSessionLocal() as session: - async with session.begin(): - for agent in catalog["agents"]: - result = await session.execute( - select(Llm).where(Llm.model_code == agent["llm_model_code"]) - ) - llm = result.scalar_one_or_none() - if llm is None: - raise RuntimeError( - f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'" - ) - - await _upsert_user_agent_catalog( - session, - agent_type=agent["agent_type"], - llm_id=llm.id, - status=agent["status"], - config=agent["config"], - ) - - logger.info("Initialized user agent catalog") -``` - -To: -```python -async def initialize_system_agents() -> None: - """Initialize system agents from YAML.""" - catalog = load_system_agents() - - async with AsyncSessionLocal() as session: - async with session.begin(): - for agent in catalog["agents"]: - result = await session.execute( - select(Llm).where(Llm.model_code == agent["llm_model_code"]) - ) - llm = result.scalar_one_or_none() - if llm is None: - raise RuntimeError( - f"LLM model '{agent['llm_model_code']}' not found for agent type '{agent['agent_type']}'" - ) - - await _upsert_system_agents( - session, - agent_type=agent["agent_type"], - llm_id=llm.id, - status=agent["status"], - config=agent["config"], - ) - - logger.info("Initialized system agents") -``` - -**Step 8: Update initialize_data function** - -Change: -```python -async def initialize_data() -> bool: - """Initialize bootstrap data.""" - await initialize_llm_catalog() - await initialize_user_agent_catalog() - - return True -``` - -To: -```python -async def initialize_data() -> bool: - """Initialize bootstrap data.""" - await initialize_llm_catalog() - await initialize_system_agents() - - return True -``` - -**Step 9: Commit** - -```bash -git add backend/src/core/config/ -git commit -m "refactor(config): rename user_agent_catalog to system_agents" -``` - ---- - -## Task 5: Run Migration - -**Step 1: Run migration** - -```bash -cd backend && uv run alembic upgrade head -``` - -Expected: Migration runs successfully - -**Step 2: Verify tables** - -Connect to database and check: -- `user_agents` table should NOT exist -- `system_agents` table should exist -- `memories.agent_id` column should NOT exist - -**Step 3: Test downgrade (optional but recommended)** - -```bash -cd backend && uv run alembic downgrade -1 -``` - -Expected: Previous migration restored - -**Step 4: Re-run upgrade** - -```bash -cd backend && uv run alembic upgrade head -``` - -Expected: Migration runs successfully again - ---- - -## Task 6: Run Tests and Linting - -**Step 1: Run type checking** - -```bash -cd backend && uv run basedpyright src/ -``` - -Expected: No errors - -**Step 2: Run linting** - -```bash -cd backend && uv run ruff check src/ -``` - -Expected: No errors - -**Step 3: Run tests** - -```bash -cd backend && uv run pytest tests/ -``` - -Expected: All tests pass - -**Step 4: Fix any failures** - -If any tests fail due to UserAgent references, update them to use SystemAgents. - ---- - -## Task 7: Final Verification - -**Step 1: Search for any remaining references** - -```bash -cd backend && grep -r "user_agents" src/ --include="*.py" -cd backend && grep -r "UserAgent" src/ --include="*.py" -``` - -Expected: No results (except in migration files) - -**Step 2: Test new user registration** - -Start the backend server and register a new user. Verify: -- Profile is created -- No user_agents records are created -- profiles.settings contains `agent_prompts: {}` - -**Step 3: Commit final changes** - -```bash -git add . -git commit -m "feat: complete agent architecture simplification" -``` - ---- - -## Success Criteria - -- [ ] Migration runs successfully (upgrade and downgrade) -- [ ] No UserAgent model references in code -- [ ] SystemAgents model works correctly -- [ ] All tests pass -- [ ] Linting passes -- [ ] Type checking passes -- [ ] New user registration works without user_agents - -## Notes - -- Keep the design document updated if any changes are made during implementation -- Test migration thoroughly before deploying to production -- Backup database before running migration in production diff --git a/docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-design.md b/docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-design.md deleted file mode 100644 index 14b3f73..0000000 --- a/docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-design.md +++ /dev/null @@ -1,81 +0,0 @@ -# Agent Runtime Closed Loop E2E Design - -## 背景 - -当前 `test_agent_sse_flow.py` 不能稳定证明真实闭环: -- `session_id` 由随机 UUID 生成,导致 `POST /api/v1/agent/runs` 经常 404。 -- 测试脚本存在不可达重复代码,诊断信息不完整。 -- 未覆盖首聊自动建会话语义,和真实聊天入口不匹配。 - -目标是验证真实环境下业务闭环是否可用: -1. 用户请求 `agent` 路由 -2. 请求进入异步任务 -3. runtime 读取 `system_agents` 和 `llm` 配置并构建执行流程 -4. 真实 LLM 请求发出并返回 -5. `sessions`/`messages` 正确落库 -6. 成本和 token 统计正确 -7. 事件按 AG-UI 规范发布并可由 `stream_events` 订阅 - -## 设计原则 - -- 真实优先:不使用 mock,不替换 queue/redis/db/llm。 -- 双轨验证: - - 诊断脚本用于本地排障(快速观察全链路状态)。 - - pytest E2E 用例用于可重复回归。 -- 明确前置条件:必须先使用 `infra/scripts/app.sh start` 启动 tmux 服务。 -- 本地真实 LLM 基线:DashScope Qwen。 - -## API 契约调整 - -### `POST /api/v1/agent/runs` - -- 现状:`session_id` 必填且必须存在。 -- 新契约:`session_id` 可选。 - - 有值:复用现有会话,校验 owner。 - - 无值:在服务层先创建会话,再入队 run。 -- 响应扩展:返回 `created` 标识是否为首聊自动建会话。 - -该契约与聊天产品行为一致:用户首条消息即可开始,不需要前置调用创建会话接口。 - -## 数据关系与删除语义 - -- `messages.session_id -> sessions.id` 为外键,且硬删除级联(`ondelete=CASCADE`)。 -- 软删除需要补齐级联: - - 软删 `sessions` 时,同事务更新对应 `messages.deleted_at`。 - - E2E 增加验证,确保软删后默认查询不可见。 - -## 测试架构 - -### A. 诊断脚本(根目录) - -重构 `test_agent_sse_flow.py`: -- 增加环境健康检查(web/redis/db)。 -- 支持两种模式: - - `--new-session`:不传 `session_id`,验证首聊自动创建。 - - `--reuse-session `:验证复聊路径。 -- 输出结构化阶段日志:HTTP、task_id、SSE 事件、数据库断言、失败根因。 - -### B. pytest E2E(`backend/tests/e2e`) - -新增 `test_agent_closed_loop_live.py`: -- 标记为 `live`,默认不在 CI 执行。 -- 用真实 JWT、真实 HTTP 请求、真实 SSE 订阅。 -- 断言最小闭环标准: - - run 返回 202 - - SSE 至少收到 `RUN_STARTED` 与终态(`RUN_FINISHED` 或 `RUN_ERROR`) - - `sessions` 状态和计数更新 - - `messages` 有新增记录 - - token/cost 字段非负且会话聚合一致 - -## 验收标准 - -- `uv run python test_agent_sse_flow.py --new-session` 通过。 -- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -v -m live` 通过。 -- 首聊场景不需要外部先建 `session_id`。 -- 软删除会话后,消息软删除行为与约束一致。 - -## 风险与回退 - -- 真实 LLM 网络抖动会造成不稳定:通过重试和超时策略降低误报。 -- 生产契约变更风险:保持字段向后兼容(原 `session_id` 仍可传)。 -- 如果新契约引入问题,可临时退回“必传 session_id”路径并保留测试脚本诊断能力。 diff --git a/docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-plan.md b/docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-plan.md deleted file mode 100644 index 2cea817..0000000 --- a/docs/plans/2026-03-05-agent-runtime-closed-loop-e2e-plan.md +++ /dev/null @@ -1,230 +0,0 @@ -# Agent Runtime Closed Loop E2E Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** 让 agent 闭环在真实本地环境中可验证:`runs` 支持首聊自动建会话,并通过真实异步任务、真实 LLM、真实落库与真实 SSE 证明端到端可用。 - -**Architecture:** 在 `v1/agent` 服务层引入“可选 session_id + 自动建会话”语义;保持已有 owner 鉴权路径。重构诊断脚本并新增 live E2E 用例,统一验证 run 入队、事件流、数据库状态、成本统计与删除语义。通过最小侵入改造现有 run/resume 流程,确保兼容已存在调用。 - -**Tech Stack:** FastAPI, SQLAlchemy async, Celery, Redis Stream, LiteLLM, PyJWT, pytest, httpx - ---- - -### Task 1: 扩展 API 契约(session_id 可选) - -**Files:** -- Modify: `backend/src/v1/agent/schemas.py` -- Modify: `backend/src/v1/agent/router.py` -- Test: `backend/tests/integration/v1/agent/test_routes.py` - -**Step 1: Write the failing test** - -在 `test_routes.py` 新增用例:请求体不传 `session_id` 仍返回 202,且响应含 `session_id`。 - -**Step 2: Run test to verify it fails** - -Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -k "runs and session" -v` -Expected: FAIL,提示 `session_id` 缺失导致 422 或 mock 接口签名不匹配。 - -**Step 3: Write minimal implementation** - -- `RunRequest.session_id` 改为可选。 -- `enqueue_run` 调用 service 时传可选值。 -- `TaskAcceptedResponse` 增加 `created: bool` 字段。 - -**Step 4: Run test to verify it passes** - -Run: `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v` -Expected: PASS。 - -**Step 5: Commit** - -```bash -git add backend/src/v1/agent/schemas.py backend/src/v1/agent/router.py backend/tests/integration/v1/agent/test_routes.py -git commit -m "feat: allow agent runs without pre-created session" -``` - -### Task 2: 服务层支持自动建会话并保持鉴权 - -**Files:** -- Modify: `backend/src/v1/agent/service.py` -- Modify: `backend/src/v1/agent/repository.py` -- Modify: `backend/src/v1/agent/dependencies.py` -- Test: `backend/tests/unit/v1/agent/test_service.py` (new) - -**Step 1: Write the failing test** - -新增单测覆盖: -- `session_id is None` 时调用 `create_session_for_user` 并返回 `created=True` -- `session_id 有值` 时复用并校验 owner - -**Step 2: Run test to verify it fails** - -Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v` -Expected: FAIL,当前 service 无自动建会话能力。 - -**Step 3: Write minimal implementation** - -- repository 增加 `create_session_for_user(user_id)`。 -- service `enqueue_run` 处理两条路径: - - 无 `session_id`:先创建 session。 - - 有 `session_id`:校验 owner。 -- 返回 `TaskAccepted(task_id, session_id, created)`。 - -**Step 4: Run test to verify it passes** - -Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py -v` -Expected: PASS。 - -**Step 5: Commit** - -```bash -git add backend/src/v1/agent/service.py backend/src/v1/agent/repository.py backend/src/v1/agent/dependencies.py backend/tests/unit/v1/agent/test_service.py -git commit -m "feat: auto-create chat session on first agent run" -``` - -### Task 3: 对齐 runtime 闭环数据断言(messages/sessions/cost) - -**Files:** -- Modify: `backend/src/core/agent/application/run_service.py` -- Modify: `backend/src/core/agent/application/resume_service.py` -- Modify: `backend/src/core/agent/infrastructure/persistence/message_repository.py` -- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py` -- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py` - -**Step 1: Write the failing test** - -在集成测试增加断言: -- `sessions.total_tokens`、`sessions.total_cost` 有更新 -- `messages` 的 token/cost 字段与 session 聚合一致 - -**Step 2: Run test to verify it fails** - -Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v` -Expected: FAIL,当前默认 token/cost 为 0,未做聚合更新。 - -**Step 3: Write minimal implementation** - -- run/resume 流程接入 usage/cost 结果(来自 litellm 返回或 fallback 规则)。 -- message 写入时填充 input/output tokens 与 cost。 -- session 更新时累加 total_tokens/total_cost。 - -**Step 4: Run test to verify it passes** - -Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v` -Expected: PASS。 - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/application/run_service.py backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/persistence/message_repository.py backend/src/core/agent/infrastructure/persistence/session_repository.py backend/tests/integration/core/agent/test_queue_run_resume.py -git commit -m "feat: persist runtime token and cost aggregates" -``` - -### Task 4: 补齐软删除级联(session -> messages) - -**Files:** -- Modify: `backend/src/core/agent/infrastructure/persistence/session_repository.py` -- Modify: `backend/src/v1/agent/service.py` -- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py` - -**Step 1: Write the failing test** - -新增用例:软删 session 后,同会话 messages 的 `deleted_at` 同步写入。 - -**Step 2: Run test to verify it fails** - -Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v` -Expected: FAIL,当前无软删级联。 - -**Step 3: Write minimal implementation** - -- repository 增加 `soft_delete_session_with_messages(session_id)`。 -- service 调用时使用同事务批量更新 messages。 - -**Step 4: Run test to verify it passes** - -Run: `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -k soft_delete -v` -Expected: PASS。 - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/persistence/session_repository.py backend/src/v1/agent/service.py backend/tests/integration/core/agent/test_queue_run_resume.py -git commit -m "fix: cascade soft delete from sessions to messages" -``` - -### Task 5: 重构诊断脚本并新增 live E2E - -**Files:** -- Modify: `test_agent_sse_flow.py` -- Create: `backend/tests/e2e/test_agent_closed_loop_live.py` -- Modify: `docs/bugs/2026-03-05-agent-runtime-bugs.md` - -**Step 1: Write the failing test** - -新增 live E2E 用例(`@pytest.mark.live`): -- 首聊不传 `session_id` 返回 202 -- 订阅 SSE 收到关键事件 -- DB 断言 session/messages/tokens/cost - -**Step 2: Run test to verify it fails** - -Run: `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v` -Expected: FAIL,当前契约或脚本未对齐。 - -**Step 3: Write minimal implementation** - -- 清理脚本重复/不可达逻辑。 -- 增加健康检查、阶段化日志、超时和错误根因输出。 -- E2E 用例复用脚本中的 helper(JWT、SSE 解析、DB 断言)。 - -**Step 4: Run test to verify it passes** - -Run: -- `uv run python test_agent_sse_flow.py --new-session` -- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v` - -Expected: PASS。 - -**Step 5: Commit** - -```bash -git add test_agent_sse_flow.py backend/tests/e2e/test_agent_closed_loop_live.py docs/bugs/2026-03-05-agent-runtime-bugs.md -git commit -m "test: add live closed-loop agent e2e verification" -``` - -### Task 6: 全量验证与文档同步 - -**Files:** -- Modify: `docs/runtime/runtime-runbook.md` -- Modify: `docs/runtime/runtime-route.md` - -**Step 1: Run targeted checks** - -Run: -- `uv run pytest backend/tests/unit/v1/agent/test_service.py -v` -- `uv run pytest backend/tests/integration/v1/agent/test_routes.py -v` -- `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -v` -- `uv run pytest backend/tests/e2e/test_agent_closed_loop_live.py -m live -v` - -Expected: PASS。 - -**Step 2: Run quality gates** - -Run: -- `uv run ruff check backend/src backend/tests` -- `uv run basedpyright` - -Expected: PASS。 - -**Step 3: Update docs** - -记录本地启动流程、真实 LLM 前置配置、live E2E 执行方式和故障排查。 - -**Step 4: Commit** - -```bash -git add docs/runtime/runtime-runbook.md docs/runtime/runtime-route.md -git commit -m "docs: document live agent closed-loop e2e workflow" -``` diff --git a/docs/plans/2026-03-05-agent-runtime-closed-loop-implementation-plan.md b/docs/plans/2026-03-05-agent-runtime-closed-loop-implementation-plan.md deleted file mode 100644 index ea20286..0000000 --- a/docs/plans/2026-03-05-agent-runtime-closed-loop-implementation-plan.md +++ /dev/null @@ -1,469 +0,0 @@ -# Agent Runtime Closed Loop Implementation Plan - -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. - -**Goal:** Build a production-grade closed-loop agent runtime where `frontend -> FastAPI -> Celery -> run/resume service -> CrewAI -> AG-UI events -> Redis Stream -> SSE` is fully connected and verifiable. - -**Architecture:** Keep HTTP API as control-plane and worker as data-plane. The API validates auth/ownership and enqueues commands, the Celery worker executes run/resume business logic using DB-driven agent config, runtime emits normalized AG-UI events and usage/cost telemetry, all events are persisted to Redis Stream, and SSE endpoint streams from Redis with resume support (`Last-Event-ID`). - -**Tech Stack:** FastAPI, SQLAlchemy AsyncSession, Celery, Redis Streams, CrewAI, LiteLLM, Pydantic, pytest (unit/integration). - -**Confirmed Constraints (locked):** -- Persist semantics use existing `messages.role` only (`assistant|user|system|tool`), no new `message_kind` column. -- `tool_result` must be semantically complete (especially UI schema); do not store summary-only payload. -- Store full `tool_result` payload in Supabase Storage (private bucket) and persist durable object reference in DB metadata; do not rely on expiring signed URL as primary reference. -- `metadata` must be fixed and typed via Pydantic model (no free-form drift). -- Do not introduce additional business tables for this scope; keep schema minimal. -- CrewAI runtime must default to streaming mode. -- Full traceability target is final semantic reconstruction of `user/assistant/tool_result`; chunk-level replay is not required. - -**Metadata Contract (fixed, Pydantic-enforced):** -- Global required keys for all message metadata: `type`, `run_id`, `turn_id`. -- Global optional keys for all message metadata: `event_id`, `parent_message_id`, `error`. -- `type=user_input`: - - Required: `type`, `run_id`, `turn_id`. - - Optional: `input_source`, `client_ts`. -- `type=assistant_output`: - - Required: `type`, `run_id`, `turn_id`. - - Optional: `finish_reason`, `model_provider`, `cost_source`. -- `type=tool_call` (`role=assistant`): - - Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `tool_args`. - - Optional: `tool_schema_version`, `timeout_ms`. -- `type=tool_result` (`role=tool`): - - Required: `type`, `run_id`, `turn_id`, `tool_call_id`, `tool_name`, `storage_bucket`, `storage_path`, `payload_sha256`, `payload_bytes`, `payload_format`. - - Optional: `ui_schema_version`, `compression`, `storage_etag`, `render_hints`. -- Validation rules: - - `messages.role=tool` must use `metadata.type=tool_result`. - - `messages.role=assistant` + tool event must use `metadata.type=tool_call` or `assistant_output`. - - `tool_result` payload in DB must be reconstructable to AG-UI `TOOL_CALL_RESULT` using Storage object + metadata checksum. - ---- - -### Task 1: Add Agent Module Skeleton and Contracts - -**Files:** -- Create: `backend/src/core/agent/__init__.py` -- Create: `backend/src/core/agent/application/__init__.py` -- Create: `backend/src/core/agent/domain/__init__.py` -- Create: `backend/src/core/agent/infrastructure/events/__init__.py` -- Create: `backend/src/core/agent/infrastructure/agui/bridge.py` -- Create: `backend/src/core/agent/infrastructure/agui/stream.py` -- Test: `backend/tests/unit/core/agent/test_agui_bridge.py` - -**Step 1: Write failing tests for event normalization and SSE formatting** - -```python -def test_bridge_normalizes_event_type_to_upper_snake() -> None: - events = [{"type": "runStarted", "data": {"ok": True}}] - out = to_agui_events(events) - assert out[0]["type"] == "RUN_STARTED" - - -def test_sse_format_includes_id_event_data() -> None: - payload = to_sse_event(stream_id="1-0", event={"type": "RUN_STARTED", "data": {"a": 1}}) - assert payload.startswith("id: 1-0\nevent: RUN_STARTED\ndata: {") -``` - -**Step 2: Run tests and confirm RED** - -Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q` -Expected: FAIL with missing module/function errors. - -**Step 3: Implement minimal bridge + stream utilities** - -```python -def to_agui_events(internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]: - ... - - -def to_sse_event(stream_id: str, event: dict[str, Any]) -> str: - ... -``` - -**Step 4: Run tests and confirm GREEN** - -Run: `uv run pytest backend/tests/unit/core/agent/test_agui_bridge.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/src/core/agent backend/tests/unit/core/agent/test_agui_bridge.py -git commit -m "feat(agent): add ag-ui bridge and sse serializer utilities" -``` - -### Task 2: Implement Redis Stream Event Store and Reader - -**Files:** -- Create: `backend/src/core/agent/infrastructure/events/redis_stream.py` -- Modify: `backend/src/core/config/settings.py` -- Test: `backend/tests/unit/core/agent/test_redis_stream.py` - -**Step 1: Write failing tests for append/read semantics** - -```python -def test_append_event_writes_json_payload() -> None: - ... - - -def test_read_events_respects_last_event_id() -> None: - ... -``` - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q` -Expected: FAIL. - -**Step 3: Implement Redis stream adapter** - -```python -def append_event_sync(*, session_id: UUID, event: dict[str, Any]) -> str: - ... - - -async def read_events(...): - ... -``` - -**Step 4: Run GREEN** - -Run: `uv run pytest backend/tests/unit/core/agent/test_redis_stream.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/events/redis_stream.py backend/src/core/config/settings.py backend/tests/unit/core/agent/test_redis_stream.py -git commit -m "feat(agent): add redis stream event transport for run events" -``` - -### Task 3: Build CrewAI Runtime + AG-UI Event Mapping + Usage Tracking - -**Files:** -- Create: `backend/src/core/agent/infrastructure/crewai/factory.py` -- Create: `backend/src/core/agent/infrastructure/crewai/runtime.py` -- Create: `backend/src/core/agent/infrastructure/litellm/client.py` -- Create: `backend/src/core/agent/infrastructure/litellm/usage_tracker.py` -- Create: `backend/src/core/agent/infrastructure/config/resolver.py` -- Modify: `backend/src/core/config/settings.py` -- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py` -- Test: `backend/tests/unit/core/agent/test_litellm_usage.py` -- Test: `backend/tests/unit/core/agent/test_config_resolver.py` - -**Step 1: Write failing runtime tests (events + cost + strict errors)** - -```python -def test_runtime_emits_text_tool_reasoning_events() -> None: - ... - - -def test_runtime_raises_if_model_or_api_key_missing() -> None: - ... - - -def test_usage_tracker_extracts_tokens_and_cost() -> None: - ... -``` - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q` -Expected: FAIL. - -**Step 3: Implement runtime and tracker** - -- Register CrewAI event handlers (`Task/LLM/Tool/Reasoning`) and map to AG-UI canonical event types. -- Default runtime to streaming mode for CrewAI execution. -- Enforce strict config behavior: no `llm_model_code` or provider key -> raise. -- Use LiteLLM cost calculator for actual cost; if cost cannot be computed, fail closed (raise), do not silently record zero. - -**Step 4: Run GREEN** - -Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure backend/tests/unit/core/agent/test_crewai_runtime.py backend/tests/unit/core/agent/test_litellm_usage.py backend/tests/unit/core/agent/test_config_resolver.py backend/src/core/config/settings.py -git commit -m "feat(agent): implement crewai runtime events and litellm usage-cost auditing" -``` - -### Task 4: Implement Run/Resume Application Services (DB Config + Persistence) - -**Files:** -- Create: `backend/src/core/agent/application/run_service.py` -- Create: `backend/src/core/agent/application/resume_service.py` -- Create: `backend/src/core/agent/application/session_state_persistence.py` -- Create: `backend/src/core/agent/domain/state_snapshot.py` -- Create: `backend/src/core/agent/domain/tool_correlation.py` -- Test: `backend/tests/unit/core/agent/test_run_resume_service.py` -- Test: `backend/tests/unit/core/agent/test_state_snapshot.py` -- Test: `backend/tests/unit/core/agent/test_tool_correlation.py` - -**Step 1: Write failing tests for DB-driven runtime and aggregate updates** - -```python -async def test_run_service_loads_agent_config_from_db_and_persists_messages() -> None: - ... - - -async def test_resume_service_requires_pending_tool_call() -> None: - ... -``` - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q` -Expected: FAIL. - -**Step 3: Implement services** - -- `run_service`: read session + system agent config from DB, execute runtime, persist user/assistant messages, update session aggregates. -- `resume_service`: validate pending tool call status, enforce idempotency semantics, resume runtime, persist audit fields. -- Persist metadata audit (`tokens`, `cost`, `cost_source`, correlation ids) for every assistant message. -- Persist tool lifecycle with role-only model: - - tool call message uses `role=assistant` with fixed metadata (`type=tool_call`, `tool_call_id`, `tool_name`, arguments reference). - - tool result message uses `role=tool` with fixed metadata (`type=tool_result`, `tool_call_id`, `tool_name`, storage bucket/path, checksum, bytes, schema version). -- `tool_result` full payload (UI schema) is uploaded to Supabase Storage private bucket; DB stores durable reference and verification fields. -- Ensure DB->AG-UI `TOOL_CALL_RESULT` reconstruction is equivalent to SSE-streamed final tool result semantics. -- Enforce metadata contract by Pydantic model at write path and read path (reject malformed metadata early). - -**Step 4: Run GREEN** - -Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/application backend/src/core/agent/domain backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_state_snapshot.py backend/tests/unit/core/agent/test_tool_correlation.py -git commit -m "feat(agent): add run-resume app services with db config and audit persistence" -``` - -### Task 5: Wire Celery Worker Task to Run/Resume and Publish Runtime Events - -**Files:** -- Create: `backend/src/core/agent/infrastructure/queue/tasks.py` -- Modify: `backend/src/core/celery/app.py` -- Test: `backend/tests/unit/core/agent/test_queue_tasks.py` -- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py` - -**Step 1: Write failing queue tests** - -```python -def test_run_agent_task_emits_started_runtime_and_finished_events() -> None: - ... - - -def test_run_agent_task_emits_error_event_on_exception() -> None: - ... -``` - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q` -Expected: FAIL. - -**Step 3: Implement worker task flow** - -- Decode command type (`run`/`resume`). -- Emit lifecycle events (`RUN_STARTED/RUN_RESUMED/RUN_FINISHED/RUN_ERROR`). -- Forward runtime callback events to Redis stream immediately. -- Persist session status/snapshot after completion. - -**Step 4: Run GREEN** - -Run: `uv run pytest backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/celery/app.py backend/tests/unit/core/agent/test_queue_tasks.py backend/tests/integration/core/agent/test_queue_run_resume.py -git commit -m "feat(agent): wire celery run-resume execution and redis event publishing" -``` - -### Task 6: Implement API Contracts (Run/Resume/SSE) + Auth/Ownership/Idempotency - -**Files:** -- Create: `backend/src/v1/agent/schemas.py` -- Create: `backend/src/v1/agent/repository.py` -- Create: `backend/src/v1/agent/service.py` -- Create: `backend/src/v1/agent/router.py` -- Create: `backend/src/v1/agent/dependencies.py` -- Modify: `backend/src/v1/router.py` -- Test: `backend/tests/unit/v1/agent/test_service.py` -- Test: `backend/tests/unit/v1/agent/test_owner_guard.py` -- Test: `backend/tests/integration/v1/agent/test_routes.py` - -**Step 1: Write failing API tests** - -```python -async def test_run_requires_auth_and_returns_202_task_id() -> None: - ... - - -async def test_stream_reads_from_last_event_id() -> None: - ... - - -def test_resume_idempotency_uses_redis_lock_and_task_key() -> None: - ... -``` - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q` -Expected: FAIL. - -**Step 3: Implement API service/router** - -- `POST /api/v1/agent/runs` enqueue run command. -- `POST /api/v1/agent/runs/{session_id}/resume` enqueue resume command with async redis lock + dedup task key. -- `GET /api/v1/agent/runs/{session_id}/events` SSE stream from Redis with `Last-Event-ID`. -- Enforce auth and session ownership checks on all endpoints. -- Validate `tool_call_id` and message length/pattern boundaries. - -**Step 4: Run GREEN** - -Run: `uv run pytest backend/tests/unit/v1/agent/test_service.py backend/tests/unit/v1/agent/test_owner_guard.py backend/tests/integration/v1/agent/test_routes.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/src/v1/agent backend/src/v1/router.py backend/tests/unit/v1/agent backend/tests/integration/v1/agent/test_routes.py -git commit -m "feat(agent): add authenticated run-resume-sse api with redis-backed idempotency" -``` - -### Task 7: Add Schema/Migration Contract for Session Snapshot + Audit Fields - -**Files:** -- Create: `backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py` -- Modify: `backend/src/models/agent_chat_session.py` -- Modify: `backend/src/models/agent_chat_message.py` -- Test: `backend/tests/unit/database/test_sessions_state_snapshot_contract.py` - -**Migration scope note:** -- Fix current schema drift: model has `sessions.state_snapshot` but migration chain does not reliably provide this column in current DB state. -- Keep schema minimal; do not add new business tables in this migration. - -**Step 1: Write failing migration contract tests** - -```python -def test_session_has_state_snapshot_and_status_contract() -> None: - ... - - -def test_message_has_token_cost_and_metadata_contract() -> None: - ... -``` - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q` -Expected: FAIL. - -**Step 3: Implement migration and model alignment** - -- Ensure `state_snapshot`, `status`, token/cost/metadata fields are present and nullable constraints are explicit. -- Add/verify indexes needed for role-based semantic reconstruction (`session_id, seq`, and targeted metadata lookups if required). -- Ensure `metadata` structure is validated by fixed Pydantic schema at application boundary. -- Add DB-level guardrails where feasible (check constraints) for role/metadata consistency without introducing new tables. -- Keep reversible downgrade path. - -**Step 4: Run GREEN** - -Run: `uv run pytest backend/tests/unit/database/test_sessions_state_snapshot_contract.py -q` -Expected: PASS. - -**Step 5: Commit** - -```bash -git add backend/alembic/versions/20260305_agent_runtime_closed_loop_contract.py backend/src/models/agent_chat_session.py backend/src/models/agent_chat_message.py backend/tests/unit/database/test_sessions_state_snapshot_contract.py -git commit -m "feat(agent): add db contract for session snapshot and usage audit fields" -``` - -### Task 8: End-to-End Closure Verification and Docs Update - -**Files:** -- Modify: `docs/runtime/runtime-route.md` -- Modify: `docs/runtime/runtime-runbook.md` -- Create: `backend/tests/integration/core/agent/test_session_message_persistence.py` - -**Step 1: Write integration test for full closure path** - -```python -async def test_closed_loop_run_flow_frontend_to_sse() -> None: - # run request -> queue command -> runtime events -> redis stream -> sse read - ... -``` - -Also verify: -- `tool_result` full UI schema is written to Supabase Storage private bucket. -- `messages.role=tool` row contains stable storage reference and checksum metadata. -- Reading from DB can reconstruct final AG-UI `TOOL_CALL_RESULT` event payload semantics. - -**Step 2: Run RED** - -Run: `uv run pytest backend/tests/integration/core/agent/test_session_message_persistence.py -q` -Expected: FAIL. - -**Step 3: Implement minimal missing glue and docs** - -- Fill any missing wiring revealed by the test. -- Document endpoint contracts, event taxonomy, and operational runbook for redis/celery troubleshooting. - -**Step 4: Run GREEN + full gate verification** - -Run: -- `PYTHONPATH=backend/src uv run python backend/src/core/runtime/cli.py migrate` -- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q` -- `uv run ruff check backend/src backend/tests` -- `uv run basedpyright backend/src` - -Expected: -- All relevant tests PASS. -- Ruff PASS. -- basedpyright 0 errors (notes/warnings can be documented if pre-existing). - -**Step 5: Commit** - -```bash -git add docs/runtime/runtime-route.md docs/runtime/runtime-runbook.md backend/tests/integration/core/agent/test_session_message_persistence.py -git commit -m "docs(agent): document closed-loop runtime and verify end-to-end chain" -``` - -### Task 9: L2 Mandatory Review Gates - -**Files:** -- No direct code changes required; apply fixes if findings appear. - -**Step 1: Run required agents** - -- `tdd-guide` (already enforced by plan sequence) -- `refactor-cleaner` -- `code-reviewer` -- `security-reviewer` - -**Step 2: Fix all CRITICAL/HIGH findings** - -Run targeted tests after each fix. - -**Step 3: Final verification rerun** - -Run: -- `uv run pytest backend/tests/unit/core/agent backend/tests/unit/v1/agent backend/tests/integration/core/agent backend/tests/integration/v1/agent -q` -- `uv run ruff check backend/src backend/tests` -- `uv run basedpyright backend/src` - -Expected: no failing tests; no lint errors; no type errors. - -**Step 4: Final commit (if review fixes were needed)** - -```bash -git add backend/src backend/tests docs/runtime -git commit -m "fix(agent): resolve L2 review findings for closed-loop runtime" -``` diff --git a/docs/plans/2026-03-05-user-agent-context-settings-design.md b/docs/plans/2026-03-05-user-agent-context-settings-design.md new file mode 100644 index 0000000..ff3db7f --- /dev/null +++ b/docs/plans/2026-03-05-user-agent-context-settings-design.md @@ -0,0 +1,746 @@ +# UserAgentContext & ProfileSettings v1 设计 + +**Date:** 2026-03-05 +**Status:** Approved + +--- + +## 目标 + +为 Agent Runtime 提供完整的用户画像上下文,通过 Pydantic 约束 profiles.settings 结构,确保: + +1. 运行时入口读取 profile(username/bio/settings) +2. settings 结构类型安全、版本可演进 +3. 关键配置(语言/时区/国家)符合标准格式 + +--- + +## 架构 + +``` +Profile (DB JSONB) + ↓ +ProfileSettings (Pydantic) + ↓ +UserAgentContext (DataClass) + ↓ +build_global_system_prompt(ctx) +``` + +**设计原则:** +- 唯一入口:`get_user_agent_context(user_id)` 读取并构造上下文 +- 不可变:UserAgentContext 使用 frozen dataclass +- 向后兼容:version 字段预留未来演进 + +--- + +## ProfileSettings v1 结构 + +```json +{ + "version": 1, + "preferences": { + "interface_language": "zh-CN", + "ai_language": "zh-CN", + "timezone": "Asia/Shanghai", + "country": "CN" + }, + "privacy": {}, + "notification": {} +} +``` + +### 字段说明 + +| 字段 | 类型 | 默认值 | 约束 | +|------|------|--------|------| +| `version` | int | 1 | 必须为 1(v1 锁定) | +| `preferences.interface_language` | str | "zh-CN" | BCP-47 格式 | +| `preferences.ai_language` | str | "zh-CN" | BCP-47 格式 | +| `preferences.timezone` | str | "Asia/Shanghai" | IANA 时区 | +| `preferences.country` | str | "CN" | ISO 3166-1 alpha-2 | +| `privacy` | dict | {} | 空对象(预留) | +| `notification` | dict | {} | 空对象(预留) | + +### 约束规则 + +**1. BCP-47 语言格式** + +正则:`^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$` + +示例: +- ✅ zh-CN, en-US, zh-TW, ja-JP +- ❌ zh_CN, EN, chn + +**2. IANA 时区** + +使用 `zoneinfo.ZoneInfo` 校验。 + +示例: +- ✅ Asia/Shanghai, America/New_York, UTC +- ❌ CST, GMT+8 + +**3. ISO 3166-1 alpha-2 国家代码** + +使用 `pycountry.countries.get(alpha_2=...)` 校验。 + +示例: +- ✅ CN, US, JP, GB +- ❌ CHN, USA, zz + +--- + +## UserAgentContext 结构 + +```python +@dataclass(frozen=True) +class UserAgentContext: + user_id: UUID + username: str + bio: str | None + settings: ProfileSettings +``` + +**设计要点:** +- 不可变(frozen=True):防止运行时修改 +- 完整画像:包含身份(username/bio)和配置(settings) +- 唯一构造入口:`get_user_agent_context(user_id)` + +--- + +## Pydantic 模型实现 + +```python +from pydantic import BaseModel, Field, field_validator +from dataclasses import dataclass +from uuid import UUID +import re + +class PreferenceSettings(BaseModel): + interface_language: str = "zh-CN" + ai_language: str = "zh-CN" + timezone: str = "Asia/Shanghai" + country: str = "CN" + + @field_validator("interface_language", "ai_language") + @classmethod + def validate_bcp47(cls, v: str) -> str: + pattern = r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$" + if not re.match(pattern, v): + raise ValueError(f"Invalid BCP-47 language tag: {v}") + return v + + @field_validator("timezone") + @classmethod + def validate_iana_timezone(cls, v: str) -> str: + import zoneinfo + try: + zoneinfo.ZoneInfo(v) + except Exception: + raise ValueError(f"Invalid IANA timezone: {v}") + return v + + @field_validator("country") + @classmethod + def validate_iso_country(cls, v: str) -> str: + import pycountry + if not pycountry.countries.get(alpha_2=v.upper()): + raise ValueError(f"Invalid ISO 3166-1 alpha-2 country code: {v}") + return v.upper() + +class ProfileSettings(BaseModel): + version: int = Field(default=1, ge=1, le=1) + preferences: PreferenceSettings = Field(default_factory=PreferenceSettings) + privacy: dict = Field(default_factory=dict) + notification: dict = Field(default_factory=dict) + +@dataclass(frozen=True) +class UserAgentContext: + user_id: UUID + username: str + bio: str | None + settings: ProfileSettings +``` + +--- + +## 依赖项 + +需要添加到 `backend/pyproject.toml`: + +```toml +[project.dependencies] +pycountry = ">=23.0.0" +``` + +--- + +## 迁移策略 + +**数据库层:** +- profiles.settings 保持 JSONB,不做 schema 变更 +- 现有数据默认值:`{"version": 1, "preferences": {"country": "CN"}}` + +**应用层:** +- 读取时:`ProfileSettings.model_validate(profile.settings or {})` +- 写入时:`profile.settings = settings.model_dump()` + +--- + +## 未来演进 + + +**版本迁移:** +- Pydantic 支持多版本共存 +- 数据库不做破坏性变更 + +--- + +--- + +## AG-UI 事件转发与落库策略 + +### 核心原则 + +**1. 事件转发时机:** +- 只有 organization 阶段完成后转发 AG-UI 事件 +- AG-UI bridge 已实现底层机制,编排层控制转发时机 + +**2. 落库时机:** +- 意图识别和任务执行阶段:落库但 seq 取负数(用于审计) +- 结果反馈阶段:seq 取最新 seq 的绝对值 +1(用于展示) + +### Seq 设计细节 + +**意图识别和任务执行阶段(审计用):** +- seq 取负数(如 -1, -2) +- role: "assistant"(标记为 agent 输出) +- content: 阶段的完整输出(用于审计/调试) +- 重建会话时通过 `WHERE seq > 0` 过滤,不展示给用户 + +**结果反馈阶段(展示用):** +- seq 取正数(取最新负数的绝对值 +1) +- role: "assistant" +- content: OrganizationResult.assistant_text +- 重建会话时通过 `WHERE seq > 0` 展示给用户 + +**示例:** +``` +| seq | role | content | 展示 | +|------|----------|----------------------------|------| +| -2 | assistant| ExecutionResult (完整) | 否 | +| -1 | assistant| IntentResult (完整) | 否 | +| 1 | user | 用户输入 | 是 | +| 2 | assistant| OrganizationResult | 是 | +``` + +### 编排层职责 + +```python +@listen(intent_stage) +async def persist_intent(self, state: FlowState) -> FlowState: + # seq 取负数 + seq = await message_repo.get_next_negative_seq(state.session_id) + await message_repo.create( + session_id=state.session_id, + seq=seq, # 负数 + role="assistant", + content=state.intent_result.model_dump_json(), + ... + ) + return state + +@listen(execution_stage) +async def persist_execution(self, state: FlowState) -> FlowState: + # seq 取负数 + seq = await message_repo.get_next_negative_seq(state.session_id) + await message_repo.create( + session_id=state.session_id, + seq=seq, # 负数 + role="assistant", + content=state.execution_result.model_dump_json(), + ... + ) + return state + +@listen(organization_stage) +async def finalize_flow(self, state: FlowState) -> FlowState: + result = state.organization_result + + # seq 取正数(最新负数绝对值+1) + seq = await message_repo.get_next_positive_seq(state.session_id) + await message_repo.create( + session_id=state.session_id, + seq=seq, # 正数 + role="assistant", + content=result.assistant_text, + ... + ) + + # 触发 AG-UI 事件(由 bridge 处理) + return state +``` + +### Token 和 Cost 累加 + +**策略:在内存中累加所有阶段的 token 和 cost,organization 完成后统一落库。** + +```python +@dataclass +class FlowState: + # ... + tokens: dict[str, dict] = field(default_factory=dict) + cost: Decimal = Decimal("0") + currency: str = "CNY" +``` + +--- + +## CrewAI Flow 三阶段设计 + +### 架构概览 + +``` +User Input + UserAgentContext + ↓ +@start() begin() + ↓ +@listen() intent_stage() → 判断 can_answer_directly + ↓ (router) + ├─ DIRECT_RESPONSE → 直接返回 + └─ NEEDS_EXECUTION + ↓ + @listen() execution_stage() → 任务执行/工具调用 + ↓ + @listen() organization_stage() → 结果组织与表达 + ↓ + 返回给用户 +``` + +### 三阶段职责 + +**1. Intent Recognition(意图识别)** +- Agent Type: `INTENT_RECOGNITION` +- 输出结构(最小化设计): + ```python + class IntentResult(BaseModel): + direct_answer: bool # 是否可以直接回答 + intent_analysis: str # 意图分析文本(用于调试/审计) + execution_prompt: str # 给 execution 阶段的提示词(direct_answer=false时使用) + direct_response: str # 直接回复文本(direct_answer=true时使用) + ``` +- 短路逻辑: + - `direct_answer=true` → 完全跳过 execution 和 organization,直接返回 direct_response + - `direct_answer=false` → 进入 execution 阶段 +- 输出约束:使用 `output_pydantic=IntentResult` +- **落库策略**:落库到 messages 表,但重建会话时不展示 + +**2. Task Execution(任务执行)** +- Agent Type: `TASK_EXECUTION` +- 输入:IntentResult.execution_prompt + IntentResult.intent_analysis +- 职责: + - 执行复杂任务(查询数据库、调用工具、多步骤推理) + - 返回结构化执行结果 +- 输出结构(最小化设计): + ```python + class ExecutionResult(BaseModel): + execution_summary: str # 任务执行摘要(用于调试/审计) + organization_prompt: str # 给 organization 阶段的提示词 + execution_data: dict = {} # 执行结果的结构化数据 + ``` +- 输出约束:使用 `output_pydantic=ExecutionResult` +- **落库策略**:落库到 messages 表,但重建会话时不展示 + +**3. Result Reporting(结果报告)** +- Agent Type: `RESULT_REPORTING` +- 输入: + - IntentResult(意图识别结果) + - ExecutionResult(任务执行情况) +- 职责: + - 结合意图分析和执行结果,格式化为用户友好的响应 + - 应用个性化模板(基于 UserAgentContext) +- 输出结构(最小化设计): + ```python + class OrganizationResult(BaseModel): + assistant_text: str # 最终回复文本 + response_metadata: dict = {} # 响应元数据(可选) + ``` +- 输出约束:使用 `output_pydantic=OrganizationResult` +- **唯一展示阶段**:重建会话时只展示此阶段的 message +- **唯一转发阶段**:只有此阶段的输出需要通过 AG-UI 事件转发 + +### Flow 状态管理 + +```python +@dataclass +class FlowState: + user_input: str + context: UserAgentContext + stage_trace: list[str] = field(default_factory=list) + intent_result: IntentResult | None = None + execution_result: ExecutionResult | None = None + organization_result: OrganizationResult | None = None + assistant_text: str = "" + tokens: dict = field(default_factory=dict) + cost: Decimal = Decimal("0") +``` + +### 数据流向 + +``` +User Input + UserAgentContext + ↓ +@start() begin() + ↓ +@listen() intent_stage() + ├─ IntentResult.direct_answer=true + │ ↓ + │ 跳过 execution,直接 organization + │ ↓ + │ organization_stage(IntentResult.next_stage_prompt, IntentResult.metadata) + │ ↓ + │ OrganizationResult → AG-UI 事件 + 落库 + │ + └─ IntentResult.direct_answer=false + ↓ + execution_stage(IntentResult.next_stage_prompt, IntentResult.metadata) + ↓ + ExecutionResult + ↓ + organization_stage(ExecutionResult.next_stage_prompt, ExecutionResult.metadata) + ↓ + OrganizationResult → AG-UI 事件 + 落库 +``` + +### 三阶段输出约束 + +**所有阶段使用 `output_pydantic` 约束输出:** + +```python +from pydantic import BaseModel + +class IntentResult(BaseModel): + direct_answer: bool + next_stage_prompt: str + metadata: dict = {} + +class ExecutionResult(BaseModel): + next_stage_prompt: str + metadata: dict = {} + +class OrganizationResult(BaseModel): + assistant_text: str + metadata: dict = {} + +# Task 定义 +intent_task = Task( + description="Analyze user intent", + expected_output="Intent analysis", + agent=intent_agent, + output_pydantic=IntentResult, +) + +execution_task = Task( + description="Execute tasks", + expected_output="Execution result", + agent=execution_agent, + output_pydantic=ExecutionResult, +) + +organization_task = Task( + description="Format response", + expected_output="User-friendly response", + agent=organization_agent, + output_pydantic=OrganizationResult, +) +``` + +--- + +## 系统选模逻辑设计 + +### 问题背景 + +旧逻辑:`order_by(...).limit(1)` 随机选择一个系统 agent,不区分阶段。 + +新逻辑:按 `agent_type` 显式映射到三阶段。 + +### 选模规则 + +**必需的 Agent Types:** +- `INTENT_RECOGNITION` → 用于 intent_stage +- `TASK_EXECUTION` → 用于 execution_stage +- `RESULT_REPORTING` → 用于 organization_stage + +**查询逻辑:** + +```python +REQUIRED_TYPES = {"INTENT_RECOGNITION", "TASK_EXECUTION", "RESULT_REPORTING"} + +@dataclass(frozen=True) +class StageModels: + intent: SystemAgentCatalog + execution: SystemAgentCatalog + organization: SystemAgentCatalog + +def resolve_stage_models(rows: list[SystemAgentCatalog]) -> StageModels: + by_type = {row.agent_type: row for row in rows} + missing = REQUIRED_TYPES - set(by_type.keys()) + if missing: + raise ValueError(f"Missing required agent types: {missing}") + + return StageModels( + intent=by_type["INTENT_RECOGNITION"], + execution=by_type["TASK_EXECUTION"], + organization=by_type["RESULT_REPORTING"], + ) +``` + +**初始化数据约束:** +- `system_agents` 表必须包含三种类型的记录 +- 运行时启动时验证完整性 + +--- + +## 人民币结算策略设计 + +### 设计原则 + +1. **保留 LiteLLM 语义**:`completion_cost()` 始终返回 USD +2. **业务层映射**:根据用户国家(`profiles.settings.preferences.country`)决定落库货币 +3. **默认人民币**:中国用户或无国家信息默认 CNY +4. **汇率配置**:USD/CNY 汇率通过环境变量配置 + +### 货币来源 + +``` +UserAgentContext.settings.preferences.country + ↓ +resolve_billing_currency(country) + ↓ +CN → CNY +US → USD +其他 → USD +``` + +### 结算流程 + +``` +LiteLLM completion_cost() + ↓ (USD) +resolve_billing_cost(usd_cost, country) + ↓ + ├─ country="CN" or None → CNY (乘以汇率) + └─ country="US" → USD (保持原值) + ↓ +messages.cost + messages.currency +sessions.total_cost (同一货币) +``` + +### 汇率配置 + +```python +# 环境变量 +BILLING_USD_CNY_RATE=7.2 + +# 默认值 +DEFAULT_USD_CNY_RATE = Decimal("7.2") +``` + +### 结算模型 + +```python +@dataclass(frozen=True) +class BillingCost: + currency: str # "CNY" or "USD" + cost: Decimal # 6位小数精度 + +def resolve_billing_cost( + usd_cost: Decimal, + country: str | None, + usd_cny_rate: Decimal = DEFAULT_USD_CNY_RATE, +) -> BillingCost: + currency = "CNY" if (country or "CN").upper() == "CN" else "USD" + if currency == "CNY": + cost = usd_cost * usd_cny_rate + else: + cost = usd_cost + return BillingCost( + currency=currency, + cost=cost.quantize(Decimal("0.000001")) + ) +``` + +### 数据库落库 + +**messages 表:** +- `cost`: NUMERIC(12,6) - 业务货币金额 +- `currency`: VARCHAR(3) - "CNY" or "USD" + +**sessions 表:** +- `total_cost`: NUMERIC(12,6) - 同一货币累计 + +**约束:** +- 同一 session 内所有 messages 的 currency 必须一致 +- sessions.total_cost 累加时保持货币一致 + +--- + +## Session 状态一致性设计 + +### 问题背景 + +旧逻辑: +- `sessions.status` 与 `state_snapshot.status` 不同步 +- 失败时状态不一致 +- title 未自动赋值 + +### 状态机 + +``` +pending (创建) + ↓ +running (开始执行) + ↓ + ├─ completed (成功) + └─ failed (异常) +``` + +### 状态同步规则 + +**创建时:** +```python +session = AgentChatSession( + user_id=user_uuid, + status=AgentChatSessionStatus.PENDING, + state_snapshot={ + "status": "pending", + "pending_tool_call_id": None, + }, +) +``` + +**运行时:** +```python +# 开始执行 +session.status = AgentChatSessionStatus.RUNNING +session.state_snapshot["status"] = "running" + +# 成功完成 +session.status = AgentChatSessionStatus.COMPLETED +session.state_snapshot["status"] = "completed" + +# 失败 +session.status = AgentChatSessionStatus.FAILED +session.state_snapshot["status"] = "failed" +session.state_snapshot["error_id"] = error_id +``` + +### 自动 Title 赋值 + +**规则:** +- 首次运行时,如果 `session.title` 为空,使用 `user_input[:255]` 赋值 +- 只在第一次运行时赋值,后续不覆盖 + +**实现:** +```python +async def _set_title_if_empty(self, session_id: UUID, title: str) -> None: + stmt = ( + update(AgentChatSession) + .where(AgentChatSession.id == session_id) + .where(AgentChatSession.title.is_(None)) + .values(title=title[:255]) + ) + await self.db.execute(stmt) +``` + +### Repository 方法 + +```python +class SessionRepository: + async def mark_running(self, session_id: UUID) -> None: ... + async def mark_completed(self, session_id: UUID) -> None: ... + async def mark_failed(self, session_id: UUID, error_id: str) -> None: ... +``` + +--- + +## 全局 Prompt 构建设计 + +### 分层结构 + +``` +全局系统 Prompt +├─ 身份段(username/bio) +├─ 偏好段(language/timezone/country) +└─ 阶段段(动态注入) + ├─ intent stage prompt + ├─ execution stage prompt + └─ organization stage prompt +``` + +### 构建函数 + +```python +def build_global_system_prompt(ctx: UserAgentContext) -> str: + lines = [ + "# User Identity", + f"username: {ctx.username}", + f"bio: {ctx.bio or 'N/A'}", + "", + "# User Preferences", + f"interface_language: {ctx.settings.preferences.interface_language}", + f"ai_language: {ctx.settings.preferences.ai_language}", + f"timezone: {ctx.settings.preferences.timezone}", + f"country: {ctx.settings.preferences.country}", + "", + "# Instructions", + "Use the user's preferences to personalize responses.", + "Respond in the user's preferred AI language.", + "Consider the user's timezone for time-related queries.", + ] + return "\n".join(lines) +``` + +### 阶段注入 + +每个阶段运行时,在全局 prompt 基础上追加阶段特定的指令: + +```python +def build_stage_prompt( + base_prompt: str, + stage: str, # "intent" | "execution" | "organization" + ctx: UserAgentContext, +) -> str: + stage_prompts = { + "intent": "Analyze the user's intent and decide if direct response is possible.", + "execution": "Execute the required tasks and tools to fulfill the user's request.", + "organization": "Format the execution results into a user-friendly response.", + } + return f"{base_prompt}\n\n# Stage: {stage}\n{stage_prompts[stage]}" +``` + +--- + +## 依赖关系图 + +``` +UserAgentContext (核心上下文) + ↓ + ├─ ProfileSettings (用户配置) + │ └─ preferences.country → 人民币结算 + │ + ├─ build_global_system_prompt() (全局 Prompt) + │ └─ 三阶段 Flow 使用 + │ + └─ resolve_stage_models() (选模逻辑) + └─ 三阶段 Agent 配置 +``` + +--- + +## 相关文档 + +- [Runtime Database Schema](../runtime/runtime-database.md) +- [AG-UI Protocol](.opencode/skills/ag-ui/SKILL.md) +- [CrewAI Framework](.opencode/skills/crewai/SKILL.md) diff --git a/docs/plans/agent-llm-config.md b/docs/plans/agent-llm-config.md new file mode 100644 index 0000000..b961b2d --- /dev/null +++ b/docs/plans/agent-llm-config.md @@ -0,0 +1,144 @@ +# Agent LLM Config Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** 将 `system_agents.config` 中的 `temperature` / `max_tokens` 以受约束方式加载到运行时,并在调用 LiteLLM 时按需透传。 + +**Architecture:** 在应用层 `RunService` 读取模型选择时同步读取并校验 `SystemAgents.config`;将校验后的 `SystemAgentLLMConfig` 传入 `CrewAIRuntime`;由 runtime 将配置转交给 LiteLLM client,client 仅在值非 `None` 时向 `completion()` 传参,避免不必要的 provider 兼容风险。 + +**Tech Stack:** FastAPI, SQLAlchemy (async), Pydantic v2, LiteLLM, pytest + +--- + +## 背景与修正点 + +- 当前真实调用链为:`RunService._load_agent_model_selection()` -> `create_runtime()` -> `CrewAIRuntime.execute()` -> `run_completion()`,并非 `load_stage_models()`。 +- `SystemAgentLLMConfig` 已存在:`backend/src/core/agent/domain/system_agent_config.py`。 +- `system_agents.config` 目前在初始化 YAML 侧有约束,但运行时 DB 读取仍需二次校验,防止脏数据绕过。 + +## 规则约束 + +- 严格 TDD:先写失败测试,再做实现。 +- Python 命令统一使用 `uv run ...`。 +- 仅做增量改动,不回滚或覆盖与本任务无关的已有变更。 + +## 字段映射与透传策略 + +| 配置字段 | LiteLLM 参数 | 规则 | +|---|---|---| +| `temperature` | `temperature` | `None` 不透传;非空直接透传 | +| `max_tokens` | `max_tokens` | `None` 不透传;非空直接透传 | + +--- + +### Task 1: 应用层加载并校验 Agent LLM Config + +**Files:** +- Modify: `backend/src/core/agent/application/run_service.py` +- Test: `backend/tests/unit/core/agent/test_run_resume_service.py` + +**Step 1: 写失败测试(RED)** + +新增单测覆盖以下行为: +1. `_load_agent_model_selection()` 返回三元组:`(model_code, provider_name, llm_config)`。 +2. 当 DB `config` 为 `{}` 时,`llm_config.temperature/max_tokens` 为 `None`。 +3. 当 DB `config` 含非法值(如 `temperature=3`)时抛 `ValueError`。 + +**Step 2: 运行测试确认失败** + +Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q` +Expected: 新增断言失败(返回值结构/异常行为不匹配)。 + +**Step 3: 最小实现(GREEN)** + +在 `run_service.py`: +1. 查询 `SystemAgents.config`。 +2. 用 `SystemAgentLLMConfig.model_validate(config or {})` 校验。 +3. 将 `_load_agent_model_selection()` 改为返回三元组。 +4. 在 `run()` 中把 `llm_config` 传递到 `create_runtime(...)`。 + +**Step 4: 运行测试确认通过** + +Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q` +Expected: PASS。 + +--- + +### Task 2: Runtime 与 LiteLLM Client 支持可选参数透传 + +**Files:** +- Modify: `backend/src/core/agent/infrastructure/crewai/factory.py` +- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py` +- Modify: `backend/src/core/agent/infrastructure/litellm/client.py` +- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py` + +**Step 1: 写失败测试(RED)** + +在 `test_crewai_runtime.py` 增加用例: +1. 传入 `temperature/max_tokens` 时,`run_completion` 收到对应参数。 +2. 参数为 `None` 时,不应被透传到 LiteLLM。 + +必要时新增 `backend/tests/unit/core/agent/test_litellm_client.py`,单测 `run_completion` 的 kwargs 组装逻辑。 + +**Step 2: 运行测试确认失败** + +Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q` +Expected: 新增断言失败(参数未透传或未过滤 `None`)。 + +**Step 3: 最小实现(GREEN)** + +1. `create_runtime()` 增加 `llm_config` 参数并传给 `CrewAIRuntime`。 +2. `CrewAIRuntime` 保存 `llm_config`,执行时调用: + - `run_completion(..., temperature=llm_config.temperature, max_tokens=llm_config.max_tokens)` +3. `run_completion()` 改为支持可选 `temperature/max_tokens`,内部仅在非 `None` 时加入 kwargs 再调用 `completion()`。 + +**Step 4: 运行测试确认通过** + +Run: `uv run pytest backend/tests/unit/core/agent/test_crewai_runtime.py -q` +Expected: PASS。 + +--- + +### Task 3: 初始化数据补齐与回归验证 + +**Files:** +- Modify: `backend/src/core/config/static/database/system_agents.yaml` +- Modify: `backend/src/core/config/initial/init_data.py`(如需补充类型兜底) +- Test: `backend/tests/unit/core/agent/test_run_resume_service.py` + +**Step 1: 写失败测试(RED)** + +补充断言:YAML 读取后 `config` 可为空或包含 `max_tokens: null`,初始化逻辑不会报错,且生成结构符合 `SystemAgentLLMConfig`。 + +**Step 2: 运行测试确认失败** + +Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q` +Expected: 新增断言失败。 + +**Step 3: 最小实现(GREEN)** + +1. 在 `system_agents.yaml` 为各 agent 配置显式补充 `max_tokens: null`。 +2. `init_data.py` 保持 `config: SystemAgentLLMConfig | None = None`,写库时统一序列化为 dict。 + +**Step 4: 运行测试确认通过** + +Run: `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py -q` +Expected: PASS。 + +--- + +## 最终验证 + +1. `uv run pytest backend/tests/unit/core/agent/test_run_resume_service.py backend/tests/unit/core/agent/test_crewai_runtime.py -q` +2. `uv run pytest backend/tests/integration/core/agent/test_queue_run_resume.py -q` +3. `uv run ruff check backend/src backend/tests` +4. `uv run basedpyright` + +预期:全部通过;若集成测试依赖本地 DB 状态导致跳过/失败,需记录原因并给出手工验证步骤。 + +## 完成标准 + +- `RunService` 从 DB 读取并校验 `config`。 +- runtime 到 LiteLLM 链路支持 `temperature/max_tokens` 可选透传。 +- `None` 不透传。 +- 单测与相关集成测试通过,并给出命令级证据。 diff --git a/docs/todo/todo.md b/docs/todo/todo.md new file mode 100644 index 0000000..c1e7feb --- /dev/null +++ b/docs/todo/todo.md @@ -0,0 +1,2 @@ +1. memory短期的加载。memory的生命周期为ttl+对话条目+session_id。用crewai +2.