From 5ada60e8344b911a8e5219122e5970ea56536d6d Mon Sep 17 00:00:00 2001 From: zl-q Date: Sun, 8 Mar 2026 16:03:02 +0800 Subject: [PATCH] docs(agent): add Task2/Task3 architecture and implementation artifacts --- .../2026-03-08-agent-tool-architecture.md | 323 +++++++++++++ ...26-03-08-agent-tool-architecture-design.md | 207 ++++++++ ...t-tool-architecture-implementation-plan.md | 447 ++++++++++++++++++ 3 files changed, 977 insertions(+) create mode 100644 docs/bugs/2026-03-08-agent-tool-architecture.md create mode 100644 docs/plans/2026-03-08-agent-tool-architecture-design.md create mode 100644 docs/plans/2026-03-08-agent-tool-architecture-implementation-plan.md diff --git a/docs/bugs/2026-03-08-agent-tool-architecture.md b/docs/bugs/2026-03-08-agent-tool-architecture.md new file mode 100644 index 0000000..ace5431 --- /dev/null +++ b/docs/bugs/2026-03-08-agent-tool-architecture.md @@ -0,0 +1,323 @@ +# Agent 模块审查报告 - 工具架构 + +**日期**: 2026-03-08 +**范围**: `backend/src/core/agent` +**状态**: 待评估 + +--- + +## 🟡 MEDIUM - 工具架构问题 + +### 1. 未使用 CrewAI 工具模块,工具硬编码 + +**文件**: +- `application/run_service.py:406` - `_execute_backend_tool()` +- `infrastructure/crewai/runtime.py` - 三阶段流程 + +**问题**: + +当前 agent 只使用了 CrewAI 的 **agent/task 配置模板**(YAML),但**没有使用 CrewAI 的工具系统**: + +``` +已用到: +├── agents.yaml (agent 角色定义) +└── tasks.yaml (task 定义) + +未用到: +├── @tool 装饰器 +├── BaseTool 类 +└── Tools 工具注册表 +``` + +**当前实现**: +```python +# run_service.py:406 +async def _execute_backend_tool(self, *, tool_name, tool_args, ...): + if tool_name != "create_calendar_event": # 硬编码判断 + raise ValueError(f"unsupported backend tool: {tool_name}") + # 手动执行工具... +``` + +**影响**: +1. 每新增一个工具需要修改 `_execute_backend_tool()` 代码 +2. 无法利用 CrewAI 的工具选择、执行结果处理等能力 +3. 与 CrewAI 集成度低,无法发挥框架优势 +4. 无法将工具描述等prompt信息自动注入agent中 + +--- + +## 🟡 MEDIUM - 工具结果存储问题 + +### 2. 工具结果存储到对象存储的功能未启用 + +**文件**: +- `application/session_state_persistence.py:52` - `persist_tool_result_payload()` +- `models/agent_chat_message.py` - messages 表 + +**问题**: + +已定义 `persist_tool_result_payload()` 函数,可将工具结果上传到对象存储(MinIO/Supabase Storage),但**该函数未被调用**。 + +当前实现: +- 工具结果直接存在数据库 `messages.content` 字段 +- `metadata_json` 中定义了 `storage_bucket`, `storage_path` 等字段,但都是 `None` + +```python +# message_metadata.py:17-27 +class MessageMetadataToolResult(BaseModel): + storage_bucket: str | None = None # 当前未使用 + storage_path: str | None = None # 当前未使用 + payload_sha256: str | None = None # 当前未使用 +``` + +**影响**: +1. 工具结果(尤其是 UI 组件等大数据)存在数据库,增加 DB 负担 +2. 已定义的存储接口未被使用,代码冗余 +3. 无法利用对象存储的 CDN 加速和带宽优势 + +--- + +## 🟡 MEDIUM - 工具输出格式问题 + +### 3. 工具输出不是 UI Schema,前端无法直接渲染 + +**文件**: +- `application/run_service.py:456-479` - `_execute_backend_tool()` + +**问题**: + +当前 `create_calendar_event` 工具返回的是**非结构化文本**,不是前端可渲染的 UI Schema: + +```python +# run_service.py:456-479 +event_id = str(schedule_item.id) +ui_card = { + "type": "calendar_card.v1", + "version": "v1", + "data": {...} + "actions": [...] +} +# ui_card 构建了但没有作为 tool result 返回 +return {"status": "ok", "event_id": event_id} # 只返回了简单结构 +``` + +**当前输出**: +```json +{ + "status": "ok", + "event_id": "xxx" +} +``` + +**期望输出**(UI Schema): +```json +{ + "type": "calendar_card.v1", + "version": "v1", + "data": { + "id": "xxx", + "title": "会议", + "startAt": "2026-03-08T15:00:00Z", + ... + }, + "actions": [ + {"type": "link", "label": "查看详情", "target": "/calendar/events/xxx"} + ] +} +``` + +**影响**: +1. 前端无法直接渲染丰富的 UI 组件 +2. 需要前端手动解析文本再渲染,增加前端工作量 +3. 无法利用 AG-UI 协议的 `ui_schema` 能力 + +--- + +## 🟡 MEDIUM - 阶段配置问题 + +### 4. 三阶段流程参数硬编码,无法为每个阶段配置不同策略 + +**文件**: +- `infrastructure/crewai/runtime.py:190-277` - `CrewAIRuntime.execute()` + +**问题**: + +当前三阶段流程(intent → execution → organization)是硬编码在 `run_agent_task()` 中的,无法为每个阶段配置不同的参数,如每个阶段可以使用的工具: + +```python +# runtime.py:203-277 +# intent 阶段 +intent_text, intent_usage = _run_stage( + litellm_model=litellm_model, + api_key=..., + llm_config=self._llm_config, # 同一套配置 + stage="intent", + ... +) + +# execution 阶段(如果有) +execution_text, execution_usage = _run_stage( + litellm_model=litellm_model, + api_key=..., + llm_config=self._llm_config, # 同一套配置 + stage="execution", + ... +) + +# organization 阶段 +organization_text, organization_usage = _run_stage( + litellm_model=litellm_model, + api_key=..., + llm_config=self._llm_config, # 同一套配置 + stage="organization", + ... +) +``` + +**当前限制**: +1. 无法为 intent 阶段设置只读 LLM(不允许工具调用) + + +**影响**: +1. 无法精细控制每个阶段的 LLM 行为 +2. 意图识别阶段可能误触发工具调用 +3. 增加不必要的 LLM 调用成本 +4. 降低了架构的灵活性 + +--- + +## 🔴 HIGH - Agent Loop 断裂问题 + +### 5. 工具审批后未继续 Agent Loop + +**文件**: +- `application/resume_service.py:121-158` + +**问题**: + +前端审批工具调用后,后端返回 tool result,但**没有继续执行 agent loop**,直接标记 session 为 COMPLETED 结束。 + +当前流程: +```python +# resume_service.py:121-127 +snapshot = self._state_persistence.build_completed_snapshot() +await session_repository.update_runtime_state( + chat_session=chat_session, + status=AgentChatSessionStatus.COMPLETED, # 直接完成 + state_snapshot=snapshot, + ... +) +``` + +缺失的流程: +``` +1. 接收 tool result +2. 将 tool result 作为 message 存入上下文 +3. 再次调用 LLM(带 tool result) +4. 生成最终回复 +5. 标记为 COMPLETED +``` + +**影响**: +1. 用户审批工具后,agent 不会继续生成回复 +2. 整个 agent loop 在工具审批后断裂 +3. 用户体验不完整 + +--- + +## 🔴 HIGH - 对话历史和用户上下文架构错误 + +### 6. 对话历史由前端维护,违反后端架构设计 + +**文件**: +- `application/run_service.py:89-124` +- `domain/agui_input.py` + +**问题**: + +当前架构中,**对话历史完全由前端维护并传递**: + +``` +前端 → GET /runs/{thread_id}/history → 后端返回历史 messages +前端 → POST /runs/{thread_id}/run → 前端把 history 放入 run_input.messages 传给后端 +后端 → 只读取 run_input 中的最新 user_input,不读取数据库历史 +``` + +代码证据 (`run_service.py:89-124`): +```python +async def run(self, *, run_input: RunAgentInput): + user_input = extract_latest_user_text(run_input) # 只取最新用户消息 + + runtime_result = await asyncio.to_thread( + runtime.execute, + user_input=user_input, # 只传最新输入 + system_prompt=system_prompt, + ) +``` + +**影响**: +1. **高危安全风险**:前端可以篡改对话历史,伪造上下文 +2. **架构违反**:用户上下文和对话历史都应该由后端维护 +3. **数据不一致**:前端可能遗漏或错误处理历史消息 +4. **无法支持多端同步**:不同前端设备看到的历史可能不同 +5. **Token 浪费**:每次请求都要传递完整历史,增加请求体积 +6. 原来的计划文档写清楚了,后端通过redis来缓存对话历史,并结合数据库读取的回退策略 + +--- + +## 🟡 MEDIUM - 多模态输入支持问题 + +### 7. 不支持图片等多模态输入 + +**文件**: +- `domain/agui_input.py:64-86` - `extract_latest_user_text()` +- `infrastructure/crewai/runtime.py:121-136` - `_run_stage()` +- `infrastructure/litellm/client.py` + +**问题**: + +当前架构**只支持纯文本输入**,图片等多模态内容被丢弃: + +代码证据 (`agui_input.py:64-86`): +```python +def extract_latest_user_text(run_input: RunAgentInput) -> str: + if isinstance(content, list): + for item in content: + if getattr(item, "type", None) != "text": + continue # ❌ 跳过非 text 类型(图片被丢弃) +``` + +代码证据 (`runtime.py:125`): +```python +messages.append({"role": "user", "content": user_content}) # 只传 str +``` + +**影响**: +1. 用户无法发送图片进行多模态交互 +2. 浪费多模态 LLM 能力 +3. 无法实现"上传图片让 AI 分析"等场景 + +--- + +## 🟡 MEDIUM - 缺失语音识别 (ASR) 功能 + +### 8. 未实现 fun-asr-realtime 语音识别 API 相关路由 + +**文件**: +- 无(功能缺失) + +**问题**: + +后端**未实现语音识别功能**,无法处理前端传入的音频数据: + +当前状态: +- `dashscope` 只用于 LLM(qwen3.5-flash 等) +- 没有任何 fun-asr、ASR、audio、transcribe 相关代码 +- v1 路由中无语音/音频相关 API + +**影响**: +1. 用户无法发送语音消息 +2. 无法实现实时语音对话场景 +3. 需要前端自行完成 ASR,增大前端负担 + +--- diff --git a/docs/plans/2026-03-08-agent-tool-architecture-design.md b/docs/plans/2026-03-08-agent-tool-architecture-design.md new file mode 100644 index 0000000..4f5c55a --- /dev/null +++ b/docs/plans/2026-03-08-agent-tool-architecture-design.md @@ -0,0 +1,207 @@ +# Agent Tool Architecture Design + +**Date:** 2026-03-08 +**Source:** `docs/bugs/2026-03-08-agent-tool-architecture.md` +**Scope:** `backend/src/core/agent` +**Status:** Approved for planning + +--- + +## 1. Objective + +修复 Agent 工具架构相关 8 个问题,优先恢复端到端闭环能力(工具审批后继续推理并产出最终回复),并在同版本内补齐工具输出结构化、存储分层、阶段策略解耦、多模态与语音输入能力。 + +--- + +## 2. Deliverables + +1. 两阶段修复蓝图(Phase 1 + Phase 2) +2. 统一事件与状态机设计(AG-UI Step 事件 + 审批恢复) +3. 接口边界与职责重划分(run/resume/runtime/persistence) +4. 风险与回滚策略 +5. 验收标准(双金路径) + +--- + +## 3. Constraints And Decisions + +### 3.1 Release Strategy + +- 一次性切换 +- 不做灰度 +- 不做双轨 +- 不留兼容代码 + +### 3.2 Contract Decisions + +- `run` 接口允许破坏性变更:移除前端传完整历史 `messages` 的语义 +- 前端只传本次输入,历史以后端为准 +- Phase 1 不引入 client hint +- 工具架构在 Phase 1 完整迁移至 CrewAI Tools(非桥接) + +### 3.3 AG-UI Event Decisions + +- 三阶段固定发 `StepStarted/StepFinished`:`intent`, `execution`, `organization` +- 等待工具审批不单独新增 step,归属 execution 内部状态 +- 后端只发英文机器名,前端自行文案化 + +### 3.4 ASR / Multimodal Decisions + +- 多模态首版只支持文件上传(不支持 URL) +- ASR 首版为“录音结束后上传音频 -> 后端同步返回 transcript” +- 前端将 transcript 回填输入框,再调用 run + +--- + +## 4. Complexity And Risk + +- **Complexity:** S2(跨多个核心模块的架构调整) +- **Risk Tier:** L2(包含高危安全项:前端可篡改历史) + +风险驱动原则:先修复闭环与安全问题,再扩展能力面。 + +--- + +## 5. Phased Plan + +## Phase 1 - Close Loop And Stop Security Bleeding + +**Bugs:** #1, #5, #6 + +### Goals + +1. 后端成为历史与上下文唯一事实源 +2. 工具审批后恢复并继续 Agent Loop +3. 工具执行完整迁移到 CrewAI Tools 注册体系 + +### Module Boundaries + +- `backend/src/core/agent/application/run_service.py` + - 仅负责本次输入解析、后端上下文组装、触发 runtime + - 移除前端历史信任路径 + - 移除硬编码工具分发 + +- `backend/src/core/agent/application/resume_service.py` + - 审批确认后触发异步续跑,立即返回 `accepted` + - 不可在工具执行后直接置 `COMPLETED` + - 增加 `approval_request_id` 幂等保护 + +- `backend/src/core/agent/infrastructure/crewai/runtime.py` + - 引入 CrewAI Tools 注册与注入 + - 按 agent/stage 装配工具集 + - 三阶段统一发 Step start/end 事件 + +- `backend/src/core/agent/application/session_state_persistence.py` + - 保障审批状态、工具结果、续跑状态一致性落库 + - 为 Phase 2 元数据扩展保留一致接口 + +### Runtime Flow (Phase 1) + +1. `run` 接收本次输入 +2. 后端读取 Redis/DB 重建历史 +3. 进入 intent/execution/organization 三阶段 +4. execution 中若触发工具审批:进入 `WAITING_APPROVAL` +5. 前端审批后调用 `resume` +6. `resume` 异步触发续跑:执行工具 -> 写 tool result -> 继续 loop +7. 生成最终 assistant 回复并 `RunFinished` + +--- + +## Phase 2 - Capability Completion In Same Version + +**Order:** #3 -> #2 -> #4 -> #7 -> #8 + +### #3 Tool Output As UI Schema v1 + +- 统一工具输出结构:`type/version/data/actions` +- 单一版本 `v1`,短期不做多版本并行 + +### #2 Tool Result Object Storage + +- 大 payload 存对象存储 +- DB 仅存摘要、索引、校验信息 +- 启用 `storage_bucket/storage_path/payload_sha256` + +### #4 Stage-Level Strategy Decoupling + +- intent/execution/organization 支持独立参数与工具策略 +- intent 阶段可配置为只读(禁工具) + +### #7 Multimodal Input + +- 首版支持图片文件上传输入 +- 不再丢弃非 text 内容 + +### #8 ASR API + +- 新增语音转写 API(同步返回 transcript) +- 语音转写与 agent run 解耦 + +--- + +## 6. Session State And Events + +推荐状态机: + +`RUNNING -> WAITING_APPROVAL -> RESUMING -> RUNNING -> COMPLETED/FAILED` + +关键约束: + +- 重复审批请求不得重复执行工具(幂等) +- `COMPLETED` 仅在 loop 自然结束时设置 +- Step 事件覆盖三阶段完整生命周期 + +--- + +## 7. Acceptance Criteria + +## 7.1 Golden Path A (No Tool) + +用户输入后,完整经历三阶段并产出最终回复;前端收到完整 step 事件与 `RunFinished`。 + +## 7.2 Golden Path B (Tool + Approval + Resume) + +用户触发工具调用,审批后系统异步续跑并最终产出 assistant 回复;会话不在审批后直接结束。 + +## 7.3 Security Validation + +前端即使提交伪造历史字段,也不会影响后端实际上下文。 + +## 7.4 Event Validation + +每轮 run 必须包含 `intent/execution/organization` 的 `StepStarted/StepFinished`。 + +--- + +## 8. Risk And Rollback + +### High Risk: #6 Context Ownership Migration + +- 风险:上下文错绑、历史缺失 +- 控制:会话归属校验 + Redis/DB 一致性读取 +- 回滚:可退到“后端 DB-only 历史重建” + +### High Risk: #5 Async Resume Consistency + +- 风险:重复审批、状态卡死 +- 控制:审批幂等键 + 状态跃迁约束 + 超时终态 +- 回滚:降级为“仅返回工具结果,不自动续跑” + +### Medium Risk: #2 Storage Split Consistency + +- 风险:对象存储与 DB 元数据不一致 +- 控制:先对象后元数据 + 失败补偿清理 +- 回滚:临时退回 DB 内联存储 + +--- + +## 9. Bug-To-Phase Mapping + +- **Phase 1:** #1, #5, #6 +- **Phase 2:** #2, #3, #4, #7, #8 + +--- + +## 10. Next Step + +进入 implementation planning:将本设计拆解为任务级可执行计划(文件、测试、命令、验收证据)。 diff --git a/docs/plans/2026-03-08-agent-tool-architecture-implementation-plan.md b/docs/plans/2026-03-08-agent-tool-architecture-implementation-plan.md new file mode 100644 index 0000000..99d3c2c --- /dev/null +++ b/docs/plans/2026-03-08-agent-tool-architecture-implementation-plan.md @@ -0,0 +1,447 @@ +# Agent Tool Architecture Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** 修复 agent 工具架构 8 个问题,先恢复端到端闭环与安全正确性,再补齐 UI Schema、对象存储、阶段解耦、多模态与 ASR。 + +**Architecture:** 采用两阶段落地。Phase 1 先完成后端上下文主控、CrewAI Tools 完整迁移、审批后异步续跑闭环;Phase 2 按 `#3 -> #2 -> #4 -> #7 -> #8` 逐项扩展能力。所有变更遵循 AG-UI 事件流语义,三阶段固定发送 StepStarted/StepFinished。 + +**Tech Stack:** FastAPI, Pydantic, CrewAI, LiteLLM, Redis, Postgres, MinIO/Supabase Storage, pytest + +--- + +### Task 1: 锁定 Phase 1 契约(移除前端历史语义) + +**Files:** +- Modify: `backend/src/core/agent/domain/agui_input.py` +- Modify: `backend/src/core/agent/application/run_service.py` +- Modify: `backend/src/v1/agent/schemas.py` +- Test: `backend/tests/unit/core/agent/test_run_resume_service.py` + +**Step 1: Write the failing test** + +```python +def test_run_ignores_client_history_messages(fake_run_input_with_messages): + result = service.run(run_input=fake_run_input_with_messages) + assert result.used_context_source == "backend" +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_run_resume_service.py -k ignores_client_history -v` +Expected: FAIL,当前实现仍读取/依赖前端 history。 + +**Step 3: Write minimal implementation** + +```python +# run_service.py +user_input = extract_latest_user_text(run_input) +history = await load_context_from_backend_sources(session_id) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_run_resume_service.py -k ignores_client_history -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/domain/agui_input.py backend/src/core/agent/application/run_service.py backend/src/v1/agent/schemas.py backend/tests/unit/core/agent/test_run_resume_service.py +git commit -m "refactor(agent): make backend own conversation context" +``` + +### Task 2: CrewAI Tools 完整迁移(替换硬编码分发) + +**Files:** +- Create: `backend/src/core/agent/infrastructure/crewai/tools_registry.py` +- Create: `backend/src/core/agent/infrastructure/crewai/tools/create_calendar_event_tool.py` +- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py` +- Modify: `backend/src/core/agent/application/run_service.py` +- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py` + +**Step 1: Write the failing test** + +```python +def test_runtime_uses_registered_crewai_tools(): + runtime = build_runtime_with_registry(["create_calendar_event"]) + result = runtime.execute(user_input="帮我创建日历事件", system_prompt="x") + assert result.tool_calls[0].tool_name == "create_calendar_event" +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_crewai_runtime.py -k registered_crewai_tools -v` +Expected: FAIL,当前路径仍是 run_service 硬编码。 + +**Step 3: Write minimal implementation** + +```python +# tools_registry.py +TOOLS = {"create_calendar_event": CreateCalendarEventTool()} + +def tools_for_stage(stage: str) -> list[BaseTool]: + return STAGE_TOOL_MAP.get(stage, []) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_crewai_runtime.py -k registered_crewai_tools -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/infrastructure/crewai/tools_registry.py backend/src/core/agent/infrastructure/crewai/tools/create_calendar_event_tool.py backend/src/core/agent/infrastructure/crewai/runtime.py backend/src/core/agent/application/run_service.py backend/tests/unit/core/agent/test_crewai_runtime.py +git commit -m "feat(agent): migrate backend tools to crewai tool registry" +``` + +### Task 3: 修复审批后异步续跑闭环(#5) + +**Files:** +- Modify: `backend/src/core/agent/application/resume_service.py` +- Modify: `backend/src/core/agent/infrastructure/queue/tasks.py` +- Modify: `backend/src/core/agent/application/session_state_persistence.py` +- Test: `backend/tests/integration/core/agent/test_queue_run_resume.py` + +**Step 1: Write the failing test** + +```python +def test_resume_triggers_async_loop_until_final_assistant_message(client): + response = client.post("/v1/agent/runs/{id}/resume", json={"approve": True}) + assert response.status_code == 202 + assert eventually_has_final_assistant_message(id) +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/integration/core/agent/test_queue_run_resume.py -k triggers_async_loop -v` +Expected: FAIL,当前审批后直接完成。 + +**Step 3: Write minimal implementation** + +```python +# resume_service.py +await mark_session_resuming(...) +await enqueue_resume_task(...) +return ResumeAccepted(...) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/integration/core/agent/test_queue_run_resume.py -k triggers_async_loop -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/application/resume_service.py backend/src/core/agent/infrastructure/queue/tasks.py backend/src/core/agent/application/session_state_persistence.py backend/tests/integration/core/agent/test_queue_run_resume.py +git commit -m "fix(agent): continue agent loop asynchronously after tool approval" +``` + +### Task 4: 三阶段 Step 事件完整化(intent/execution/organization) + +**Files:** +- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py` +- Modify: `backend/src/core/agent/infrastructure/agui/bridge.py` +- Test: `backend/tests/unit/core/agent/test_agui_bridge.py` +- Test: `backend/tests/integration/v1/agent/test_sse_flow_live.py` + +**Step 1: Write the failing test** + +```python +def test_each_stage_emits_step_started_and_finished(): + events = collect_events_from_run(...) + assert has_step_pair(events, "intent") + assert has_step_pair(events, "execution") + assert has_step_pair(events, "organization") +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/integration/v1/agent/test_sse_flow_live.py -k emits_step_started_and_finished -v` +Expected: FAIL,至少一个阶段事件缺失。 + +**Step 3: Write minimal implementation** + +```python +emit_step_started(stage) +stage_output = run_stage(stage) +emit_step_finished(stage) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/integration/v1/agent/test_sse_flow_live.py -k emits_step_started_and_finished -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/infrastructure/crewai/runtime.py backend/src/core/agent/infrastructure/agui/bridge.py backend/tests/unit/core/agent/test_agui_bridge.py backend/tests/integration/v1/agent/test_sse_flow_live.py +git commit -m "feat(agent): emit ag-ui step events for three-stage flow" +``` + +### Task 5: 工具输出统一为 UI Schema v1(#3) + +**Files:** +- Modify: `backend/src/core/agent/infrastructure/crewai/tools/create_calendar_event_tool.py` +- Modify: `backend/src/core/agent/domain/message_metadata.py` +- Test: `backend/tests/unit/core/agent/test_run_resume_service.py` + +**Step 1: Write the failing test** + +```python +def test_calendar_tool_returns_ui_schema_v1(): + result = run_calendar_tool(...) + assert result["type"] == "calendar_card.v1" + assert result["version"] == "v1" +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_run_resume_service.py -k returns_ui_schema_v1 -v` +Expected: FAIL,当前返回简单 status/event_id。 + +**Step 3: Write minimal implementation** + +```python +return { + "type": "calendar_card.v1", + "version": "v1", + "data": {...}, + "actions": [...], +} +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_run_resume_service.py -k returns_ui_schema_v1 -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/infrastructure/crewai/tools/create_calendar_event_tool.py backend/src/core/agent/domain/message_metadata.py backend/tests/unit/core/agent/test_run_resume_service.py +git commit -m "feat(agent): return tool results as ui schema v1" +``` + +### Task 6: 工具结果对象存储(#2) + +**Files:** +- Modify: `backend/src/core/agent/application/session_state_persistence.py` +- Modify: `backend/src/core/agent/domain/message_metadata.py` +- Test: `backend/tests/integration/core/agent/test_session_message_persistence.py` + +**Step 1: Write the failing test** + +```python +def test_large_tool_payload_persisted_to_object_storage(): + meta = persist_large_tool_result(...) + assert meta.storage_bucket is not None + assert meta.storage_path is not None +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/integration/core/agent/test_session_message_persistence.py -k object_storage -v` +Expected: FAIL,当前 metadata 为空。 + +**Step 3: Write minimal implementation** + +```python +payload_ref = await persist_tool_result_payload(...) +metadata.storage_bucket = payload_ref.bucket +metadata.storage_path = payload_ref.path +metadata.payload_sha256 = payload_ref.sha256 +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/integration/core/agent/test_session_message_persistence.py -k object_storage -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/application/session_state_persistence.py backend/src/core/agent/domain/message_metadata.py backend/tests/integration/core/agent/test_session_message_persistence.py +git commit -m "feat(agent): persist large tool results to object storage" +``` + +### Task 7: 三阶段参数解耦(#4) + +**Files:** +- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py` +- Modify: `backend/src/core/agent/infrastructure/config/resolver.py` +- Test: `backend/tests/unit/core/agent/test_config_resolver.py` +- Test: `backend/tests/unit/core/agent/test_crewai_runtime.py` + +**Step 1: Write the failing test** + +```python +def test_intent_stage_can_disable_tools(): + cfg = load_stage_config(intent_tools=[]) + result = run_intent_stage(cfg) + assert result.tool_calls == [] +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_crewai_runtime.py -k intent_stage_can_disable_tools -v` +Expected: FAIL,当前三阶段共享同一 llm/tools 配置。 + +**Step 3: Write minimal implementation** + +```python +stage_cfg = config.for_stage(stage) +run_stage(..., llm_config=stage_cfg.llm, tools=stage_cfg.tools) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_crewai_runtime.py -k intent_stage_can_disable_tools -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/infrastructure/crewai/runtime.py backend/src/core/agent/infrastructure/config/resolver.py backend/tests/unit/core/agent/test_config_resolver.py backend/tests/unit/core/agent/test_crewai_runtime.py +git commit -m "refactor(agent): decouple llm and tool strategy by stage" +``` + +### Task 8: 多模态图片输入(文件上传)支持(#7) + +**Files:** +- Modify: `backend/src/core/agent/domain/agui_input.py` +- Modify: `backend/src/core/agent/infrastructure/crewai/runtime.py` +- Modify: `backend/src/core/agent/infrastructure/litellm/client.py` +- Test: `backend/tests/unit/core/agent/test_litellm_client.py` + +**Step 1: Write the failing test** + +```python +def test_image_content_block_is_preserved_for_llm(): + payload = build_multimodal_payload(text="分析图片", image_file="a.png") + assert payload_contains_image_block(payload) +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_litellm_client.py -k image_content_block_is_preserved -v` +Expected: FAIL,当前非 text 被丢弃。 + +**Step 3: Write minimal implementation** + +```python +if item.type == "image": + blocks.append({"type": "image_url", "image_url": {"url": signed_file_url}}) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/unit/core/agent/test_litellm_client.py -k image_content_block_is_preserved -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/core/agent/domain/agui_input.py backend/src/core/agent/infrastructure/crewai/runtime.py backend/src/core/agent/infrastructure/litellm/client.py backend/tests/unit/core/agent/test_litellm_client.py +git commit -m "feat(agent): support multimodal image input blocks" +``` + +### Task 9: 新增 ASR 同步转写 API(#8) + +**Files:** +- Create: `backend/src/v1/agent/asr_router.py` +- Modify: `backend/src/v1/agent/router.py` +- Create: `backend/src/v1/agent/asr_service.py` +- Create: `backend/src/v1/agent/asr_schemas.py` +- Test: `backend/tests/integration/v1/agent/test_routes.py` + +**Step 1: Write the failing test** + +```python +def test_asr_transcribe_returns_sync_transcript(client, wav_file): + resp = client.post("/v1/agent/asr/transcribe", files={"audio": wav_file}) + assert resp.status_code == 200 + assert resp.json()["transcript"] +``` + +**Step 2: Run test to verify it fails** + +Run: `cd backend && uv run pytest tests/integration/v1/agent/test_routes.py -k asr_transcribe_returns_sync_transcript -v` +Expected: FAIL,当前无路由。 + +**Step 3: Write minimal implementation** + +```python +@router.post("/asr/transcribe") +async def transcribe(audio: UploadFile) -> AsrTranscribeResponse: + text = await asr_service.transcribe(audio) + return AsrTranscribeResponse(transcript=text) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd backend && uv run pytest tests/integration/v1/agent/test_routes.py -k asr_transcribe_returns_sync_transcript -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add backend/src/v1/agent/asr_router.py backend/src/v1/agent/router.py backend/src/v1/agent/asr_service.py backend/src/v1/agent/asr_schemas.py backend/tests/integration/v1/agent/test_routes.py +git commit -m "feat(agent): add synchronous asr transcription endpoint" +``` + +### Task 10: 全量验证与文档对齐 + +**Files:** +- Modify: `docs/runtime/runtime-route.md` +- Modify: `docs/bugs/2026-03-08-agent-tool-architecture.md` (状态回填) + +**Step 1: Run targeted unit suite** + +Run: `cd backend && uv run pytest tests/unit/core/agent -v` +Expected: PASS + +**Step 2: Run targeted integration suite** + +Run: `cd backend && uv run pytest tests/integration/core/agent tests/integration/v1/agent -v` +Expected: PASS + +**Step 3: Run e2e smoke for agent flow** + +Run: `cd backend && uv run pytest tests/e2e -k "agent or mobile_health" -v` +Expected: PASS 或明确记录跳过原因 + +**Step 4: Run quality gates** + +Run: `cd backend && uv run ruff check src tests && uv run basedpyright` +Expected: PASS + +**Step 5: Final commit** + +```bash +git add docs/runtime/runtime-route.md docs/bugs/2026-03-08-agent-tool-architecture.md +git commit -m "docs(agent): align runtime docs with new tool architecture" +``` + +--- + +## Verification Evidence Requirements + +实施完成时必须输出: + +1. 双金路径验证结果(无工具 + 工具审批后续跑) +2. 三阶段 StepStarted/StepFinished 事件日志片段 +3. 安全验证结果(前端 history 篡改无效) +4. ASR 同步转写接口请求/响应样例 +5. 关键命令输出摘要(pytest/ruff/basedpyright) + +--- + +## Notes + +- 本计划不包含兼容逻辑保留。 +- 本计划采用一次性切换。 +- 若实施中出现 S2 -> S3 范围升级,先暂停并更新计划,再继续执行。