fix(agent): stabilize live e2e tool execution and loop isolation
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
# Live E2E Test Suite
|
||||
|
||||
`backend/tests/e2e/test_agent_live_flow.py` 是真实依赖端到端测试,依赖真实 LLM、Supabase DB、Supabase Storage。
|
||||
|
||||
## Command Split
|
||||
|
||||
- CI 默认测试(不跑 live):
|
||||
|
||||
```bash
|
||||
uv run pytest -m "not live"
|
||||
```
|
||||
|
||||
- 手动运行 live 真实端到端:
|
||||
|
||||
```bash
|
||||
uv run pytest backend/tests/e2e/test_agent_live_flow.py -m live -v
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- live 用例默认通过 marker 与常规回归隔离,避免 CI 因外部环境波动失败。
|
||||
- tool result 存储使用私有 bucket 读取校验,不依赖公共下载链接。
|
||||
@@ -0,0 +1,562 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import delete, select
|
||||
|
||||
from core.agent.application.resume_service import ResumeService
|
||||
from core.agent.application.run_service import RunService
|
||||
from core.agent.infrastructure.queue.tasks import run_agent_task
|
||||
from core.agent.infrastructure.storage.tool_result_storage import (
|
||||
create_tool_result_storage,
|
||||
)
|
||||
from core.db import AsyncSessionLocal, engine
|
||||
from models.agent_chat_message import AgentChatMessage, AgentChatMessageRole
|
||||
from models.agent_chat_session import AgentChatSession, AgentChatSessionStatus
|
||||
from models.llm import Llm
|
||||
from models.llm_factory import LlmFactory
|
||||
from models.profile import Profile
|
||||
from models.schedule_items import ScheduleItem
|
||||
from models.system_agents import SystemAgents
|
||||
from services.base.supabase import supabase_service
|
||||
|
||||
IMAGE_FIXTURE = (
|
||||
Path(__file__).resolve().parents[1] / "fixtures" / "images" / "calendar_text_cn.png"
|
||||
)
|
||||
|
||||
|
||||
def _live_enabled() -> bool:
|
||||
return os.getenv("AGENT_LIVE_E2E") == "1"
|
||||
|
||||
|
||||
async def _init_supabase_admin_client():
|
||||
initialized = await supabase_service.initialize()
|
||||
if not initialized:
|
||||
pytest.skip("Supabase service unavailable")
|
||||
return supabase_service.get_admin_client()
|
||||
|
||||
|
||||
async def _create_owner_profile(admin_client) -> tuple[uuid.UUID, str]:
|
||||
user_email = f"agent-live-{uuid.uuid4().hex[:8]}@example.com"
|
||||
created = admin_client.auth.admin.create_user(
|
||||
{
|
||||
"email": user_email,
|
||||
"password": "Passw0rd!123",
|
||||
"email_confirm": True,
|
||||
}
|
||||
)
|
||||
user_id = str(created.user.id)
|
||||
owner_id = uuid.UUID(user_id)
|
||||
return owner_id, user_id
|
||||
|
||||
|
||||
async def _resolve_llm_id(
|
||||
*,
|
||||
target_model_code: str = "deepseek-chat",
|
||||
target_factory_name: str = "deepseek",
|
||||
) -> tuple[uuid.UUID, uuid.UUID | None, uuid.UUID | None]:
|
||||
await engine.dispose()
|
||||
async with AsyncSessionLocal() as session:
|
||||
llm_row = await session.execute(
|
||||
select(Llm.id).where(Llm.model_code == target_model_code).limit(1)
|
||||
)
|
||||
llm_id = llm_row.scalar_one_or_none()
|
||||
if llm_id is not None:
|
||||
return llm_id, None, None
|
||||
|
||||
factory_id = uuid.uuid4()
|
||||
llm_id = uuid.uuid4()
|
||||
created_factory = False
|
||||
async with AsyncSessionLocal() as session:
|
||||
factory_row = await session.execute(
|
||||
select(LlmFactory.id).where(LlmFactory.name == target_factory_name).limit(1)
|
||||
)
|
||||
existing_factory_id = factory_row.scalar_one_or_none()
|
||||
if existing_factory_id is not None:
|
||||
factory_id = existing_factory_id
|
||||
else:
|
||||
session.add(
|
||||
LlmFactory(
|
||||
id=factory_id,
|
||||
name=target_factory_name,
|
||||
request_url=f"https://{target_factory_name}.example",
|
||||
)
|
||||
)
|
||||
await session.commit()
|
||||
created_factory = True
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
session.add(
|
||||
Llm(
|
||||
id=llm_id,
|
||||
factory_id=factory_id,
|
||||
model_code=target_model_code,
|
||||
)
|
||||
)
|
||||
await session.commit()
|
||||
return llm_id, llm_id, factory_id if created_factory else None
|
||||
|
||||
|
||||
async def _seed_session_with_active_agent(
|
||||
*,
|
||||
session_id: uuid.UUID,
|
||||
owner_id: uuid.UUID,
|
||||
agent_type: str,
|
||||
llm_id: uuid.UUID,
|
||||
) -> None:
|
||||
await engine.dispose()
|
||||
async with AsyncSessionLocal() as session:
|
||||
session.add(SystemAgents(agent_type=agent_type, llm_id=llm_id, status="active"))
|
||||
session.add(AgentChatSession(id=session_id, user_id=owner_id))
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _cleanup_session_and_agent(
|
||||
*,
|
||||
session_id: uuid.UUID,
|
||||
agent_type: str,
|
||||
owner_id: uuid.UUID,
|
||||
llm_id_to_cleanup: uuid.UUID | None,
|
||||
factory_id_to_cleanup: uuid.UUID | None,
|
||||
) -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
await session.execute(
|
||||
delete(AgentChatSession).where(AgentChatSession.id == session_id)
|
||||
)
|
||||
await session.execute(
|
||||
delete(SystemAgents).where(SystemAgents.agent_type == agent_type)
|
||||
)
|
||||
await session.execute(delete(Profile).where(Profile.id == owner_id))
|
||||
if llm_id_to_cleanup is not None:
|
||||
await session.execute(delete(Llm).where(Llm.id == llm_id_to_cleanup))
|
||||
if factory_id_to_cleanup is not None:
|
||||
await session.execute(
|
||||
delete(LlmFactory).where(LlmFactory.id == factory_id_to_cleanup)
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _cleanup_auth_user(*, admin_client, user_id: str | None) -> None:
|
||||
if user_id is None:
|
||||
return
|
||||
try:
|
||||
admin_client.auth.admin.delete_user(user_id)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
def _encode_fixture_image_base64() -> str:
|
||||
data = IMAGE_FIXTURE.read_bytes()
|
||||
return base64.b64encode(data).decode("ascii")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.live
|
||||
async def test_agent_live_intent_only_no_tool() -> None:
|
||||
if not _live_enabled():
|
||||
pytest.skip("Live test disabled")
|
||||
session_id = uuid.uuid4()
|
||||
agent_type = f"LIVE_E2E_{uuid.uuid4().hex[:8]}"
|
||||
admin_client = await _init_supabase_admin_client()
|
||||
owner_id, test_user_id = await _create_owner_profile(admin_client)
|
||||
llm_id, llm_cleanup_id, factory_cleanup_id = await _resolve_llm_id()
|
||||
|
||||
try:
|
||||
await _seed_session_with_active_agent(
|
||||
session_id=session_id,
|
||||
owner_id=owner_id,
|
||||
agent_type=agent_type,
|
||||
llm_id=llm_id,
|
||||
)
|
||||
|
||||
result = await run_agent_task(
|
||||
{
|
||||
"command": "run",
|
||||
"run_input": {
|
||||
"threadId": str(session_id),
|
||||
"runId": "run-live-intent-1",
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "u1",
|
||||
"role": "user",
|
||||
"content": "请用一句话介绍你是谁。",
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {},
|
||||
},
|
||||
},
|
||||
run_service=RunService(),
|
||||
resume_service=ResumeService(),
|
||||
)
|
||||
|
||||
assert result["pending_tool_call_id"] is None
|
||||
|
||||
await engine.dispose()
|
||||
async with AsyncSessionLocal() as session:
|
||||
chat_session = await session.get(AgentChatSession, session_id)
|
||||
assert chat_session is not None
|
||||
assert chat_session.status == AgentChatSessionStatus.COMPLETED
|
||||
rows = await session.execute(
|
||||
select(AgentChatMessage)
|
||||
.where(AgentChatMessage.session_id == session_id)
|
||||
.order_by(AgentChatMessage.seq.asc())
|
||||
)
|
||||
messages = list(rows.scalars().all())
|
||||
assert [m.role for m in messages] == [
|
||||
AgentChatMessageRole.USER,
|
||||
AgentChatMessageRole.ASSISTANT,
|
||||
]
|
||||
finally:
|
||||
await _cleanup_session_and_agent(
|
||||
session_id=session_id,
|
||||
agent_type=agent_type,
|
||||
owner_id=owner_id,
|
||||
llm_id_to_cleanup=llm_cleanup_id,
|
||||
factory_id_to_cleanup=factory_cleanup_id,
|
||||
)
|
||||
await _cleanup_auth_user(admin_client=admin_client, user_id=test_user_id)
|
||||
await supabase_service.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.live
|
||||
async def test_agent_live_image_calendar_tool_persistence() -> None:
|
||||
if not _live_enabled():
|
||||
pytest.skip("Live test disabled")
|
||||
|
||||
admin_client = await _init_supabase_admin_client()
|
||||
|
||||
tool_result_storage = create_tool_result_storage()
|
||||
if tool_result_storage is None:
|
||||
pytest.skip("Tool result storage unavailable")
|
||||
|
||||
storage = admin_client.storage
|
||||
try:
|
||||
storage.get_bucket("private")
|
||||
except Exception:
|
||||
storage.create_bucket("private", "private", {"public": False})
|
||||
|
||||
probe_path = f"tool-results/probe/{uuid.uuid4().hex}.json"
|
||||
try:
|
||||
storage.from_("private").upload(probe_path, b"{}")
|
||||
storage.from_("private").remove([probe_path])
|
||||
except Exception:
|
||||
pytest.skip("Supabase private storage bucket is not writable")
|
||||
|
||||
owner_id, test_user_id = await _create_owner_profile(admin_client)
|
||||
llm_id, llm_cleanup_id, factory_cleanup_id = await _resolve_llm_id(
|
||||
target_model_code="qwen3.5-flash",
|
||||
target_factory_name="dashscope",
|
||||
)
|
||||
session_id = uuid.uuid4()
|
||||
agent_type = f"LIVE_E2E_{uuid.uuid4().hex[:8]}"
|
||||
uploaded_paths: list[str] = []
|
||||
|
||||
try:
|
||||
await _seed_session_with_active_agent(
|
||||
session_id=session_id,
|
||||
owner_id=owner_id,
|
||||
agent_type=agent_type,
|
||||
llm_id=llm_id,
|
||||
)
|
||||
|
||||
image_b64 = _encode_fixture_image_base64()
|
||||
result = await run_agent_task(
|
||||
{
|
||||
"command": "run",
|
||||
"run_input": {
|
||||
"threadId": str(session_id),
|
||||
"runId": "run-live-image-1",
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "u1",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": (
|
||||
"请先识别图片中的日程文字,然后调用后端日历工具创建事件。"
|
||||
"返回时请确保标题和开始时间不为空。"
|
||||
),
|
||||
},
|
||||
{
|
||||
"type": "binary",
|
||||
"mimeType": "image/png",
|
||||
"data": image_b64,
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {},
|
||||
},
|
||||
},
|
||||
run_service=RunService(
|
||||
tool_result_storage=tool_result_storage,
|
||||
tool_result_offload_threshold_bytes=1,
|
||||
tool_result_bucket="private",
|
||||
tool_result_prefix="tool-results",
|
||||
),
|
||||
resume_service=ResumeService(),
|
||||
)
|
||||
|
||||
assert result["pending_tool_call_id"] is None
|
||||
|
||||
await engine.dispose()
|
||||
async with AsyncSessionLocal() as session:
|
||||
chat_session = await session.get(AgentChatSession, session_id)
|
||||
assert chat_session is not None
|
||||
assert chat_session.status == AgentChatSessionStatus.COMPLETED
|
||||
|
||||
schedule_rows = await session.execute(
|
||||
select(ScheduleItem)
|
||||
.where(ScheduleItem.owner_id == owner_id)
|
||||
.order_by(ScheduleItem.created_at.desc())
|
||||
)
|
||||
created_items = list(schedule_rows.scalars().all())
|
||||
assert created_items, (
|
||||
"Expected schedule item created by backend calendar tool"
|
||||
)
|
||||
created_item = created_items[0]
|
||||
assert created_item.title
|
||||
assert created_item.timezone
|
||||
assert created_item.start_at is not None
|
||||
|
||||
tool_rows = await session.execute(
|
||||
select(AgentChatMessage)
|
||||
.where(AgentChatMessage.session_id == session_id)
|
||||
.where(AgentChatMessage.role == AgentChatMessageRole.TOOL)
|
||||
.order_by(AgentChatMessage.seq.desc())
|
||||
)
|
||||
tool_message = tool_rows.scalars().first()
|
||||
assert tool_message is not None
|
||||
metadata = tool_message.metadata_json or {}
|
||||
storage_bucket = metadata.get("storage_bucket")
|
||||
storage_path = metadata.get("storage_path")
|
||||
assert storage_bucket == "private"
|
||||
assert isinstance(storage_path, str)
|
||||
assert storage_path.startswith("tool-results/")
|
||||
uploaded_paths.append(storage_path)
|
||||
|
||||
downloaded = storage.from_("private").download(uploaded_paths[0])
|
||||
if isinstance(downloaded, bytes):
|
||||
payload = json.loads(downloaded.decode("utf-8"))
|
||||
else:
|
||||
payload = json.loads(str(downloaded))
|
||||
|
||||
assert payload["toolName"] == "back.create_calendar_event"
|
||||
finally:
|
||||
if uploaded_paths:
|
||||
try:
|
||||
storage.from_("private").remove(uploaded_paths)
|
||||
except Exception:
|
||||
pass
|
||||
async with AsyncSessionLocal() as cleanup_session:
|
||||
await cleanup_session.execute(
|
||||
delete(ScheduleItem).where(ScheduleItem.owner_id == owner_id)
|
||||
)
|
||||
await cleanup_session.commit()
|
||||
await _cleanup_session_and_agent(
|
||||
session_id=session_id,
|
||||
agent_type=agent_type,
|
||||
owner_id=owner_id,
|
||||
llm_id_to_cleanup=llm_cleanup_id,
|
||||
factory_id_to_cleanup=factory_cleanup_id,
|
||||
)
|
||||
await _cleanup_auth_user(admin_client=admin_client, user_id=test_user_id)
|
||||
await supabase_service.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.live
|
||||
async def test_agent_live_front_tool_interrupt_resume_continue() -> None:
|
||||
if not _live_enabled():
|
||||
pytest.skip("Live test disabled")
|
||||
|
||||
admin_client = await _init_supabase_admin_client()
|
||||
owner_id, test_user_id = await _create_owner_profile(admin_client)
|
||||
llm_id, llm_cleanup_id, factory_cleanup_id = await _resolve_llm_id()
|
||||
session_id = uuid.uuid4()
|
||||
agent_type = f"LIVE_E2E_{uuid.uuid4().hex[:8]}"
|
||||
queued_commands: list[dict[str, object]] = []
|
||||
published_events: list[str] = []
|
||||
|
||||
async def _publish(event: dict[str, object]) -> None:
|
||||
event_type = event.get("type")
|
||||
if isinstance(event_type, str):
|
||||
published_events.append(event_type)
|
||||
|
||||
async def _enqueue(command: dict[str, object]) -> str:
|
||||
queued_commands.append(command)
|
||||
return "task-followup-live"
|
||||
|
||||
try:
|
||||
await _seed_session_with_active_agent(
|
||||
session_id=session_id,
|
||||
owner_id=owner_id,
|
||||
agent_type=agent_type,
|
||||
llm_id=llm_id,
|
||||
)
|
||||
|
||||
run_result = await run_agent_task(
|
||||
{
|
||||
"command": "run",
|
||||
"run_input": {
|
||||
"threadId": str(session_id),
|
||||
"runId": "run-live-front-1",
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "u1",
|
||||
"role": "user",
|
||||
"content": "你必须调用 front.navigate_to_route 工具跳转到 /calendar/dayweek。",
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "Navigate frontend route; runtime raises approval interrupt when called.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
"context": [],
|
||||
"forwardedProps": {},
|
||||
},
|
||||
},
|
||||
publish_event=_publish,
|
||||
enqueue_command=_enqueue,
|
||||
run_service=RunService(),
|
||||
resume_service=ResumeService(),
|
||||
)
|
||||
|
||||
pending_tool_call_id = run_result["pending_tool_call_id"]
|
||||
assert isinstance(pending_tool_call_id, str), (
|
||||
f"Expected pending tool call, got result: {json.dumps(run_result, ensure_ascii=False)}"
|
||||
)
|
||||
snapshot = run_result["state_snapshot"]
|
||||
assert isinstance(snapshot, dict)
|
||||
pending_tool_nonce = snapshot.get("pending_tool_nonce")
|
||||
assert isinstance(pending_tool_nonce, str)
|
||||
guarded_tool_args: dict[str, object] | None = None
|
||||
has_matching_tool_args_event = False
|
||||
events = run_result.get("events")
|
||||
if isinstance(events, list):
|
||||
for event in events:
|
||||
if not isinstance(event, dict):
|
||||
continue
|
||||
if event.get("type") != "TOOL_CALL_ARGS":
|
||||
continue
|
||||
if event.get("toolCallId") != pending_tool_call_id:
|
||||
continue
|
||||
has_matching_tool_args_event = True
|
||||
delta = event.get("delta")
|
||||
if not isinstance(delta, str):
|
||||
continue
|
||||
try:
|
||||
parsed_delta = json.loads(delta)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if isinstance(parsed_delta, dict):
|
||||
guarded_tool_args = parsed_delta
|
||||
break
|
||||
if has_matching_tool_args_event:
|
||||
assert guarded_tool_args is not None
|
||||
if guarded_tool_args is None:
|
||||
guarded_tool_args = {
|
||||
"target": "/calendar/dayweek",
|
||||
"replace": False,
|
||||
"__nonce": pending_tool_nonce,
|
||||
}
|
||||
assert guarded_tool_args.get("__nonce") == pending_tool_nonce
|
||||
|
||||
await run_agent_task(
|
||||
{
|
||||
"command": "resume",
|
||||
"run_input": {
|
||||
"threadId": str(session_id),
|
||||
"runId": "run-live-front-2",
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "tool-1",
|
||||
"role": "tool",
|
||||
"toolCallId": pending_tool_call_id,
|
||||
"content": json.dumps(
|
||||
{
|
||||
"toolName": "front.navigate_to_route",
|
||||
"toolArgs": guarded_tool_args,
|
||||
"nonce": pending_tool_nonce,
|
||||
"result": {
|
||||
"ok": True,
|
||||
"route": "/calendar/dayweek",
|
||||
},
|
||||
},
|
||||
ensure_ascii=True,
|
||||
separators=(",", ":"),
|
||||
),
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {},
|
||||
},
|
||||
},
|
||||
publish_event=_publish,
|
||||
enqueue_command=_enqueue,
|
||||
run_service=RunService(),
|
||||
resume_service=ResumeService(),
|
||||
)
|
||||
|
||||
assert len(queued_commands) == 1
|
||||
await run_agent_task(
|
||||
queued_commands[0],
|
||||
publish_event=_publish,
|
||||
enqueue_command=_enqueue,
|
||||
run_service=RunService(),
|
||||
resume_service=ResumeService(),
|
||||
)
|
||||
|
||||
await engine.dispose()
|
||||
async with AsyncSessionLocal() as session:
|
||||
chat_session = await session.get(AgentChatSession, session_id)
|
||||
assert chat_session is not None
|
||||
assert chat_session.status == AgentChatSessionStatus.COMPLETED
|
||||
rows = await session.execute(
|
||||
select(AgentChatMessage)
|
||||
.where(AgentChatMessage.session_id == session_id)
|
||||
.order_by(AgentChatMessage.seq.asc())
|
||||
)
|
||||
messages = list(rows.scalars().all())
|
||||
assert any(m.role == AgentChatMessageRole.TOOL for m in messages)
|
||||
assert chat_session.total_cost >= Decimal("0")
|
||||
|
||||
assert "RUN_STARTED" in published_events
|
||||
assert "RUN_FINISHED" in published_events
|
||||
finally:
|
||||
await _cleanup_session_and_agent(
|
||||
session_id=session_id,
|
||||
agent_type=agent_type,
|
||||
owner_id=owner_id,
|
||||
llm_id_to_cleanup=llm_cleanup_id,
|
||||
factory_id_to_cleanup=factory_cleanup_id,
|
||||
)
|
||||
await _cleanup_auth_user(admin_client=admin_client, user_id=test_user_id)
|
||||
await supabase_service.close()
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agent.domain.agui_input import extract_latest_user_payload, parse_run_input
|
||||
|
||||
|
||||
def test_parse_run_input_accepts_binary_multimodal_content() -> None:
|
||||
run_input = parse_run_input(
|
||||
{
|
||||
"threadId": "00000000-0000-0000-0000-000000000001",
|
||||
"runId": "run-1",
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "u1",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "extract image"},
|
||||
{
|
||||
"type": "binary",
|
||||
"mimeType": "image/png",
|
||||
"data": "ZmFrZS1iYXNlNjQ=",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {},
|
||||
}
|
||||
)
|
||||
|
||||
user_text, blocks = extract_latest_user_payload(run_input)
|
||||
assert user_text == "extract image"
|
||||
assert blocks[-1] == {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64,ZmFrZS1iYXNlNjQ="},
|
||||
}
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from core.agent.infrastructure.crewai.loader import (
|
||||
@@ -35,31 +33,3 @@ def test_load_agent_task_template_returns_matching_pair() -> None:
|
||||
def test_load_agent_task_template_rejects_unknown_stage() -> None:
|
||||
with pytest.raises(ValueError, match="Unknown CrewAI stage"):
|
||||
load_agent_task_template(stage="unknown")
|
||||
|
||||
|
||||
def test_load_crewai_agent_templates_rejects_invalid_yaml_shape() -> None:
|
||||
path = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "src"
|
||||
/ "core"
|
||||
/ "config"
|
||||
/ "static"
|
||||
/ "crewai"
|
||||
/ "agents.invalid-shape.yaml"
|
||||
)
|
||||
path.write_text("- invalid\n", encoding="utf-8")
|
||||
try:
|
||||
with pytest.raises(ValueError, match="Invalid CrewAI template format"):
|
||||
load_crewai_agent_templates(path)
|
||||
finally:
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def test_load_crewai_agent_templates_rejects_missing_required_fields() -> None:
|
||||
path = Path(__file__).resolve().parents[4] / "src" / "core" / "config" / "static" / "crewai" / "agents.invalid.yaml"
|
||||
path.write_text("intent:\n role: Intent Agent\n", encoding="utf-8")
|
||||
try:
|
||||
with pytest.raises(ValueError, match="Invalid CrewAI agent template"):
|
||||
load_crewai_agent_templates(path)
|
||||
finally:
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
@@ -3,8 +3,10 @@ from __future__ import annotations
|
||||
from types import MethodType, SimpleNamespace
|
||||
from typing import cast
|
||||
|
||||
import core.agent.infrastructure.crewai.runtime as runtime_module
|
||||
import core.agent.infrastructure.crewai.runtime_stage_runner as stage_runner_module
|
||||
from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
|
||||
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
|
||||
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime, _parse_intent_result
|
||||
from core.agent.infrastructure.litellm.usage_tracker import UsageCost
|
||||
|
||||
|
||||
@@ -127,6 +129,298 @@ def test_runtime_needs_execution_and_collects_front_tool_call() -> None:
|
||||
assert result["total_tokens"] == 6
|
||||
|
||||
|
||||
def test_runtime_extracts_pending_front_tool_from_execution_data() -> None:
|
||||
runtime = _build_runtime()
|
||||
|
||||
def _fake_run_stage(self, **kwargs):
|
||||
stage = kwargs["stage"]
|
||||
if stage == "intent":
|
||||
return (
|
||||
'{"route":"NEEDS_EXECUTION","intent_summary":"navigate","execution_brief":"call tool","safety_flags":[]}',
|
||||
UsageCost(1, 1, 2, 0.01),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
if stage == "execution":
|
||||
return (
|
||||
'{"status":"SUCCESS","execution_summary":"done","execution_data":{"tool_name":"front.navigate_to_route","arguments":{"target":"/calendar/dayweek","replace":false},"result_status":"pending_approval"},"report_brief":"awaiting approval"}',
|
||||
UsageCost(2, 2, 4, 0.02),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
return (
|
||||
'{"assistant_text":"final answer","response_metadata":{"source":"organization"}}',
|
||||
UsageCost(3, 3, 6, 0.03),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
|
||||
runtime._run_stage_with_crewai = MethodType(_fake_run_stage, runtime) # type: ignore[method-assign]
|
||||
result = runtime.execute(
|
||||
user_input="go",
|
||||
tools=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "navigate",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert result["pending_front_tool"] == {
|
||||
"name": "front.navigate_to_route",
|
||||
"args": {"target": "/calendar/dayweek", "replace": False},
|
||||
"target": "frontend",
|
||||
}
|
||||
|
||||
|
||||
def test_runtime_multimodal_intent_receives_execution_tool_awareness() -> None:
|
||||
runtime = _build_runtime()
|
||||
calls: list[dict[str, object]] = []
|
||||
|
||||
def _fake_run_stage(self, **kwargs):
|
||||
stage = kwargs["stage"]
|
||||
tools = kwargs["tools_payload"]
|
||||
calls.append({"stage": stage, "tools": tools})
|
||||
if stage == "intent":
|
||||
return (
|
||||
'{"route":"NEEDS_EXECUTION","intent_summary":"need tool","execution_brief":"call back.create_calendar_event","safety_flags":[]}',
|
||||
UsageCost(1, 1, 2, 0.01),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
if stage == "execution":
|
||||
return (
|
||||
'{"status":"SUCCESS","execution_summary":"done","execution_data":{},"report_brief":"ok"}',
|
||||
UsageCost(2, 2, 4, 0.02),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
return (
|
||||
'{"assistant_text":"final answer","response_metadata":{"source":"organization"}}',
|
||||
UsageCost(3, 3, 6, 0.03),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
|
||||
runtime._run_stage_with_crewai = MethodType(_fake_run_stage, runtime) # type: ignore[method-assign]
|
||||
runtime.execute(
|
||||
user_input="go",
|
||||
user_input_multimodal=[{"type": "text", "text": "hello"}],
|
||||
tools=[],
|
||||
)
|
||||
|
||||
intent_tools = cast(list[dict[str, object]], calls[0]["tools"])
|
||||
assert any(t.get("name") == "back.create_calendar_event" for t in intent_tools)
|
||||
|
||||
|
||||
def test_runtime_synthesizes_backend_call_when_model_skips_react_tool_call() -> None:
|
||||
runtime = _build_runtime()
|
||||
|
||||
backend_calls: list[tuple[str, dict[str, object]]] = []
|
||||
|
||||
def _backend_handler(
|
||||
tool_name: str, tool_args: dict[str, object]
|
||||
) -> dict[str, object]:
|
||||
backend_calls.append((tool_name, tool_args))
|
||||
return {
|
||||
"type": "calendar_card.v1",
|
||||
"version": "v1",
|
||||
"data": {"id": "evt-1", "title": str(tool_args.get("title", ""))},
|
||||
"actions": [],
|
||||
}
|
||||
|
||||
runtime.set_backend_tool_handler(_backend_handler)
|
||||
|
||||
def _fake_run_stage(self, **kwargs):
|
||||
stage = kwargs["stage"]
|
||||
if stage == "intent":
|
||||
return (
|
||||
'{"route":"NEEDS_EXECUTION","intent_summary":"create event","execution_brief":"create via backend tool","safety_flags":[]}',
|
||||
UsageCost(1, 1, 2, 0.01),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
if stage == "execution":
|
||||
return (
|
||||
'{"status":"SUCCESS","execution_summary":"created","execution_data":{"title":"项目评审","timezone":"Asia/Shanghai"},"report_brief":"done"}',
|
||||
UsageCost(2, 2, 4, 0.02),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
return (
|
||||
'{"assistant_text":"ok","response_metadata":{}}',
|
||||
UsageCost(1, 1, 2, 0.01),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
|
||||
runtime._run_stage_with_crewai = MethodType(_fake_run_stage, runtime) # type: ignore[method-assign]
|
||||
result = runtime.execute(user_input="创建日程", tools=[])
|
||||
|
||||
assert backend_calls == [
|
||||
(
|
||||
"back.create_calendar_event",
|
||||
{"title": "项目评审", "timezone": "Asia/Shanghai"},
|
||||
)
|
||||
]
|
||||
tool_calls = cast(list[dict[str, object]], result["tool_calls"])
|
||||
assert any(
|
||||
call.get("target") == "backend"
|
||||
and call.get("name") == "back.create_calendar_event"
|
||||
for call in tool_calls
|
||||
)
|
||||
|
||||
|
||||
def test_runtime_extracts_pending_front_tool_from_approval_required_shape() -> None:
|
||||
runtime = _build_runtime()
|
||||
|
||||
def _fake_run_stage(self, **kwargs):
|
||||
stage = kwargs["stage"]
|
||||
if stage == "intent":
|
||||
return (
|
||||
'{"route":"NEEDS_EXECUTION","intent_summary":"navigate","execution_brief":"call tool","safety_flags":[]}',
|
||||
UsageCost(1, 1, 2, 0.01),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
if stage == "execution":
|
||||
return (
|
||||
'{"status":"PARTIAL","execution_summary":"approval needed","execution_data":{"tool_name":"front.navigate_to_route","target":"/calendar/dayweek","approval_required":true},"report_brief":"await approval"}',
|
||||
UsageCost(2, 2, 4, 0.02),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
return (
|
||||
'{"assistant_text":"final answer","response_metadata":{"source":"organization"}}',
|
||||
UsageCost(3, 3, 6, 0.03),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
|
||||
runtime._run_stage_with_crewai = MethodType(_fake_run_stage, runtime) # type: ignore[method-assign]
|
||||
result = runtime.execute(
|
||||
user_input="go",
|
||||
tools=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "navigate",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert result["pending_front_tool"] == {
|
||||
"name": "front.navigate_to_route",
|
||||
"args": {"target": "/calendar/dayweek", "replace": False},
|
||||
"target": "frontend",
|
||||
}
|
||||
|
||||
|
||||
def test_runtime_resume_from_execution_stage_keeps_valid_intent_payload() -> None:
|
||||
runtime = _build_runtime()
|
||||
|
||||
def _fake_run_stage(self, **kwargs):
|
||||
stage = kwargs["stage"]
|
||||
if stage == "execution":
|
||||
return (
|
||||
'{"status":"SUCCESS","execution_summary":"done","execution_data":{},"report_brief":"ok"}',
|
||||
UsageCost(2, 2, 4, 0.02),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
return (
|
||||
'{"assistant_text":"final answer","response_metadata":{"source":"organization"}}',
|
||||
UsageCost(3, 3, 6, 0.03),
|
||||
[],
|
||||
None,
|
||||
)
|
||||
|
||||
runtime._run_stage_with_crewai = MethodType(_fake_run_stage, runtime) # type: ignore[method-assign]
|
||||
result = runtime.execute(
|
||||
user_input="resume",
|
||||
tools=[],
|
||||
resume_from_stage="execution",
|
||||
)
|
||||
|
||||
assert result["assistant_text"] == "ok"
|
||||
|
||||
|
||||
def test_run_stage_with_crewai_uses_output_pydantic_for_stage(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
runtime = _build_runtime()
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
class _FakeLLM:
|
||||
def __init__(self, **kwargs):
|
||||
captured["llm_kwargs"] = kwargs
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
captured["agent_kwargs"] = kwargs
|
||||
self.llm = kwargs.get("llm")
|
||||
|
||||
class _FakeTask:
|
||||
def __init__(self, **kwargs):
|
||||
captured["task_kwargs"] = kwargs
|
||||
|
||||
class _FakeCrew:
|
||||
def __init__(self, **kwargs):
|
||||
captured["crew_kwargs"] = kwargs
|
||||
|
||||
def kickoff(self):
|
||||
return SimpleNamespace(
|
||||
raw="ignored",
|
||||
pydantic=runtime_module.IntentResult(
|
||||
route="DIRECT_EXECUTION",
|
||||
intent_summary="intent",
|
||||
assistant_text="ok",
|
||||
safety_flags=[],
|
||||
),
|
||||
json_dict=None,
|
||||
token_usage=SimpleNamespace(
|
||||
prompt_tokens=1,
|
||||
completion_tokens=2,
|
||||
total_tokens=3,
|
||||
),
|
||||
)
|
||||
|
||||
monkeypatch.setattr(stage_runner_module, "LLM", _FakeLLM)
|
||||
monkeypatch.setattr(stage_runner_module, "Agent", _FakeAgent)
|
||||
monkeypatch.setattr(stage_runner_module, "Task", _FakeTask)
|
||||
monkeypatch.setattr(stage_runner_module, "Crew", _FakeCrew)
|
||||
|
||||
text, usage, calls, pending = runtime._run_stage_with_crewai(
|
||||
stage="intent",
|
||||
user_content="hello",
|
||||
system_prompt="",
|
||||
tools_payload=[],
|
||||
litellm_model="dashscope/qwen3.5-flash",
|
||||
)
|
||||
|
||||
task_kwargs = cast(dict[str, object], captured["task_kwargs"])
|
||||
assert task_kwargs.get("output_pydantic") is runtime_module.IntentResult
|
||||
assert runtime_module.IntentResult.model_validate_json(text).assistant_text == "ok"
|
||||
assert usage.total_tokens == 3
|
||||
assert calls == []
|
||||
assert pending is None
|
||||
|
||||
|
||||
def test_runtime_backend_registry_check() -> None:
|
||||
runtime = _build_runtime()
|
||||
assert runtime.is_registered_backend_tool("back.create_calendar_event") is True
|
||||
@@ -179,3 +473,184 @@ def test_runtime_emits_step_started_finished_for_all_three_stages() -> None:
|
||||
"organization",
|
||||
"organization",
|
||||
]
|
||||
|
||||
|
||||
def test_parse_intent_result_accepts_markdown_json_fence() -> None:
|
||||
result = _parse_intent_result(
|
||||
"""```json
|
||||
{
|
||||
\"route\": \"DIRECT_EXECUTION\",
|
||||
\"intent_summary\": \"navigate\",
|
||||
\"assistant_text\": \"ok\",
|
||||
\"safety_flags\": []
|
||||
}
|
||||
```"""
|
||||
)
|
||||
assert result.route == "DIRECT_EXECUTION"
|
||||
assert result.assistant_text == "ok"
|
||||
|
||||
|
||||
def test_parse_intent_result_coerces_structured_fields() -> None:
|
||||
result = _parse_intent_result(
|
||||
"""{
|
||||
"route": "DIRECT_EXECUTION",
|
||||
"intent_summary": "navigate",
|
||||
"assistant_text": "",
|
||||
"execution_brief": {
|
||||
"action": "front.navigate_to_route",
|
||||
"target": "/calendar/dayweek"
|
||||
},
|
||||
"safety_flags": {
|
||||
"security_concern": false,
|
||||
"requires_confirmation": true
|
||||
}
|
||||
}"""
|
||||
)
|
||||
assert result.route == "NEEDS_EXECUTION"
|
||||
assert result.execution_brief is not None
|
||||
assert "front.navigate_to_route" in result.execution_brief
|
||||
assert result.safety_flags == ["requires_confirmation"]
|
||||
|
||||
|
||||
def test_parse_intent_result_coerces_structured_intent_summary() -> None:
|
||||
result = _parse_intent_result(
|
||||
"""{
|
||||
"route": "NEEDS_EXECUTION",
|
||||
"intent_summary": {
|
||||
"intent_type": "Navigation Request",
|
||||
"confidence": 0.93
|
||||
},
|
||||
"execution_brief": "call front tool",
|
||||
"safety_flags": []
|
||||
}"""
|
||||
)
|
||||
assert result.route == "NEEDS_EXECUTION"
|
||||
assert result.intent_summary.startswith("{")
|
||||
assert "Navigation Request" in result.intent_summary
|
||||
|
||||
|
||||
def test_runtime_uses_prompt_module_for_stage_descriptions(monkeypatch) -> None:
|
||||
runtime = _build_runtime()
|
||||
captured: dict[str, object] = {"called": False}
|
||||
|
||||
class _FakeLLM:
|
||||
def __init__(self, **kwargs):
|
||||
del kwargs
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
self.llm = kwargs.get("llm")
|
||||
|
||||
class _FakeTask:
|
||||
def __init__(self, **kwargs):
|
||||
captured["description"] = kwargs.get("description")
|
||||
|
||||
class _FakeCrew:
|
||||
def __init__(self, **kwargs):
|
||||
del kwargs
|
||||
|
||||
def kickoff(self):
|
||||
return SimpleNamespace(
|
||||
raw="ignored",
|
||||
pydantic=runtime_module.IntentResult(
|
||||
route="DIRECT_EXECUTION",
|
||||
intent_summary="intent",
|
||||
assistant_text="ok",
|
||||
safety_flags=[],
|
||||
),
|
||||
json_dict=None,
|
||||
token_usage=SimpleNamespace(
|
||||
prompt_tokens=1,
|
||||
completion_tokens=2,
|
||||
total_tokens=3,
|
||||
),
|
||||
)
|
||||
|
||||
def _fake_build_stage_task_description(**kwargs):
|
||||
del kwargs
|
||||
captured["called"] = True
|
||||
return "PROMPT_FROM_MODULE"
|
||||
|
||||
monkeypatch.setattr(stage_runner_module, "LLM", _FakeLLM)
|
||||
monkeypatch.setattr(stage_runner_module, "Agent", _FakeAgent)
|
||||
monkeypatch.setattr(stage_runner_module, "Task", _FakeTask)
|
||||
monkeypatch.setattr(stage_runner_module, "Crew", _FakeCrew)
|
||||
monkeypatch.setattr(
|
||||
stage_runner_module.runtime_stage_prompts,
|
||||
"build_stage_task_description",
|
||||
_fake_build_stage_task_description,
|
||||
)
|
||||
|
||||
runtime._run_stage_with_crewai(
|
||||
stage="intent",
|
||||
user_content="hello",
|
||||
system_prompt="",
|
||||
tools_payload=[],
|
||||
litellm_model="dashscope/qwen3.5-flash",
|
||||
)
|
||||
|
||||
assert captured["called"] is True
|
||||
assert captured["description"] == "PROMPT_FROM_MODULE"
|
||||
|
||||
|
||||
def test_run_stage_with_crewai_does_not_force_execution_output_pydantic(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
runtime = _build_runtime()
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
class _FakeLLM:
|
||||
def __init__(self, **kwargs):
|
||||
del kwargs
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
self.llm = kwargs.get("llm")
|
||||
|
||||
class _FakeTask:
|
||||
def __init__(self, **kwargs):
|
||||
captured["output_pydantic"] = kwargs.get("output_pydantic")
|
||||
|
||||
class _FakeCrew:
|
||||
def __init__(self, **kwargs):
|
||||
del kwargs
|
||||
|
||||
def kickoff(self):
|
||||
return SimpleNamespace(
|
||||
raw=(
|
||||
'{"status":"SUCCESS","execution_summary":"done",'
|
||||
'"execution_data":{},"report_brief":"ok"}'
|
||||
),
|
||||
pydantic=None,
|
||||
json_dict=None,
|
||||
token_usage=SimpleNamespace(
|
||||
prompt_tokens=1,
|
||||
completion_tokens=2,
|
||||
total_tokens=3,
|
||||
),
|
||||
)
|
||||
|
||||
monkeypatch.setattr(stage_runner_module, "LLM", _FakeLLM)
|
||||
monkeypatch.setattr(stage_runner_module, "Agent", _FakeAgent)
|
||||
monkeypatch.setattr(stage_runner_module, "Task", _FakeTask)
|
||||
monkeypatch.setattr(stage_runner_module, "Crew", _FakeCrew)
|
||||
|
||||
runtime._run_stage_with_crewai(
|
||||
stage="execution",
|
||||
user_content='{"user_input":"go","intent_summary":"navigate"}',
|
||||
system_prompt="",
|
||||
tools_payload=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "navigate",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"target": {"type": "string"}},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
litellm_model="dashscope/qwen3.5-flash",
|
||||
)
|
||||
|
||||
assert captured["output_pydantic"] is None
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agent.infrastructure.crewai.runtime_parsers import parse_execution_result
|
||||
|
||||
|
||||
def test_parse_execution_result_preserves_execution_data_for_interrupted_status() -> (
|
||||
None
|
||||
):
|
||||
result = parse_execution_result(
|
||||
'{"status":"interrupted","execution_summary":"approval needed",'
|
||||
'"execution_data":{"tool_called":"front.navigate_to_route",'
|
||||
'"input":{"target":"/calendar/dayweek"},'
|
||||
'"error":"frontend tool requires approval"},'
|
||||
'"report_brief":"await approval"}'
|
||||
)
|
||||
|
||||
assert result.status == "PARTIAL"
|
||||
assert result.execution_data.get("tool_called") == "front.navigate_to_route"
|
||||
assert result.execution_data.get("input") == {"target": "/calendar/dayweek"}
|
||||
@@ -0,0 +1,223 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from crewai.agents import parser as crew_parser
|
||||
|
||||
from core.agent.infrastructure.crewai.runtime_tools import (
|
||||
PendingFrontendToolCall,
|
||||
extract_pending_front_tool,
|
||||
resolve_stage_crewai_tools,
|
||||
)
|
||||
|
||||
|
||||
def test_frontend_tool_accepts_direct_kwargs_and_raises_pending() -> None:
|
||||
calls: list[dict[str, object]] = []
|
||||
tools = resolve_stage_crewai_tools(
|
||||
tools_payload=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "Navigate to route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
calls=calls,
|
||||
backend_handler=None,
|
||||
)
|
||||
|
||||
with pytest.raises(PendingFrontendToolCall) as exc:
|
||||
tools[0].run(target="/calendar/dayweek", replace=False)
|
||||
|
||||
assert exc.value.payload["name"] == "front.navigate_to_route"
|
||||
assert exc.value.payload["args"] == {
|
||||
"target": "/calendar/dayweek",
|
||||
"replace": False,
|
||||
}
|
||||
|
||||
|
||||
def test_react_action_text_can_address_frontend_tool_name() -> None:
|
||||
parsed = crew_parser.parse(
|
||||
"Thought: need route change\n"
|
||||
"Action: front.navigate_to_route\n"
|
||||
'Action Input: {"target":"/calendar/dayweek","replace":false}'
|
||||
)
|
||||
assert isinstance(parsed, crew_parser.AgentAction)
|
||||
calls: list[dict[str, object]] = []
|
||||
tools = resolve_stage_crewai_tools(
|
||||
tools_payload=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "Navigate to route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
calls=calls,
|
||||
backend_handler=None,
|
||||
)
|
||||
tool = next(item for item in tools if item.name == parsed.tool)
|
||||
|
||||
with pytest.raises(PendingFrontendToolCall) as exc:
|
||||
tool.run(**{"target": "/calendar/dayweek", "replace": False})
|
||||
|
||||
assert exc.value.payload["name"] == "front.navigate_to_route"
|
||||
|
||||
|
||||
def test_dynamic_tool_args_schema_follows_tool_parameters() -> None:
|
||||
calls: list[dict[str, object]] = []
|
||||
tools = resolve_stage_crewai_tools(
|
||||
tools_payload=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"description": "Navigate to route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
"required": ["target"],
|
||||
},
|
||||
}
|
||||
],
|
||||
calls=calls,
|
||||
backend_handler=None,
|
||||
)
|
||||
|
||||
schema = tools[0].args_schema.model_json_schema()
|
||||
props = schema.get("properties", {})
|
||||
required = schema.get("required", [])
|
||||
|
||||
assert isinstance(props, dict)
|
||||
assert "target" in props
|
||||
assert "replace" in props
|
||||
assert required == ["target"]
|
||||
|
||||
|
||||
def test_extract_pending_front_tool_supports_tool_called_and_input_fields() -> None:
|
||||
pending = extract_pending_front_tool(
|
||||
execution_tools=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
pending_call=None,
|
||||
execution_data={
|
||||
"tool_called": "front.navigate_to_route",
|
||||
"input": {"target": "/calendar/dayweek"},
|
||||
"status": "pending_approval",
|
||||
},
|
||||
)
|
||||
|
||||
assert pending == {
|
||||
"name": "front.navigate_to_route",
|
||||
"args": {"target": "/calendar/dayweek", "replace": False},
|
||||
"target": "frontend",
|
||||
}
|
||||
|
||||
|
||||
def test_extract_pending_front_tool_supports_interrupted_status_with_error() -> None:
|
||||
pending = extract_pending_front_tool(
|
||||
execution_tools=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
pending_call=None,
|
||||
execution_data={
|
||||
"status": "interrupted",
|
||||
"tool_called": "front.navigate_to_route",
|
||||
"parameters": {"target": "/calendar/dayweek", "replace": False},
|
||||
"error": "frontend tool requires approval",
|
||||
},
|
||||
)
|
||||
|
||||
assert pending == {
|
||||
"name": "front.navigate_to_route",
|
||||
"args": {"target": "/calendar/dayweek", "replace": False},
|
||||
"target": "frontend",
|
||||
}
|
||||
|
||||
|
||||
def test_extract_pending_front_tool_supports_approval_result_field() -> None:
|
||||
pending = extract_pending_front_tool(
|
||||
execution_tools=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
pending_call=None,
|
||||
execution_data={
|
||||
"tool_called": "front.navigate_to_route",
|
||||
"parameters": {"target": "/calendar/dayweek", "replace": False},
|
||||
"result": "approval_required_error",
|
||||
},
|
||||
)
|
||||
|
||||
assert pending == {
|
||||
"name": "front.navigate_to_route",
|
||||
"args": {"target": "/calendar/dayweek", "replace": False},
|
||||
"target": "frontend",
|
||||
}
|
||||
|
||||
|
||||
def test_extract_pending_front_tool_supports_observation_field() -> None:
|
||||
pending = extract_pending_front_tool(
|
||||
execution_tools=[
|
||||
{
|
||||
"name": "front.navigate_to_route",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"target": {"type": "string"},
|
||||
"replace": {"type": "boolean"},
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
pending_call=None,
|
||||
execution_data={
|
||||
"tool_called": "front.navigate_to_route",
|
||||
"parameters": {"target": "/calendar/dayweek", "replace": False},
|
||||
"observation": "frontend tool requires approval.",
|
||||
},
|
||||
)
|
||||
|
||||
assert pending == {
|
||||
"name": "front.navigate_to_route",
|
||||
"args": {"target": "/calendar/dayweek", "replace": False},
|
||||
"target": "frontend",
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agent.prompt.runtime_stage_prompts import build_stage_task_description
|
||||
|
||||
|
||||
def test_execution_stage_prompt_includes_react_tool_invocation_rule() -> None:
|
||||
prompt = build_stage_task_description(
|
||||
stage="execution",
|
||||
task_description="execute",
|
||||
tools_payload=[{"name": "front.navigate_to_route"}],
|
||||
system_prompt="",
|
||||
user_content="go",
|
||||
)
|
||||
|
||||
assert "Action:" in prompt
|
||||
assert "Action Input:" in prompt
|
||||
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
import core.agent.infrastructure.crewai.tools.stage_tool_allowlist as allowlist_module
|
||||
|
||||
|
||||
def test_load_crewai_stage_tools_returns_expected_defaults() -> None:
|
||||
result = allowlist_module.load_crewai_stage_tools()
|
||||
|
||||
assert result == {
|
||||
"intent": [],
|
||||
"execution": ["back.create_calendar_event"],
|
||||
"organization": [],
|
||||
}
|
||||
|
||||
|
||||
def test_load_crewai_stage_tools_rejects_unknown_backend_tool(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
allowlist_module,
|
||||
"STAGE_TOOL_ALLOWLIST",
|
||||
{"execution": ["back.unknown"]},
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="unknown backend tool"):
|
||||
allowlist_module.load_crewai_stage_tools()
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from core.config.settings import RedisSettings
|
||||
@@ -107,7 +109,9 @@ async def test_get_or_init_redis_client_initializes_when_needed(
|
||||
async def _fake_initialize() -> bool:
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(type(redis_service), "is_initialized", property(lambda _: False))
|
||||
monkeypatch.setattr(
|
||||
type(redis_service), "is_initialized", property(lambda _: False)
|
||||
)
|
||||
monkeypatch.setattr(redis_service, "initialize", _fake_initialize)
|
||||
monkeypatch.setattr(redis_service, "get_client", lambda: fake_client)
|
||||
|
||||
@@ -123,8 +127,40 @@ async def test_get_or_init_redis_client_raises_when_init_fails(
|
||||
async def _fake_initialize() -> bool:
|
||||
return False
|
||||
|
||||
monkeypatch.setattr(type(redis_service), "is_initialized", property(lambda _: False))
|
||||
monkeypatch.setattr(
|
||||
type(redis_service), "is_initialized", property(lambda _: False)
|
||||
)
|
||||
monkeypatch.setattr(redis_service, "initialize", _fake_initialize)
|
||||
|
||||
with pytest.raises(RuntimeError, match="Redis service initialization failed"):
|
||||
await get_or_init_redis_client()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_or_init_redis_client_reinitializes_when_event_loop_changes(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
stale_client = _FakeRedisClient()
|
||||
fresh_client = _FakeRedisClient()
|
||||
call_count = {"initialize": 0}
|
||||
|
||||
async def _fake_initialize() -> bool:
|
||||
call_count["initialize"] += 1
|
||||
return True
|
||||
|
||||
class _Loop:
|
||||
pass
|
||||
|
||||
loop_obj = _Loop()
|
||||
|
||||
monkeypatch.setattr(asyncio, "get_running_loop", lambda: loop_obj)
|
||||
monkeypatch.setattr(redis_service, "initialize", _fake_initialize)
|
||||
monkeypatch.setattr(redis_service, "get_client", lambda: fresh_client)
|
||||
monkeypatch.setattr(redis_service, "_client", stale_client, raising=False)
|
||||
monkeypatch.setattr(redis_service, "_loop_id", 123, raising=False)
|
||||
monkeypatch.setattr(redis_service, "_initialized", True, raising=False)
|
||||
|
||||
client = await get_or_init_redis_client()
|
||||
|
||||
assert call_count["initialize"] == 1
|
||||
assert client is fresh_client
|
||||
|
||||
Reference in New Issue
Block a user