563 lines
20 KiB
Python
563 lines
20 KiB
Python
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import os
|
|
import uuid
|
|
from decimal import Decimal
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from sqlalchemy import delete, select
|
|
|
|
from core.agent.application.resume_service import ResumeService
|
|
from core.agent.application.run_service import RunService
|
|
from core.agent.infrastructure.queue.tasks import run_agent_task
|
|
from core.agent.infrastructure.storage.tool_result_storage import (
|
|
create_tool_result_storage,
|
|
)
|
|
from core.db import AsyncSessionLocal, engine
|
|
from models.agent_chat_message import AgentChatMessage, AgentChatMessageRole
|
|
from models.agent_chat_session import AgentChatSession, AgentChatSessionStatus
|
|
from models.llm import Llm
|
|
from models.llm_factory import LlmFactory
|
|
from models.profile import Profile
|
|
from models.schedule_items import ScheduleItem
|
|
from models.system_agents import SystemAgents
|
|
from services.base.supabase import supabase_service
|
|
|
|
IMAGE_FIXTURE = (
|
|
Path(__file__).resolve().parents[1] / "fixtures" / "images" / "calendar_text_cn.png"
|
|
)
|
|
|
|
|
|
def _live_enabled() -> bool:
|
|
return os.getenv("AGENT_LIVE_E2E") == "1"
|
|
|
|
|
|
async def _init_supabase_admin_client():
|
|
initialized = await supabase_service.initialize()
|
|
if not initialized:
|
|
pytest.skip("Supabase service unavailable")
|
|
return supabase_service.get_admin_client()
|
|
|
|
|
|
async def _create_owner_profile(admin_client) -> tuple[uuid.UUID, str]:
|
|
user_email = f"agent-live-{uuid.uuid4().hex[:8]}@example.com"
|
|
created = admin_client.auth.admin.create_user(
|
|
{
|
|
"email": user_email,
|
|
"password": "Passw0rd!123",
|
|
"email_confirm": True,
|
|
}
|
|
)
|
|
user_id = str(created.user.id)
|
|
owner_id = uuid.UUID(user_id)
|
|
return owner_id, user_id
|
|
|
|
|
|
async def _resolve_llm_id(
|
|
*,
|
|
target_model_code: str = "deepseek-chat",
|
|
target_factory_name: str = "deepseek",
|
|
) -> tuple[uuid.UUID, uuid.UUID | None, uuid.UUID | None]:
|
|
await engine.dispose()
|
|
async with AsyncSessionLocal() as session:
|
|
llm_row = await session.execute(
|
|
select(Llm.id).where(Llm.model_code == target_model_code).limit(1)
|
|
)
|
|
llm_id = llm_row.scalar_one_or_none()
|
|
if llm_id is not None:
|
|
return llm_id, None, None
|
|
|
|
factory_id = uuid.uuid4()
|
|
llm_id = uuid.uuid4()
|
|
created_factory = False
|
|
async with AsyncSessionLocal() as session:
|
|
factory_row = await session.execute(
|
|
select(LlmFactory.id).where(LlmFactory.name == target_factory_name).limit(1)
|
|
)
|
|
existing_factory_id = factory_row.scalar_one_or_none()
|
|
if existing_factory_id is not None:
|
|
factory_id = existing_factory_id
|
|
else:
|
|
session.add(
|
|
LlmFactory(
|
|
id=factory_id,
|
|
name=target_factory_name,
|
|
request_url=f"https://{target_factory_name}.example",
|
|
)
|
|
)
|
|
await session.commit()
|
|
created_factory = True
|
|
|
|
async with AsyncSessionLocal() as session:
|
|
session.add(
|
|
Llm(
|
|
id=llm_id,
|
|
factory_id=factory_id,
|
|
model_code=target_model_code,
|
|
)
|
|
)
|
|
await session.commit()
|
|
return llm_id, llm_id, factory_id if created_factory else None
|
|
|
|
|
|
async def _seed_session_with_active_agent(
|
|
*,
|
|
session_id: uuid.UUID,
|
|
owner_id: uuid.UUID,
|
|
agent_type: str,
|
|
llm_id: uuid.UUID,
|
|
) -> None:
|
|
await engine.dispose()
|
|
async with AsyncSessionLocal() as session:
|
|
session.add(SystemAgents(agent_type=agent_type, llm_id=llm_id, status="active"))
|
|
session.add(AgentChatSession(id=session_id, user_id=owner_id))
|
|
await session.commit()
|
|
|
|
|
|
async def _cleanup_session_and_agent(
|
|
*,
|
|
session_id: uuid.UUID,
|
|
agent_type: str,
|
|
owner_id: uuid.UUID,
|
|
llm_id_to_cleanup: uuid.UUID | None,
|
|
factory_id_to_cleanup: uuid.UUID | None,
|
|
) -> None:
|
|
async with AsyncSessionLocal() as session:
|
|
await session.execute(
|
|
delete(AgentChatSession).where(AgentChatSession.id == session_id)
|
|
)
|
|
await session.execute(
|
|
delete(SystemAgents).where(SystemAgents.agent_type == agent_type)
|
|
)
|
|
await session.execute(delete(Profile).where(Profile.id == owner_id))
|
|
if llm_id_to_cleanup is not None:
|
|
await session.execute(delete(Llm).where(Llm.id == llm_id_to_cleanup))
|
|
if factory_id_to_cleanup is not None:
|
|
await session.execute(
|
|
delete(LlmFactory).where(LlmFactory.id == factory_id_to_cleanup)
|
|
)
|
|
await session.commit()
|
|
|
|
|
|
async def _cleanup_auth_user(*, admin_client, user_id: str | None) -> None:
|
|
if user_id is None:
|
|
return
|
|
try:
|
|
admin_client.auth.admin.delete_user(user_id)
|
|
except Exception:
|
|
return
|
|
|
|
|
|
def _encode_fixture_image_base64() -> str:
|
|
data = IMAGE_FIXTURE.read_bytes()
|
|
return base64.b64encode(data).decode("ascii")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.live
|
|
async def test_agent_live_intent_only_no_tool() -> None:
|
|
if not _live_enabled():
|
|
pytest.skip("Live test disabled")
|
|
session_id = uuid.uuid4()
|
|
agent_type = f"LIVE_E2E_{uuid.uuid4().hex[:8]}"
|
|
admin_client = await _init_supabase_admin_client()
|
|
owner_id, test_user_id = await _create_owner_profile(admin_client)
|
|
llm_id, llm_cleanup_id, factory_cleanup_id = await _resolve_llm_id()
|
|
|
|
try:
|
|
await _seed_session_with_active_agent(
|
|
session_id=session_id,
|
|
owner_id=owner_id,
|
|
agent_type=agent_type,
|
|
llm_id=llm_id,
|
|
)
|
|
|
|
result = await run_agent_task(
|
|
{
|
|
"command": "run",
|
|
"run_input": {
|
|
"threadId": str(session_id),
|
|
"runId": "run-live-intent-1",
|
|
"state": {},
|
|
"messages": [
|
|
{
|
|
"id": "u1",
|
|
"role": "user",
|
|
"content": "请用一句话介绍你是谁。",
|
|
}
|
|
],
|
|
"tools": [],
|
|
"context": [],
|
|
"forwardedProps": {},
|
|
},
|
|
},
|
|
run_service=RunService(),
|
|
resume_service=ResumeService(),
|
|
)
|
|
|
|
assert result["pending_tool_call_id"] is None
|
|
|
|
await engine.dispose()
|
|
async with AsyncSessionLocal() as session:
|
|
chat_session = await session.get(AgentChatSession, session_id)
|
|
assert chat_session is not None
|
|
assert chat_session.status == AgentChatSessionStatus.COMPLETED
|
|
rows = await session.execute(
|
|
select(AgentChatMessage)
|
|
.where(AgentChatMessage.session_id == session_id)
|
|
.order_by(AgentChatMessage.seq.asc())
|
|
)
|
|
messages = list(rows.scalars().all())
|
|
assert [m.role for m in messages] == [
|
|
AgentChatMessageRole.USER,
|
|
AgentChatMessageRole.ASSISTANT,
|
|
]
|
|
finally:
|
|
await _cleanup_session_and_agent(
|
|
session_id=session_id,
|
|
agent_type=agent_type,
|
|
owner_id=owner_id,
|
|
llm_id_to_cleanup=llm_cleanup_id,
|
|
factory_id_to_cleanup=factory_cleanup_id,
|
|
)
|
|
await _cleanup_auth_user(admin_client=admin_client, user_id=test_user_id)
|
|
await supabase_service.close()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.live
|
|
async def test_agent_live_image_calendar_tool_persistence() -> None:
|
|
if not _live_enabled():
|
|
pytest.skip("Live test disabled")
|
|
|
|
admin_client = await _init_supabase_admin_client()
|
|
|
|
tool_result_storage = create_tool_result_storage()
|
|
if tool_result_storage is None:
|
|
pytest.skip("Tool result storage unavailable")
|
|
|
|
storage = admin_client.storage
|
|
try:
|
|
storage.get_bucket("private")
|
|
except Exception:
|
|
storage.create_bucket("private", "private", {"public": False})
|
|
|
|
probe_path = f"tool-results/probe/{uuid.uuid4().hex}.json"
|
|
try:
|
|
storage.from_("private").upload(probe_path, b"{}")
|
|
storage.from_("private").remove([probe_path])
|
|
except Exception:
|
|
pytest.skip("Supabase private storage bucket is not writable")
|
|
|
|
owner_id, test_user_id = await _create_owner_profile(admin_client)
|
|
llm_id, llm_cleanup_id, factory_cleanup_id = await _resolve_llm_id(
|
|
target_model_code="qwen3.5-flash",
|
|
target_factory_name="dashscope",
|
|
)
|
|
session_id = uuid.uuid4()
|
|
agent_type = f"LIVE_E2E_{uuid.uuid4().hex[:8]}"
|
|
uploaded_paths: list[str] = []
|
|
|
|
try:
|
|
await _seed_session_with_active_agent(
|
|
session_id=session_id,
|
|
owner_id=owner_id,
|
|
agent_type=agent_type,
|
|
llm_id=llm_id,
|
|
)
|
|
|
|
image_b64 = _encode_fixture_image_base64()
|
|
result = await run_agent_task(
|
|
{
|
|
"command": "run",
|
|
"run_input": {
|
|
"threadId": str(session_id),
|
|
"runId": "run-live-image-1",
|
|
"state": {},
|
|
"messages": [
|
|
{
|
|
"id": "u1",
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": (
|
|
"请先识别图片中的日程文字,然后调用后端日历工具创建事件。"
|
|
"返回时请确保标题和开始时间不为空。"
|
|
),
|
|
},
|
|
{
|
|
"type": "binary",
|
|
"mimeType": "image/png",
|
|
"data": image_b64,
|
|
},
|
|
],
|
|
}
|
|
],
|
|
"tools": [],
|
|
"context": [],
|
|
"forwardedProps": {},
|
|
},
|
|
},
|
|
run_service=RunService(
|
|
tool_result_storage=tool_result_storage,
|
|
tool_result_offload_threshold_bytes=1,
|
|
tool_result_bucket="private",
|
|
tool_result_prefix="tool-results",
|
|
),
|
|
resume_service=ResumeService(),
|
|
)
|
|
|
|
assert result["pending_tool_call_id"] is None
|
|
|
|
await engine.dispose()
|
|
async with AsyncSessionLocal() as session:
|
|
chat_session = await session.get(AgentChatSession, session_id)
|
|
assert chat_session is not None
|
|
assert chat_session.status == AgentChatSessionStatus.COMPLETED
|
|
|
|
schedule_rows = await session.execute(
|
|
select(ScheduleItem)
|
|
.where(ScheduleItem.owner_id == owner_id)
|
|
.order_by(ScheduleItem.created_at.desc())
|
|
)
|
|
created_items = list(schedule_rows.scalars().all())
|
|
assert created_items, (
|
|
"Expected schedule item created by backend calendar tool"
|
|
)
|
|
created_item = created_items[0]
|
|
assert created_item.title
|
|
assert created_item.timezone
|
|
assert created_item.start_at is not None
|
|
|
|
tool_rows = await session.execute(
|
|
select(AgentChatMessage)
|
|
.where(AgentChatMessage.session_id == session_id)
|
|
.where(AgentChatMessage.role == AgentChatMessageRole.TOOL)
|
|
.order_by(AgentChatMessage.seq.desc())
|
|
)
|
|
tool_message = tool_rows.scalars().first()
|
|
assert tool_message is not None
|
|
metadata = tool_message.metadata_json or {}
|
|
storage_bucket = metadata.get("storage_bucket")
|
|
storage_path = metadata.get("storage_path")
|
|
assert storage_bucket == "private"
|
|
assert isinstance(storage_path, str)
|
|
assert storage_path.startswith("tool-results/")
|
|
uploaded_paths.append(storage_path)
|
|
|
|
downloaded = storage.from_("private").download(uploaded_paths[0])
|
|
if isinstance(downloaded, bytes):
|
|
payload = json.loads(downloaded.decode("utf-8"))
|
|
else:
|
|
payload = json.loads(str(downloaded))
|
|
|
|
assert payload["toolName"] == "back.create_calendar_event"
|
|
finally:
|
|
if uploaded_paths:
|
|
try:
|
|
storage.from_("private").remove(uploaded_paths)
|
|
except Exception:
|
|
pass
|
|
async with AsyncSessionLocal() as cleanup_session:
|
|
await cleanup_session.execute(
|
|
delete(ScheduleItem).where(ScheduleItem.owner_id == owner_id)
|
|
)
|
|
await cleanup_session.commit()
|
|
await _cleanup_session_and_agent(
|
|
session_id=session_id,
|
|
agent_type=agent_type,
|
|
owner_id=owner_id,
|
|
llm_id_to_cleanup=llm_cleanup_id,
|
|
factory_id_to_cleanup=factory_cleanup_id,
|
|
)
|
|
await _cleanup_auth_user(admin_client=admin_client, user_id=test_user_id)
|
|
await supabase_service.close()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.live
|
|
async def test_agent_live_front_tool_interrupt_resume_continue() -> None:
|
|
if not _live_enabled():
|
|
pytest.skip("Live test disabled")
|
|
|
|
admin_client = await _init_supabase_admin_client()
|
|
owner_id, test_user_id = await _create_owner_profile(admin_client)
|
|
llm_id, llm_cleanup_id, factory_cleanup_id = await _resolve_llm_id()
|
|
session_id = uuid.uuid4()
|
|
agent_type = f"LIVE_E2E_{uuid.uuid4().hex[:8]}"
|
|
queued_commands: list[dict[str, object]] = []
|
|
published_events: list[str] = []
|
|
|
|
async def _publish(event: dict[str, object]) -> None:
|
|
event_type = event.get("type")
|
|
if isinstance(event_type, str):
|
|
published_events.append(event_type)
|
|
|
|
async def _enqueue(command: dict[str, object]) -> str:
|
|
queued_commands.append(command)
|
|
return "task-followup-live"
|
|
|
|
try:
|
|
await _seed_session_with_active_agent(
|
|
session_id=session_id,
|
|
owner_id=owner_id,
|
|
agent_type=agent_type,
|
|
llm_id=llm_id,
|
|
)
|
|
|
|
run_result = await run_agent_task(
|
|
{
|
|
"command": "run",
|
|
"run_input": {
|
|
"threadId": str(session_id),
|
|
"runId": "run-live-front-1",
|
|
"state": {},
|
|
"messages": [
|
|
{
|
|
"id": "u1",
|
|
"role": "user",
|
|
"content": "你必须调用 front.navigate_to_route 工具跳转到 /calendar/dayweek。",
|
|
}
|
|
],
|
|
"tools": [
|
|
{
|
|
"name": "front.navigate_to_route",
|
|
"description": "Navigate frontend route; runtime raises approval interrupt when called.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"target": {"type": "string"},
|
|
"replace": {"type": "boolean"},
|
|
},
|
|
"required": ["target"],
|
|
},
|
|
}
|
|
],
|
|
"context": [],
|
|
"forwardedProps": {},
|
|
},
|
|
},
|
|
publish_event=_publish,
|
|
enqueue_command=_enqueue,
|
|
run_service=RunService(),
|
|
resume_service=ResumeService(),
|
|
)
|
|
|
|
pending_tool_call_id = run_result["pending_tool_call_id"]
|
|
assert isinstance(pending_tool_call_id, str), (
|
|
f"Expected pending tool call, got result: {json.dumps(run_result, ensure_ascii=False)}"
|
|
)
|
|
snapshot = run_result["state_snapshot"]
|
|
assert isinstance(snapshot, dict)
|
|
pending_tool_nonce = snapshot.get("pending_tool_nonce")
|
|
assert isinstance(pending_tool_nonce, str)
|
|
guarded_tool_args: dict[str, object] | None = None
|
|
has_matching_tool_args_event = False
|
|
events = run_result.get("events")
|
|
if isinstance(events, list):
|
|
for event in events:
|
|
if not isinstance(event, dict):
|
|
continue
|
|
if event.get("type") != "TOOL_CALL_ARGS":
|
|
continue
|
|
if event.get("toolCallId") != pending_tool_call_id:
|
|
continue
|
|
has_matching_tool_args_event = True
|
|
delta = event.get("delta")
|
|
if not isinstance(delta, str):
|
|
continue
|
|
try:
|
|
parsed_delta = json.loads(delta)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
if isinstance(parsed_delta, dict):
|
|
guarded_tool_args = parsed_delta
|
|
break
|
|
if has_matching_tool_args_event:
|
|
assert guarded_tool_args is not None
|
|
if guarded_tool_args is None:
|
|
guarded_tool_args = {
|
|
"target": "/calendar/dayweek",
|
|
"replace": False,
|
|
"__nonce": pending_tool_nonce,
|
|
}
|
|
assert guarded_tool_args.get("__nonce") == pending_tool_nonce
|
|
|
|
await run_agent_task(
|
|
{
|
|
"command": "resume",
|
|
"run_input": {
|
|
"threadId": str(session_id),
|
|
"runId": "run-live-front-2",
|
|
"state": {},
|
|
"messages": [
|
|
{
|
|
"id": "tool-1",
|
|
"role": "tool",
|
|
"toolCallId": pending_tool_call_id,
|
|
"content": json.dumps(
|
|
{
|
|
"toolName": "front.navigate_to_route",
|
|
"toolArgs": guarded_tool_args,
|
|
"nonce": pending_tool_nonce,
|
|
"result": {
|
|
"ok": True,
|
|
"route": "/calendar/dayweek",
|
|
},
|
|
},
|
|
ensure_ascii=True,
|
|
separators=(",", ":"),
|
|
),
|
|
}
|
|
],
|
|
"tools": [],
|
|
"context": [],
|
|
"forwardedProps": {},
|
|
},
|
|
},
|
|
publish_event=_publish,
|
|
enqueue_command=_enqueue,
|
|
run_service=RunService(),
|
|
resume_service=ResumeService(),
|
|
)
|
|
|
|
assert len(queued_commands) == 1
|
|
await run_agent_task(
|
|
queued_commands[0],
|
|
publish_event=_publish,
|
|
enqueue_command=_enqueue,
|
|
run_service=RunService(),
|
|
resume_service=ResumeService(),
|
|
)
|
|
|
|
await engine.dispose()
|
|
async with AsyncSessionLocal() as session:
|
|
chat_session = await session.get(AgentChatSession, session_id)
|
|
assert chat_session is not None
|
|
assert chat_session.status == AgentChatSessionStatus.COMPLETED
|
|
rows = await session.execute(
|
|
select(AgentChatMessage)
|
|
.where(AgentChatMessage.session_id == session_id)
|
|
.order_by(AgentChatMessage.seq.asc())
|
|
)
|
|
messages = list(rows.scalars().all())
|
|
assert any(m.role == AgentChatMessageRole.TOOL for m in messages)
|
|
assert chat_session.total_cost >= Decimal("0")
|
|
|
|
assert "RUN_STARTED" in published_events
|
|
assert "RUN_FINISHED" in published_events
|
|
finally:
|
|
await _cleanup_session_and_agent(
|
|
session_id=session_id,
|
|
agent_type=agent_type,
|
|
owner_id=owner_id,
|
|
llm_id_to_cleanup=llm_cleanup_id,
|
|
factory_id_to_cleanup=factory_cleanup_id,
|
|
)
|
|
await _cleanup_auth_user(admin_client=admin_client, user_id=test_user_id)
|
|
await supabase_service.close()
|