feat: 添加 Agent 步骤事件与图片附件功能

- 新增 stepStarted/stepFinished 事件类型支持
- 前端实现图片附件上传和预览功能
- 后端增强工具结果存储和事件处理
- 完善相关单元测试和集成测试
This commit is contained in:
zl-q
2026-03-12 09:29:57 +08:00
parent 87215f9d41
commit 7b8865e256
45 changed files with 3869 additions and 308 deletions
@@ -40,3 +40,34 @@ def test_reserved_keys_in_data_cannot_override_wire_fields() -> None:
assert result["threadId"] == "thread-1"
assert result["runId"] == "run-1"
assert result["message"] == "ok"
def test_tool_result_wire_event_filters_sensitive_fields() -> None:
internal = {
"type": "tool.result",
"threadId": "thread-1",
"runId": "run-1",
"data": {
"messageId": "tool-result-1",
"toolCallId": "call-1",
"callId": "call-1",
"toolName": "calendar_write",
"content": "summary",
"ui": {"type": "calendar_operation.v1", "data": {"ok": True}},
"args": {"token": "secret"},
"result": {"raw": "secret"},
"error": "stack trace",
},
}
result = to_agui_wire_event(internal)
assert result["type"] == "TOOL_CALL_RESULT"
assert result["messageId"] == "tool-result-1"
assert result["toolCallId"] == "call-1"
assert result["toolName"] == "calendar_write"
assert result["content"] == "summary"
assert isinstance(result.get("ui"), dict)
assert "args" not in result
assert "result" not in result
assert "error" not in result
@@ -28,6 +28,27 @@ class _FakeSessionCtx:
del exc_type, exc, tb
class _FakeToolResultStorage:
def __init__(self) -> None:
self.upload_calls: list[dict[str, object]] = []
async def upload_json(
self,
*,
bucket: str,
path: str,
payload: dict[str, object],
) -> str:
self.upload_calls.append(
{
"bucket": bucket,
"path": path,
"payload": payload,
}
)
return path
@pytest.mark.asyncio
async def test_store_marks_session_running_on_run_started(
monkeypatch: pytest.MonkeyPatch,
@@ -300,7 +321,12 @@ async def test_store_persists_tool_call_result_as_tool_message(
monkeypatch.setattr(store_module, "MessageRepository", _FakeMessageRepository)
monkeypatch.setattr(store_module, "AgentChatSessionStatus", _SessionStatus)
store = store_module.SqlAlchemyEventStore(session_factory=lambda: _FakeSessionCtx())
fake_storage = _FakeToolResultStorage()
store = store_module.SqlAlchemyEventStore(
session_factory=lambda: _FakeSessionCtx(),
tool_result_storage=fake_storage,
tool_result_bucket="agent-tool-results",
)
await store.persist(
{
"type": "TOOL_CALL_RESULT",
@@ -310,7 +336,7 @@ async def test_store_persists_tool_call_result_as_tool_message(
"taskId": "t1",
"stage": "execution",
"args": {"title": "A"},
"result": {"event_id": "evt-1"},
"result": {"event_id": "evt-1", "token": "secret"},
}
)
@@ -318,9 +344,94 @@ async def test_store_persists_tool_call_result_as_tool_message(
assert getattr(append_kwargs["role"], "value", None) == "tool"
assert append_kwargs["tool_name"] == "calendar_write"
assert append_kwargs["metadata"]["task_id"] == "t1"
tool_call_id = append_kwargs["metadata"]["tool_call_id"]
assert isinstance(tool_call_id, str)
assert tool_call_id.startswith("run-1-t1-")
assert append_kwargs["metadata"]["storage_bucket"] == "agent-tool-results"
assert isinstance(append_kwargs["metadata"]["storage_path"], str)
assert append_kwargs["content"].startswith("已创建日程")
assert len(fake_storage.upload_calls) == 1
uploaded = fake_storage.upload_calls[0]
assert uploaded["bucket"] == "agent-tool-results"
payload = cast(dict[str, Any], uploaded["payload"])
assert payload["toolName"] == "calendar_write"
assert "args" not in payload
assert isinstance(payload.get("result"), dict)
assert payload["result"]["token"] == "[REDACTED]"
assert captured["message_delta"] == 1
@pytest.mark.asyncio
async def test_store_sanitizes_nested_sensitive_fields_in_result_payload(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict[str, object] = {}
fake_chat_session = SimpleNamespace(state_snapshot={}, message_count=0)
class _FakeSessionRepository:
def __init__(self, session: object) -> None:
del session
async def get_session(self, *, session_id): # noqa: ANN001
del session_id
return fake_chat_session
async def lock_session_for_update(self, *, session_id): # noqa: ANN001
del session_id
return fake_chat_session
async def update_runtime_state(self, **kwargs): # noqa: ANN003
captured.update(kwargs)
class _FakeMessageRepository:
def __init__(self, session: object) -> None:
del session
async def append_message(self, **kwargs): # noqa: ANN003
captured["append_kwargs"] = kwargs
monkeypatch.setattr(store_module, "SessionRepository", _FakeSessionRepository)
monkeypatch.setattr(store_module, "MessageRepository", _FakeMessageRepository)
monkeypatch.setattr(store_module, "AgentChatSessionStatus", _SessionStatus)
fake_storage = _FakeToolResultStorage()
store = store_module.SqlAlchemyEventStore(
session_factory=lambda: _FakeSessionCtx(),
tool_result_storage=fake_storage,
tool_result_bucket="agent-tool-results",
)
await store.persist(
{
"type": "TOOL_CALL_RESULT",
"threadId": "00000000-0000-0000-0000-000000000001",
"runId": "run-1",
"toolName": "calendar_write",
"result": {
"data": {
"ok": True,
"accessToken": "secret-a",
"nested": {
"refresh_token": "secret-b",
},
"items": [
{"authorizationHeader": "secret-c"},
],
}
},
}
)
payload = cast(dict[str, Any], fake_storage.upload_calls[0]["payload"])
stored_result = cast(dict[str, Any], payload["result"])
data = cast(dict[str, Any], stored_result["data"])
assert data["accessToken"] == "[REDACTED]"
nested = cast(dict[str, Any], data["nested"])
assert nested["refresh_token"] == "[REDACTED]"
items = cast(list[Any], data["items"])
assert isinstance(items[0], dict)
assert items[0]["authorizationHeader"] == "[REDACTED]"
@pytest.mark.asyncio
async def test_store_drops_buffer_when_session_missing(
monkeypatch: pytest.MonkeyPatch,
@@ -0,0 +1,73 @@
from __future__ import annotations
from core.agentscope.events.tool_result_summary import build_tool_content_summary
def test_summary_prioritizes_error() -> None:
text = build_tool_content_summary(
tool_name="calendar_write",
args={"title": "A"},
result={"message": "ignored"},
error={"message": "denied"},
)
assert text == "calendar_write 执行失败:denied"
def test_summary_for_calendar_write() -> None:
text = build_tool_content_summary(
tool_name="calendar_write",
args={"title": "项目评审"},
result={"startAt": "明天 10:00"},
error=None,
)
assert text == "已创建日程:项目评审(明天 10:00)"
def test_summary_for_calendar_read() -> None:
text = build_tool_content_summary(
tool_name="calendar_read",
args={"query": "今天"},
result={"data": {"total": 3}},
error=None,
)
assert text == "查询到 3 条日程(今天)"
def test_summary_falls_back_to_result_content() -> None:
text = build_tool_content_summary(
tool_name="unknown_tool",
args=None,
result={"content": "这是非常长的说明" * 20},
error=None,
)
assert text.startswith("这是非常长的说明")
assert len(text) <= 80
def test_summary_default_done() -> None:
text = build_tool_content_summary(
tool_name="unknown_tool",
args=None,
result=None,
error=None,
)
assert text == "unknown_tool 执行完成"
def test_summary_marks_business_failure_when_ok_false() -> None:
text = build_tool_content_summary(
tool_name="calendar_write",
args={"title": "上学"},
result={
"type": "calendar_operation.v1",
"data": {
"ok": False,
"code": "UNAUTHORIZED",
"message": "calendar.write requires validated user token",
},
},
error=None,
)
assert (
text == "calendar_write 执行失败:calendar.write requires validated user token"
)
@@ -109,7 +109,6 @@ async def test_runtime_emits_started_text_and_finished_events() -> None:
"step.start",
"step.finish",
"step.start",
"step.finish",
"text.start",
"text.delta",
"text.end",
@@ -117,6 +116,7 @@ async def test_runtime_emits_started_text_and_finished_events() -> None:
"text.delta",
"text.end",
"tool.result",
"step.finish",
"step.start",
"text.start",
"text.delta",
@@ -127,10 +127,14 @@ async def test_runtime_emits_started_text_and_finished_events() -> None:
assert calls[1]["data"]["stepName"] == "intent"
assert calls[2]["data"]["stepName"] == "intent"
assert calls[3]["data"]["stepName"] == "execution"
assert calls[4]["data"]["stepName"] == "execution"
assert calls[5]["data"]["stage"] == "intent"
assert calls[8]["data"]["stage"] == "execution"
assert calls[11]["data"]["toolName"] == "calendar_write"
assert calls[4]["data"]["stage"] == "intent"
assert calls[7]["data"]["stage"] == "execution"
assert calls[10]["data"]["toolName"] == "calendar_write"
assert calls[10]["data"]["toolCallId"] == "run-1-t1-1"
assert calls[10]["data"]["messageId"] == "tool-result-run-1-t1-1"
tool_content = calls[10]["data"]["content"]
assert tool_content == "calendar_write 执行完成"
assert calls[11]["data"]["stepName"] == "execution"
assert calls[12]["data"]["stepName"] == "report"
assert calls[14]["data"]["delta"] == "hello world"
assert calls[13]["data"]["messageId"] == calls[14]["data"]["messageId"]
@@ -305,3 +309,300 @@ async def test_runtime_direct_response_finishes_without_report_stage() -> None:
]
assert calls[3]["data"]["stage"] == "intent"
assert calls[4]["data"]["delta"] == "direct-answer"
@pytest.mark.asyncio
async def test_runtime_tool_result_parses_json_string_ui_payload() -> None:
calls: list[dict[str, Any]] = []
class _FakePipeline:
async def emit(self, *, session_id: str, event: dict[str, object]) -> str:
assert session_id == "thread-1"
calls.append(event)
return f"{len(calls)}-0"
class _FakeOrchestrator:
async def run(self, **_: object) -> RuntimeOutput:
return RuntimeOutput(
intent=IntentOutput(
route="TASK_EXECUTION",
intent_summary="summary",
direct_response=None,
tasks=[IntentTask(task_id="t1", title="exec", objective="do")],
complexity="complex",
response_metadata={},
),
execution=ExecutionBatchOutput(
task_results=[
ExecutionTaskOutput(
task_id="t1",
status="SUCCESS",
execution_summary="execution-ok",
execution_data={},
user_feedback_needs=[],
response_metadata={},
tool_calls=[
ExecutionToolCall(
tool_name="calendar_write",
args={"title": "A"},
result='{"type":"calendar_card.v1","version":"v1","data":{"ok":true,"title":"A"},"actions":[]}',
)
],
)
],
overall_status="SUCCESS",
aggregate_summary="ok",
),
report=ReportOutput(
assistant_text="hello world",
response_metadata={},
),
)
runtime = AgentRouteRuntime(
orchestrator=_FakeOrchestrator(), pipeline=_FakePipeline()
)
command = RunCommand(threadId="thread-1", runId="run-1", messages=[])
await runtime.run(
command=command,
owner_id=uuid4(),
user_token="token",
user_context=_user_context(),
session=cast(AsyncSession, object()),
)
tool_events = [item for item in calls if item.get("type") == "tool.result"]
assert len(tool_events) == 1
data = tool_events[0]["data"]
assert isinstance(data, dict)
assert isinstance(data.get("ui"), dict)
assert data["ui"]["type"] == "calendar_card.v1"
@pytest.mark.asyncio
async def test_runtime_tool_result_keeps_plain_text_content() -> None:
calls: list[dict[str, Any]] = []
class _FakePipeline:
async def emit(self, *, session_id: str, event: dict[str, object]) -> str:
assert session_id == "thread-1"
calls.append(event)
return f"{len(calls)}-0"
class _FakeOrchestrator:
async def run(self, **_: object) -> RuntimeOutput:
return RuntimeOutput(
intent=IntentOutput(
route="TASK_EXECUTION",
intent_summary="summary",
direct_response=None,
tasks=[IntentTask(task_id="t1", title="exec", objective="do")],
complexity="complex",
response_metadata={},
),
execution=ExecutionBatchOutput(
task_results=[
ExecutionTaskOutput(
task_id="t1",
status="SUCCESS",
execution_summary="execution-ok",
execution_data={},
user_feedback_needs=[],
response_metadata={},
tool_calls=[
ExecutionToolCall(
tool_name="calendar_write",
args={"title": "A"},
result="created successfully",
)
],
)
],
overall_status="SUCCESS",
aggregate_summary="ok",
),
report=ReportOutput(
assistant_text="hello world",
response_metadata={},
),
)
runtime = AgentRouteRuntime(
orchestrator=_FakeOrchestrator(), pipeline=_FakePipeline()
)
command = RunCommand(threadId="thread-1", runId="run-1", messages=[])
await runtime.run(
command=command,
owner_id=uuid4(),
user_token="token",
user_context=_user_context(),
session=cast(AsyncSession, object()),
)
tool_events = [item for item in calls if item.get("type") == "tool.result"]
assert len(tool_events) == 1
data = tool_events[0]["data"]
assert isinstance(data, dict)
assert data["content"] == "created successfully"
@pytest.mark.asyncio
async def test_runtime_tool_result_sanitizes_sensitive_payload() -> None:
calls: list[dict[str, Any]] = []
class _FakePipeline:
async def emit(self, *, session_id: str, event: dict[str, object]) -> str:
assert session_id == "thread-1"
calls.append(event)
return f"{len(calls)}-0"
class _FakeOrchestrator:
async def run(self, **_: object) -> RuntimeOutput:
return RuntimeOutput(
intent=IntentOutput(
route="TASK_EXECUTION",
intent_summary="summary",
direct_response=None,
tasks=[IntentTask(task_id="t1", title="exec", objective="do")],
complexity="complex",
response_metadata={},
),
execution=ExecutionBatchOutput(
task_results=[
ExecutionTaskOutput(
task_id="t1",
status="SUCCESS",
execution_summary="execution-ok",
execution_data={},
user_feedback_needs=[],
response_metadata={},
tool_calls=[
ExecutionToolCall(
tool_name="calendar_write",
args={
"title": "A",
"accessToken": "arg-secret",
"author": "alice",
},
result={
"ok": True,
"accessToken": "secret-token",
"message": "Authorization: Bearer inline-token",
"nested": [
{
"authorizationHeader": "Bearer abc",
}
],
},
error="failed authorization=Bearer abc123 detail",
)
],
)
],
overall_status="SUCCESS",
aggregate_summary="ok",
),
report=ReportOutput(
assistant_text="hello world",
response_metadata={},
),
)
runtime = AgentRouteRuntime(
orchestrator=_FakeOrchestrator(), pipeline=_FakePipeline()
)
command = RunCommand(threadId="thread-1", runId="run-1", messages=[])
await runtime.run(
command=command,
owner_id=uuid4(),
user_token="token",
user_context=_user_context(),
session=cast(AsyncSession, object()),
)
tool_events = [item for item in calls if item.get("type") == "tool.result"]
assert len(tool_events) == 1
data = tool_events[0]["data"]
assert isinstance(data, dict)
assert isinstance(data["result"], dict)
assert data["result"]["accessToken"] == "[REDACTED]"
assert data["result"]["message"] == "Authorization=[REDACTED]"
nested = data["result"]["nested"]
assert isinstance(nested, list)
assert nested[0]["authorizationHeader"] == "[REDACTED]"
assert isinstance(data["args"], dict)
assert data["args"]["accessToken"] == "[REDACTED]"
assert data["args"]["author"] == "alice"
assert data["error"] == "failed authorization=[REDACTED] detail"
@pytest.mark.asyncio
async def test_runtime_tool_result_keeps_non_object_result() -> None:
calls: list[dict[str, Any]] = []
class _FakePipeline:
async def emit(self, *, session_id: str, event: dict[str, object]) -> str:
assert session_id == "thread-1"
calls.append(event)
return f"{len(calls)}-0"
class _FakeOrchestrator:
async def run(self, **_: object) -> RuntimeOutput:
return RuntimeOutput(
intent=IntentOutput(
route="TASK_EXECUTION",
intent_summary="summary",
direct_response=None,
tasks=[IntentTask(task_id="t1", title="exec", objective="do")],
complexity="complex",
response_metadata={},
),
execution=ExecutionBatchOutput(
task_results=[
ExecutionTaskOutput(
task_id="t1",
status="SUCCESS",
execution_summary="execution-ok",
execution_data={},
user_feedback_needs=[],
response_metadata={},
tool_calls=[
ExecutionToolCall(
tool_name="calendar_write",
args={"title": "A"},
result=["evt-1", "evt-2"],
)
],
)
],
overall_status="SUCCESS",
aggregate_summary="ok",
),
report=ReportOutput(
assistant_text="hello world",
response_metadata={},
),
)
runtime = AgentRouteRuntime(
orchestrator=_FakeOrchestrator(), pipeline=_FakePipeline()
)
command = RunCommand(threadId="thread-1", runId="run-1", messages=[])
await runtime.run(
command=command,
owner_id=uuid4(),
user_token="token",
user_context=_user_context(),
session=cast(AsyncSession, object()),
)
tool_events = [item for item in calls if item.get("type") == "tool.result"]
assert len(tool_events) == 1
data = tool_events[0]["data"]
assert isinstance(data, dict)
assert isinstance(data["result"], dict)
assert data["result"]["value"] == ["evt-1", "evt-2"]
@@ -212,6 +212,9 @@ def test_merge_stage_response_metadata_estimates_cost_from_pricing(
model="qwen3.5-flash",
),
latency_ms=50,
system_prompt="system",
user_prompt="user",
assistant_text='{"route":"DIRECT_RESPONSE"}',
)
metadata = payload["response_metadata"]
@@ -50,6 +50,10 @@ async def test_run_agentscope_task_calls_runtime_run(
async def _fake_get_redis_client() -> object:
return object()
async def _empty_context(**kwargs: object) -> list[dict[str, Any]]:
del kwargs
return []
monkeypatch.setattr(tasks_module, "AgentRouteRuntime", _FakeRuntime)
monkeypatch.setattr(
tasks_module,
@@ -60,7 +64,7 @@ async def test_run_agentscope_task_calls_runtime_run(
monkeypatch.setattr(
tasks_module,
"_build_recent_context_messages",
lambda **_: [],
_empty_context,
)
result = await tasks_module.run_agentscope_task(
@@ -101,6 +105,10 @@ async def test_run_agentscope_task_includes_recent_context_messages(
async def _fake_get_redis_client() -> object:
return object()
async def _empty_context(**kwargs: object) -> list[dict[str, Any]]:
del kwargs
return []
async def _fake_context(**kwargs: object) -> list[dict[str, Any]]:
del kwargs
return [{"id": "ctx-1", "role": "assistant", "content": "历史上下文"}]
@@ -115,7 +123,7 @@ async def test_run_agentscope_task_includes_recent_context_messages(
monkeypatch.setattr(
tasks_module,
"_build_recent_context_messages",
lambda **_: [],
_empty_context,
)
monkeypatch.setattr(
tasks_module,
@@ -94,3 +94,46 @@ def test_validate_run_request_messages_contract_requires_single_user_message() -
match="RunAgentInput.messages must contain exactly one user message",
):
validate_run_request_messages_contract(run_input)
def test_validate_run_request_messages_contract_accepts_binary_url_blocks() -> None:
payload = _base_payload()
payload["messages"] = [
{
"id": "u1",
"role": "user",
"content": [
{"type": "text", "text": "请分析"},
{
"type": "binary",
"mimeType": "image/png",
"url": "https://signed.example/a.png",
},
],
}
]
run_input = parse_run_input(payload)
validate_run_request_messages_contract(run_input)
def test_validate_run_request_messages_contract_rejects_binary_data_block() -> None:
payload = _base_payload()
payload["messages"] = [
{
"id": "u1",
"role": "user",
"content": [
{"type": "text", "text": "请分析"},
{
"type": "binary",
"mimeType": "image/png",
"data": "aGVsbG8=",
},
],
}
]
run_input = parse_run_input(payload)
with pytest.raises(ValueError, match="binary content requires url"):
validate_run_request_messages_contract(run_input)
@@ -54,3 +54,20 @@ def test_build_intent_user_prompt_filters_non_image_binary_block() -> None:
assert isinstance(prompt, list)
image_blocks = [item for item in prompt if item.get("type") == "image"]
assert image_blocks == []
def test_build_intent_user_prompt_includes_previous_context_messages() -> None:
prompt = build_intent_user_prompt(
user_input=[
{"id": "u1", "role": "user", "content": "我的口令是蓝鲸42"},
{"id": "a1", "role": "assistant", "content": "已记住"},
{"id": "u2", "role": "user", "content": "请重复口令"},
]
)
assert isinstance(prompt, list)
assert prompt
instruction = prompt[0].get("text", "")
assert isinstance(instruction, str)
assert "[Conversation Context]" in instruction
assert "\\u84dd\\u9cb842" in instruction