refactor: clean CLI taxonomy — canonical subcommands, merged memory.update, no aliases
- calendar: split write → create/read/update/delete/share - contacts: rename lookup → read - memory: merge write+forget → update (unified action field in operations) - Remove all alias/normalization logic from adapter and handlers - Update tool_postprocessor ui_hints builders to canonical keys - Remove frontend legacy TOOL_CALL_START/ARGS/END events and ToolCallItem - Update SKILL.md files and protocol docs - Update tests and settings screens
This commit is contained in:
@@ -4,6 +4,7 @@ import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -105,68 +106,84 @@ async def _run_agent_and_collect_events(
|
||||
user_message: str,
|
||||
runtime_mode: str = "chat",
|
||||
) -> tuple[list[dict], bool, str]:
|
||||
run_resp = await client.post(
|
||||
f"{BASE_URL}/api/v1/agent/runs",
|
||||
headers=headers,
|
||||
json={
|
||||
"threadId": thread_id,
|
||||
"runId": run_id,
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "u1",
|
||||
"role": "user",
|
||||
"content": user_message,
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {"runtime_mode": runtime_mode},
|
||||
},
|
||||
)
|
||||
if run_resp.status_code != 202:
|
||||
pytest.fail(f"Run request failed: {run_resp.status_code} - {run_resp.text}")
|
||||
assert run_resp.status_code == 202
|
||||
max_attempts = 3
|
||||
last_thread_id = thread_id
|
||||
|
||||
run_data = run_resp.json()
|
||||
effective_thread_id = str(run_data.get("threadId", thread_id))
|
||||
effective_run_id = run_data.get("runId", run_id)
|
||||
for attempt in range(max_attempts):
|
||||
attempt_run_id = run_id if attempt == 0 else f"{run_id}-retry-{attempt}"
|
||||
run_resp = await client.post(
|
||||
f"{BASE_URL}/api/v1/agent/runs",
|
||||
headers=headers,
|
||||
json={
|
||||
"threadId": thread_id,
|
||||
"runId": attempt_run_id,
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": "u1",
|
||||
"role": "user",
|
||||
"content": user_message,
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {"runtime_mode": runtime_mode},
|
||||
},
|
||||
)
|
||||
if run_resp.status_code != 202:
|
||||
pytest.fail(f"Run request failed: {run_resp.status_code} - {run_resp.text}")
|
||||
assert run_resp.status_code == 202
|
||||
|
||||
events_url = f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events?runId={effective_run_id}"
|
||||
tool_call_results: list[dict] = []
|
||||
run_finished = False
|
||||
run_data = run_resp.json()
|
||||
effective_thread_id = str(run_data.get("threadId", thread_id))
|
||||
effective_run_id = run_data.get("runId", attempt_run_id)
|
||||
last_thread_id = effective_thread_id
|
||||
|
||||
async with client.stream(
|
||||
"GET", events_url, headers=headers, timeout=120.0
|
||||
) as sse_resp:
|
||||
if sse_resp.status_code != 200:
|
||||
error_body = await sse_resp.aread()
|
||||
pytest.fail(f"SSE request failed: {sse_resp.status_code} - {error_body.decode()}")
|
||||
assert sse_resp.status_code == 200
|
||||
buffer = ""
|
||||
async for line in sse_resp.aiter_lines():
|
||||
if line.startswith("data:"):
|
||||
data_str = line.split(":", 1)[1].strip()
|
||||
if data_str:
|
||||
buffer = data_str
|
||||
elif line == "" and buffer:
|
||||
try:
|
||||
event_data = json.loads(buffer)
|
||||
event_type = event_data.get("type")
|
||||
if event_type == "TOOL_CALL_RESULT":
|
||||
tool_call_results.append(event_data)
|
||||
elif event_type == "RUN_ERROR":
|
||||
run_finished = True
|
||||
print(f"RUN_ERROR: {event_data}")
|
||||
break
|
||||
elif event_type == "RUN_FINISHED":
|
||||
run_finished = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
buffer = ""
|
||||
events_url = f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events?runId={effective_run_id}"
|
||||
tool_call_results: list[dict] = []
|
||||
run_finished = False
|
||||
run_error_code: str | None = None
|
||||
|
||||
return tool_call_results, run_finished, effective_thread_id
|
||||
async with client.stream(
|
||||
"GET", events_url, headers=headers, timeout=120.0
|
||||
) as sse_resp:
|
||||
if sse_resp.status_code != 200:
|
||||
error_body = await sse_resp.aread()
|
||||
pytest.fail(
|
||||
f"SSE request failed: {sse_resp.status_code} - {error_body.decode()}"
|
||||
)
|
||||
assert sse_resp.status_code == 200
|
||||
buffer = ""
|
||||
async for line in sse_resp.aiter_lines():
|
||||
if line.startswith("data:"):
|
||||
data_str = line.split(":", 1)[1].strip()
|
||||
if data_str:
|
||||
buffer = data_str
|
||||
elif line == "" and buffer:
|
||||
try:
|
||||
event_data = json.loads(buffer)
|
||||
event_type = event_data.get("type")
|
||||
if event_type == "TOOL_CALL_RESULT":
|
||||
tool_call_results.append(event_data)
|
||||
elif event_type == "RUN_ERROR":
|
||||
run_finished = True
|
||||
run_error_code = event_data.get("code")
|
||||
print(f"RUN_ERROR: {event_data}")
|
||||
break
|
||||
elif event_type == "RUN_FINISHED":
|
||||
run_finished = True
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
buffer = ""
|
||||
|
||||
if run_error_code == "AGENT_UPSTREAM_CONNECTION_ERROR" and attempt < (max_attempts - 1):
|
||||
await asyncio.sleep(0.4)
|
||||
continue
|
||||
|
||||
return tool_call_results, run_finished, effective_thread_id
|
||||
|
||||
return [], False, last_thread_id
|
||||
|
||||
|
||||
def _check_db_record(table: str, user_id: str, extra_condition: str = "") -> bool:
|
||||
@@ -201,7 +218,7 @@ def _check_db_record(table: str, user_id: str, extra_condition: str = "") -> boo
|
||||
os.getenv("CLI_SKILLS_LIVE_TEST") != "1",
|
||||
reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test",
|
||||
)
|
||||
async def test_calendar_write_skill_creates_db_record() -> None:
|
||||
async def test_calendar_create_skill_creates_db_record() -> None:
|
||||
token = await _get_test_user_token()
|
||||
user_id = _get_test_user_id()
|
||||
|
||||
@@ -220,7 +237,7 @@ async def test_calendar_write_skill_creates_db_record() -> None:
|
||||
client=client,
|
||||
headers=headers,
|
||||
thread_id=thread_id,
|
||||
run_id="run-calendar-write-test",
|
||||
run_id="run-calendar-create-test",
|
||||
user_message=user_message,
|
||||
)
|
||||
|
||||
@@ -236,16 +253,23 @@ async def test_calendar_write_skill_creates_db_record() -> None:
|
||||
|
||||
args = cli_result.get("tool_call_args", {})
|
||||
assert args.get("command") == "calendar"
|
||||
assert args.get("subcommand") == "write"
|
||||
assert args.get("subcommand") == "create"
|
||||
|
||||
if user_id:
|
||||
result_payload = cli_result.get("result")
|
||||
assert isinstance(result_payload, dict), f"Unexpected result payload: {cli_result}"
|
||||
data_payload = result_payload.get("data")
|
||||
assert isinstance(data_payload, dict), f"Missing result data payload: {cli_result}"
|
||||
created_ids = data_payload.get("ids")
|
||||
assert isinstance(created_ids, list) and created_ids, f"No created event ids returned: {cli_result}"
|
||||
created_event_id = str(created_ids[0])
|
||||
|
||||
if user_id and _get_supabase_url().startswith("http://localhost"):
|
||||
time.sleep(1)
|
||||
has_record = _check_db_record(
|
||||
_check_db_record(
|
||||
"schedule_items",
|
||||
user_id,
|
||||
f" AND title LIKE '%CLI集成测试-{thread_id[:8]}%'",
|
||||
f" AND id = '{created_event_id}'",
|
||||
)
|
||||
assert has_record, f"No schedule_items record found for user {user_id}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -303,7 +327,7 @@ async def test_calendar_read_skill_queries_db() -> None:
|
||||
os.getenv("CLI_SKILLS_LIVE_TEST") != "1",
|
||||
reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test",
|
||||
)
|
||||
async def test_contacts_lookup_skill_queries_db() -> None:
|
||||
async def test_contacts_read_skill_queries_db() -> None:
|
||||
token = await _get_test_user_token()
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
@@ -316,7 +340,7 @@ async def test_contacts_lookup_skill_queries_db() -> None:
|
||||
client=client,
|
||||
headers=headers,
|
||||
thread_id=thread_id,
|
||||
run_id="run-contacts-lookup-test",
|
||||
run_id="run-contacts-read-test",
|
||||
user_message=user_message,
|
||||
)
|
||||
|
||||
@@ -332,7 +356,7 @@ async def test_contacts_lookup_skill_queries_db() -> None:
|
||||
|
||||
args = cli_result.get("tool_call_args", {})
|
||||
assert args.get("command") == "contacts"
|
||||
assert args.get("subcommand") == "lookup"
|
||||
assert args.get("subcommand") == "read"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -341,7 +365,7 @@ async def test_contacts_lookup_skill_queries_db() -> None:
|
||||
os.getenv("CLI_SKILLS_LIVE_TEST") != "1",
|
||||
reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test",
|
||||
)
|
||||
async def test_memory_write_skill_via_automation() -> None:
|
||||
async def test_memory_update_skill_via_automation() -> None:
|
||||
token = await _get_test_user_token()
|
||||
user_id = _get_test_user_id()
|
||||
|
||||
@@ -358,7 +382,7 @@ async def test_memory_write_skill_via_automation() -> None:
|
||||
client=client,
|
||||
headers=headers,
|
||||
thread_id=thread_id,
|
||||
run_id="run-memory-write-test",
|
||||
run_id="run-memory-update-test",
|
||||
user_message=user_message,
|
||||
runtime_mode="automation",
|
||||
)
|
||||
@@ -375,7 +399,7 @@ async def test_memory_write_skill_via_automation() -> None:
|
||||
|
||||
args = cli_result.get("tool_call_args", {})
|
||||
assert args.get("command") == "memory"
|
||||
assert args.get("subcommand") in {"write", "update"}
|
||||
assert args.get("subcommand") == "update"
|
||||
|
||||
if user_id:
|
||||
time.sleep(1)
|
||||
|
||||
@@ -43,7 +43,7 @@ def test_parse_tool_agent_output_uses_side_channel_payload() -> None:
|
||||
store_tool_agent_output(
|
||||
tool_call_id=tool_call_id,
|
||||
payload={
|
||||
"tool_name": "calendar.write",
|
||||
"tool_name": "calendar.update",
|
||||
"tool_call_id": tool_call_id,
|
||||
"tool_call_args": {"title": "Sync"},
|
||||
"status": "success",
|
||||
@@ -60,12 +60,12 @@ def test_parse_tool_agent_output_uses_side_channel_payload() -> None:
|
||||
parsed = parse_tool_agent_output(
|
||||
output,
|
||||
tool_call_id=tool_call_id,
|
||||
tool_name="calendar.write",
|
||||
tool_name="calendar.update",
|
||||
tool_call_args={"title": "Sync"},
|
||||
)
|
||||
|
||||
assert parsed is not None
|
||||
assert parsed.tool_name == "calendar.write"
|
||||
assert parsed.tool_name == "calendar.update"
|
||||
assert parsed.tool_call_id == tool_call_id
|
||||
assert parsed.result == {"status": "success", "event": {"id": "evt_1"}}
|
||||
assert parsed.ui_hints == {"view": "calendar_event_created"}
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agentscope.tools.cli.handler_calendar import (
|
||||
_resolve_read_range,
|
||||
)
|
||||
from core.agentscope.tools.cli.models import CliCommand
|
||||
|
||||
|
||||
def test_resolve_read_range_supports_date_timezone_fallback() -> None:
|
||||
request = CliCommand(
|
||||
command="calendar",
|
||||
subcommand="read",
|
||||
owner_id="u1",
|
||||
args={"date": "2026-04-23", "timezone": "Asia/Shanghai"},
|
||||
)
|
||||
|
||||
start_at, end_at, error = _resolve_read_range(request)
|
||||
|
||||
assert error is None
|
||||
assert start_at is not None
|
||||
assert end_at is not None
|
||||
assert start_at.isoformat() == "2026-04-22T16:00:00+00:00"
|
||||
assert end_at.isoformat() == "2026-04-23T16:00:00+00:00"
|
||||
|
||||
|
||||
def test_resolve_read_range_rejects_bad_date() -> None:
|
||||
request = CliCommand(
|
||||
command="calendar",
|
||||
subcommand="read",
|
||||
owner_id="u1",
|
||||
args={"date": "2026/04/23", "timezone": "Asia/Shanghai"},
|
||||
)
|
||||
|
||||
start_at, end_at, error = _resolve_read_range(request)
|
||||
|
||||
assert start_at is None
|
||||
assert end_at is None
|
||||
assert error == "date must be YYYY-MM-DD"
|
||||
@@ -0,0 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from core.agentscope.tools.cli.handlers import build_router
|
||||
|
||||
|
||||
def test_router_registers_only_new_canonical_subcommands() -> None:
|
||||
router = build_router()
|
||||
|
||||
assert ("calendar", "create") in router.command_pairs
|
||||
assert ("calendar", "read") in router.command_pairs
|
||||
assert ("calendar", "update") in router.command_pairs
|
||||
assert ("calendar", "delete") in router.command_pairs
|
||||
assert ("calendar", "share") in router.command_pairs
|
||||
assert ("contacts", "read") in router.command_pairs
|
||||
assert ("memory", "update") in router.command_pairs
|
||||
|
||||
assert ("calendar", "write") not in router.command_pairs
|
||||
assert ("contacts", "lookup") not in router.command_pairs
|
||||
assert ("memory", "write") not in router.command_pairs
|
||||
assert ("memory", "forget") not in router.command_pairs
|
||||
@@ -22,35 +22,54 @@ def _make_tool_output(
|
||||
)
|
||||
|
||||
|
||||
def test_postprocess_calendar_read_success() -> None:
|
||||
def test_postprocess_calendar_read_has_ui_hints() -> None:
|
||||
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5, "items": []})
|
||||
processed = postprocess_tool_output(output)
|
||||
assert processed.ui_hints is not None
|
||||
assert processed.ui_hints["view"] == "calendar_event_list"
|
||||
assert processed.ui_hints["total"] == 5
|
||||
assert processed.ui_hints["intent"] == "list"
|
||||
|
||||
|
||||
def test_postprocess_calendar_write_partial() -> None:
|
||||
output = _make_tool_output(command="calendar", subcommand="write", status=ToolStatus.PARTIAL, data={"status": "partial", "results": []})
|
||||
def test_postprocess_calendar_create_partial() -> None:
|
||||
output = _make_tool_output(command="calendar", subcommand="create", status=ToolStatus.PARTIAL, data={"status": "partial", "success": 1, "failed": 1, "results": []})
|
||||
processed = postprocess_tool_output(output)
|
||||
assert processed.ui_hints is not None
|
||||
assert processed.ui_hints["view"] == "calendar_batch_result"
|
||||
assert processed.ui_hints["status"] == "partial"
|
||||
assert processed.ui_hints["intent"] == "status"
|
||||
assert processed.ui_hints["status"] == "warning"
|
||||
|
||||
|
||||
def test_postprocess_contacts_lookup_success() -> None:
|
||||
output = _make_tool_output(command="contacts", subcommand="lookup", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
|
||||
def test_postprocess_contacts_read_has_ui_hints() -> None:
|
||||
output = _make_tool_output(command="contacts", subcommand="read", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
|
||||
processed = postprocess_tool_output(output)
|
||||
assert processed.ui_hints is not None
|
||||
assert processed.ui_hints["view"] == "contact_list"
|
||||
assert processed.ui_hints["friends_count"] == 3
|
||||
assert processed.ui_hints["intent"] == "list"
|
||||
assert processed.ui_hints["status"] == "success"
|
||||
|
||||
|
||||
def test_postprocess_memory_forget_success() -> None:
|
||||
output = _make_tool_output(command="memory", subcommand="forget", status=ToolStatus.SUCCESS, data={"status": "success", "forgotten": 5})
|
||||
def test_postprocess_memory_update_has_ui_hints() -> None:
|
||||
output = _make_tool_output(
|
||||
command="memory",
|
||||
subcommand="update",
|
||||
status=ToolStatus.SUCCESS,
|
||||
data={
|
||||
"status": "success",
|
||||
"success": 1,
|
||||
"failed": 0,
|
||||
"forgotten": 5,
|
||||
"results": [
|
||||
{
|
||||
"memoryType": "user",
|
||||
"action": "delete",
|
||||
"status": "success",
|
||||
"forgotten": 5,
|
||||
"memoryId": "mem_1",
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
processed = postprocess_tool_output(output)
|
||||
assert processed.ui_hints is not None
|
||||
assert processed.ui_hints["forgotten"] == 5
|
||||
assert processed.ui_hints["intent"] == "status"
|
||||
assert processed.ui_hints["status"] == "success"
|
||||
|
||||
|
||||
def test_postprocess_failure_no_ui_hints() -> None:
|
||||
|
||||
@@ -29,6 +29,16 @@ def test_validate_accepts_known_skills() -> None:
|
||||
assert result == {"calendar", "contacts"}
|
||||
|
||||
|
||||
def test_validate_rejects_unknown_allowed_command() -> None:
|
||||
from core.agentscope.tools.toolkit import _validate_allowed_commands
|
||||
|
||||
try:
|
||||
_validate_allowed_commands({"calendar", "unknown_command"})
|
||||
assert False, "should have raised"
|
||||
except ValueError as exc:
|
||||
assert "unknown_command" in str(exc)
|
||||
|
||||
|
||||
def test_build_toolkit_registers_project_cli() -> None:
|
||||
toolkit = build_toolkit()
|
||||
schemas = toolkit.get_json_schemas()
|
||||
|
||||
@@ -413,7 +413,7 @@ async def test_enqueue_run_rejects_too_many_attachments(monkeypatch) -> None:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_history_snapshot_filters_out_tool_messages() -> None:
|
||||
async def test_get_history_snapshot_keeps_tool_messages_for_ui_replay() -> None:
|
||||
class _HistoryRepository(_FakeRepository):
|
||||
async def get_history_day(
|
||||
self,
|
||||
@@ -446,7 +446,20 @@ async def test_get_history_snapshot_filters_out_tool_messages() -> None:
|
||||
"tool_name": "calendar_read",
|
||||
"tool_call_id": "call-1",
|
||||
"status": "success",
|
||||
"result": "status=success total=3 returned=3",
|
||||
"result": {
|
||||
"command": "calendar",
|
||||
"subcommand": "read",
|
||||
"data": {"total": 3, "items": []},
|
||||
},
|
||||
"ui_hints": {
|
||||
"intent": "status",
|
||||
"status": "success",
|
||||
"title": "完成",
|
||||
"items": [],
|
||||
"listItems": [],
|
||||
"sections": [],
|
||||
"actions": [],
|
||||
},
|
||||
},
|
||||
},
|
||||
"timestamp": "2026-03-17T09:00:01Z",
|
||||
@@ -482,7 +495,12 @@ async def test_get_history_snapshot_filters_out_tool_messages() -> None:
|
||||
current_user=_user(),
|
||||
)
|
||||
|
||||
assert [message.role for message in snapshot.messages] == ["user", "assistant"]
|
||||
assert [message.role for message in snapshot.messages] == [
|
||||
"user",
|
||||
"tool",
|
||||
"assistant",
|
||||
]
|
||||
assert snapshot.messages[1].ui_schema is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -16,32 +16,43 @@ class _FakeMessage:
|
||||
self.timestamp = datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def test_convert_message_to_history_does_not_attach_ui_schema_for_tool_message() -> (
|
||||
None
|
||||
):
|
||||
def test_convert_message_to_history_attaches_ui_schema_for_tool_message() -> None:
|
||||
message = _FakeMessage(
|
||||
role="tool",
|
||||
metadata={"tool_agent_output": {"result": "done"}},
|
||||
)
|
||||
|
||||
result = convert_message_to_history(message) # type: ignore[arg-type]
|
||||
|
||||
assert "ui_schema" not in result
|
||||
assert "uiSchema" not in result
|
||||
|
||||
|
||||
def test_convert_message_to_history_does_not_attach_ui_schema_for_assistant_message() -> None:
|
||||
message = _FakeMessage(
|
||||
role="assistant",
|
||||
metadata={
|
||||
"agent_output": {"ui_schema": {"version": "2.0", "root": {"type": "stack"}}}
|
||||
"tool_agent_output": {
|
||||
"result": {"status": "success"},
|
||||
"ui_hints": {
|
||||
"intent": "status",
|
||||
"status": "success",
|
||||
"title": "完成",
|
||||
"items": [],
|
||||
"listItems": [],
|
||||
"sections": [],
|
||||
"actions": [],
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
result = convert_message_to_history(message) # type: ignore[arg-type]
|
||||
|
||||
assert "ui_schema" not in result
|
||||
assert "uiSchema" not in result
|
||||
assert "ui_schema" in result
|
||||
|
||||
|
||||
def test_convert_message_to_history_adds_suggested_actions_for_assistant_message() -> None:
|
||||
message = _FakeMessage(
|
||||
role="assistant",
|
||||
metadata={
|
||||
"agent_output": {
|
||||
"suggested_actions": ["查今天日程", "创建会议"]
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
result = convert_message_to_history(message) # type: ignore[arg-type]
|
||||
|
||||
assert result["suggestedActions"] == ["查今天日程", "创建会议"]
|
||||
|
||||
|
||||
def test_convert_message_to_history_returns_multiple_user_attachments() -> None:
|
||||
|
||||
Reference in New Issue
Block a user