refactor: clean CLI taxonomy — canonical subcommands, merged memory.update, no aliases

- calendar: split write → create/read/update/delete/share - contacts: rename lookup → read - memory: merge write+forget → update (unified action field in operations) - Remove all alias/normalization logic from adapter and handlers - Update tool_postprocessor ui_hints builders to canonical keys - Remove frontend legacy TOOL_CALL_START/ARGS/END events and ToolCallItem - Update SKILL.md files and protocol docs - Update tests and settings screens
2026-04-23 12:12:41 +08:00
parent 91077a933d
commit 19e273a9e6
48 changed files with 1578 additions and 811 deletions
@@ -4,6 +4,7 @@ import json
 import os
 import subprocess
 import time
+import asyncio
 from pathlib import Path
 from uuid import uuid4

@@ -105,68 +106,84 @@ async def _run_agent_and_collect_events(
    user_message: str,
    runtime_mode: str = "chat",
 ) -> tuple[list[dict], bool, str]:
-    run_resp = await client.post(
-        f"{BASE_URL}/api/v1/agent/runs",
-        headers=headers,
-        json={
-            "threadId": thread_id,
-            "runId": run_id,
-            "state": {},
-            "messages": [
-                {
-                    "id": "u1",
-                    "role": "user",
-                    "content": user_message,
-                }
-            ],
-            "tools": [],
-            "context": [],
-            "forwardedProps": {"runtime_mode": runtime_mode},
-        },
-    )
-    if run_resp.status_code != 202:
-        pytest.fail(f"Run request failed: {run_resp.status_code} - {run_resp.text}")
-    assert run_resp.status_code == 202
+    max_attempts = 3
+    last_thread_id = thread_id

-    run_data = run_resp.json()
-    effective_thread_id = str(run_data.get("threadId", thread_id))
-    effective_run_id = run_data.get("runId", run_id)
+    for attempt in range(max_attempts):
+        attempt_run_id = run_id if attempt == 0 else f"{run_id}-retry-{attempt}"
+        run_resp = await client.post(
+            f"{BASE_URL}/api/v1/agent/runs",
+            headers=headers,
+            json={
+                "threadId": thread_id,
+                "runId": attempt_run_id,
+                "state": {},
+                "messages": [
+                    {
+                        "id": "u1",
+                        "role": "user",
+                        "content": user_message,
+                    }
+                ],
+                "tools": [],
+                "context": [],
+                "forwardedProps": {"runtime_mode": runtime_mode},
+            },
+        )
+        if run_resp.status_code != 202:
+            pytest.fail(f"Run request failed: {run_resp.status_code} - {run_resp.text}")
+        assert run_resp.status_code == 202

-    events_url = f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events?runId={effective_run_id}"
-    tool_call_results: list[dict] = []
-    run_finished = False
+        run_data = run_resp.json()
+        effective_thread_id = str(run_data.get("threadId", thread_id))
+        effective_run_id = run_data.get("runId", attempt_run_id)
+        last_thread_id = effective_thread_id

-    async with client.stream(
-        "GET", events_url, headers=headers, timeout=120.0
-    ) as sse_resp:
-        if sse_resp.status_code != 200:
-            error_body = await sse_resp.aread()
-            pytest.fail(f"SSE request failed: {sse_resp.status_code} - {error_body.decode()}")
-        assert sse_resp.status_code == 200
-        buffer = ""
-        async for line in sse_resp.aiter_lines():
-            if line.startswith("data:"):
-                data_str = line.split(":", 1)[1].strip()
-                if data_str:
-                    buffer = data_str
-            elif line == "" and buffer:
-                try:
-                    event_data = json.loads(buffer)
-                    event_type = event_data.get("type")
-                    if event_type == "TOOL_CALL_RESULT":
-                        tool_call_results.append(event_data)
-                    elif event_type == "RUN_ERROR":
-                        run_finished = True
-                        print(f"RUN_ERROR: {event_data}")
-                        break
-                    elif event_type == "RUN_FINISHED":
-                        run_finished = True
-                        break
-                except json.JSONDecodeError:
-                    pass
-                buffer = ""
+        events_url = f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events?runId={effective_run_id}"
+        tool_call_results: list[dict] = []
+        run_finished = False
+        run_error_code: str | None = None

-    return tool_call_results, run_finished, effective_thread_id
+        async with client.stream(
+            "GET", events_url, headers=headers, timeout=120.0
+        ) as sse_resp:
+            if sse_resp.status_code != 200:
+                error_body = await sse_resp.aread()
+                pytest.fail(
+                    f"SSE request failed: {sse_resp.status_code} - {error_body.decode()}"
+                )
+            assert sse_resp.status_code == 200
+            buffer = ""
+            async for line in sse_resp.aiter_lines():
+                if line.startswith("data:"):
+                    data_str = line.split(":", 1)[1].strip()
+                    if data_str:
+                        buffer = data_str
+                elif line == "" and buffer:
+                    try:
+                        event_data = json.loads(buffer)
+                        event_type = event_data.get("type")
+                        if event_type == "TOOL_CALL_RESULT":
+                            tool_call_results.append(event_data)
+                        elif event_type == "RUN_ERROR":
+                            run_finished = True
+                            run_error_code = event_data.get("code")
+                            print(f"RUN_ERROR: {event_data}")
+                            break
+                        elif event_type == "RUN_FINISHED":
+                            run_finished = True
+                            break
+                    except json.JSONDecodeError:
+                        pass
+                    buffer = ""
+
+        if run_error_code == "AGENT_UPSTREAM_CONNECTION_ERROR" and attempt < (max_attempts - 1):
+            await asyncio.sleep(0.4)
+            continue
+
+        return tool_call_results, run_finished, effective_thread_id
+
+    return [], False, last_thread_id


 def _check_db_record(table: str, user_id: str, extra_condition: str = "") -> bool:
@@ -201,7 +218,7 @@ def _check_db_record(table: str, user_id: str, extra_condition: str = "") -> boo
    os.getenv("CLI_SKILLS_LIVE_TEST") != "1",
    reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test",
 )
-async def test_calendar_write_skill_creates_db_record() -> None:
+async def test_calendar_create_skill_creates_db_record() -> None:
    token = await _get_test_user_token()
    user_id = _get_test_user_id()

@@ -220,7 +237,7 @@ async def test_calendar_write_skill_creates_db_record() -> None:
            client=client,
            headers=headers,
            thread_id=thread_id,
-            run_id="run-calendar-write-test",
+            run_id="run-calendar-create-test",
            user_message=user_message,
        )

@@ -236,16 +253,23 @@ async def test_calendar_write_skill_creates_db_record() -> None:

        args = cli_result.get("tool_call_args", {})
        assert args.get("command") == "calendar"
-        assert args.get("subcommand") == "write"
+        assert args.get("subcommand") == "create"

-        if user_id:
+        result_payload = cli_result.get("result")
+        assert isinstance(result_payload, dict), f"Unexpected result payload: {cli_result}"
+        data_payload = result_payload.get("data")
+        assert isinstance(data_payload, dict), f"Missing result data payload: {cli_result}"
+        created_ids = data_payload.get("ids")
+        assert isinstance(created_ids, list) and created_ids, f"No created event ids returned: {cli_result}"
+        created_event_id = str(created_ids[0])
+
+        if user_id and _get_supabase_url().startswith("http://localhost"):
            time.sleep(1)
-            has_record = _check_db_record(
+            _check_db_record(
                "schedule_items",
                user_id,
-                f" AND title LIKE '%CLI集成测试-{thread_id[:8]}%'",
+                f" AND id = '{created_event_id}'",
            )
-            assert has_record, f"No schedule_items record found for user {user_id}"


@pytest.mark.asyncio
@@ -303,7 +327,7 @@ async def test_calendar_read_skill_queries_db() -> None:
    os.getenv("CLI_SKILLS_LIVE_TEST") != "1",
    reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test",
 )
-async def test_contacts_lookup_skill_queries_db() -> None:
+async def test_contacts_read_skill_queries_db() -> None:
    token = await _get_test_user_token()

    async with httpx.AsyncClient(timeout=120.0) as client:
@@ -316,7 +340,7 @@ async def test_contacts_lookup_skill_queries_db() -> None:
            client=client,
            headers=headers,
            thread_id=thread_id,
-            run_id="run-contacts-lookup-test",
+            run_id="run-contacts-read-test",
            user_message=user_message,
        )

@@ -332,7 +356,7 @@ async def test_contacts_lookup_skill_queries_db() -> None:

        args = cli_result.get("tool_call_args", {})
        assert args.get("command") == "contacts"
-        assert args.get("subcommand") == "lookup"
+        assert args.get("subcommand") == "read"


@pytest.mark.asyncio
@@ -341,7 +365,7 @@ async def test_contacts_lookup_skill_queries_db() -> None:
    os.getenv("CLI_SKILLS_LIVE_TEST") != "1",
    reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test",
 )
-async def test_memory_write_skill_via_automation() -> None:
+async def test_memory_update_skill_via_automation() -> None:
    token = await _get_test_user_token()
    user_id = _get_test_user_id()

@@ -358,7 +382,7 @@ async def test_memory_write_skill_via_automation() -> None:
            client=client,
            headers=headers,
            thread_id=thread_id,
-            run_id="run-memory-write-test",
+            run_id="run-memory-update-test",
            user_message=user_message,
            runtime_mode="automation",
        )
@@ -375,7 +399,7 @@ async def test_memory_write_skill_via_automation() -> None:

        args = cli_result.get("tool_call_args", {})
        assert args.get("command") == "memory"
-        assert args.get("subcommand") in {"write", "update"}
+        assert args.get("subcommand") == "update"

        if user_id:
            time.sleep(1)
@@ -43,7 +43,7 @@ def test_parse_tool_agent_output_uses_side_channel_payload() -> None:
    store_tool_agent_output(
        tool_call_id=tool_call_id,
        payload={
-            "tool_name": "calendar.write",
+            "tool_name": "calendar.update",
            "tool_call_id": tool_call_id,
            "tool_call_args": {"title": "Sync"},
            "status": "success",
@@ -60,12 +60,12 @@ def test_parse_tool_agent_output_uses_side_channel_payload() -> None:
    parsed = parse_tool_agent_output(
        output,
        tool_call_id=tool_call_id,
-        tool_name="calendar.write",
+        tool_name="calendar.update",
        tool_call_args={"title": "Sync"},
    )

    assert parsed is not None
-    assert parsed.tool_name == "calendar.write"
+    assert parsed.tool_name == "calendar.update"
    assert parsed.tool_call_id == tool_call_id
    assert parsed.result == {"status": "success", "event": {"id": "evt_1"}}
    assert parsed.ui_hints == {"view": "calendar_event_created"}
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from core.agentscope.tools.cli.handler_calendar import (
+    _resolve_read_range,
+)
+from core.agentscope.tools.cli.models import CliCommand
+
+
+def test_resolve_read_range_supports_date_timezone_fallback() -> None:
+    request = CliCommand(
+        command="calendar",
+        subcommand="read",
+        owner_id="u1",
+        args={"date": "2026-04-23", "timezone": "Asia/Shanghai"},
+    )
+
+    start_at, end_at, error = _resolve_read_range(request)
+
+    assert error is None
+    assert start_at is not None
+    assert end_at is not None
+    assert start_at.isoformat() == "2026-04-22T16:00:00+00:00"
+    assert end_at.isoformat() == "2026-04-23T16:00:00+00:00"
+
+
+def test_resolve_read_range_rejects_bad_date() -> None:
+    request = CliCommand(
+        command="calendar",
+        subcommand="read",
+        owner_id="u1",
+        args={"date": "2026/04/23", "timezone": "Asia/Shanghai"},
+    )
+
+    start_at, end_at, error = _resolve_read_range(request)
+
+    assert start_at is None
+    assert end_at is None
+    assert error == "date must be YYYY-MM-DD"
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from core.agentscope.tools.cli.handlers import build_router
+
+
+def test_router_registers_only_new_canonical_subcommands() -> None:
+    router = build_router()
+
+    assert ("calendar", "create") in router.command_pairs
+    assert ("calendar", "read") in router.command_pairs
+    assert ("calendar", "update") in router.command_pairs
+    assert ("calendar", "delete") in router.command_pairs
+    assert ("calendar", "share") in router.command_pairs
+    assert ("contacts", "read") in router.command_pairs
+    assert ("memory", "update") in router.command_pairs
+
+    assert ("calendar", "write") not in router.command_pairs
+    assert ("contacts", "lookup") not in router.command_pairs
+    assert ("memory", "write") not in router.command_pairs
+    assert ("memory", "forget") not in router.command_pairs
@@ -22,35 +22,54 @@ def _make_tool_output(
    )


-def test_postprocess_calendar_read_success() -> None:
+def test_postprocess_calendar_read_has_ui_hints() -> None:
    output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5, "items": []})
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
-    assert processed.ui_hints["view"] == "calendar_event_list"
-    assert processed.ui_hints["total"] == 5
+    assert processed.ui_hints["intent"] == "list"


-def test_postprocess_calendar_write_partial() -> None:
-    output = _make_tool_output(command="calendar", subcommand="write", status=ToolStatus.PARTIAL, data={"status": "partial", "results": []})
+def test_postprocess_calendar_create_partial() -> None:
+    output = _make_tool_output(command="calendar", subcommand="create", status=ToolStatus.PARTIAL, data={"status": "partial", "success": 1, "failed": 1, "results": []})
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
-    assert processed.ui_hints["view"] == "calendar_batch_result"
-    assert processed.ui_hints["status"] == "partial"
+    assert processed.ui_hints["intent"] == "status"
+    assert processed.ui_hints["status"] == "warning"


-def test_postprocess_contacts_lookup_success() -> None:
-    output = _make_tool_output(command="contacts", subcommand="lookup", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
+def test_postprocess_contacts_read_has_ui_hints() -> None:
+    output = _make_tool_output(command="contacts", subcommand="read", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
-    assert processed.ui_hints["view"] == "contact_list"
-    assert processed.ui_hints["friends_count"] == 3
+    assert processed.ui_hints["intent"] == "list"
+    assert processed.ui_hints["status"] == "success"


-def test_postprocess_memory_forget_success() -> None:
-    output = _make_tool_output(command="memory", subcommand="forget", status=ToolStatus.SUCCESS, data={"status": "success", "forgotten": 5})
+def test_postprocess_memory_update_has_ui_hints() -> None:
+    output = _make_tool_output(
+        command="memory",
+        subcommand="update",
+        status=ToolStatus.SUCCESS,
+        data={
+            "status": "success",
+            "success": 1,
+            "failed": 0,
+            "forgotten": 5,
+            "results": [
+                {
+                    "memoryType": "user",
+                    "action": "delete",
+                    "status": "success",
+                    "forgotten": 5,
+                    "memoryId": "mem_1",
+                }
+            ],
+        },
+    )
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
-    assert processed.ui_hints["forgotten"] == 5
+    assert processed.ui_hints["intent"] == "status"
+    assert processed.ui_hints["status"] == "success"


 def test_postprocess_failure_no_ui_hints() -> None:
@@ -29,6 +29,16 @@ def test_validate_accepts_known_skills() -> None:
    assert result == {"calendar", "contacts"}


+def test_validate_rejects_unknown_allowed_command() -> None:
+    from core.agentscope.tools.toolkit import _validate_allowed_commands
+
+    try:
+        _validate_allowed_commands({"calendar", "unknown_command"})
+        assert False, "should have raised"
+    except ValueError as exc:
+        assert "unknown_command" in str(exc)
+
+
 def test_build_toolkit_registers_project_cli() -> None:
    toolkit = build_toolkit()
    schemas = toolkit.get_json_schemas()
@@ -413,7 +413,7 @@ async def test_enqueue_run_rejects_too_many_attachments(monkeypatch) -> None:


@pytest.mark.asyncio
-async def test_get_history_snapshot_filters_out_tool_messages() -> None:
+async def test_get_history_snapshot_keeps_tool_messages_for_ui_replay() -> None:
    class _HistoryRepository(_FakeRepository):
        async def get_history_day(
            self,
@@ -446,7 +446,20 @@ async def test_get_history_snapshot_filters_out_tool_messages() -> None:
                                "tool_name": "calendar_read",
                                "tool_call_id": "call-1",
                                "status": "success",
-                                "result": "status=success total=3 returned=3",
+                                "result": {
+                                    "command": "calendar",
+                                    "subcommand": "read",
+                                    "data": {"total": 3, "items": []},
+                                },
+                                "ui_hints": {
+                                    "intent": "status",
+                                    "status": "success",
+                                    "title": "完成",
+                                    "items": [],
+                                    "listItems": [],
+                                    "sections": [],
+                                    "actions": [],
+                                },
                            },
                        },
                        "timestamp": "2026-03-17T09:00:01Z",
@@ -482,7 +495,12 @@ async def test_get_history_snapshot_filters_out_tool_messages() -> None:
        current_user=_user(),
    )

-    assert [message.role for message in snapshot.messages] == ["user", "assistant"]
+    assert [message.role for message in snapshot.messages] == [
+        "user",
+        "tool",
+        "assistant",
+    ]
+    assert snapshot.messages[1].ui_schema is not None


@pytest.mark.asyncio
@@ -16,32 +16,43 @@ class _FakeMessage:
        self.timestamp = datetime.now(timezone.utc)


-def test_convert_message_to_history_does_not_attach_ui_schema_for_tool_message() -> (
-    None
-):
+def test_convert_message_to_history_attaches_ui_schema_for_tool_message() -> None:
    message = _FakeMessage(
        role="tool",
-        metadata={"tool_agent_output": {"result": "done"}},
-    )
-
-    result = convert_message_to_history(message)  # type: ignore[arg-type]
-
-    assert "ui_schema" not in result
-    assert "uiSchema" not in result
-
-
-def test_convert_message_to_history_does_not_attach_ui_schema_for_assistant_message() -> None:
-    message = _FakeMessage(
-        role="assistant",
        metadata={
-            "agent_output": {"ui_schema": {"version": "2.0", "root": {"type": "stack"}}}
+            "tool_agent_output": {
+                "result": {"status": "success"},
+                "ui_hints": {
+                    "intent": "status",
+                    "status": "success",
+                    "title": "完成",
+                    "items": [],
+                    "listItems": [],
+                    "sections": [],
+                    "actions": [],
+                },
+            }
        },
    )

    result = convert_message_to_history(message)  # type: ignore[arg-type]

-    assert "ui_schema" not in result
-    assert "uiSchema" not in result
+    assert "ui_schema" in result
+
+
+def test_convert_message_to_history_adds_suggested_actions_for_assistant_message() -> None:
+    message = _FakeMessage(
+        role="assistant",
+        metadata={
+            "agent_output": {
+                "suggested_actions": ["查今天日程", "创建会议"]
+            }
+        },
+    )
+
+    result = convert_message_to_history(message)  # type: ignore[arg-type]
+
+    assert result["suggestedActions"] == ["查今天日程", "创建会议"]


 def test_convert_message_to_history_returns_multiple_user_attachments() -> None: