feat(agent): redesign project_cli with module/method/input protocol

- Replace command/subcommand/args with module/method/input envelope
- Calendar handler uses discriminated union (mode) for read operations
- Strict Pydantic models with extra='forbid' for all calendar methods
- Worker max_iters=7, router prompt simplified (removed project_cli_defaults)
- Skill index cards + per-action files for progressive disclosure
- Frontend/AG-UI aligned to module/method dispatch
- Protocol docs updated to module/method/input contract

WIP: action cards need envelope fix, 2 tests need update, memory
handler needs Pydantic models.
This commit is contained in:
qzl
2026-04-24 13:24:13 +08:00
parent ab526af2c4
commit d060962a5f
62 changed files with 4802 additions and 805 deletions
@@ -7,6 +7,7 @@ from ag_ui.core import RunAgentInput
import core.agentscope.runtime.runner as runner_module
from core.agentscope.runtime.runner import AgentScopeRunner
from schemas.agent.runtime_models import (
RunStatus,
RouterAgentOutput,
WorkerAgentOutputLite,
)
@@ -60,6 +61,31 @@ def test_build_worker_input_messages_only_contains_router_contract() -> None:
assert "[RouterAgentOutput]" in str(input_messages[0].content)
def test_build_agent_sets_worker_max_iters(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict[str, object] = {}
class _FakeJsonReActAgent:
def __init__(self, **kwargs: object) -> None:
captured.update(kwargs)
monkeypatch.setattr(runner_module, "JsonReActAgent", _FakeJsonReActAgent)
runner = AgentScopeRunner()
model = runner_module.TrackingChatModel(object())
agent = runner._build_agent(
agent_name="worker",
system_prompt="test",
toolkit=object(),
model=model,
)
assert isinstance(agent, _FakeJsonReActAgent)
assert captured["max_iters"] == 7
def test_build_router_messages_injects_user_input_when_context_last_not_user() -> None:
runner = AgentScopeRunner()
run_input = _run_input()
@@ -119,6 +145,45 @@ def test_build_router_messages_appends_user_input_to_context_tail() -> None:
assert messages[0].content == "上一轮回复"
def test_enforce_tool_evidence_contract_keeps_success_when_tool_succeeds() -> None:
runner = AgentScopeRunner()
worker_output = runner._enforce_tool_evidence_contract(
worker_output=WorkerAgentOutputLite(
status=RunStatus.SUCCESS,
answer="今天没有日程",
suggested_actions=["查明天"],
),
requires_tool_evidence=True,
has_successful_tool_result=True,
)
assert worker_output.status == RunStatus.SUCCESS
assert worker_output.answer == "今天没有日程"
assert worker_output.suggested_actions == ["查明天"]
assert worker_output.error is None
def test_enforce_tool_evidence_contract_forces_failure_without_successful_tool() -> None:
runner = AgentScopeRunner()
worker_output = runner._enforce_tool_evidence_contract(
worker_output=WorkerAgentOutputLite(
status=RunStatus.SUCCESS,
answer="今天没有日程",
suggested_actions=["查明天"],
),
requires_tool_evidence=True,
has_successful_tool_result=False,
)
assert worker_output.status == RunStatus.FAILED
assert worker_output.answer == "无法确认结果:所需工具调用未成功完成。"
assert worker_output.suggested_actions == []
assert worker_output.error is not None
assert worker_output.error.code == "TOOL_EVIDENCE_MISSING"
def test_build_model_omits_none_generate_kwargs(
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -1,6 +1,10 @@
from __future__ import annotations
from core.agentscope.prompts.agent_prompt import build_agent_prompt
from core.agentscope.prompts.agent_prompt import (
build_agent_prompt,
build_worker_contract_prompt,
)
from schemas.agent.runtime_models import RouterAgentOutput
from schemas.agent.system_agent import AgentType, SystemAgentLLMConfig
@@ -18,9 +22,12 @@ def test_build_agent_prompt_for_worker_contains_runtime_config() -> None:
assert "<!-- AGENT_START -->" in prompt
assert "- type: worker" in prompt
assert "context_messages.mode=number" in prompt
assert "context_messages.count=20" in prompt
assert "enabled_skills=calendar,contacts" in prompt
assert "Use objective plus context_summary as the primary execution guide from the router." in prompt
assert "When requires_tool_evidence=true, do not finalize an answer from failed tool calls; either recover with a corrected tool call or explicitly surface that execution failed." in prompt
assert "If all tool calls fail under requires_tool_evidence=true, set status=failed and populate error; do not present a factual answer as confirmed." in prompt
assert "context_messages.mode=number" not in prompt
assert "context_messages.count=20" not in prompt
def test_build_agent_prompt_for_router_contains_identity_and_config() -> None:
@@ -35,5 +42,20 @@ def test_build_agent_prompt_for_router_contains_identity_and_config() -> None:
assert "- type: router" in prompt
assert "[Router Agent]" in prompt
assert "When the task will require project_cli, include canonical tool input defaults in context_summary using the exact shape `project_cli_defaults={\"module\":...,\"method\":...,\"input\":{...}}` whenever they can be determined safely." in prompt
assert "Standardize every time value mentioned in context_summary to the exact project_cli input format that would be required downstream: dates as `YYYY-MM-DD`, local datetimes as RFC3339 with timezone offset, and event ids as raw UUID strings." in prompt
assert "For relative time requests like today, tomorrow, or next Monday, resolve them using system_time_local and place the resolved standardized value into project_cli_defaults.input instead of leaving natural-language time phrases." in prompt
assert "context_messages.mode=day" in prompt
assert "context_messages.count=2" in prompt
def test_build_worker_contract_prompt_prefers_resolved_dates_from_context_summary() -> None:
prompt = build_worker_contract_prompt(
router_output=RouterAgentOutput(
objective="查询今天日程",
context_summary="目标日期: 2026-04-24",
requires_tool_evidence=True,
)
)
assert "If context_summary contains project_cli_defaults, prefer using those exact module/method/input values directly." in prompt
@@ -0,0 +1,84 @@
from __future__ import annotations
import json
import pytest
from core.agentscope.tools.cli.adapter import invoke_cli_tool
@pytest.mark.asyncio
async def test_project_cli_requires_module_and_method() -> None:
response = await invoke_cli_tool(
tool_name="project_cli",
tool_call_args={
"module": "calendar",
"input": {},
},
allowed_commands={"calendar"},
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
payload = json.loads(text)
assert payload["ok"] is False
assert payload["module"] == "calendar"
assert payload["method"] == ""
assert payload["error"]["code"] == "INVALID_ARGUMENT"
@pytest.mark.asyncio
async def test_project_cli_failure_includes_method_contract_in_side_channel() -> None:
from core.agentscope.tools.tool_call_context import (
peek_tool_agent_output,
reset_current_tool_call_id,
set_current_tool_call_id,
)
from core.auth.credential_issuer import create_credential_issuer
from core.auth.tool_credential_context import reset_tool_credential, set_tool_credential
token = set_current_tool_call_id("call-test-guidance")
credential_token = set_tool_credential(
create_credential_issuer().issue(
owner_id="00000000-0000-0000-0000-000000000001",
mode="chat",
)
)
try:
response = await invoke_cli_tool(
tool_name="project_cli",
tool_call_args={
"module": "calendar",
"method": "read",
"input": {},
},
allowed_commands={"calendar"},
)
finally:
reset_tool_credential(credential_token)
reset_current_tool_call_id(token)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
payload = json.loads(text)
assert payload["ok"] is False
assert payload["module"] == "calendar"
assert payload["method"] == "read"
assert payload["data"] is None
assert payload["error"]["code"] == "INVALID_ACTION_INPUT"
stored = peek_tool_agent_output(tool_call_id="call-test-guidance")
assert stored is not None
error = stored.get("error")
assert isinstance(error, dict)
assert error["code"] == "INVALID_ACTION_INPUT"
assert error["details"]["input_schema"]["mode"] == "string enum(day|range|event)"
assert error["details"]["expected_input_examples"][0] == {
"mode": "day",
"date": "2026-04-24",
"timezone": "Asia/Shanghai",
}
assert "resolve the day to a concrete input.date value" in error["message"]
@@ -1,38 +1,96 @@
from __future__ import annotations
import pytest
from core.agentscope.tools.cli.handler_calendar import (
_resolve_read_range,
_day_input_to_range_input,
_CalendarReadDayInput,
handle_calendar_create_event,
handle_calendar_list_day,
)
from core.agentscope.tools.cli.models import CliCommand
def test_resolve_read_range_supports_date_timezone_fallback() -> None:
request = CliCommand(
command="calendar",
subcommand="read",
owner_id="u1",
args={"date": "2026-04-23", "timezone": "Asia/Shanghai"},
def test_day_input_converts_to_tz_range() -> None:
payload = _CalendarReadDayInput.model_validate(
{"mode": "day", "date": "2026-04-23", "timezone": "Asia/Shanghai"}
)
start_at, end_at, error = _resolve_read_range(request)
result = _day_input_to_range_input(payload)
assert error is None
assert start_at is not None
assert end_at is not None
assert start_at.isoformat() == "2026-04-22T16:00:00+00:00"
assert end_at.isoformat() == "2026-04-23T16:00:00+00:00"
assert result == {
"mode": "range",
"start_at": "2026-04-23T00:00:00+08:00",
"end_at": "2026-04-24T00:00:00+08:00",
}
def test_resolve_read_range_rejects_bad_date() -> None:
@pytest.mark.asyncio
async def test_calendar_read_rejects_bad_date_format() -> None:
request = CliCommand(
command="calendar",
subcommand="read",
module="calendar",
method="read",
owner_id="u1",
args={"date": "2026/04/23", "timezone": "Asia/Shanghai"},
input={"mode": "day", "date": "2026/04/23", "timezone": "Asia/Shanghai"},
)
start_at, end_at, error = _resolve_read_range(request)
result = await handle_calendar_list_day(request)
assert start_at is None
assert end_at is None
assert error == "date must be YYYY-MM-DD"
assert result.ok is False
assert result.error is not None
assert result.error.code == "INVALID_ACTION_INPUT"
assert result.error.details == {
"missing_fields": [],
"invalid_fields": ["day.date"],
}
@pytest.mark.asyncio
async def test_calendar_read_range_requires_timezone_aware_datetimes() -> None:
request = CliCommand(
module="calendar",
method="read",
owner_id="u1",
input={
"mode": "range",
"start_at": "2026-04-23T00:00:00",
"end_at": "2026-04-24T00:00:00",
},
)
result = await handle_calendar_list_day(request)
assert result.ok is False
assert result.error is not None
assert result.error.code == "INVALID_ACTION_INPUT"
assert sorted(result.error.details["invalid_fields"]) == ["range.end_at", "range.start_at"]
@pytest.mark.asyncio
async def test_create_event_rejects_legacy_field_aliases_with_corrections() -> None:
request = CliCommand(
module="calendar",
method="create",
owner_id="u1",
input={
"title": "Project sync",
"start_time": "2026-04-23T10:00:00+08:00",
"end_time": "2026-04-23T11:00:00+08:00",
"event_timezone": "Asia/Shanghai",
},
)
result = await handle_calendar_create_event(request)
assert result.ok is False
assert result.error is not None
assert result.error.code == "INVALID_ACTION_INPUT"
assert result.error.details == {
"missing_fields": ["start_at", "timezone"],
"invalid_fields": ["end_time", "event_timezone", "start_time"],
"alias_corrections": {
"start_time": "start_at",
"end_time": "end_at",
"event_timezone": "timezone",
},
}
@@ -3,18 +3,21 @@ from __future__ import annotations
from core.agentscope.tools.cli.handlers import build_router
def test_router_registers_only_new_canonical_subcommands() -> None:
def test_router_registers_only_new_canonical_actions() -> None:
router = build_router()
assert ("calendar", "create") in router.command_pairs
assert ("calendar", "read") in router.command_pairs
assert ("calendar", "update") in router.command_pairs
assert ("calendar", "delete") in router.command_pairs
assert ("calendar", "share") in router.command_pairs
assert ("contacts", "read") in router.command_pairs
assert ("memory", "update") in router.command_pairs
assert ("calendar", "read") in router.module_methods
assert ("calendar", "create") in router.module_methods
assert ("calendar", "update") in router.module_methods
assert ("calendar", "delete") in router.module_methods
assert ("calendar", "share") in router.module_methods
assert ("calendar", "accept_invite") in router.module_methods
assert ("calendar", "reject_invite") in router.module_methods
assert ("contacts", "read") in router.module_methods
assert ("memory", "update") in router.module_methods
assert ("calendar", "write") not in router.command_pairs
assert ("contacts", "lookup") not in router.command_pairs
assert ("memory", "write") not in router.command_pairs
assert ("memory", "forget") not in router.command_pairs
assert ("calendar", "list_day") not in router.module_methods
assert ("calendar", "get_event") not in router.module_methods
assert ("contacts", "lookup") not in router.module_methods
assert ("memory", "write") not in router.module_methods
assert ("memory", "forget") not in router.module_methods
@@ -11,13 +11,13 @@ async def test_router_register_and_dispatch() -> None:
router = CommandRouter()
async def mock_handler(request: CliCommand) -> CliCommandResult:
return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand, data={"name": request.args["name"]})
return CliCommandResult(ok=True, module=request.module, method=request.method, data={"name": request.input["name"]})
router.register(command="test", subcommand="run", handler=mock_handler)
router.register(module="test", method="run", handler=mock_handler)
assert ("test", "run") in router.command_pairs
assert ("test", "run") in router.module_methods
result = await router.dispatch(CliCommand(command="test", subcommand="run", args={"name": "demo"}, owner_id="u1"))
result = await router.dispatch(CliCommand(module="test", method="run", input={"name": "demo"}, owner_id="u1"))
assert result.ok is True
assert result.data == {"name": "demo"}
@@ -25,10 +25,10 @@ async def test_router_register_and_dispatch() -> None:
@pytest.mark.asyncio
async def test_router_unknown_command() -> None:
router = CommandRouter()
result = await router.dispatch(CliCommand(command="unknown", subcommand="run", args={}, owner_id="u1"))
result = await router.dispatch(CliCommand(module="unknown", method="run", input={}, owner_id="u1"))
assert result.ok is False
assert result.error is not None
assert result.error.code == "UNKNOWN_COMMAND"
assert result.error.code == "UNKNOWN_METHOD"
@pytest.mark.asyncio
@@ -39,9 +39,9 @@ async def test_router_handler_exception() -> None:
del request
raise ValueError("intentional error")
router.register(command="fail", subcommand="run", handler=failing_handler)
router.register(module="fail", method="run", handler=failing_handler)
result = await router.dispatch(CliCommand(command="fail", subcommand="run", args={}, owner_id="u1"))
result = await router.dispatch(CliCommand(module="fail", method="run", input={}, owner_id="u1"))
assert result.ok is False
assert result.error is not None
assert result.error.code == "HANDLER_ERROR"
@@ -51,12 +51,12 @@ def test_router_duplicate_register() -> None:
router = CommandRouter()
async def handler1(request: CliCommand) -> CliCommandResult:
return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand)
return CliCommandResult(ok=True, module=request.module, method=request.method)
async def handler2(request: CliCommand) -> CliCommandResult:
return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand)
return CliCommandResult(ok=True, module=request.module, method=request.method)
router.register(command="cmd", subcommand="one", handler=handler1)
router.register(module="cmd", method="one", handler=handler1)
with pytest.raises(ValueError, match="already registered"):
router.register(command="cmd", subcommand="one", handler=handler2)
router.register(module="cmd", method="one", handler=handler2)
@@ -6,31 +6,53 @@ from schemas.agent.runtime_models import ToolAgentOutput, ToolStatus
def _make_tool_output(
*,
command: str,
subcommand: str,
module: str,
method: str,
status: ToolStatus,
data: dict | None = None,
) -> ToolAgentOutput:
return ToolAgentOutput(
tool_name="project_cli",
tool_call_id="test_call_id",
tool_call_args={"command": command, "subcommand": subcommand, "args": {}},
tool_call_args={"module": module, "method": method, "input": {}},
status=status,
result={"command": command, "subcommand": subcommand, "data": data or {}},
result={"module": module, "method": method, "data": data or {}},
error=None,
ui_hints=None,
)
def test_postprocess_calendar_read_has_ui_hints() -> None:
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5, "items": []})
output = _make_tool_output(
module="calendar",
method="read",
status=ToolStatus.SUCCESS,
data={"total": 5, "items": []},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["intent"] == "list"
def test_postprocess_calendar_read_event_detail_has_ui_hints() -> None:
output = _make_tool_output(
module="calendar",
method="read",
status=ToolStatus.SUCCESS,
data={"id": "evt_1", "title": "Project sync", "start_at": "2026-04-21T10:00:00+08:00"},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["title"] == "日程详情"
def test_postprocess_calendar_create_partial() -> None:
output = _make_tool_output(command="calendar", subcommand="create", status=ToolStatus.PARTIAL, data={"status": "partial", "success": 1, "failed": 1, "results": []})
output = _make_tool_output(
module="calendar",
method="create",
status=ToolStatus.PARTIAL,
data={"status": "partial", "success": 1, "failed": 1, "results": []},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["intent"] == "status"
@@ -39,8 +61,8 @@ def test_postprocess_calendar_create_partial() -> None:
def test_postprocess_calendar_share_has_ui_hints() -> None:
output = _make_tool_output(
command="calendar",
subcommand="share",
module="calendar",
method="share",
status=ToolStatus.SUCCESS,
data={
"status": "success",
@@ -60,7 +82,12 @@ def test_postprocess_calendar_share_has_ui_hints() -> None:
def test_postprocess_contacts_read_has_ui_hints() -> None:
output = _make_tool_output(command="contacts", subcommand="read", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
output = _make_tool_output(
module="contacts",
method="read",
status=ToolStatus.SUCCESS,
data={"friends_count": 3, "friends": []},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["intent"] == "list"
@@ -69,8 +96,8 @@ def test_postprocess_contacts_read_has_ui_hints() -> None:
def test_postprocess_memory_update_has_ui_hints() -> None:
output = _make_tool_output(
command="memory",
subcommand="update",
module="memory",
method="update",
status=ToolStatus.SUCCESS,
data={
"status": "success",
@@ -95,19 +122,19 @@ def test_postprocess_memory_update_has_ui_hints() -> None:
def test_postprocess_failure_no_ui_hints() -> None:
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.FAILURE, data=None)
output = _make_tool_output(module="calendar", method="read", status=ToolStatus.FAILURE, data=None)
processed = postprocess_tool_output(output)
assert processed.ui_hints is None
def test_postprocess_unknown_command_no_ui_hints() -> None:
output = _make_tool_output(command="unknown", subcommand="run", status=ToolStatus.SUCCESS, data={"data": "test"})
output = _make_tool_output(module="unknown", method="run", status=ToolStatus.SUCCESS, data={"data": "test"})
processed = postprocess_tool_output(output)
assert processed.ui_hints is None
def test_postprocess_preserves_existing_ui_hints() -> None:
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5})
output = _make_tool_output(module="calendar", method="read", status=ToolStatus.SUCCESS, data={"total": 5})
output = output.model_copy(update={"ui_hints": {"view": "custom_view", "custom": True}})
processed = postprocess_tool_output(output)
assert processed.ui_hints["view"] == "custom_view"
@@ -3,6 +3,7 @@ import asyncio
from core.agentscope.tools.internal.project_cli import PROJECT_CLI_TOOL_NAME
from core.agentscope.tools.internal.view_skill_file import VIEW_SKILL_FILE_TOOL_NAME
from core.agentscope.tools.internal import make_view_skill_file_wrapper
from core.agentscope.tools.skill_session import SkillSessionState
from core.agentscope.tools.toolkit import build_toolkit
from schemas.agent.skill_config import SkillName
@@ -48,8 +49,22 @@ def test_build_toolkit_registers_project_cli() -> None:
}
def test_build_toolkit_uses_custom_agent_skill_prompt_contract() -> None:
toolkit = build_toolkit(enabled_skill_names={"calendar"})
prompt = toolkit.get_agent_skill_prompt()
assert prompt is not None
assert "The entries below are skill indexes, not full execution instructions." in prompt
assert 'file_path="calendar/SKILL.md"' in prompt
assert "/home/" not in prompt
def test_view_skill_file_rejects_path_outside_enabled_skill_dirs() -> None:
wrapper = make_view_skill_file_wrapper(enabled_skill_names={"calendar"})
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"calendar"},
skill_session=SkillSessionState(),
)
response = asyncio.run(
wrapper(file_path="/tmp/not-allowed.txt", ranges=None),
@@ -62,10 +77,48 @@ def test_view_skill_file_rejects_path_outside_enabled_skill_dirs() -> None:
def test_view_skill_file_reads_enabled_skill_file() -> None:
wrapper = make_view_skill_file_wrapper(enabled_skill_names={"calendar"})
skill_session = SkillSessionState()
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"calendar"},
skill_session=skill_session,
)
response = asyncio.run(wrapper(file_path="calendar/SKILL.md", ranges=[1, 10]))
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "Calendar Skill" in text or "name: calendar" in text
assert skill_session.has_read(skill_name="calendar") is True
def test_view_skill_file_reads_calendar_action_card() -> None:
skill_session = SkillSessionState()
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"calendar"},
skill_session=skill_session,
)
response = asyncio.run(
wrapper(file_path="calendar/actions/get_event.md", ranges=[1, 20])
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "get_event" in text
assert '"action": "get_event"' in text
assert skill_session.has_read(skill_name="calendar") is True
def test_view_skill_file_rejects_action_card_for_disabled_skill() -> None:
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"contacts"},
skill_session=SkillSessionState(),
)
response = asyncio.run(
wrapper(file_path="calendar/actions/get_event.md", ranges=[1, 20])
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "ACCESS_DENIED" in text