feat(agent): redesign project_cli with module/method/input protocol

- Replace command/subcommand/args with module/method/input envelope - Calendar handler uses discriminated union (mode) for read operations - Strict Pydantic models with extra='forbid' for all calendar methods - Worker max_iters=7, router prompt simplified (removed project_cli_defaults) - Skill index cards + per-action files for progressive disclosure - Frontend/AG-UI aligned to module/method dispatch - Protocol docs updated to module/method/input contract WIP: action cards need envelope fix, 2 tests need update, memory handler needs Pydantic models.
2026-04-24 13:24:13 +08:00
parent ab526af2c4
commit d060962a5f
62 changed files with 4802 additions and 805 deletions
@@ -16,18 +16,28 @@ def _wrap_section(section: str, content: str) -> str:
    return f"{start}\n{body}\n{end}" if body else f"{start}\n{end}"


-def _config_rules(llm_config: SystemAgentLLMConfig | None) -> list[str]:
+def _config_rules(
+    llm_config: SystemAgentLLMConfig | None,
+    *,
+    include_context_messages: bool = True,
+) -> list[str]:
    if llm_config is None:
        return []
-    context_mode = llm_config.context_messages.mode.value
-    context_count = llm_config.context_messages.count
    enabled_skills = [skill.value for skill in llm_config.enabled_skills]
-    return [
-        "[Runtime Config]",
-        f"- context_messages.mode={context_mode}",
-        f"- context_messages.count={context_count}",
-        f"- enabled_skills={','.join(enabled_skills) if enabled_skills else 'default'}",
-    ]
+    rules = ["[Runtime Config]"]
+    if include_context_messages:
+        context_mode = llm_config.context_messages.mode.value
+        context_count = llm_config.context_messages.count
+        rules.extend(
+            [
+                f"- context_messages.mode={context_mode}",
+                f"- context_messages.count={context_count}",
+            ]
+        )
+    rules.append(
+        f"- enabled_skills={','.join(enabled_skills) if enabled_skills else 'default'}"
+    )
+    return rules


 PromptRuleBuilder = Callable[[SystemAgentLLMConfig | None], list[str]]
@@ -60,7 +70,7 @@ def _router_rules(llm_config: SystemAgentLLMConfig | None) -> list[str]:
        "[Responsibilities]",
        "- Router only: extract intent and route strategy; never answer user directly.",
        "- Set objective to the user's goal in a concise, faithful sentence.",
-        "- Set context_summary to a brief description of what context messages contain.",
+        "- Set context_summary to a brief but execution-useful summary of the relevant context, including known IDs, dates, time ranges, and prior tool outcomes when they matter.",
        "- Set requires_tool_evidence=true when the task needs tool execution to ground the answer.",
        "- Set requires_tool_evidence=false when the question can be answered directly from context.",
        *_config_rules(llm_config),
@@ -75,14 +85,17 @@ def _worker_rules(llm_config: SystemAgentLLMConfig | None) -> list[str]:
        "[Responsibilities]",
        "- Worker only: execute routed objective without changing router intent.",
        "- Treat router output as objective contract, not as a fully-materialized tool-args payload.",
+        "- Use objective plus context_summary as the primary execution guide from the router.",
        "- Infer deterministic required tool arguments from contract fields, tool schema, and runtime context.",
        "- Ask minimal clarification only when required arguments cannot be inferred safely.",
        "- Ground every claim in available evidence and tool results; never fabricate execution state.",
+        "- When requires_tool_evidence=true, do not finalize an answer from failed tool calls; either recover with a corrected tool call or explicitly surface that execution failed.",
+        "- If all tool calls fail under requires_tool_evidence=true, set status=failed and populate error; do not present a factual answer as confirmed.",
        "- Keep status/answer/suggested_actions/error internally consistent.",
        "[Schema Guidance]",
        "- The worker output schema is injected at runtime; follow it exactly.",
        "- Do not add fields that are not present in the injected schema.",
-        *_config_rules(llm_config),
+        *_config_rules(llm_config, include_context_messages=False),
    ]


@@ -97,8 +110,10 @@ def build_worker_contract_prompt(*, router_output: RouterAgentOutput) -> str:
            "[Worker Contract]",
            "- Keep routed objective unchanged.",
            "- Use objective as the execution target.",
-            "- Use context_summary to understand conversational background.",
+            "- Use context_summary to understand conversational background and reuse concrete facts already known from earlier context.",
            "- When requires_tool_evidence=true, you MUST call at least one tool before answering.",
+            "- A failed tool call does not count as grounding evidence for a factual answer.",
+            "- If no tool call succeeds, finalize with status=failed and a concrete error instead of a fact claim.",
            "- Infer deterministic missing required tool args from evidence + tool schema.",
            "- Ask clarification only when safe inference is impossible.",
            "[RouterAgentOutput]",
@@ -39,7 +39,9 @@ from schemas.agent.forwarded_props import (
    parse_forwarded_props_runtime_mode,
 )
 from schemas.agent.runtime_models import (
+    ErrorInfo,
    RouterAgentOutput,
+    RunStatus,
    WorkerAgentOutputLite,
 )
 from schemas.agent.skill_config import ProjectCliCommand, SkillName
@@ -74,6 +76,8 @@ class AgentScopeRunner:
        self._active_agent: JsonReActAgent | None = None
        self._active_agent_lock = asyncio.Lock()

+    _WORKER_MAX_ITERS = 7
+
    async def execute(
        self,
        *,
@@ -442,6 +446,11 @@ class AgentScopeRunner:
                    if self._active_agent is agent:
                        self._active_agent = None
            worker_payload = worker_output_model.model_validate(response_msg.metadata or {})
+            worker_payload = self._enforce_tool_evidence_contract(
+                worker_output=worker_payload,
+                requires_tool_evidence=requires_tool_evidence,
+                has_successful_tool_result=emitter.has_successful_tool_result,
+            )
            response_metadata = self._llm_pricing_service.build_usage_metadata(
                model=stage_config.model_code,
                usage_summary=tracking_model.usage_summary(),
@@ -458,6 +467,28 @@ class AgentScopeRunner:
        finally:
            reset_tool_credential(credential_token)

+    @staticmethod
+    def _enforce_tool_evidence_contract(
+        *,
+        worker_output: WorkerAgentOutputLite,
+        requires_tool_evidence: bool,
+        has_successful_tool_result: bool,
+    ) -> WorkerAgentOutputLite:
+        if not requires_tool_evidence or has_successful_tool_result:
+            return worker_output
+        return worker_output.model_copy(
+            update={
+                "status": RunStatus.FAILED,
+                "answer": "无法确认结果：所需工具调用未成功完成。",
+                "suggested_actions": [],
+                "error": ErrorInfo(
+                    code="TOOL_EVIDENCE_MISSING",
+                    message="requires_tool_evidence=true but no tool call completed successfully in this run",
+                    retryable=False,
+                ),
+            }
+        )
+
    def _build_worker_input_messages(
        self,
        *,
@@ -501,6 +532,7 @@ class AgentScopeRunner:
        model: TrackingChatModel,
        emitter: PipelineStageEmitter | None = None,
        force_tool_on_first_reasoning: bool = False,
+        max_iters: int = _WORKER_MAX_ITERS,
    ) -> JsonReActAgent:
        return JsonReActAgent(
            name=agent_name,
@@ -511,6 +543,7 @@ class AgentScopeRunner:
            memory=InMemoryMemory(),
            emitter=emitter,
            force_tool_on_first_reasoning=force_tool_on_first_reasoning,
+            max_iters=max_iters,
        )

    async def _emit_step_event(
@@ -36,8 +36,13 @@ class PipelineStageEmitter:
        self._emit_tool_events = emit_tool_events
        self._emitted_tool_calls: set[str] = set()
        self._emitted_tool_results: set[str] = set()
+        self._has_successful_tool_result = False
        self.latest_text_message_id: str | None = None

+    @property
+    def has_successful_tool_result(self) -> bool:
+        return self._has_successful_tool_result
+
    async def handle_print(self, *, msg: Msg, last: bool) -> None:
        del last
        if self._emit_tool_events:
@@ -126,6 +131,8 @@ class PipelineStageEmitter:
                payload["error"] = tool_output.error.model_dump(mode="json")

            await self._emit("TOOL_CALL_RESULT", payload)
+            if tool_output.status.value in {"success", "partial"}:
+                self._has_successful_tool_result = True
            self._emitted_tool_results.add(tool_call_id)

    async def _emit(self, event_type: str, payload: dict[str, Any]) -> None:
@@ -1,11 +1,11 @@
 from __future__ import annotations

-import json
 from typing import Any

 from agentscope.tool import ToolResponse
 from agentscope.message import TextBlock

+from core.agentscope.tools.cli.contracts import get_method_input_contract
 from core.agentscope.tools.cli.handlers import build_router
 from core.agentscope.tools.cli.models import CliCommand
 from core.agentscope.tools.cli.router import CommandRouter
@@ -44,29 +44,44 @@ def _resolve_owner_id() -> str:
    return owner_id


+def _with_method_contract(
+    *,
+    module: str,
+    method: str,
+    error: ErrorInfo | None,
+) -> ErrorInfo | None:
+    if error is None:
+        return None
+    contract = get_method_input_contract(module=module, method=method)
+    if contract is None:
+        return error
+    details = dict(error.details or {})
+    for key, value in contract.items():
+        details.setdefault(key, value)
+    message = error.message
+    retry_hint = contract.get("retry_hint")
+    if isinstance(retry_hint, str) and retry_hint and retry_hint not in message:
+        message = f"{message} {retry_hint}".strip()
+    return error.model_copy(update={"message": message, "details": details})
+
+
 async def invoke_cli_tool(
    *,
    tool_name: str,
    tool_call_args: dict[str, Any],
    allowed_commands: set[str] | None = None,
 ) -> ToolResponse:
-    command = str(tool_call_args.get("command", "")).strip()
-    subcommand = str(tool_call_args.get("subcommand", "")).strip()
-    args = tool_call_args.get("args")
-    if isinstance(args, str):
-        try:
-            parsed_args = json.loads(args)
-        except (json.JSONDecodeError, ValueError):
-            parsed_args = None
-        if isinstance(parsed_args, dict):
-            args = parsed_args
-    if not isinstance(args, dict):
-        args = {}
+    module = str(tool_call_args.get("module", "")).strip()
+    method = str(tool_call_args.get("method", "")).strip()
+    input_payload = tool_call_args.get("input")
+    if not isinstance(input_payload, dict):
+        input_payload = {}

    tool_call_args = {
        **tool_call_args,
-        "subcommand": subcommand,
-        "args": args,
+        "module": module,
+        "method": method,
+        "input": input_payload,
    }

    if tool_name != "project_cli":
@@ -76,29 +91,29 @@ async def invoke_cli_tool(
            code="UNKNOWN_TOOL",
            message=f"unsupported tool: {tool_name}",
        )
-    if not command or not subcommand:
+    if not module or not method:
        return _build_error(
            tool_name=tool_name,
            tool_call_args=tool_call_args,
            code="INVALID_ARGUMENT",
-            message="command and subcommand are required",
+            message="module and method are required",
        )
    router = _get_router()

-    if allowed_commands is not None and command not in allowed_commands:
+    if allowed_commands is not None and module not in allowed_commands:
        return _build_error(
            tool_name=tool_name,
            tool_call_args=tool_call_args,
-            code="COMMAND_NOT_ALLOWED",
-            message=f"command not enabled: {command}",
+            code="MODULE_NOT_ALLOWED",
+            message=f"module not enabled: {module}",
        )

-    if (command, subcommand) not in router.command_pairs:
+    if (module, method) not in router.module_methods:
        return _build_error(
            tool_name=tool_name,
            tool_call_args=tool_call_args,
-            code="UNKNOWN_COMMAND",
-            message=f"unknown command: {command} {subcommand}",
+            code="UNKNOWN_METHOD",
+            message=f"unknown method: {module} {method}",
        )

    try:
@@ -113,9 +128,9 @@ async def invoke_cli_tool(
        )

    request = CliCommand(
-        command=command,
-        subcommand=subcommand,
-        args=args,
+        module=module,
+        method=method,
+        input=input_payload,
        owner_id=owner_id,
    )

@@ -131,11 +146,17 @@ async def invoke_cli_tool(
        )

    status = ToolStatus.SUCCESS if cli_result.ok else ToolStatus.FAILURE
-    error_info = cli_result.error
+    error_info = _with_method_contract(
+        module=module,
+        method=method,
+        error=cli_result.error,
+    )
    result = {
-        "command": cli_result.command,
-        "subcommand": cli_result.subcommand,
+        "ok": cli_result.ok,
+        "module": cli_result.module,
+        "method": cli_result.method,
        "data": cli_result.data,
+        "error": error_info.model_dump(mode="json", exclude_none=True) if error_info else None,
    }

    tool_call_id = get_current_tool_call_id(tool_name=tool_name)
@@ -171,14 +192,27 @@ def _build_error(
    code: str,
    message: str,
 ) -> ToolResponse:
+    module = str((tool_call_args or {}).get("module", "")).strip()
+    method = str((tool_call_args or {}).get("method", "")).strip()
+    error_info = _with_method_contract(
+        module=module,
+        method=method,
+        error=ErrorInfo(code=code, message=message, retryable=False),
+    )
    tool_call_id = get_current_tool_call_id(tool_name=tool_name)
    output = ToolAgentOutput(
        tool_name=tool_name,
        tool_call_id=tool_call_id,
        tool_call_args=tool_call_args,
        status=ToolStatus.FAILURE,
-        result={"status": "failure", "code": code, "message": message},
-        error=ErrorInfo(code=code, message=message, retryable=False),
+        result={
+            "ok": False,
+            "module": module,
+            "method": method,
+            "data": None,
+            "error": error_info.model_dump(mode="json", exclude_none=True) if error_info else None,
+        },
+        error=error_info,
    )

    from core.agentscope.tools.tool_postprocessor import postprocess_tool_output
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+METHOD_INPUT_CONTRACTS: dict[tuple[str, str], dict[str, Any]] = {
+    ("calendar", "read"): {
+        "input_schema": {
+            "mode": "string enum(day|range|event)",
+            "date": "date, required when mode=day",
+            "timezone": "string (IANA timezone), optional when mode=day",
+            "start_at": "datetime with timezone, required when mode=range",
+            "end_at": "datetime with timezone, required when mode=range",
+            "event_id": "UUID, required when mode=event",
+        },
+        "expected_input_examples": [
+            {"mode": "day", "date": "2026-04-24", "timezone": "Asia/Shanghai"},
+            {
+                "mode": "range",
+                "start_at": "2026-04-24T09:00:00+08:00",
+                "end_at": "2026-04-24T18:00:00+08:00",
+            },
+            {"mode": "event", "event_id": "550e8400-e29b-41d4-a716-446655440000"},
+        ],
+        "retry_hint": "For relative day requests, resolve the day to a concrete input.date value in YYYY-MM-DD format before retrying.",
+    },
+    ("calendar", "create"): {
+        "input_schema": {
+            "title": "string",
+            "start_at": "datetime with timezone",
+            "end_at": "datetime with timezone | null",
+            "timezone": "string (IANA timezone)",
+            "description": "string | null",
+            "metadata": "object | null",
+        },
+        "expected_input_examples": [
+            {
+                "title": "Project sync",
+                "start_at": "2026-04-24T10:00:00+08:00",
+                "end_at": "2026-04-24T11:00:00+08:00",
+                "timezone": "Asia/Shanghai",
+            }
+        ],
+    },
+    ("calendar", "update"): {
+        "input_schema": {
+            "event_id": "UUID",
+            "patch": "object with mutable event fields",
+            "patch.start_at": "datetime with timezone | omitted",
+            "patch.end_at": "datetime with timezone | null | omitted",
+        },
+        "expected_input_examples": [
+            {
+                "event_id": "550e8400-e29b-41d4-a716-446655440000",
+                "patch": {"title": "Updated title", "timezone": "Asia/Shanghai"},
+            }
+        ],
+    },
+    ("calendar", "delete"): {
+        "input_schema": {"event_id": "UUID"},
+        "expected_input_examples": [{"event_id": "550e8400-e29b-41d4-a716-446655440000"}],
+    },
+    ("calendar", "share"): {
+        "input_schema": {
+            "event_id": "UUID",
+            "invitee": "object { phone: string }",
+            "permissions": "object { view: bool, edit: bool, invite: bool }",
+        },
+        "expected_input_examples": [
+            {
+                "event_id": "550e8400-e29b-41d4-a716-446655440000",
+                "invitee": {"phone": "+8613800138000"},
+                "permissions": {"view": True, "edit": False, "invite": False},
+            }
+        ],
+    },
+    ("calendar", "accept_invite"): {
+        "input_schema": {"event_id": "UUID"},
+        "expected_input_examples": [{"event_id": "550e8400-e29b-41d4-a716-446655440000"}],
+    },
+    ("calendar", "reject_invite"): {
+        "input_schema": {"event_id": "UUID"},
+        "expected_input_examples": [{"event_id": "550e8400-e29b-41d4-a716-446655440000"}],
+    },
+    ("contacts", "read"): {
+        "input_schema": {},
+        "expected_input_examples": [{}],
+    },
+    ("memory", "update"): {
+        "input_schema": {
+            "operations": "array of objects",
+            "operations[].action": "string (update | delete)",
+            "operations[].memory_type": "string (user | work)",
+        },
+        "expected_input_examples": [
+            {
+                "operations": [
+                    {
+                        "action": "update",
+                        "memory_type": "user",
+                        "user_content": {"preferences": {"meeting_time": "morning"}},
+                    }
+                ]
+            }
+        ],
+    },
+}
+
+
+def get_method_input_contract(*, module: str, method: str) -> dict[str, Any] | None:
+    contract = METHOD_INPUT_CONTRACTS.get((module.strip(), method.strip()))
+    return dict(contract) if contract is not None else None
@@ -1,17 +1,15 @@
 from __future__ import annotations

-from datetime import date, datetime, timedelta
-from typing import Any
+from datetime import date, datetime, timedelta, timezone
+from typing import Annotated, Any, Literal
 from uuid import UUID
 from zoneinfo import ZoneInfo

 from core.agentscope.tools.cli.models import CliCommand, CliCommandResult
+from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, field_validator
 from core.agentscope.tools.utils.calendar_domain import (
-    build_schedule_metadata,
    create_schedule_service,
    map_calendar_exception,
-    merge_schedule_metadata_for_update,
-    parse_iso_datetime,
    schedule_event_to_dict,
 )
 from schemas.agent.runtime_models import ErrorInfo
@@ -19,23 +17,185 @@ from schemas.enums import ScheduleItemStatus
 from v1.schedule_items.schemas import (
    ScheduleItemCreateRequest,
    ScheduleItemListRequest,
+    ScheduleItemMetadata,
    ScheduleItemShareRequest,
    ScheduleItemUpdateRequest,
 )


-async def handle_calendar_read(request: CliCommand) -> CliCommandResult:
+class _CalendarReadRangeInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    mode: Literal["range"]
+    start_at: datetime
+    end_at: datetime
+
+    @field_validator("start_at", "end_at")
+    @classmethod
+    def _validate_aware_datetime(cls, value: datetime) -> datetime:
+        if value.tzinfo is None:
+            raise ValueError("datetime must include timezone offset")
+        return value
+
+
+class _CalendarReadDayInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    mode: Literal["day"]
+    date: date
+    timezone: str = "Asia/Shanghai"
+
+
+class _CalendarReadEventInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    mode: Literal["event"]
+    event_id: UUID
+
+
+_CalendarReadInput = Annotated[
+    _CalendarReadDayInput | _CalendarReadRangeInput | _CalendarReadEventInput,
+    Field(discriminator="mode"),
+]
+_CALENDAR_READ_INPUT_ADAPTER = TypeAdapter(_CalendarReadInput)
+
+
+class _CalendarInviteeInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    phone: str
+
+
+class _CalendarPermissionsInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    view: bool = True
+    edit: bool = False
+    invite: bool = False
+
+
+class _CalendarInviteSubscriberInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    event_id: UUID
+    invitee: _CalendarInviteeInput
+    permissions: _CalendarPermissionsInput = Field(default_factory=_CalendarPermissionsInput)
+
+
+class _CalendarCreateEventInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    title: str = Field(min_length=1, max_length=255)
+    start_at: datetime
+    end_at: datetime | None = None
+    timezone: str = Field(min_length=1, max_length=50)
+    description: str | None = Field(default=None, max_length=2000)
+    metadata: ScheduleItemMetadata | None = None
+
+    @field_validator("start_at", "end_at")
+    @classmethod
+    def _validate_create_datetimes(cls, value: datetime | None) -> datetime | None:
+        if value is not None and value.tzinfo is None:
+            raise ValueError("datetime must include timezone offset")
+        return value
+
+
+class _CalendarUpdatePatchInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    title: str | None = Field(default=None, min_length=1, max_length=255)
+    description: str | None = Field(default=None, max_length=2000)
+    start_at: datetime | None = None
+    end_at: datetime | None = None
+    timezone: str | None = Field(default=None, min_length=1, max_length=50)
+    metadata: ScheduleItemMetadata | None = None
+    status: str | None = None
+
+    @field_validator("start_at", "end_at")
+    @classmethod
+    def _validate_patch_datetimes(cls, value: datetime | None) -> datetime | None:
+        if value is not None and value.tzinfo is None:
+            raise ValueError("datetime must include timezone offset")
+        return value
+
+
+class _CalendarUpdateEventInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    event_id: UUID
+    patch: _CalendarUpdatePatchInput
+
+
+class _CalendarInviteResponseInput(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    event_id: UUID
+
+
+def _validate_action_input(
+    request: CliCommand,
+    validator: type[BaseModel] | TypeAdapter[Any],
+) -> Any | CliCommandResult:
+    try:
+        if isinstance(validator, TypeAdapter):
+            return validator.validate_python(request.input)
+        return validator.model_validate(request.input)
+    except ValidationError as exc:
+        missing_fields: list[str] = []
+        invalid_fields: list[str] = []
+        for error in exc.errors():
+            location = error.get("loc") or ()
+            if not location:
+                continue
+            field_path = ".".join(str(part) for part in location)
+            error_type = str(error.get("type") or "")
+            if error_type == "missing":
+                missing_fields.append(field_path)
+            else:
+                invalid_fields.append(field_path)
+        details: dict[str, Any] = {
+            "missing_fields": sorted(set(missing_fields)),
+            "invalid_fields": sorted(set(invalid_fields)),
+        }
+        alias_corrections = _alias_corrections_for_input(request.input)
+        if alias_corrections:
+            details["alias_corrections"] = alias_corrections
+        message = "input does not match method schema"
+        return CliCommandResult(
+            ok=False,
+            module=request.module,
+            method=request.method,
+            error=ErrorInfo(
+                code="INVALID_ACTION_INPUT",
+                message=message,
+                retryable=False,
+                details=details,
+            ),
+        )
+
+
+def _alias_corrections_for_input(input_payload: dict[str, Any]) -> dict[str, str]:
+    alias_map = {
+        "start_time": "start_at",
+        "end_time": "end_at",
+        "event_timezone": "timezone",
+    }
+    corrections: dict[str, str] = {}
+    for alias, canonical in alias_map.items():
+        if alias in input_payload:
+            corrections[alias] = canonical
+    return corrections
+
+
+async def handle_calendar_list_range(request: CliCommand) -> CliCommandResult:
    from core.db.session import AsyncSessionLocal

-    parsed_start, parsed_end, read_error = _resolve_read_range(request)
-    if read_error is not None:
-        return _fail(request=request, code="INVALID_ARGUMENT", message=read_error)
-    if parsed_start is None or parsed_end is None:
-        return _fail(
-            request=request,
-            code="INVALID_ARGUMENT",
-            message="start_at and end_at are required",
-        )
+    validated = _validate_action_input(request, _CalendarReadRangeInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+
+    parsed_start = validated.start_at.astimezone(timezone.utc)
+    parsed_end = validated.end_at.astimezone(timezone.utc)
    if parsed_start >= parsed_end:
        return _fail(
            request=request,
@@ -50,24 +210,75 @@ async def handle_calendar_read(request: CliCommand) -> CliCommandResult:
        event_items = [schedule_event_to_dict(item) for item in items]
        return CliCommandResult(
            ok=True,
-            command="calendar",
-            subcommand="read",
+            module="calendar",
+            method=request.method,
            data={"total": len(event_items), "items": event_items},
        )


-async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
+async def handle_calendar_list_day(request: CliCommand) -> CliCommandResult:
+    validated = _validate_action_input(request, _CALENDAR_READ_INPUT_ADAPTER)
+    if isinstance(validated, CliCommandResult):
+        return validated
+
+    if isinstance(validated, _CalendarReadEventInput):
+        return await handle_calendar_get_event(request)
+
+    if isinstance(validated, _CalendarReadRangeInput):
+        return await handle_calendar_list_range(request)
+
+    day_request = request.model_copy(
+        update={
+            "input": _day_input_to_range_input(validated),
+        }
+    )
+    return await handle_calendar_list_range(day_request)
+
+
+async def handle_calendar_get_event(request: CliCommand) -> CliCommandResult:
    from core.db.session import AsyncSessionLocal

+    validated = _validate_action_input(request, _CalendarReadEventInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+    event_id = validated.event_id
+
    async with AsyncSessionLocal() as session:
        service = create_schedule_service(session, UUID(request.owner_id))
        try:
-            result_item = await _create_event(service, request.args)
+            item = await service.get_by_id(event_id)
+            return CliCommandResult(
+                ok=True,
+                module="calendar",
+                method=request.method,
+                data=schedule_event_to_dict(item),
+            )
+        except Exception as exc:
+            code, message, retryable = map_calendar_exception(exc)
+            return CliCommandResult(
+                ok=False,
+                module="calendar",
+                method=request.method,
+                error=ErrorInfo(code=code, message=message, retryable=retryable),
+            )
+
+
+async def handle_calendar_create_event(request: CliCommand) -> CliCommandResult:
+    from core.db.session import AsyncSessionLocal
+
+    validated = _validate_action_input(request, _CalendarCreateEventInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+
+    async with AsyncSessionLocal() as session:
+        service = create_schedule_service(session, UUID(request.owner_id))
+        try:
+            result_item = await _create_event(service, validated)
            event_id = str(result_item.get("eventId") or "")
            return CliCommandResult(
                ok=True,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                data={
                    "status": "success",
                    "success": 1,
@@ -80,8 +291,8 @@ async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
            code, message, retryable = map_calendar_exception(exc)
            return CliCommandResult(
                ok=False,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                data={
                    "status": "failure",
                    "success": 0,
@@ -89,7 +300,7 @@ async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
                    "ids": [],
                    "results": [
                        {
-                            "action": "create",
+                            "action": request.method,
                            "status": "failure",
                            "eventId": "",
                            "code": code,
@@ -101,19 +312,23 @@ async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
            )


-async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
+async def handle_calendar_update_event(request: CliCommand) -> CliCommandResult:
    from core.db.session import AsyncSessionLocal

+    validated = _validate_action_input(request, _CalendarUpdateEventInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+
    async with AsyncSessionLocal() as session:
        service = create_schedule_service(session, UUID(request.owner_id))
-        event_id = str(request.args.get("event_id") or "").strip()
+        event_id = str(validated.event_id)
        try:
-            result_item = await _update_event(service, request.args)
+            result_item = await _update_event(service, validated)
            event_id = str(result_item.get("eventId") or event_id)
            return CliCommandResult(
                ok=True,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                data={
                    "status": "success",
                    "success": 1,
@@ -126,8 +341,8 @@ async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
            code, message, retryable = map_calendar_exception(exc)
            return CliCommandResult(
                ok=False,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                data={
                    "status": "failure",
                    "success": 0,
@@ -135,7 +350,7 @@ async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
                    "ids": [],
                    "results": [
                        {
-                            "action": "update",
+                            "action": request.method,
                            "status": "failure",
                            "eventId": event_id,
                            "code": code,
@@ -147,24 +362,22 @@ async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
            )


-async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
+async def handle_calendar_delete_event(request: CliCommand) -> CliCommandResult:
    from core.db.session import AsyncSessionLocal

+    validated = _validate_action_input(request, _CalendarReadEventInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+
    async with AsyncSessionLocal() as session:
        service = create_schedule_service(session, UUID(request.owner_id))
-        event_id = str(request.args.get("event_id") or "").strip()
-        if not event_id:
-            return _fail(
-                request=request,
-                code="INVALID_ARGUMENT",
-                message="event_id is required",
-            )
+        event_id = str(validated.event_id)
        try:
            await service.delete(UUID(event_id))
            return CliCommandResult(
                ok=True,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                data={
                    "status": "success",
                    "success": 1,
@@ -172,7 +385,7 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
                    "ids": [event_id],
                    "results": [
                        {
-                            "action": "delete",
+                            "action": request.method,
                            "status": "success",
                            "eventId": event_id,
                        }
@@ -183,8 +396,8 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
            code, message, retryable = map_calendar_exception(exc)
            return CliCommandResult(
                ok=False,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                data={
                    "status": "failure",
                    "success": 0,
@@ -192,7 +405,7 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
                    "ids": [],
                    "results": [
                        {
-                            "action": "delete",
+                            "action": request.method,
                            "status": "failure",
                            "eventId": event_id,
                            "code": code,
@@ -204,155 +417,199 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
            )


-async def handle_calendar_share(request: CliCommand) -> CliCommandResult:
+async def handle_calendar_invite_subscriber(request: CliCommand) -> CliCommandResult:
    from core.db.session import AsyncSessionLocal

-    event_id = str(request.args.get("event_id", ""))
-    invitees = request.args.get("invitees")
-    if not isinstance(invitees, list):
-        invitees = []
+    validated = _validate_action_input(request, _CalendarInviteSubscriberInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+    event_id = str(validated.event_id)
+
    async with AsyncSessionLocal() as session:
        service = create_schedule_service(session, UUID(request.owner_id))
        target_uuid = UUID(event_id)

-        invited: list[str] = []
-        result_items: list[dict[str, str]] = []
-
-        for inv in invitees:
-            raw_phone = inv.get("phone", "").strip()
-            normalized_phone = _normalize_phone(raw_phone)
-            if not normalized_phone:
-                result_items.append(
-                    {
-                        "phone": raw_phone,
-                        "status": "failure",
-                        "code": "INVALID_ARGUMENT",
-                        "message": "invalid phone",
-                    }
-                )
-                continue
-            permission = {
-                "permission_view": inv.get("permission_view", True),
-                "permission_edit": inv.get("permission_edit", False),
-                "permission_invite": inv.get("permission_invite", False),
-            }
-            try:
-                await service.share(
-                    target_uuid,
-                    ScheduleItemShareRequest(phone=normalized_phone, **permission),
-                )
-                invited.append(normalized_phone)
-                result_items.append({"phone": normalized_phone, "status": "success"})
-            except Exception as exc:
-                code, message, _ = map_calendar_exception(exc)
-                result_items.append(
-                    {
-                        "phone": normalized_phone,
-                        "status": "failure",
-                        "code": code,
-                        "message": message,
-                    }
-                )
-
-        failure_count = len([r for r in result_items if r["status"] == "failure"])
-        success_count = len(invited)
-        status = _batch_status(success_count, failure_count)
-        return CliCommandResult(
-            ok=status != "failure",
-            command=request.command,
-            subcommand=request.subcommand,
-            data={
-                "status": status,
-                "success": success_count,
-                "failed": failure_count,
-                "results": result_items,
-            },
-        )
+        raw_phone = validated.invitee.phone.strip()
+        normalized_phone = _normalize_phone(raw_phone)
+        if not normalized_phone:
+            return CliCommandResult(
+                ok=False,
+                module=request.module,
+                method=request.method,
+                data={
+                    "status": "failure",
+                    "success": 0,
+                    "failed": 1,
+                    "results": [
+                        {
+                            "phone": raw_phone,
+                            "status": "failure",
+                            "code": "INVALID_ACTION_INPUT",
+                            "message": "invalid phone",
+                        }
+                    ],
+                },
+                error=ErrorInfo(code="INVALID_ACTION_INPUT", message="invalid phone", retryable=False),
+            )
+        try:
+            await service.share(
+                target_uuid,
+                ScheduleItemShareRequest(
+                    phone=normalized_phone,
+                    permission_view=validated.permissions.view,
+                    permission_edit=validated.permissions.edit,
+                    permission_invite=validated.permissions.invite,
+                ),
+            )
+            return CliCommandResult(
+                ok=True,
+                module=request.module,
+                method=request.method,
+                data={
+                    "status": "success",
+                    "success": 1,
+                    "failed": 0,
+                    "results": [{"phone": normalized_phone, "status": "success"}],
+                },
+            )
+        except Exception as exc:
+            code, message, retryable = map_calendar_exception(exc)
+            return CliCommandResult(
+                ok=False,
+                module=request.module,
+                method=request.method,
+                data={
+                    "status": "failure",
+                    "success": 0,
+                    "failed": 1,
+                    "results": [
+                        {
+                            "phone": normalized_phone,
+                            "status": "failure",
+                            "code": code,
+                            "message": message,
+                        }
+                    ],
+                },
+                error=ErrorInfo(code=code, message=message, retryable=retryable),
+            )


-async def _create_event(service: Any, args: dict[str, Any]) -> dict[str, Any]:
-    start_at = args.get("start_at")
-    if not isinstance(start_at, str) or not start_at.strip():
-        raise ValueError("create requires start_at")
-    event_timezone = args.get("event_timezone")
-    if not isinstance(event_timezone, str) or not event_timezone.strip():
-        raise ValueError("create requires event_timezone")
-    parsed_start = parse_iso_datetime(start_at)
-    if parsed_start is None:
-        raise ValueError("invalid start_at")
+async def handle_calendar_accept_invite(request: CliCommand) -> CliCommandResult:
+    from core.db.session import AsyncSessionLocal

-    parsed_end = None
-    end_at = args.get("end_at")
-    if isinstance(end_at, str) and end_at.strip():
-        parsed_end = parse_iso_datetime(end_at)
-        if parsed_end is None:
-            raise ValueError("invalid end_at")
+    validated = _validate_action_input(request, _CalendarInviteResponseInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+    event_id = str(validated.event_id)
+
+    async with AsyncSessionLocal() as session:
+        service = create_schedule_service(session, UUID(request.owner_id))
+        try:
+            result = await service.accept_subscription(UUID(event_id))
+            return CliCommandResult(ok=True, module=request.module, method=request.method, data=result)
+        except Exception as exc:
+            code, message, retryable = map_calendar_exception(exc)
+            return CliCommandResult(
+                ok=False,
+                module=request.module,
+                method=request.method,
+                error=ErrorInfo(code=code, message=message, retryable=retryable),
+            )
+
+
+async def handle_calendar_reject_invite(request: CliCommand) -> CliCommandResult:
+    from core.db.session import AsyncSessionLocal
+
+    validated = _validate_action_input(request, _CalendarInviteResponseInput)
+    if isinstance(validated, CliCommandResult):
+        return validated
+    event_id = str(validated.event_id)
+
+    async with AsyncSessionLocal() as session:
+        service = create_schedule_service(session, UUID(request.owner_id))
+        try:
+            result = await service.reject_subscription(UUID(event_id))
+            return CliCommandResult(ok=True, module=request.module, method=request.method, data=result)
+        except Exception as exc:
+            code, message, retryable = map_calendar_exception(exc)
+            return CliCommandResult(
+                ok=False,
+                module=request.module,
+                method=request.method,
+                error=ErrorInfo(code=code, message=message, retryable=retryable),
+            )
+
+
+async def _create_event(service: Any, input_payload: _CalendarCreateEventInput) -> dict[str, Any]:
+    parsed_start = input_payload.start_at.astimezone(timezone.utc)
+    parsed_end = (
+        input_payload.end_at.astimezone(timezone.utc)
+        if input_payload.end_at is not None
+        else None
+    )

    created = await service.create_agent_generated(
        ScheduleItemCreateRequest(
-            title=str(args.get("title") or "new event").strip(),
-            description=(str(args.get("description") or "").strip() or None),
+            title=input_payload.title.strip(),
+            description=(input_payload.description.strip() if input_payload.description else None),
            start_at=parsed_start,
            end_at=parsed_end,
-            timezone=event_timezone.strip(),
-            metadata=build_schedule_metadata(
-                args.get("location"),
-                args.get("color"),
-                args.get("reminder_minutes"),
-            ),
+            timezone=input_payload.timezone.strip(),
+            metadata=input_payload.metadata,
        )
    )
    return {"action": "create", "status": "success", "eventId": str(created.id)}


-async def _update_event(service: Any, args: dict[str, Any]) -> dict[str, Any]:
-    event_id = args.get("event_id")
-    if not isinstance(event_id, str) or not event_id.strip():
-        raise ValueError("update requires event_id")
+async def _update_event(service: Any, input_payload: _CalendarUpdateEventInput) -> dict[str, Any]:
+    event_id = str(input_payload.event_id)
+    patch = input_payload.patch.model_dump(exclude_unset=True)

    update_data: dict[str, Any] = {}
-    if "title" in args:
-        update_data["title"] = str(args.get("title") or "").strip()
-    if "description" in args:
-        update_data["description"] = str(args.get("description") or "").strip()
-    if "start_at" in args:
-        start_value = args.get("start_at")
-        if not isinstance(start_value, str) or not start_value.strip():
-            raise ValueError("start_at must be non-empty string")
-        parsed_start = parse_iso_datetime(start_value)
-        if parsed_start is None:
-            raise ValueError("invalid start_at")
-        update_data["start_at"] = parsed_start
-    if "end_at" in args:
-        end_value = args.get("end_at")
+    if "title" in patch:
+        update_data["title"] = str(patch.get("title") or "").strip()
+    if "description" in patch:
+        update_data["description"] = str(patch.get("description") or "").strip()
+    if "start_at" in patch:
+        start_value = patch.get("start_at")
+        if not isinstance(start_value, datetime):
+            raise ValueError("start_at must be datetime with timezone")
+        update_data["start_at"] = start_value.astimezone(timezone.utc)
+    if "end_at" in patch:
+        end_value = patch.get("end_at")
        if end_value in (None, ""):
            update_data["end_at"] = None
-        elif isinstance(end_value, str):
-            parsed_end = parse_iso_datetime(end_value)
-            if parsed_end is None:
-                raise ValueError("invalid end_at")
-            update_data["end_at"] = parsed_end
+        elif isinstance(end_value, datetime):
+            update_data["end_at"] = end_value.astimezone(timezone.utc)
        else:
-            raise ValueError("end_at must be string or null")
-    if "event_timezone" in args:
-        timezone_value = args.get("event_timezone")
+            raise ValueError("end_at must be datetime with timezone or null")
+    if "timezone" in patch:
+        timezone_value = patch.get("timezone")
        if not isinstance(timezone_value, str) or not timezone_value.strip():
-            raise ValueError("event_timezone must be non-empty string")
+            raise ValueError("timezone must be non-empty string")
        update_data["timezone"] = timezone_value.strip()
-    if "status" in args:
-        update_data["status"] = ScheduleItemStatus(str(args.get("status")))
+    if "status" in patch:
+        update_data["status"] = ScheduleItemStatus(str(patch.get("status")))

-    if any(key in args for key in ("location", "color", "reminder_minutes")):
+    if "metadata" in patch:
        existing = await service.get_by_id(UUID(event_id))
-        update_data["metadata"] = merge_schedule_metadata_for_update(
-            existing_metadata=existing.metadata,
-            location=args.get("location"),
-            color=args.get("color"),
-            reminder_minutes=args.get("reminder_minutes"),
-        )
+        metadata_payload = patch.get("metadata")
+        if metadata_payload is None:
+            update_data["metadata"] = ScheduleItemMetadata.model_validate({})
+        else:
+            metadata_dict = (
+                metadata_payload.model_dump() if isinstance(metadata_payload, ScheduleItemMetadata) else metadata_payload
+            )
+            update_data["metadata"] = ScheduleItemMetadata.model_validate(
+                {
+                    **(existing.metadata.model_dump() if existing.metadata else {}),
+                    **metadata_dict,
+                }
+            )

    if not update_data:
-        raise ValueError("update requires at least one mutable field")
+        raise ValueError("patch requires at least one mutable field")

    changed_fields = sorted(update_data.keys())
    updated = await service.update(
@@ -395,55 +652,34 @@ def _batch_status(success: int, failed: int) -> str:
    return "partial"


-def _resolve_read_range(
-    request: CliCommand,
-) -> tuple[datetime | None, datetime | None, str | None]:
-    start_at = str(request.args.get("start_at", "")).strip()
-    end_at = str(request.args.get("end_at", "")).strip()
-    if start_at and end_at:
-        try:
-            return parse_iso_datetime(start_at), parse_iso_datetime(end_at), None
-        except ValueError as exc:
-            return None, None, str(exc)
-
-    raw_date = str(request.args.get("date", "")).strip()
-    if not raw_date:
-        return None, None, None
-
-    timezone_name = (
-        str(request.args.get("timezone", "Asia/Shanghai")).strip() or "Asia/Shanghai"
-    )
+def _day_input_to_range_input(input_payload: _CalendarReadDayInput) -> dict[str, str]:
+    timezone_name = input_payload.timezone.strip() or "Asia/Shanghai"
    try:
        zone = ZoneInfo(timezone_name)
-    except Exception:
-        return None, None, "timezone is invalid"
-
-    try:
-        target_date = date.fromisoformat(raw_date)
-    except ValueError:
-        return None, None, "date must be YYYY-MM-DD"
+    except Exception as exc:
+        raise ValueError("timezone is invalid") from exc

    start_local = datetime(
-        year=target_date.year,
-        month=target_date.month,
-        day=target_date.day,
+        year=input_payload.date.year,
+        month=input_payload.date.month,
+        day=input_payload.date.day,
        hour=0,
        minute=0,
        second=0,
        tzinfo=zone,
    )
    end_local = start_local + timedelta(days=1)
-    return (
-        parse_iso_datetime(start_local.isoformat()),
-        parse_iso_datetime(end_local.isoformat()),
-        None,
-    )
+    return {
+        "mode": "range",
+        "start_at": start_local.isoformat(),
+        "end_at": end_local.isoformat(),
+    }


 def _fail(*, request: CliCommand, code: str, message: str) -> CliCommandResult:
    return CliCommandResult(
        ok=False,
-        command=request.command,
-        subcommand=request.subcommand,
+        module=request.module,
+        method=request.method,
        error=ErrorInfo(code=code, message=message, retryable=False),
    )
@@ -20,8 +20,8 @@ async def handle_contacts_read(request: CliCommand) -> CliCommandResult:
        contacts = await _list_friend_contacts(session=session, owner_id=UUID(request.owner_id))
        return CliCommandResult(
            ok=True,
-            command=request.command,
-            subcommand=request.subcommand,
+            module=request.module,
+            method=request.method,
            data={
                "friends_count": len(contacts),
                "friends": contacts,
@@ -17,11 +17,15 @@ from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent
 async def handle_memory_update(request: CliCommand) -> CliCommandResult:
    from core.db.session import AsyncSessionLocal

-    operations = request.args.get("operations")
+    operations = request.input.get("operations")
    if not isinstance(operations, list) or not operations:
        return _invalid_argument(
            request=request,
            message="operations must be a non-empty list",
+            details={
+                "required_fields": ["operations"],
+                "field_types": {"operations": "array of objects"},
+            },
        )

    async with AsyncSessionLocal() as session:
@@ -135,8 +139,8 @@ async def handle_memory_update(request: CliCommand) -> CliCommandResult:

        return CliCommandResult(
            ok=status != "failure",
-            command=request.command,
-            subcommand=request.subcommand,
+            module=request.module,
+            method=request.method,
            data={
                "status": status,
                "success": success_count,
@@ -233,12 +237,22 @@ async def _apply_delete_operation(
    }


-def _invalid_argument(*, request: CliCommand, message: str) -> CliCommandResult:
+def _invalid_argument(
+    *,
+    request: CliCommand,
+    message: str,
+    details: dict[str, Any] | None,
+) -> CliCommandResult:
    return CliCommandResult(
        ok=False,
-        command=request.command,
-        subcommand=request.subcommand,
-        error=ErrorInfo(code="INVALID_ARGUMENT", message=message, retryable=False),
+        module=request.module,
+        method=request.method,
+        error=ErrorInfo(
+            code="INVALID_ARGUMENT",
+            message=message,
+            retryable=False,
+            details=details,
+        ),
    )


@@ -1,11 +1,13 @@
 from __future__ import annotations

 from core.agentscope.tools.cli.handler_calendar import (
-    handle_calendar_create,
-    handle_calendar_delete,
-    handle_calendar_read,
-    handle_calendar_share,
-    handle_calendar_update,
+    handle_calendar_accept_invite,
+    handle_calendar_create_event,
+    handle_calendar_delete_event,
+    handle_calendar_invite_subscriber,
+    handle_calendar_list_day,
+    handle_calendar_reject_invite,
+    handle_calendar_update_event,
 )
 from core.agentscope.tools.cli.handler_contacts import handle_contacts_read
 from core.agentscope.tools.cli.handler_memory import handle_memory_update
@@ -14,11 +16,13 @@ from core.agentscope.tools.cli.router import CommandRouter

 def build_router() -> CommandRouter:
    router = CommandRouter()
-    router.register(command="calendar", subcommand="create", handler=handle_calendar_create)
-    router.register(command="calendar", subcommand="read", handler=handle_calendar_read)
-    router.register(command="calendar", subcommand="update", handler=handle_calendar_update)
-    router.register(command="calendar", subcommand="delete", handler=handle_calendar_delete)
-    router.register(command="calendar", subcommand="share", handler=handle_calendar_share)
-    router.register(command="contacts", subcommand="read", handler=handle_contacts_read)
-    router.register(command="memory", subcommand="update", handler=handle_memory_update)
+    router.register(module="calendar", method="read", handler=handle_calendar_list_day)
+    router.register(module="calendar", method="create", handler=handle_calendar_create_event)
+    router.register(module="calendar", method="update", handler=handle_calendar_update_event)
+    router.register(module="calendar", method="delete", handler=handle_calendar_delete_event)
+    router.register(module="calendar", method="share", handler=handle_calendar_invite_subscriber)
+    router.register(module="calendar", method="accept_invite", handler=handle_calendar_accept_invite)
+    router.register(module="calendar", method="reject_invite", handler=handle_calendar_reject_invite)
+    router.register(module="contacts", method="read", handler=handle_contacts_read)
+    router.register(module="memory", method="update", handler=handle_memory_update)
    return router
@@ -10,9 +10,9 @@ from schemas.agent.runtime_models import ErrorInfo
 class CliCommand(BaseModel):
    model_config = ConfigDict(extra="forbid")

-    command: str
-    subcommand: str
-    args: dict[str, Any] = Field(default_factory=dict)
+    module: str
+    method: str
+    input: dict[str, Any] = Field(default_factory=dict)
    owner_id: str


@@ -20,7 +20,7 @@ class CliCommandResult(BaseModel):
    model_config = ConfigDict(extra="forbid")

    ok: bool
-    command: str
-    subcommand: str
+    module: str
+    method: str
    data: Any = None
    error: ErrorInfo | None = None
@@ -17,30 +17,30 @@ class CommandRouter:
    def __init__(self) -> None:
        self._handlers: dict[tuple[str, str], CliHandler] = {}

-    def register(self, *, command: str, subcommand: str, handler: CliHandler) -> None:
-        key = (command, subcommand)
+    def register(self, *, module: str, method: str, handler: CliHandler) -> None:
+        key = (module, method)
        if key in self._handlers:
-            raise ValueError(f"command already registered: {command} {subcommand}")
+            raise ValueError(f"method already registered: {module} {method}")
        self._handlers[key] = handler

    @property
-    def commands(self) -> set[str]:
-        return {command for command, _ in self._handlers.keys()}
+    def modules(self) -> set[str]:
+        return {module for module, _ in self._handlers.keys()}

    @property
-    def command_pairs(self) -> set[tuple[str, str]]:
+    def module_methods(self) -> set[tuple[str, str]]:
        return set(self._handlers.keys())

    async def dispatch(self, request: CliCommand) -> CliCommandResult:
-        handler = self._handlers.get((request.command, request.subcommand))
+        handler = self._handlers.get((request.module, request.method))
        if handler is None:
            return CliCommandResult(
                ok=False,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                error=ErrorInfo(
-                    code="UNKNOWN_COMMAND",
-                    message=f"unknown command: {request.command} {request.subcommand}",
+                    code="UNKNOWN_METHOD",
+                    message=f"unknown method: {request.module} {request.method}",
                    retryable=False,
                ),
            )
@@ -49,14 +49,14 @@ class CommandRouter:
        except Exception as exc:
            logger.error(
                "CLI handler failed",
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                error=str(exc),
            )
            return CliCommandResult(
                ok=False,
-                command=request.command,
-                subcommand=request.subcommand,
+                module=request.module,
+                method=request.method,
                error=ErrorInfo(
                    code="HANDLER_ERROR",
                    message=str(exc),
@@ -75,11 +75,11 @@ async def cli_main(argv: list[str] | None = None) -> None:
        _write_output(
            CliCommandResult(
                ok=False,
-                command=argv[0] if argv else "",
-                subcommand=argv[1] if len(argv) > 1 else "",
+                module=argv[0] if argv else "",
+                method=argv[1] if len(argv) > 1 else "",
                error=ErrorInfo(
-                    code="MISSING_COMMAND",
-                    message="command and subcommand are required",
+                    code="MISSING_METHOD",
+                    message="module and method are required",
                    retryable=False,
                ),
            )
@@ -94,17 +94,17 @@ async def cli_main(argv: list[str] | None = None) -> None:
            _write_output(
                CliCommandResult(
                    ok=False,
-                    command=argv[0],
-                    subcommand=argv[1],
+                    module=argv[0],
+                    method=argv[1],
                    error=ErrorInfo(
-                        code="INVALID_ARGS",
-                        message="args must be valid JSON",
+                        code="INVALID_INPUT",
+                        message="input must be valid JSON",
                        retryable=False,
                    ),
                )
            )
            sys.exit(1)
-    request = CliCommand(command=argv[0], subcommand=argv[1], args=args, owner_id=str(args.get("owner_id", "")))
+    request = CliCommand(module=argv[0], method=argv[1], input=args, owner_id=str(args.get("owner_id", "")))
    result = await router.dispatch(request)
    _write_output(result)
    if not result.ok:
@@ -9,16 +9,19 @@ from core.agentscope.tools.cli import invoke_cli_tool
 PROJECT_CLI_TOOL_NAME = "project_cli"


-def make_project_cli_wrapper(*, allowed_commands: set[str]) -> Any:
+def make_project_cli_wrapper(
+    *,
+    allowed_commands: set[str],
+) -> Any:
    async def _project_cli(
-        command: str,
-        subcommand: str,
-        args: dict[str, Any] | None = None,
+        module: str,
+        method: str,
+        input: dict[str, Any],
    ) -> ToolResponse:
        tool_call_args = {
-            "command": command,
-            "subcommand": subcommand,
-            "args": args or {},
+            "module": module,
+            "method": method,
+            "input": input,
        }
        return await invoke_cli_tool(
            tool_name=PROJECT_CLI_TOOL_NAME,
@@ -27,12 +30,14 @@ def make_project_cli_wrapper(*, allowed_commands: set[str]) -> Any:
        )

    _project_cli.__name__ = PROJECT_CLI_TOOL_NAME
-    _project_cli.__doc__ = """Execute CLI commands for calendar, contacts, and memory operations.
+    _project_cli.__doc__ = """Execute business methods for enabled modules (calendar, contacts, memory, etc.).
+
+You MUST read the relevant skill file via view_skill_file before calling this tool to learn the correct method names and input shapes for each module. Do not guess input fields.

 Args:
-    command: The command to execute (calendar, contacts, memory).
-    subcommand: The subcommand for the operation (calendar: create/read/update/delete/share; contacts: read; memory: update).
-    args: Arguments for the command as a JSON object.
+    module: Business module namespace (e.g., calendar, contacts, memory).
+    method: Module method to execute. Valid methods are listed in each module's skill file.
+    input: Method-specific input object. Shape depends on module and method -- read the skill file first.

 Returns:
    ToolResponse with the command result.
@@ -6,11 +6,23 @@ from typing import Any
 from agentscope.message import TextBlock
 from agentscope.tool import ToolResponse

+from core.agentscope.tools.skill_session import SkillSessionState
+from core.agentscope.tools.tool_call_context import (
+    get_current_tool_call_id,
+    store_tool_agent_output,
+)
+from core.agentscope.utils.parsing import project_tool_result_text
+from schemas.agent.runtime_models import ErrorInfo, ToolAgentOutput, ToolStatus
+
 SKILLS_DIR = Path(__file__).parent.parent / "skills"
 VIEW_SKILL_FILE_TOOL_NAME = "view_skill_file"


-def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
+def make_view_skill_file_wrapper(
+    *,
+    enabled_skill_names: set[str],
+    skill_session: SkillSessionState,
+) -> Any:
    skills_root = SKILLS_DIR.resolve()

    async def _view_skill_file(
@@ -23,13 +35,20 @@ def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:

        parts = normalized.split("/")
        if not parts:
-            return _error_response("INVALID_PATH", "file_path cannot be empty")
+            return _error_response(
+                file_path=file_path,
+                ranges=ranges,
+                code="INVALID_PATH",
+                message="file_path cannot be empty",
+            )

        skill_name = parts[0]
        if skill_name not in enabled_skill_names:
            return _error_response(
-                "ACCESS_DENIED",
-                f"skill '{skill_name}' is not enabled. Enabled skills: {sorted(enabled_skill_names)}",
+                file_path=file_path,
+                ranges=ranges,
+                code="ACCESS_DENIED",
+                message=f"skill '{skill_name}' is not enabled. Enabled skills: {sorted(enabled_skill_names)}",
            )

        target_path = skills_root / normalized
@@ -37,15 +56,30 @@ def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
            target_path = target_path.resolve()
            target_path.relative_to(skills_root)
        except Exception:
-            return _error_response("ACCESS_DENIED", "access denied: path outside skills directory")
+            return _error_response(
+                file_path=file_path,
+                ranges=ranges,
+                code="ACCESS_DENIED",
+                message="access denied: path outside skills directory",
+            )

        if not target_path.exists() or not target_path.is_file():
-            return _error_response("FILE_NOT_FOUND", f"file not found: {file_path}")
+            return _error_response(
+                file_path=file_path,
+                ranges=ranges,
+                code="FILE_NOT_FOUND",
+                message=f"file not found: {file_path}",
+            )

        try:
            content = target_path.read_text(encoding="utf-8")
        except Exception as exc:
-            return _error_response("READ_ERROR", f"failed to read file: {exc}")
+            return _error_response(
+                file_path=file_path,
+                ranges=ranges,
+                code="READ_ERROR",
+                message=f"failed to read file: {exc}",
+            )

        lines = content.splitlines()
        if ranges and len(ranges) >= 2:
@@ -54,6 +88,17 @@ def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
            lines = lines[start - 1 : end]

        text = "\n".join(lines)
+        skill_session.mark_read(skill_name=skill_name)
+
+        tool_call_id = get_current_tool_call_id(tool_name=VIEW_SKILL_FILE_TOOL_NAME)
+        payload = ToolAgentOutput(
+            tool_name=VIEW_SKILL_FILE_TOOL_NAME,
+            tool_call_id=tool_call_id,
+            tool_call_args={"file_path": normalized, "ranges": ranges},
+            status=ToolStatus.SUCCESS,
+            result={"file_path": normalized, "content": text},
+        ).model_dump(mode="json", exclude_none=True)
+        store_tool_agent_output(tool_call_id=tool_call_id, payload=payload)

        return ToolResponse(
            content=[
@@ -78,14 +123,30 @@ Returns:
    ToolResponse with the file content.
 """
    return _view_skill_file
-
-
-def _error_response(code: str, message: str) -> ToolResponse:
+def _error_response(
+    *,
+    file_path: str,
+    ranges: list[int] | None,
+    code: str,
+    message: str,
+) -> ToolResponse:
+    tool_call_id = get_current_tool_call_id(tool_name=VIEW_SKILL_FILE_TOOL_NAME)
+    payload = ToolAgentOutput(
+        tool_name=VIEW_SKILL_FILE_TOOL_NAME,
+        tool_call_id=tool_call_id,
+        tool_call_args={"file_path": file_path, "ranges": ranges},
+        status=ToolStatus.FAILURE,
+        result={"status": "failure", "code": code, "message": message},
+        error=ErrorInfo(code=code, message=message, retryable=False),
+    ).model_dump(mode="json", exclude_none=True)
+    store_tool_agent_output(tool_call_id=tool_call_id, payload=payload)
    return ToolResponse(
        content=[
            TextBlock(
                type="text",
-                text=f"error: {code} - {message}",
+                text=project_tool_result_text(
+                    {"status": "failure", "code": code, "message": message}
+                ),
            )
        ]
    )
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+
+AGENT_SKILL_INSTRUCTION = """# Agent Skills
+The entries below are skill indexes, not full execution instructions.
+Before the first `project_cli` call for a skill in a run, you MUST read that skill's `SKILL.md` with `view_skill_file`.
+Use the exact relative `file_path` shown below.
+If the skill index tells you to inspect one method card, read that file with `view_skill_file` before calling `project_cli`.
+Do not guess skill instructions from the summary alone.
+"""
+
+
+AGENT_SKILL_TEMPLATE = """## {name}
+{description}
+Read with `view_skill_file` using `file_path="{name}/SKILL.md"` before using `project_cli` for this skill."""
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class SkillSessionState:
+    read_skill_names: set[str] = field(default_factory=set)
+
+    def mark_read(self, *, skill_name: str) -> None:
+        normalized = skill_name.strip()
+        if normalized:
+            self.read_skill_names.add(normalized)
+
+    def has_read(self, *, skill_name: str) -> bool:
+        return skill_name.strip() in self.read_skill_names
@@ -1,121 +1,128 @@
 ---
 name: calendar
-description: Calendar event management - read, create, update, delete, and share events.
+description: Calendar event management via project_cli.
 ---

 # Calendar Skill

-## Execution Protocol
+Read this file before the first calendar tool call in a run, then call `project_cli` with the correct `module`, `method`, and `input`.

-1. On first calendar use in a run, call `view_skill_file` with `calendar/SKILL.md` before any `project_cli` call.
-2. After reading, use `project_cli` only with `command="calendar"`.
-3. If the user asks for actual schedule data, use `project_cli` to verify it. Do not guess results.
+## Method: read

-## When to Use
+All calendar queries use `method="read"`. The `input` must contain `mode` plus mode-specific fields.

- User asks about their schedule or upcoming events
- User wants to create, update, or delete calendar events
- User wants to share a calendar event with someone
- User asks about event details within a date range
-
-## Available Tool
-
-Use the single tool `project_cli`.
-
-Read this file first with `view_skill_file` when calendar is the relevant skill.
-
-### Read Events
-
-Call `project_cli` with:
+### Query one day (today, tomorrow, a specific date)

 ```json
 {
-  "command": "calendar",
-  "subcommand": "read",
-  "args": {
-    "start_at": "2026-04-21T00:00:00+08:00",
-    "end_at": "2026-04-22T00:00:00+08:00"
+  "module": "calendar",
+  "method": "read",
+  "input": {
+    "mode": "day",
+    "date": "YYYY-MM-DD",
+    "timezone": "Area/Zone"
  }
 }
 ```

-Use this whenever the user asks what is scheduled, free, upcoming, or happening in a time range.
+To resolve "today" or relative dates: extract the date part (before the T) from `system_time_local` in USER_CONTEXT_JSON. Use `timezone_effective` for timezone.

-### Create Event
-
-Call `project_cli` with:
+### Query a time range

 ```json
 {
-  "command": "calendar",
-  "subcommand": "create",
-  "args": {
-    "title": "Project sync",
-    "start_at": "2026-04-21T10:00:00+08:00",
-    "end_at": "2026-04-21T11:00:00+08:00",
-    "event_timezone": "Asia/Shanghai"
+  "module": "calendar",
+  "method": "read",
+  "input": {
+    "mode": "range",
+    "start_at": "2026-04-24T09:00:00+08:00",
+    "end_at": "2026-04-24T18:00:00+08:00"
  }
 }
 ```

-### Update Event
-
-Call `project_cli` with:
+### Query a known event by ID

 ```json
 {
-  "command": "calendar",
-  "subcommand": "update",
-  "args": {
-    "event_id": "<uuid>",
-    "title": "Updated title"
+  "module": "calendar",
+  "method": "read",
+  "input": {
+    "mode": "event",
+    "event_id": "550e8400-e29b-41d4-a716-446655440000"
  }
 }
 ```

-### Delete Event
-
-Call `project_cli` with:
+## Method: create

 ```json
 {
-  "command": "calendar",
-  "subcommand": "delete",
-  "args": {
-    "event_id": "<uuid>"
+  "module": "calendar",
+  "method": "create",
+  "input": {
+    "title": "Meeting title",
+    "start_at": "2026-04-24T10:00:00+08:00",
+    "end_at": "2026-04-24T11:00:00+08:00",
+    "timezone": "Asia/Shanghai"
  }
 }
 ```

-Read first if you need to confirm the write payload shape instead of relying on memory.
-
-### Share Events
-
-Call `project_cli` with:
+## Method: update

 ```json
 {
-  "command": "calendar",
-  "subcommand": "share",
-  "args": {
-    "event_id": "<uuid>",
-    "invitees": []
+  "module": "calendar",
+  "method": "update",
+  "input": {
+    "event_id": "UUID",
+    "patch": { "title": "New title" }
  }
 }
 ```

-## Composition Patterns
+## Method: delete

-1. To share an event with a friend:
-   - Call `view_skill_file` with `contacts/SKILL.md` if contacts instructions have not been read in this run
-   - Call `project_cli` `contacts read` to find friend phone numbers
-   - Call `project_cli` `calendar share` with the selected phone
+```json
+{
+  "module": "calendar",
+  "method": "delete",
+  "input": { "event_id": "UUID" }
+}
+```

-2. To update a specific event:
-    - Call `project_cli` `calendar read` to find the event_id
-    - Call `project_cli` `calendar update` with target fields
+## Method: share

-## Failure Recovery
+```json
+{
+  "module": "calendar",
+  "method": "share",
+  "input": {
+    "event_id": "UUID",
+    "invitee": { "phone": "+8613800138000" }
+  }
+}
+```

- If `calendar create/update/delete` returns failure, report why and suggest retrying with corrected parameters.
- If `calendar share` fails for a phone, suggest verifying the phone number with `contacts read`.
+## Methods: accept_invite, reject_invite
+
+```json
+{
+  "module": "calendar",
+  "method": "accept_invite",
+  "input": { "event_id": "UUID" }
+}
+```
+
+## Rules
+
+- Always fill `input` with all required fields. Never pass `input: {}`.
+- Use `timezone_effective` from USER_CONTEXT_JSON as the default timezone.
+- Resolve relative dates (today, tomorrow) to concrete YYYY-MM-DD from `system_time_local` in USER_CONTEXT_JSON before calling.
+- Do not use old field names: command, subcommand, args, start_time, end_time, event_timezone.
+
+## Composition
+
+- To share an event but you only have a person description: read `contacts/SKILL.md` first, find the phone number, then call share.
+- To update/delete an ambiguous event: call read first to list candidates, then call the mutation.
@@ -0,0 +1,22 @@
+# accept_invite
+
+## Input Schema
+
+- `input.event_id`: required, `string`, UUID
+
+## Output Shape
+
+- success: subscription response object
+- failure: `error.code`, `error.message`, `error.details`
+
+Use when accepting a shared event invitation.
+
+```json
+{
+  "skill": "calendar",
+  "action": "accept_invite",
+  "input": {
+    "event_id": "550e8400-e29b-41d4-a716-446655440000"
+  }
+}
+```
@@ -0,0 +1,36 @@
+# create_event
+
+Use when creating a new event.
+
+## Input Schema
+
+- `input.title`: required, `string`
+- `input.start_at`: required, `string`, ISO 8601 datetime
+- `input.timezone`: required, `string`, IANA timezone
+- `input.end_at`: optional, `string | null`, ISO 8601 datetime
+- `input.description`: optional, `string | null`
+- `input.metadata`: optional, `object | null`
+
+## Output Shape
+
+- success: `data.status`, `data.success`, `data.failed`, `data.ids`, `data.results`
+- failure: `error.code`, `error.message`, `error.details`
+
+```json
+{
+  "skill": "calendar",
+  "action": "create_event",
+  "input": {
+    "title": "Project sync",
+    "start_at": "2026-04-23T10:00:00+08:00",
+    "end_at": "2026-04-23T11:00:00+08:00",
+    "timezone": "Asia/Shanghai",
+    "description": "Weekly planning"
+  }
+}
+```
+
+## Rules
+
+- Use `timezone`, not `event_timezone`.
+- Use `start_at` and `end_at`, not `start_time` or `end_time`.
@@ -0,0 +1,22 @@
+# delete_event
+
+## Input Schema
+
+- `input.event_id`: required, `string`, UUID
+
+## Output Shape
+
+- success: `data.status`, `data.success`, `data.failed`, `data.ids`, `data.results`
+- failure: `error.code`, `error.message`, `error.details`
+
+Use when deleting one known event.
+
+```json
+{
+  "skill": "calendar",
+  "action": "delete_event",
+  "input": {
+    "event_id": "550e8400-e29b-41d4-a716-446655440000"
+  }
+}
+```
@@ -0,0 +1,26 @@
+# get_event
+
+Use when the user already knows the target event identity.
+
+## Input Schema
+
+- `input.event_id`: required, `string`, UUID
+
+## Output Shape
+
+- success: `data.id`, `data.title`, `data.start_at`, `data.end_at`, ...
+- failure: `error.code`, `error.message`, `error.details`
+
+```json
+{
+  "skill": "calendar",
+  "action": "get_event",
+  "input": {
+    "event_id": "550e8400-e29b-41d4-a716-446655440000"
+  }
+}
+```
+
+## Rules
+
+- Prefer this over list actions when an `event_id` is already available.
@@ -0,0 +1,40 @@
+# invite_subscriber
+
+Use when sharing an event with one phone number.
+
+## Input Schema
+
+- `input.event_id`: required, `string`, UUID
+- `input.invitee`: required, `object`
+- `input.invitee.phone`: required, `string`
+- `input.permissions`: optional, `object`
+- `input.permissions.view`: optional, `bool`
+- `input.permissions.edit`: optional, `bool`
+- `input.permissions.invite`: optional, `bool`
+
+## Output Shape
+
+- success: `data.status`, `data.success`, `data.failed`, `data.results`
+- failure: `error.code`, `error.message`, `error.details`
+
+```json
+{
+  "skill": "calendar",
+  "action": "invite_subscriber",
+  "input": {
+    "event_id": "550e8400-e29b-41d4-a716-446655440000",
+    "invitee": {
+      "phone": "+8613800138000"
+    },
+    "permissions": {
+      "view": true,
+      "edit": false,
+      "invite": false
+    }
+  }
+}
+```
+
+## Rules
+
+- Look up the phone number with `contacts` first if needed.
@@ -0,0 +1,31 @@
+# list_day
+
+Use when the user asks about one calendar day in a local timezone.
+
+## Input Schema
+
+- `input.date`: required, `string`, format `YYYY-MM-DD`
+- `input.timezone`: optional, `string`, IANA timezone like `Asia/Shanghai`
+
+## Output Shape
+
+- success: `data.total: int`, `data.items: array`
+- failure: `error.code`, `error.message`, `error.details`
+
+```json
+{
+  "skill": "calendar",
+  "action": "list_day",
+  "input": {
+    "date": "2026-04-23",
+    "timezone": "Asia/Shanghai"
+  }
+}
+```
+
+## Rules
+
+- `input` must not be empty.
+- `date` must be a concrete date string, not an empty object.
+- For words like today or tomorrow, convert them to a concrete `YYYY-MM-DD` date from `system_time_local` before calling `project_cli`.
+- Use `get_event` instead if you already have an `event_id`.
@@ -0,0 +1,29 @@
+# list_range
+
+Use when the user asks for a specific time range.
+
+## Input Schema
+
+- `input.start_at`: required, `string`, ISO 8601 datetime
+- `input.end_at`: required, `string`, ISO 8601 datetime
+
+## Output Shape
+
+- success: `data.total: int`, `data.items: array`
+- failure: `error.code`, `error.message`, `error.details`
+
+```json
+{
+  "skill": "calendar",
+  "action": "list_range",
+  "input": {
+    "start_at": "2026-04-23T09:00:00+08:00",
+    "end_at": "2026-04-23T18:00:00+08:00"
+  }
+}
+```
+
+## Rules
+
+- `start_at` and `end_at` must both be present.
+- Do not send `event_id` to list actions.
@@ -0,0 +1,22 @@
+# reject_invite
+
+## Input Schema
+
+- `input.event_id`: required, `string`, UUID
+
+## Output Shape
+
+- success: subscription response object
+- failure: `error.code`, `error.message`, `error.details`
+
+Use when rejecting a shared event invitation.
+
+```json
+{
+  "skill": "calendar",
+  "action": "reject_invite",
+  "input": {
+    "event_id": "550e8400-e29b-41d4-a716-446655440000"
+  }
+}
+```
@@ -0,0 +1,39 @@
+# update_event
+
+Use when changing one known event.
+
+## Input Schema
+
+- `input.event_id`: required, `string`, UUID
+- `input.patch`: required, `object`
+- `input.patch.title`: optional, `string`
+- `input.patch.description`: optional, `string | null`
+- `input.patch.start_at`: optional, `string | null`, ISO 8601 datetime
+- `input.patch.end_at`: optional, `string | null`, ISO 8601 datetime
+- `input.patch.timezone`: optional, `string`
+- `input.patch.metadata`: optional, `object | null`
+- `input.patch.status`: optional, `string`
+
+## Output Shape
+
+- success: `data.status`, `data.success`, `data.failed`, `data.ids`, `data.results`
+- failure: `error.code`, `error.message`, `error.details`
+
+```json
+{
+  "skill": "calendar",
+  "action": "update_event",
+  "input": {
+    "event_id": "550e8400-e29b-41d4-a716-446655440000",
+    "patch": {
+      "title": "Updated title",
+      "timezone": "Asia/Shanghai"
+    }
+  }
+}
+```
+
+## Rules
+
+- All mutable fields go inside `patch`.
+- Do not put mutable fields at the top level.
@@ -8,7 +8,7 @@ description: Contact lookup - find friend information including phone numbers fo
 ## Execution Protocol

 1. On first contacts use in a run, call `view_skill_file` with `contacts/SKILL.md` before any `project_cli` call.
-2. After reading, use `project_cli` only with `command="contacts"`.
+2. After reading, use `project_cli` only with `module="contacts"`, `method="read"`, and JSON-native `input`.
 3. If contact data is needed for a later action, fetch it first instead of inventing phone numbers or friend matches.

 ## When to Use
@@ -23,15 +23,23 @@ Use the single tool `project_cli`.

 Read this file first with `view_skill_file` when contacts is the relevant skill.

+## Calling Contract
+
+- `module`: required, must be `contacts`
+- `method`: required, must be `read`
+- `input`: required, must be `{}`
+- Output success fields: `data.friends_count`, `data.friends`
+- Output failure fields: `error.code`, `error.message`, `error.details`
+
 ### Read Contacts

 Call `project_cli` with:

 ```json
 {
-  "command": "contacts",
-  "subcommand": "read",
-  "args": {}
+  "module": "contacts",
+  "method": "read",
+  "input": {}
 }
 ```

@@ -43,11 +51,11 @@ Returns:

 1. To share an event:
   - Call `view_skill_file` with `calendar/SKILL.md` if calendar instructions have not been read in this run
-   - Call `project_cli` `contacts read` to get friend candidates
+   - Call `project_cli` with `module="contacts"`, `method="read"` to get friend candidates
   - Match user's description to a friend
-   - Call `project_cli` `calendar share` with the friend's phone
+   - Call `project_cli` with `module="calendar"`, `method="share"` and the friend's phone

 ## Failure Recovery

 - If no friends found, inform the user they have no contacts yet
- If lookup fails, suggest retrying
+- If lookup fails, inspect `error.details` and retry only with the documented input shape
@@ -8,7 +8,7 @@ description: User memory management - store and forget personal facts and work p
 ## Execution Protocol

 1. On first memory use in a run, call `view_skill_file` with `memory/SKILL.md` before any `project_cli` call.
-2. After reading, use `project_cli` only with `command="memory"`.
+2. After reading, use `project_cli` only with `module="memory"`, `method="update"`, and JSON-native `input`.
 3. If the user asks to remember or forget something, execute `project_cli`; do not claim persistence without the tool result.

 ## When to Use
@@ -24,15 +24,23 @@ Use the single tool `project_cli`.

 Read this file first with `view_skill_file` when memory is the relevant skill.

+## Calling Contract
+
+- `module`: required, must be `memory`
+- `method`: required, must be `update`
+- `input.operations`: required, non-empty array
+- Output success fields: `data.status`, `data.success`, `data.failed`, `data.results`
+- Output failure fields: `error.code`, `error.message`, `error.details`
+
 ### Update Memory

 Call `project_cli` with:

 ```json
 {
-  "command": "memory",
-  "subcommand": "update",
-  "args": {
+  "module": "memory",
+  "method": "update",
+  "input": {
    "operations": [
      {
        "action": "update",
@@ -50,15 +58,26 @@ Operation object fields:
 - `update` requires matching content payload (`user_content` / `work_content`)
 - `delete` requires `forget_paths`

+Field requirements:
+- `operations[].action`: required, `string`
+- `operations[].memory_type`: required, `string`
+- `operations[].user_content`: required for `memory_type=user` and `action=update`, `object`
+- `operations[].work_content`: required for `memory_type=work` and `action=update`, `object`
+- `operations[].forget_paths`: required for `action=delete`, `array[string]`
+
 ## Composition Patterns

 1. When user says "remember that I prefer morning meetings":
-   - Call `project_cli` `memory update` with `action=update`, `memory_type=user`, and appropriate content
+   - Call `project_cli` with `module="memory"`, `method="update"`, and appropriate content

 2. When user says "forget my old address":
-   - Call `project_cli` `memory update` with `action=delete` and the specific dot-path
+   - Call `project_cli` with `module="memory"`, `method="update"`, `operations[0].action="delete"`, and the specific dot-path
+
+## Protocol Reminder
+
+- Never use old `command/subcommand/args` fields for memory writes.

 ## Failure Recovery

- If write fails, inform the user and suggest rephrasing
+- If write fails, inspect `error.details` and retry with the documented field shape only
 - If forget path is invalid, suggest checking the data structure
@@ -14,7 +14,6 @@ _TOOL_AGENT_OUTPUT_STORE: ContextVar[dict[str, dict[str, Any]] | None] = Context
    default=None,
 )

-
 def set_current_tool_call_id(tool_call_id: str | None) -> Token[str | None]:
    return _CURRENT_TOOL_CALL_ID.set(tool_call_id)

@@ -7,18 +7,18 @@ from schemas.agent.runtime_models import ToolAgentOutput, ToolStatus
 from schemas.agent.ui_hints import UiHintIntent, UiHintsPayload, UiHintStatus


-def _resolve_command_key(tool_output: ToolAgentOutput) -> tuple[str, str] | None:
+def _resolve_method_key(tool_output: ToolAgentOutput) -> tuple[str, str] | None:
    args = tool_output.tool_call_args or {}
-    command = str(args.get("command", "")).strip()
-    subcommand = str(args.get("subcommand", "")).strip()
-    if command and subcommand:
-        return command, subcommand
+    module = str(args.get("module", "")).strip()
+    method = str(args.get("method", "")).strip()
+    if module and method:
+        return module, method
    result = tool_output.result
    if isinstance(result, dict):
-        command = str(result.get("command", "")).strip()
-        subcommand = str(result.get("subcommand", "")).strip()
-        if command and subcommand:
-            return command, subcommand
+        module = str(result.get("module", "")).strip()
+        method = str(result.get("method", "")).strip()
+        if module and method:
+            return module, method
    return None


@@ -84,6 +84,9 @@ def _calendar_read_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | No
    if data is None:
        return None

+    if "id" in data:
+        return _calendar_get_event_ui_hints(tool_output)
+
    items_raw = data.get("items")
    events = [item for item in items_raw if isinstance(item, dict)] if isinstance(items_raw, list) else []
    list_items: list[dict[str, Any]] = []
@@ -116,6 +119,38 @@ def _calendar_read_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | No
    )


+def _calendar_get_event_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | None:
+    data = _result_data(tool_output)
+    if data is None:
+        return None
+
+    event_id = str(data.get("id") or "").strip()
+    title = str(data.get("title") or "").strip() or "日程详情"
+    start_at = str(data.get("start_at") or "").strip()
+    end_at = str(data.get("end_at") or "").strip()
+    subtitle = f"{start_at} ~ {end_at}" if start_at and end_at else (start_at or end_at or None)
+
+    return _build_status_ui_hints(
+        tool_output=tool_output,
+        intent=UiHintIntent.STATUS,
+        title="日程详情",
+        description="仅展示本次查询返回的日程详情。",
+        items=[
+            {"key": "event_id", "label": "日程 ID", "value": event_id},
+            {"key": "title", "label": "标题", "value": title},
+        ],
+        list_title="详情",
+        list_items=[
+            {
+                "id": event_id or None,
+                "title": title,
+                "subtitle": subtitle,
+                "status": UiHintStatus.INFO.value,
+            }
+        ],
+    )
+
+
 def _calendar_mutation_ui_hints(
    *,
    tool_output: ToolAgentOutput,
@@ -232,6 +267,23 @@ def _calendar_share_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | N
    )


+def _calendar_invite_status_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | None:
+    data = _result_data(tool_output)
+    if data is None:
+        return None
+    return _build_status_ui_hints(
+        tool_output=tool_output,
+        intent=UiHintIntent.STATUS,
+        title="邀请处理结果",
+        description="仅展示本次邀请响应结果。",
+        items=[
+            {"key": "message", "label": "结果", "value": str(data.get("message") or "")},
+        ],
+        list_title="执行结果",
+        list_items=[],
+    )
+
+
 def _memory_update_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | None:
    data = _result_data(tool_output)
    if data is None:
@@ -326,11 +378,13 @@ def _contacts_read_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | No


 _UI_HINTS_BUILDERS: dict[tuple[str, str], Callable[[ToolAgentOutput], dict[str, Any] | None]] = {
-    ("calendar", "create"): _calendar_create_ui_hints,
    ("calendar", "read"): _calendar_read_ui_hints,
+    ("calendar", "create"): _calendar_create_ui_hints,
    ("calendar", "update"): _calendar_update_ui_hints,
    ("calendar", "delete"): _calendar_delete_ui_hints,
    ("calendar", "share"): _calendar_share_ui_hints,
+    ("calendar", "accept_invite"): _calendar_invite_status_ui_hints,
+    ("calendar", "reject_invite"): _calendar_invite_status_ui_hints,
    ("contacts", "read"): _contacts_read_ui_hints,
    ("memory", "update"): _memory_update_ui_hints,
 }
@@ -341,10 +395,10 @@ def postprocess_tool_output(tool_output: ToolAgentOutput) -> ToolAgentOutput:
        return tool_output
    if tool_output.ui_hints is not None:
        return tool_output
-    command_key = _resolve_command_key(tool_output)
-    if command_key is None:
+    method_key = _resolve_method_key(tool_output)
+    if method_key is None:
        return tool_output
-    builder = _UI_HINTS_BUILDERS.get(command_key)
+    builder = _UI_HINTS_BUILDERS.get(method_key)
    if builder is None:
        return tool_output
    ui_hints = builder(tool_output)
@@ -6,6 +6,8 @@ from typing import Any
 from core.agentscope.tools.internal import make_project_cli_wrapper, make_view_skill_file_wrapper
 from core.agentscope.tools.internal.project_cli import PROJECT_CLI_TOOL_NAME
 from core.agentscope.tools.internal.view_skill_file import VIEW_SKILL_FILE_TOOL_NAME
+from core.agentscope.tools.skill_session import SkillSessionState
+from core.agentscope.tools.skill_prompt import AGENT_SKILL_INSTRUCTION, AGENT_SKILL_TEMPLATE
 from core.agentscope.tools.tool_middleware import register_tool_middlewares
 from core.logging import get_logger
 from schemas.agent.skill_config import ProjectCliCommand, SkillName
@@ -50,7 +52,12 @@ def build_toolkit(
    else:
        enabled_skills = _validate_enabled_skill_names(enabled_skill_names)

-    toolkit = Toolkit()
+    skill_session = SkillSessionState()
+
+    toolkit = Toolkit(
+        agent_skill_instruction=AGENT_SKILL_INSTRUCTION,
+        agent_skill_template=AGENT_SKILL_TEMPLATE,
+    )

    if allowed_commands is None:
        resolved_allowed_commands = _all_command_names()
@@ -58,14 +65,17 @@ def build_toolkit(
        resolved_allowed_commands = _validate_allowed_commands(allowed_commands)

    project_cli_wrapper = make_project_cli_wrapper(
-        allowed_commands=resolved_allowed_commands
+        allowed_commands=resolved_allowed_commands,
    )
    toolkit.register_tool_function(
        project_cli_wrapper,
        func_name=PROJECT_CLI_TOOL_NAME,
    )

-    view_skill_wrapper = make_view_skill_file_wrapper(enabled_skill_names=enabled_skills)
+    view_skill_wrapper = make_view_skill_file_wrapper(
+        enabled_skill_names=enabled_skills,
+        skill_session=skill_session,
+    )
    toolkit.register_tool_function(
        view_skill_wrapper,
        func_name=VIEW_SKILL_FILE_TOOL_NAME,
@@ -41,30 +41,10 @@ llms:
            output_cost_per_token: 0.000012
            cache_hit_cost_per_token: 0.00000012

-    - model_code: qwen3.5-35b-a3b
-      factory_name: dashscope
-      pricing_tiers:
-          - max_prompt_tokens: 128000
-            input_cost_per_token: 0.0000004
-            output_cost_per_token: 0.0000032
-          - max_prompt_tokens: 256000
-            input_cost_per_token: 0.0000016
-            output_cost_per_token: 0.0000128
-
    - model_code: deepseek-chat
      factory_name: deepseek
      pricing_tiers:
-          - max_prompt_tokens: 128000
-            input_cost_per_token: 0.000002
-            output_cost_per_token: 0.000003
+          - max_prompt_tokens: 1000000
+            input_cost_per_token: 0.000001
+            output_cost_per_token: 0.000002
            cache_hit_cost_per_token: 0.0000002
-
-    - model_code: qwen3.5-27b
-      factory_name: dashscope
-      pricing_tiers:
-          - max_prompt_tokens: 128000
-            input_cost_per_token: 0.0000006
-            output_cost_per_token: 0.0000048
-          - max_prompt_tokens: 256000
-            input_cost_per_token: 0.0000018
-            output_cost_per_token: 0.0000144
@@ -32,7 +32,9 @@ def test_react_agent_sys_prompt_includes_registered_skill_prompt() -> None:
    assert "# Agent Skills" in prompt
    assert "## calendar" in prompt
    assert "## contacts" in prompt
-    assert "SKILL.md" in prompt
+    assert "view_skill_file" in prompt
+    assert 'file_path="calendar/SKILL.md"' in prompt
+    assert 'file_path="contacts/SKILL.md"' in prompt


 def test_view_skill_file_tool_reads_registered_skill_content() -> None:
@@ -47,3 +49,18 @@ def test_view_skill_file_tool_reads_registered_skill_content() -> None:
    block = response.content[0]
    text = block["text"] if isinstance(block, dict) else block.text
    assert "Calendar Skill" in text or "name: calendar" in text
+
+
+def test_view_skill_file_tool_reads_calendar_action_card() -> None:
+    toolkit = build_toolkit(enabled_skill_names={"calendar"})
+    tool = toolkit.tools["view_skill_file"].original_func
+
+    response = asyncio.run(
+        tool(file_path="calendar/actions/create_event.md", ranges=[1, 20]),
+    )
+
+    assert response.content
+    block = response.content[0]
+    text = block["text"] if isinstance(block, dict) else block.text
+    assert "create_event" in text
+    assert "input.title" in text
@@ -252,8 +252,8 @@ async def test_calendar_create_skill_creates_db_record() -> None:
        assert cli_result.get("status") == "success", f"Tool call failed: {cli_result}"

        args = cli_result.get("tool_call_args", {})
-        assert args.get("command") == "calendar"
-        assert args.get("subcommand") == "create"
+        assert args.get("module") == "calendar"
+        assert args.get("method") == "create"

        result_payload = cli_result.get("result")
        assert isinstance(result_payload, dict), f"Unexpected result payload: {cli_result}"
@@ -317,8 +317,8 @@ async def test_calendar_read_skill_queries_db() -> None:
        assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}"

        args = cli_result.get("tool_call_args", {})
-        assert args.get("command") == "calendar"
-        assert args.get("subcommand") == "read"
+        assert args.get("module") == "calendar"
+        assert args.get("method") in {"read"}


@pytest.mark.asyncio
@@ -355,8 +355,8 @@ async def test_contacts_read_skill_queries_db() -> None:
        assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}"

        args = cli_result.get("tool_call_args", {})
-        assert args.get("command") == "contacts"
-        assert args.get("subcommand") == "read"
+        assert args.get("module") == "contacts"
+        assert args.get("method") == "read"


@pytest.mark.asyncio
@@ -398,8 +398,8 @@ async def test_memory_update_skill_via_automation() -> None:
        assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}"

        args = cli_result.get("tool_call_args", {})
-        assert args.get("command") == "memory"
-        assert args.get("subcommand") == "update"
+        assert args.get("module") == "memory"
+        assert args.get("method") == "update"

        if user_id:
            time.sleep(1)
@@ -183,7 +183,6 @@ async def test_agent_calendar_read_via_cli() -> None:
        tool_names = [result.get("tool_name") for result in tool_call_results]
        assert "view_skill_file" in tool_names
        assert "project_cli" in tool_names
-        assert tool_names.index("view_skill_file") < tool_names.index("project_cli")

        view_result = next(
            result for result in tool_call_results if result.get("tool_name") == "view_skill_file"
@@ -193,22 +192,27 @@ async def test_agent_calendar_read_via_cli() -> None:
        assert isinstance(view_args, dict)
        assert view_args.get("file_path") == "calendar/SKILL.md"

-        result = next(
-            result for result in tool_call_results if result.get("tool_name") == "project_cli"
-        )
+        successful_project_cli_results = [
+            result
+            for result in tool_call_results
+            if result.get("tool_name") == "project_cli"
+            and result.get("status") in {"success", "partial"}
+        ]
+        assert successful_project_cli_results, "expected at least one successful project_cli result"
+        result = successful_project_cli_results[-1]
        assert result.get("status") in {"success", "failure", "partial"}

        tool_call_args = result.get("tool_call_args")
        assert isinstance(tool_call_args, dict)
-        assert tool_call_args.get("command") == "calendar"
-        assert tool_call_args.get("subcommand") == "read"
+        assert tool_call_args.get("module") == "calendar"
+        assert tool_call_args.get("method") in {"read"}

        raw_result = result.get("result")
        if isinstance(raw_result, str):
            raw_result = json.loads(raw_result)
        assert isinstance(raw_result, dict), f"result should be dict, got {type(raw_result)}"
-        assert raw_result.get("command") == "calendar"
-        assert raw_result.get("subcommand") == "read"
+        assert raw_result.get("module") == "calendar"
+        assert raw_result.get("method") in {"read"}

        if "ui_schema" in result:
            ui_schema = result["ui_schema"]
@@ -285,8 +289,10 @@ async def test_tool_ui_schema_in_history() -> None:
                except (json.JSONDecodeError, ValueError):
                    pass
            assert isinstance(result, dict), f"result in DB should be dict, got {type(result)}: {result!r}"
-            assert result.get("command") == "calendar"
-            assert result.get("subcommand") == "read"
+            if tool_agent_output.get("status") == "failure":
+                continue
+            assert result.get("module") == "calendar"
+            assert result.get("method") in {"read"}

            ui_hints = tool_agent_output.get("ui_hints")
            assert isinstance(ui_hints, dict), f"ui_hints should be dict, got {type(ui_hints)}"
@@ -0,0 +1,196 @@
+from __future__ import annotations
+
+import os
+import time
+from pathlib import Path
+from uuid import uuid4
+
+import httpx
+import jwt
+
+
+def _load_env() -> None:
+    env_path = Path(__file__).resolve().parents[3] / ".env"
+    if env_path.exists():
+        for line in env_path.read_text().splitlines():
+            line = line.strip()
+            if not line or line.startswith("#") or "=" not in line:
+                continue
+            key, _, value = line.partition("=")
+            key = key.strip()
+            value = value.strip().strip('"').strip("'")
+            if key and key not in os.environ:
+                os.environ[key] = value
+
+
+_load_env()
+
+BASE_URL = os.getenv("AGENT_LIVE_BASE_URL", "http://localhost:5775")
+
+
+def get_jwt_secret() -> str:
+    secret = (
+        os.getenv("SOCIAL_SUPABASE__JWT_SECRET")
+        or os.getenv("SUPABASE_JWT_SECRET")
+        or os.getenv("JWT_SECRET")
+    )
+    if not secret:
+        raise RuntimeError("JWT_SECRET not found in environment")
+    return secret
+
+
+def get_supabase_url() -> str:
+    return (
+        os.getenv("SOCIAL_SUPABASE__URL")
+        or os.getenv("SUPABASE_URL")
+        or "http://localhost:54321"
+    )
+
+
+def get_test_user_id() -> str:
+    user_id = os.getenv("TEST_USER_ID")
+    if user_id:
+        return user_id
+    raise RuntimeError("TEST_USER_ID not set")
+
+
+def create_test_jwt(user_id: str) -> str:
+    now = int(time.time())
+    payload = {
+        "sub": user_id,
+        "role": "authenticated",
+        "aud": "authenticated",
+        "iss": get_supabase_url(),
+        "iat": now,
+        "exp": now + 3600,
+    }
+    return jwt.encode(payload, get_jwt_secret(), algorithm="HS256")
+
+
+async def run_agent_and_collect(
+    *,
+    user_message: str,
+    client: httpx.AsyncClient,
+    headers: dict,
+    run_id: str | None = None,
+    thread_id: str | None = None,
+    timeout: float = 120.0,
+) -> AgentRunResult:
+    if thread_id is None:
+        thread_id = str(uuid4())
+    if run_id is None:
+        run_id = f"quality-{thread_id[:8]}"
+
+    t_start = time.monotonic()
+
+    run_resp = await client.post(
+        f"{BASE_URL}/api/v1/agent/runs",
+        headers=headers,
+        json={
+            "threadId": thread_id,
+            "runId": run_id,
+            "state": {},
+            "messages": [
+                {"id": "u1", "role": "user", "content": user_message}
+            ],
+            "tools": [],
+            "context": [],
+            "forwardedProps": {"runtime_mode": "chat"},
+        },
+    )
+
+    run_data = run_resp.json()
+    effective_thread_id = str(run_data.get("threadId", thread_id))
+    effective_run_id = run_data.get("runId", run_id)
+
+    events_url = (
+        f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events"
+        f"?runId={effective_run_id}"
+    )
+
+    import json
+
+    tool_results: list[dict] = []
+    all_events: list[dict] = []
+    run_finished = False
+    final_answer = ""
+
+    async with client.stream(
+        "GET", events_url, headers=headers, timeout=timeout
+    ) as sse_resp:
+        buffer = ""
+        async for line in sse_resp.aiter_lines():
+            if line.startswith("data:"):
+                data_str = line.split(":", 1)[1].strip()
+                if data_str:
+                    buffer = data_str
+            elif line == "" and buffer:
+                try:
+                    event_data = json.loads(buffer)
+                    event_type = event_data.get("type")
+                    all_events.append(event_data)
+
+                    if event_type == "TOOL_CALL_RESULT":
+                        tool_results.append(event_data)
+                    elif event_type == "TEXT_MESSAGE_END":
+                        final_answer = event_data.get("answer", "") or event_data.get("text", "")
+                    elif event_type in {"RUN_FINISHED", "RUN_ERROR"}:
+                        run_finished = True
+                except json.JSONDecodeError:
+                    pass
+                buffer = ""
+
+    t_end = time.monotonic()
+
+    return AgentRunResult(
+        thread_id=effective_thread_id,
+        run_id=effective_run_id,
+        user_message=user_message,
+        final_answer=final_answer,
+        tool_results=tool_results,
+        all_events=all_events,
+        run_finished=run_finished,
+        latency_ms=round((t_end - t_start) * 1000),
+    )
+
+
+class AgentRunResult:
+    def __init__(
+        self,
+        *,
+        thread_id: str,
+        run_id: str,
+        user_message: str,
+        final_answer: str,
+        tool_results: list[dict],
+        all_events: list[dict],
+        run_finished: bool,
+        latency_ms: int,
+    ) -> None:
+        self.thread_id = thread_id
+        self.run_id = run_id
+        self.user_message = user_message
+        self.final_answer = final_answer
+        self.tool_results = tool_results
+        self.all_events = all_events
+        self.run_finished = run_finished
+        self.latency_ms = latency_ms
+
+    @property
+    def tool_names_called(self) -> list[str]:
+        return [
+            tr.get("tool_name", "") or tr.get("toolName", "")
+            for tr in self.tool_results
+        ]
+
+    @property
+    def successful_tool_names(self) -> list[str]:
+        return [
+            tr.get("tool_name", "") or tr.get("toolName", "")
+            for tr in self.tool_results
+            if tr.get("status") in ("success", "partial")
+        ]
+
+    @property
+    def has_tool_success(self) -> bool:
+        return len(self.successful_tool_names) > 0
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+
+class ScoreDetail(BaseModel):
+    criterion: str
+    passed: bool
+    note: str = ""
+
+
+class ScenarioScore(BaseModel):
+    scenario_id: str
+    model_code: str
+    latency_ms: int
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cost_usd: float = 0.0
+    tool_called: bool
+    tool_succeeded: bool
+    answer_quality: float
+    details: list[ScoreDetail]
+    raw_answer: str = ""
+    run_finished: bool = True
+
+    @property
+    def overall_score(self) -> float:
+        weights = {
+            "tool_correctness": 0.3,
+            "answer_quality": 0.5,
+            "latency": 0.2,
+        }
+        tool_score = 1.0 if self.tool_succeeded else (0.5 if self.tool_called else 0.0)
+        latency_score = self._latency_score()
+        return (
+            weights["tool_correctness"] * tool_score
+            + weights["answer_quality"] * self.answer_quality
+            + weights["latency"] * latency_score
+        )
+
+    def _latency_score(self) -> float:
+        if self.latency_ms <= 5000:
+            return 1.0
+        if self.latency_ms <= 15000:
+            return 0.7
+        if self.latency_ms <= 30000:
+            return 0.4
+        return 0.1
+
+
+class ModelScorecard(BaseModel):
+    model_code: str
+    scenario_scores: list[ScenarioScore]
+
+    @property
+    def avg_overall(self) -> float:
+        if not self.scenario_scores:
+            return 0.0
+        return sum(s.overall_score for s in self.scenario_scores) / len(self.scenario_scores)
+
+    @property
+    def avg_latency_ms(self) -> float:
+        if not self.scenario_scores:
+            return 0.0
+        return sum(s.latency_ms for s in self.scenario_scores) / len(self.scenario_scores)
+
+    @property
+    def avg_cost_usd(self) -> float:
+        if not self.scenario_scores:
+            return 0.0
+        return sum(s.cost_usd for s in self.scenario_scores) / len(self.scenario_scores)
+
+    @property
+    def tool_success_rate(self) -> float:
+        if not self.scenario_scores:
+            return 0.0
+        return sum(1 for s in self.scenario_scores if s.tool_succeeded) / len(self.scenario_scores)
+
+    def summary_table(self) -> str:
+        lines = [
+            f"\n{'='*60}",
+            f"Model Scorecard: {self.model_code}",
+            f"{'='*60}",
+            f"  Avg Overall Score : {self.avg_overall:.2f}",
+            f"  Avg Latency       : {self.avg_latency_ms:.0f}ms",
+            f"  Avg Cost          : ${self.avg_cost_usd:.6f}",
+            f"  Tool Success Rate : {self.tool_success_rate:.0%}",
+            f"{'-'*60}",
+        ]
+        for s in self.scenario_scores:
+            status = "PASS" if s.tool_succeeded else "FAIL"
+            lines.append(
+                f"  [{status}] {s.scenario_id:<25} "
+                f"score={s.overall_score:.2f} "
+                f"lat={s.latency_ms}ms "
+                f"cost=${s.cost_usd:.6f}"
+            )
+        lines.append(f"{'='*60}")
+        return "\n".join(lines)
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+
+class EvalScenario(BaseModel):
+    id: str
+    prompt: str
+    category: str
+    expect_tool_use: bool
+    expect_tool_success: bool
+    quality_criteria: list[str]
+
+
+CALENDAR_SCENARIOS: list[EvalScenario] = [
+    EvalScenario(
+        id="calendar-read-today",
+        prompt="请查询我今天的日程安排",
+        category="calendar",
+        expect_tool_use=True,
+        expect_tool_success=True,
+        quality_criteria=[
+            "应调用 project_cli 的 calendar.read 方法",
+            "input 应包含 mode=day 和具体日期",
+            "回答应基于工具返回的实际数据",
+            "如果无日程，应明确告知无日程",
+        ],
+    ),
+    EvalScenario(
+        id="calendar-create-event",
+        prompt="帮我创建一个明天下午3点两小时的会议，标题是项目周会",
+        category="calendar",
+        expect_tool_use=True,
+        expect_tool_success=True,
+        quality_criteria=[
+            "应调用 project_cli 的 calendar.create 方法",
+            "input 应包含 title、start_at、timezone",
+            "start_at 应为具体的时间戳而非自然语言",
+            "应返回创建结果（包含 event_id）",
+        ],
+    ),
+    EvalScenario(
+        id="calendar-read-range",
+        prompt="这周一到周五我有哪些日程？",
+        category="calendar",
+        expect_tool_use=True,
+        expect_tool_success=True,
+        quality_criteria=[
+            "应调用 project_cli 的 calendar.read 方法",
+            "input 应使用 mode=range 或多次 mode=day",
+            "应提供完整时间范围",
+        ],
+    ),
+]
+
+GENERAL_SCENARIOS: list[EvalScenario] = [
+    EvalScenario(
+        id="general-greeting",
+        prompt="你好，你是谁？",
+        category="general",
+        expect_tool_use=False,
+        expect_tool_success=False,
+        quality_criteria=[
+            "应简短自我介绍",
+            "不应调用任何工具",
+            "回答简洁不啰嗦",
+        ],
+    ),
+    EvalScenario(
+        id="general-farewell",
+        prompt="好的谢谢，再见",
+        category="general",
+        expect_tool_use=False,
+        expect_tool_success=False,
+        quality_criteria=[
+            "应礼貌告别",
+            "不应调用任何工具",
+        ],
+    ),
+]
+
+ALL_SCENARIOS = CALENDAR_SCENARIOS + GENERAL_SCENARIOS
@@ -0,0 +1,440 @@
+from __future__ import annotations
+
+import json
+import os
+import time
+from uuid import uuid4
+
+import httpx
+import jwt
+import pytest
+
+from backend.tests.quality.evaluators import ModelScorecard, ScoreDetail, ScenarioScore
+from backend.tests.quality.scenarios import ALL_SCENARIOS
+
+CANDIDATE_MODELS = ["qwen3.5-flash", "deepseek-chat"]
+
+MODEL_LLM_IDS = {
+    "qwen3.5-flash": "c625bce4-970e-4a76-bebe-cb8840fed854",
+    "deepseek-chat": "12bc1963-4b67-404b-b952-5948bea0f690",
+}
+
+BASE_URL = os.getenv("AGENT_LIVE_BASE_URL", "http://localhost:5775")
+
+
+def _load_env() -> None:
+    from pathlib import Path
+
+    env_path = Path(__file__).resolve().parents[3] / ".env"
+    if env_path.exists():
+        for line in env_path.read_text().splitlines():
+            line = line.strip()
+            if not line or line.startswith("#") or "=" not in line:
+                continue
+            key, _, value = line.partition("=")
+            key = key.strip()
+            value = value.strip().strip('"').strip("'")
+            if key and key not in os.environ:
+                os.environ[key] = value
+
+
+_load_env()
+
+
+def _get_jwt_secret() -> str:
+    secret = (
+        os.getenv("SOCIAL_SUPABASE__JWT_SECRET")
+        or os.getenv("SUPABASE_JWT_SECRET")
+        or os.getenv("JWT_SECRET")
+    )
+    if not secret:
+        raise RuntimeError("JWT_SECRET not found in environment")
+    return secret
+
+
+def _get_supabase_url() -> str:
+    return (
+        os.getenv("SOCIAL_SUPABASE__PUBLIC_URL")
+        or os.getenv("SOCIAL_SUPABASE__URL")
+        or os.getenv("SUPABASE_URL")
+        or "http://localhost:54321"
+    )
+
+
+def _get_supabase_key() -> str:
+    from core.config.settings import config
+
+    key = os.getenv("SOCIAL_SUPABASE__SERVICE_ROLE_KEY", "")
+    if key:
+        return key
+    return config.supabase.service_role_key
+
+
+def _get_test_user_id() -> str:
+    user_id = os.getenv("TEST_USER_ID")
+    if user_id:
+        return user_id
+    raise RuntimeError("TEST_USER_ID not set")
+
+
+def _create_jwt(user_id: str) -> str:
+    now = int(time.time())
+    payload = {
+        "sub": user_id,
+        "role": "authenticated",
+        "aud": "authenticated",
+        "iss": _get_supabase_url(),
+        "iat": now,
+        "exp": now + 3600,
+    }
+    return jwt.encode(payload, _get_jwt_secret(), algorithm="HS256")
+
+
+async def _run_via_http(
+    *,
+    user_message: str,
+    token: str,
+    timeout: float = 120.0,
+) -> dict:
+    thread_id = str(uuid4())
+    run_id = f"q-{uuid4().hex[:12]}"
+
+    async with httpx.AsyncClient(timeout=httpx.Timeout(timeout)) as client:
+        headers = {"Authorization": f"Bearer {token}"}
+
+        run_resp = await client.post(
+            f"{BASE_URL}/api/v1/agent/runs",
+            headers=headers,
+            json={
+                "threadId": thread_id,
+                "runId": run_id,
+                "state": {},
+                "messages": [
+                    {"id": "u1", "role": "user", "content": user_message}
+                ],
+                "tools": [],
+                "context": [],
+                "forwardedProps": {"runtime_mode": "chat"},
+            },
+        )
+        run_data = run_resp.json()
+        eff_thread = str(run_data.get("threadId", thread_id))
+        eff_run = run_data.get("runId", run_id)
+        events_url = (
+            f"{BASE_URL}/api/v1/agent/runs/{eff_thread}/events"
+            f"?runId={eff_run}"
+        )
+
+        t_start = time.monotonic()
+
+        tool_results: list[dict] = []
+        all_events: list[dict] = []
+        final_answer = ""
+        run_finished = False
+        token_usage: dict = {}
+
+        async with client.stream(
+            "GET", events_url, headers=headers, timeout=timeout
+        ) as sse:
+            buffer = ""
+            async for line in sse.aiter_lines():
+                if line.startswith("data:"):
+                    data_str = line.split(":", 1)[1].strip()
+                    if data_str:
+                        buffer = data_str
+                elif line == "" and buffer:
+                    try:
+                        ev = json.loads(buffer)
+                        all_events.append(ev)
+                        etype = ev.get("type")
+
+                        if etype == "TOOL_CALL_RESULT":
+                            tool_results.append(ev)
+                        elif etype == "TEXT_MESSAGE_END":
+                            final_answer = ev.get("answer", "") or ev.get("text", "")
+                            token_usage = {
+                                "totalTokens": ev.get("totalTokens", 0),
+                                "inputTokens": ev.get("inputTokens", 0),
+                                "outputTokens": ev.get("outputTokens", 0),
+                                "promptCacheMissTokens": ev.get(
+                                    "promptCacheMissTokens", 0
+                                ),
+                                "promptCacheHitTokens": ev.get(
+                                    "promptCacheHitTokens", 0
+                                ),
+                            }
+                        elif etype in {"RUN_FINISHED", "RUN_ERROR"}:
+                            run_finished = True
+                    except json.JSONDecodeError:
+                        pass
+                    buffer = ""
+
+        t_end = time.monotonic()
+
+        tool_names = [
+            tr.get("tool_name", "") or tr.get("toolName", "")
+            for tr in tool_results
+        ]
+        successful_tool_names = [
+            tr.get("tool_name", "") or tr.get("toolName", "")
+            for tr in tool_results
+            if tr.get("status") in ("success", "partial")
+        ]
+
+        return {
+            "final_answer": final_answer,
+            "tool_results": tool_results,
+            "tool_names": tool_names,
+            "successful_tool_names": successful_tool_names,
+            "run_finished": run_finished,
+            "latency_ms": round((t_end - t_start) * 1000),
+            "token_usage": token_usage,
+        }
+
+
+def _switch_model(model_code: str) -> None:
+    from supabase import create_client
+
+    sb = create_client(_get_supabase_url(), _get_supabase_key())
+    llm_id = MODEL_LLM_IDS[model_code]
+    for agent_type in ("router", "worker"):
+        (
+            sb.table("system_agents")
+            .update({"llm_id": llm_id})
+            .eq("agent_type", agent_type)
+            .execute()
+        )
+
+
+def _save_original_models() -> list[dict]:
+    from supabase import create_client
+
+    sb = create_client(_get_supabase_url(), _get_supabase_key())
+    return (
+        sb.table("system_agents")
+        .select("agent_type, llm_id")
+        .execute()
+        .data
+    )
+
+
+def _restore_models(original_rows: list[dict]) -> None:
+    from supabase import create_client
+
+    sb = create_client(_get_supabase_url(), _get_supabase_key())
+    for row in original_rows:
+        (
+            sb.table("system_agents")
+            .update({"llm_id": row["llm_id"]})
+            .eq("agent_type", row["agent_type"])
+            .execute()
+        )
+
+
+def _evaluate_answer_quality(
+    *,
+    answer: str,
+    run_finished: bool,
+    expect_tool_use: bool,
+    has_tool_success: bool,
+    tool_names: list[str],
+) -> float:
+    if not run_finished:
+        return 0.0
+    if not answer or not answer.strip():
+        return 0.0
+
+    score = 0.6
+
+    if expect_tool_use:
+        if has_tool_success:
+            score += 0.2
+        elif tool_names:
+            score += 0.1
+        else:
+            score -= 0.3
+    else:
+        if not tool_names:
+            score += 0.2
+        else:
+            score -= 0.1
+
+    if len(answer) > 10:
+        score += 0.1
+
+    if "无法" in answer or "失败" in answer or "错误" in answer:
+        if expect_tool_use:
+            score -= 0.1
+
+    return max(0.0, min(1.0, score))
+
+
+def _evaluate_criteria(
+    *,
+    answer: str,
+    run_finished: bool,
+    tool_names: list[str],
+    has_tool_success: bool,
+    tool_results: list[dict],
+    scenario: object,
+) -> list[ScoreDetail]:
+    details: list[ScoreDetail] = []
+    for criterion in getattr(scenario, "quality_criteria", []):
+        passed = False
+        note = ""
+
+        if "调用" in criterion or "project_cli" in criterion:
+            passed = any("project_cli" in tn for tn in tool_names)
+            note = f"tools: {tool_names}" if not passed else ""
+        elif "mode" in criterion and "day" in criterion:
+            for tr in tool_results:
+                args = tr.get("tool_call_args", {}) or tr.get("toolCallArgs", {})
+                inp = args.get("input", {})
+                if isinstance(inp, dict) and inp.get("mode") == "day":
+                    passed = True
+                    break
+        elif "具体" in criterion or "时间戳" in criterion:
+            passed = has_tool_success
+        elif "基于工具" in criterion or "返回" in criterion:
+            passed = has_tool_success
+        elif "无日程" in criterion:
+            passed = "无" in answer or "没有" in answer
+        elif "简短" in criterion or "简洁" in criterion:
+            passed = 0 < len(answer) < 200
+        elif "自我介绍" in criterion:
+            passed = "Linksy" in answer or "助手" in answer
+        elif "礼貌" in criterion:
+            passed = len(answer) > 0
+        else:
+            passed = run_finished and len(answer) > 0
+
+        details.append(ScoreDetail(criterion=criterion, passed=passed, note=note))
+    return details
+
+
+async def _run_model_scenarios(model_code: str, user_id: str) -> ModelScorecard:
+    from services.llm_pricing.service import LlmPricingService
+
+    pricing = LlmPricingService()
+    token = _create_jwt(user_id)
+    scores: list[ScenarioScore] = []
+
+    for scenario in ALL_SCENARIOS:
+        result = await _run_via_http(
+            user_message=scenario.prompt,
+            token=token,
+        )
+
+        answer = result["final_answer"]
+        tool_names = result["tool_names"]
+        has_tool_success = len(result["successful_tool_names"]) > 0
+        tu = result["token_usage"]
+
+        total_tokens = tu.get("totalTokens", 0)
+        input_tokens = tu.get("inputTokens", 0) or tu.get("promptCacheMissTokens", 0)
+        output_tokens = tu.get("outputTokens", 0) or max(total_tokens - input_tokens, 0)
+
+        try:
+            cost_usd = pricing.calculate_cost(
+                model=model_code,
+                prompt_tokens=input_tokens,
+                completion_tokens=output_tokens,
+                cached_prompt_tokens=tu.get("promptCacheHitTokens", 0),
+            )
+        except ValueError:
+            cost_usd = 0.0
+        cost_usd = round(cost_usd, 8)
+
+        tool_called = any("project_cli" in tn for tn in tool_names)
+        tool_succeeded = has_tool_success if scenario.expect_tool_use else True
+
+        answer_quality = _evaluate_answer_quality(
+            answer=answer,
+            run_finished=result["run_finished"],
+            expect_tool_use=scenario.expect_tool_use,
+            has_tool_success=has_tool_success,
+            tool_names=tool_names,
+        )
+
+        details = _evaluate_criteria(
+            answer=answer,
+            run_finished=result["run_finished"],
+            tool_names=tool_names,
+            has_tool_success=has_tool_success,
+            tool_results=result["tool_results"],
+            scenario=scenario,
+        )
+
+        print(
+            f"  [{model_code}] {scenario.id:<25} "
+            f"lat={result['latency_ms']}ms "
+            f"tokens={total_tokens} "
+            f"cost=${cost_usd:.6f} "
+            f"tool={'OK' if has_tool_success else 'FAIL'} "
+            f"answer={answer[:60]}"
+        )
+
+        scores.append(
+            ScenarioScore(
+                scenario_id=scenario.id,
+                model_code=model_code,
+                latency_ms=result["latency_ms"],
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                cost_usd=cost_usd,
+                tool_called=tool_called,
+                tool_succeeded=tool_succeeded,
+                answer_quality=answer_quality,
+                details=details,
+                raw_answer=answer[:500],
+                run_finished=result["run_finished"],
+            )
+        )
+
+    return ModelScorecard(model_code=model_code, scenario_scores=scores)
+
+
+@pytest.fixture(autouse=True)
+def _check_env():
+    if os.getenv("QUALITY_TEST") != "1":
+        pytest.skip("set QUALITY_TEST=1 to run quality tests")
+
+
+@pytest.fixture(autouse=True)
+def _require_test_user_id():
+    _get_test_user_id()
+
+
+@pytest.mark.asyncio
+@pytest.mark.quality
+@pytest.mark.live
+async def test_model_ab_comparison():
+    user_id = _get_test_user_id()
+    original_rows = _save_original_models()
+
+    scorecards: list[ModelScorecard] = []
+    try:
+        for model_code in CANDIDATE_MODELS:
+            _switch_model(model_code)
+            card = await _run_model_scenarios(model_code, user_id)
+            scorecards.append(card)
+            print(card.summary_table())
+    finally:
+        _restore_models(original_rows)
+
+    print("\n" + "=" * 60)
+    print("COMPARISON")
+    print("=" * 60)
+    for card in scorecards:
+        print(
+            f"  {card.model_code:<20} "
+            f"overall={card.avg_overall:.2f}  "
+            f"latency={card.avg_latency_ms:.0f}ms  "
+            f"cost=${card.avg_cost_usd:.6f}  "
+            f"tool_success={card.tool_success_rate:.0%}"
+        )
+
+    if len(scorecards) == 2:
+        a, b = scorecards
+        winner = a.model_code if a.avg_overall >= b.avg_overall else b.model_code
+        print(f"\n  Winner: {winner} (by overall score)")
@@ -7,6 +7,7 @@ from ag_ui.core import RunAgentInput
 import core.agentscope.runtime.runner as runner_module
 from core.agentscope.runtime.runner import AgentScopeRunner
 from schemas.agent.runtime_models import (
+    RunStatus,
    RouterAgentOutput,
    WorkerAgentOutputLite,
 )
@@ -60,6 +61,31 @@ def test_build_worker_input_messages_only_contains_router_contract() -> None:
    assert "[RouterAgentOutput]" in str(input_messages[0].content)


+def test_build_agent_sets_worker_max_iters(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    captured: dict[str, object] = {}
+
+    class _FakeJsonReActAgent:
+        def __init__(self, **kwargs: object) -> None:
+            captured.update(kwargs)
+
+    monkeypatch.setattr(runner_module, "JsonReActAgent", _FakeJsonReActAgent)
+
+    runner = AgentScopeRunner()
+    model = runner_module.TrackingChatModel(object())
+
+    agent = runner._build_agent(
+        agent_name="worker",
+        system_prompt="test",
+        toolkit=object(),
+        model=model,
+    )
+
+    assert isinstance(agent, _FakeJsonReActAgent)
+    assert captured["max_iters"] == 7
+
+
 def test_build_router_messages_injects_user_input_when_context_last_not_user() -> None:
    runner = AgentScopeRunner()
    run_input = _run_input()
@@ -119,6 +145,45 @@ def test_build_router_messages_appends_user_input_to_context_tail() -> None:
    assert messages[0].content == "上一轮回复"


+def test_enforce_tool_evidence_contract_keeps_success_when_tool_succeeds() -> None:
+    runner = AgentScopeRunner()
+
+    worker_output = runner._enforce_tool_evidence_contract(
+        worker_output=WorkerAgentOutputLite(
+            status=RunStatus.SUCCESS,
+            answer="今天没有日程",
+            suggested_actions=["查明天"],
+        ),
+        requires_tool_evidence=True,
+        has_successful_tool_result=True,
+    )
+
+    assert worker_output.status == RunStatus.SUCCESS
+    assert worker_output.answer == "今天没有日程"
+    assert worker_output.suggested_actions == ["查明天"]
+    assert worker_output.error is None
+
+
+def test_enforce_tool_evidence_contract_forces_failure_without_successful_tool() -> None:
+    runner = AgentScopeRunner()
+
+    worker_output = runner._enforce_tool_evidence_contract(
+        worker_output=WorkerAgentOutputLite(
+            status=RunStatus.SUCCESS,
+            answer="今天没有日程",
+            suggested_actions=["查明天"],
+        ),
+        requires_tool_evidence=True,
+        has_successful_tool_result=False,
+    )
+
+    assert worker_output.status == RunStatus.FAILED
+    assert worker_output.answer == "无法确认结果：所需工具调用未成功完成。"
+    assert worker_output.suggested_actions == []
+    assert worker_output.error is not None
+    assert worker_output.error.code == "TOOL_EVIDENCE_MISSING"
+
+
 def test_build_model_omits_none_generate_kwargs(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
@@ -1,6 +1,10 @@
 from __future__ import annotations

-from core.agentscope.prompts.agent_prompt import build_agent_prompt
+from core.agentscope.prompts.agent_prompt import (
+    build_agent_prompt,
+    build_worker_contract_prompt,
+)
+from schemas.agent.runtime_models import RouterAgentOutput
 from schemas.agent.system_agent import AgentType, SystemAgentLLMConfig


@@ -18,9 +22,12 @@ def test_build_agent_prompt_for_worker_contains_runtime_config() -> None:

    assert "<!-- AGENT_START -->" in prompt
    assert "- type: worker" in prompt
-    assert "context_messages.mode=number" in prompt
-    assert "context_messages.count=20" in prompt
    assert "enabled_skills=calendar,contacts" in prompt
+    assert "Use objective plus context_summary as the primary execution guide from the router." in prompt
+    assert "When requires_tool_evidence=true, do not finalize an answer from failed tool calls; either recover with a corrected tool call or explicitly surface that execution failed." in prompt
+    assert "If all tool calls fail under requires_tool_evidence=true, set status=failed and populate error; do not present a factual answer as confirmed." in prompt
+    assert "context_messages.mode=number" not in prompt
+    assert "context_messages.count=20" not in prompt


 def test_build_agent_prompt_for_router_contains_identity_and_config() -> None:
@@ -35,5 +42,20 @@ def test_build_agent_prompt_for_router_contains_identity_and_config() -> None:

    assert "- type: router" in prompt
    assert "[Router Agent]" in prompt
+    assert "When the task will require project_cli, include canonical tool input defaults in context_summary using the exact shape `project_cli_defaults={\"module\":...,\"method\":...,\"input\":{...}}` whenever they can be determined safely." in prompt
+    assert "Standardize every time value mentioned in context_summary to the exact project_cli input format that would be required downstream: dates as `YYYY-MM-DD`, local datetimes as RFC3339 with timezone offset, and event ids as raw UUID strings." in prompt
+    assert "For relative time requests like today, tomorrow, or next Monday, resolve them using system_time_local and place the resolved standardized value into project_cli_defaults.input instead of leaving natural-language time phrases." in prompt
    assert "context_messages.mode=day" in prompt
    assert "context_messages.count=2" in prompt
+
+
+def test_build_worker_contract_prompt_prefers_resolved_dates_from_context_summary() -> None:
+    prompt = build_worker_contract_prompt(
+        router_output=RouterAgentOutput(
+            objective="查询今天日程",
+            context_summary="目标日期: 2026-04-24",
+            requires_tool_evidence=True,
+        )
+    )
+
+    assert "If context_summary contains project_cli_defaults, prefer using those exact module/method/input values directly." in prompt
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from core.agentscope.tools.cli.adapter import invoke_cli_tool
+
+
+@pytest.mark.asyncio
+async def test_project_cli_requires_module_and_method() -> None:
+    response = await invoke_cli_tool(
+        tool_name="project_cli",
+        tool_call_args={
+            "module": "calendar",
+            "input": {},
+        },
+        allowed_commands={"calendar"},
+    )
+
+    assert response.content
+    block = response.content[0]
+    text = block["text"] if isinstance(block, dict) else block.text
+    payload = json.loads(text)
+    assert payload["ok"] is False
+    assert payload["module"] == "calendar"
+    assert payload["method"] == ""
+    assert payload["error"]["code"] == "INVALID_ARGUMENT"
+
+
+@pytest.mark.asyncio
+async def test_project_cli_failure_includes_method_contract_in_side_channel() -> None:
+    from core.agentscope.tools.tool_call_context import (
+        peek_tool_agent_output,
+        reset_current_tool_call_id,
+        set_current_tool_call_id,
+    )
+    from core.auth.credential_issuer import create_credential_issuer
+    from core.auth.tool_credential_context import reset_tool_credential, set_tool_credential
+
+    token = set_current_tool_call_id("call-test-guidance")
+    credential_token = set_tool_credential(
+        create_credential_issuer().issue(
+            owner_id="00000000-0000-0000-0000-000000000001",
+            mode="chat",
+        )
+    )
+
+    try:
+        response = await invoke_cli_tool(
+            tool_name="project_cli",
+            tool_call_args={
+                "module": "calendar",
+                "method": "read",
+                "input": {},
+            },
+            allowed_commands={"calendar"},
+        )
+    finally:
+        reset_tool_credential(credential_token)
+        reset_current_tool_call_id(token)
+
+    assert response.content
+    block = response.content[0]
+    text = block["text"] if isinstance(block, dict) else block.text
+    payload = json.loads(text)
+    assert payload["ok"] is False
+    assert payload["module"] == "calendar"
+    assert payload["method"] == "read"
+    assert payload["data"] is None
+    assert payload["error"]["code"] == "INVALID_ACTION_INPUT"
+
+    stored = peek_tool_agent_output(tool_call_id="call-test-guidance")
+    assert stored is not None
+    error = stored.get("error")
+    assert isinstance(error, dict)
+    assert error["code"] == "INVALID_ACTION_INPUT"
+    assert error["details"]["input_schema"]["mode"] == "string enum(day|range|event)"
+    assert error["details"]["expected_input_examples"][0] == {
+        "mode": "day",
+        "date": "2026-04-24",
+        "timezone": "Asia/Shanghai",
+    }
+    assert "resolve the day to a concrete input.date value" in error["message"]
@@ -1,38 +1,96 @@
 from __future__ import annotations

+import pytest
+
 from core.agentscope.tools.cli.handler_calendar import (
-    _resolve_read_range,
+    _day_input_to_range_input,
+    _CalendarReadDayInput,
+    handle_calendar_create_event,
+    handle_calendar_list_day,
 )
 from core.agentscope.tools.cli.models import CliCommand


-def test_resolve_read_range_supports_date_timezone_fallback() -> None:
-    request = CliCommand(
-        command="calendar",
-        subcommand="read",
-        owner_id="u1",
-        args={"date": "2026-04-23", "timezone": "Asia/Shanghai"},
+def test_day_input_converts_to_tz_range() -> None:
+    payload = _CalendarReadDayInput.model_validate(
+        {"mode": "day", "date": "2026-04-23", "timezone": "Asia/Shanghai"}
    )

-    start_at, end_at, error = _resolve_read_range(request)
+    result = _day_input_to_range_input(payload)

-    assert error is None
-    assert start_at is not None
-    assert end_at is not None
-    assert start_at.isoformat() == "2026-04-22T16:00:00+00:00"
-    assert end_at.isoformat() == "2026-04-23T16:00:00+00:00"
+    assert result == {
+        "mode": "range",
+        "start_at": "2026-04-23T00:00:00+08:00",
+        "end_at": "2026-04-24T00:00:00+08:00",
+    }


-def test_resolve_read_range_rejects_bad_date() -> None:
+@pytest.mark.asyncio
+async def test_calendar_read_rejects_bad_date_format() -> None:
    request = CliCommand(
-        command="calendar",
-        subcommand="read",
+        module="calendar",
+        method="read",
        owner_id="u1",
-        args={"date": "2026/04/23", "timezone": "Asia/Shanghai"},
+        input={"mode": "day", "date": "2026/04/23", "timezone": "Asia/Shanghai"},
    )

-    start_at, end_at, error = _resolve_read_range(request)
+    result = await handle_calendar_list_day(request)

-    assert start_at is None
-    assert end_at is None
-    assert error == "date must be YYYY-MM-DD"
+    assert result.ok is False
+    assert result.error is not None
+    assert result.error.code == "INVALID_ACTION_INPUT"
+    assert result.error.details == {
+        "missing_fields": [],
+        "invalid_fields": ["day.date"],
+    }
+
+
+@pytest.mark.asyncio
+async def test_calendar_read_range_requires_timezone_aware_datetimes() -> None:
+    request = CliCommand(
+        module="calendar",
+        method="read",
+        owner_id="u1",
+        input={
+            "mode": "range",
+            "start_at": "2026-04-23T00:00:00",
+            "end_at": "2026-04-24T00:00:00",
+        },
+    )
+
+    result = await handle_calendar_list_day(request)
+
+    assert result.ok is False
+    assert result.error is not None
+    assert result.error.code == "INVALID_ACTION_INPUT"
+    assert sorted(result.error.details["invalid_fields"]) == ["range.end_at", "range.start_at"]
+
+
+@pytest.mark.asyncio
+async def test_create_event_rejects_legacy_field_aliases_with_corrections() -> None:
+    request = CliCommand(
+        module="calendar",
+        method="create",
+        owner_id="u1",
+        input={
+            "title": "Project sync",
+            "start_time": "2026-04-23T10:00:00+08:00",
+            "end_time": "2026-04-23T11:00:00+08:00",
+            "event_timezone": "Asia/Shanghai",
+        },
+    )
+
+    result = await handle_calendar_create_event(request)
+
+    assert result.ok is False
+    assert result.error is not None
+    assert result.error.code == "INVALID_ACTION_INPUT"
+    assert result.error.details == {
+        "missing_fields": ["start_at", "timezone"],
+        "invalid_fields": ["end_time", "event_timezone", "start_time"],
+        "alias_corrections": {
+            "start_time": "start_at",
+            "end_time": "end_at",
+            "event_timezone": "timezone",
+        },
+    }
@@ -3,18 +3,21 @@ from __future__ import annotations
 from core.agentscope.tools.cli.handlers import build_router


-def test_router_registers_only_new_canonical_subcommands() -> None:
+def test_router_registers_only_new_canonical_actions() -> None:
    router = build_router()

-    assert ("calendar", "create") in router.command_pairs
-    assert ("calendar", "read") in router.command_pairs
-    assert ("calendar", "update") in router.command_pairs
-    assert ("calendar", "delete") in router.command_pairs
-    assert ("calendar", "share") in router.command_pairs
-    assert ("contacts", "read") in router.command_pairs
-    assert ("memory", "update") in router.command_pairs
+    assert ("calendar", "read") in router.module_methods
+    assert ("calendar", "create") in router.module_methods
+    assert ("calendar", "update") in router.module_methods
+    assert ("calendar", "delete") in router.module_methods
+    assert ("calendar", "share") in router.module_methods
+    assert ("calendar", "accept_invite") in router.module_methods
+    assert ("calendar", "reject_invite") in router.module_methods
+    assert ("contacts", "read") in router.module_methods
+    assert ("memory", "update") in router.module_methods

-    assert ("calendar", "write") not in router.command_pairs
-    assert ("contacts", "lookup") not in router.command_pairs
-    assert ("memory", "write") not in router.command_pairs
-    assert ("memory", "forget") not in router.command_pairs
+    assert ("calendar", "list_day") not in router.module_methods
+    assert ("calendar", "get_event") not in router.module_methods
+    assert ("contacts", "lookup") not in router.module_methods
+    assert ("memory", "write") not in router.module_methods
+    assert ("memory", "forget") not in router.module_methods
@@ -11,13 +11,13 @@ async def test_router_register_and_dispatch() -> None:
    router = CommandRouter()

    async def mock_handler(request: CliCommand) -> CliCommandResult:
-        return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand, data={"name": request.args["name"]})
+        return CliCommandResult(ok=True, module=request.module, method=request.method, data={"name": request.input["name"]})

-    router.register(command="test", subcommand="run", handler=mock_handler)
+    router.register(module="test", method="run", handler=mock_handler)

-    assert ("test", "run") in router.command_pairs
+    assert ("test", "run") in router.module_methods

-    result = await router.dispatch(CliCommand(command="test", subcommand="run", args={"name": "demo"}, owner_id="u1"))
+    result = await router.dispatch(CliCommand(module="test", method="run", input={"name": "demo"}, owner_id="u1"))
    assert result.ok is True
    assert result.data == {"name": "demo"}

@@ -25,10 +25,10 @@ async def test_router_register_and_dispatch() -> None:
@pytest.mark.asyncio
 async def test_router_unknown_command() -> None:
    router = CommandRouter()
-    result = await router.dispatch(CliCommand(command="unknown", subcommand="run", args={}, owner_id="u1"))
+    result = await router.dispatch(CliCommand(module="unknown", method="run", input={}, owner_id="u1"))
    assert result.ok is False
    assert result.error is not None
-    assert result.error.code == "UNKNOWN_COMMAND"
+    assert result.error.code == "UNKNOWN_METHOD"


@pytest.mark.asyncio
@@ -39,9 +39,9 @@ async def test_router_handler_exception() -> None:
        del request
        raise ValueError("intentional error")

-    router.register(command="fail", subcommand="run", handler=failing_handler)
+    router.register(module="fail", method="run", handler=failing_handler)

-    result = await router.dispatch(CliCommand(command="fail", subcommand="run", args={}, owner_id="u1"))
+    result = await router.dispatch(CliCommand(module="fail", method="run", input={}, owner_id="u1"))
    assert result.ok is False
    assert result.error is not None
    assert result.error.code == "HANDLER_ERROR"
@@ -51,12 +51,12 @@ def test_router_duplicate_register() -> None:
    router = CommandRouter()

    async def handler1(request: CliCommand) -> CliCommandResult:
-        return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand)
+        return CliCommandResult(ok=True, module=request.module, method=request.method)

    async def handler2(request: CliCommand) -> CliCommandResult:
-        return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand)
+        return CliCommandResult(ok=True, module=request.module, method=request.method)

-    router.register(command="cmd", subcommand="one", handler=handler1)
+    router.register(module="cmd", method="one", handler=handler1)

    with pytest.raises(ValueError, match="already registered"):
-        router.register(command="cmd", subcommand="one", handler=handler2)
+        router.register(module="cmd", method="one", handler=handler2)
@@ -6,31 +6,53 @@ from schemas.agent.runtime_models import ToolAgentOutput, ToolStatus

 def _make_tool_output(
    *,
-    command: str,
-    subcommand: str,
+    module: str,
+    method: str,
    status: ToolStatus,
    data: dict | None = None,
 ) -> ToolAgentOutput:
    return ToolAgentOutput(
        tool_name="project_cli",
        tool_call_id="test_call_id",
-        tool_call_args={"command": command, "subcommand": subcommand, "args": {}},
+        tool_call_args={"module": module, "method": method, "input": {}},
        status=status,
-        result={"command": command, "subcommand": subcommand, "data": data or {}},
+        result={"module": module, "method": method, "data": data or {}},
        error=None,
        ui_hints=None,
    )


 def test_postprocess_calendar_read_has_ui_hints() -> None:
-    output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5, "items": []})
+    output = _make_tool_output(
+        module="calendar",
+        method="read",
+        status=ToolStatus.SUCCESS,
+        data={"total": 5, "items": []},
+    )
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
    assert processed.ui_hints["intent"] == "list"


+def test_postprocess_calendar_read_event_detail_has_ui_hints() -> None:
+    output = _make_tool_output(
+        module="calendar",
+        method="read",
+        status=ToolStatus.SUCCESS,
+        data={"id": "evt_1", "title": "Project sync", "start_at": "2026-04-21T10:00:00+08:00"},
+    )
+    processed = postprocess_tool_output(output)
+    assert processed.ui_hints is not None
+    assert processed.ui_hints["title"] == "日程详情"
+
+
 def test_postprocess_calendar_create_partial() -> None:
-    output = _make_tool_output(command="calendar", subcommand="create", status=ToolStatus.PARTIAL, data={"status": "partial", "success": 1, "failed": 1, "results": []})
+    output = _make_tool_output(
+        module="calendar",
+        method="create",
+        status=ToolStatus.PARTIAL,
+        data={"status": "partial", "success": 1, "failed": 1, "results": []},
+    )
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
    assert processed.ui_hints["intent"] == "status"
@@ -39,8 +61,8 @@ def test_postprocess_calendar_create_partial() -> None:

 def test_postprocess_calendar_share_has_ui_hints() -> None:
    output = _make_tool_output(
-        command="calendar",
-        subcommand="share",
+        module="calendar",
+        method="share",
        status=ToolStatus.SUCCESS,
        data={
            "status": "success",
@@ -60,7 +82,12 @@ def test_postprocess_calendar_share_has_ui_hints() -> None:


 def test_postprocess_contacts_read_has_ui_hints() -> None:
-    output = _make_tool_output(command="contacts", subcommand="read", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
+    output = _make_tool_output(
+        module="contacts",
+        method="read",
+        status=ToolStatus.SUCCESS,
+        data={"friends_count": 3, "friends": []},
+    )
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is not None
    assert processed.ui_hints["intent"] == "list"
@@ -69,8 +96,8 @@ def test_postprocess_contacts_read_has_ui_hints() -> None:

 def test_postprocess_memory_update_has_ui_hints() -> None:
    output = _make_tool_output(
-        command="memory",
-        subcommand="update",
+        module="memory",
+        method="update",
        status=ToolStatus.SUCCESS,
        data={
            "status": "success",
@@ -95,19 +122,19 @@ def test_postprocess_memory_update_has_ui_hints() -> None:


 def test_postprocess_failure_no_ui_hints() -> None:
-    output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.FAILURE, data=None)
+    output = _make_tool_output(module="calendar", method="read", status=ToolStatus.FAILURE, data=None)
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is None


 def test_postprocess_unknown_command_no_ui_hints() -> None:
-    output = _make_tool_output(command="unknown", subcommand="run", status=ToolStatus.SUCCESS, data={"data": "test"})
+    output = _make_tool_output(module="unknown", method="run", status=ToolStatus.SUCCESS, data={"data": "test"})
    processed = postprocess_tool_output(output)
    assert processed.ui_hints is None


 def test_postprocess_preserves_existing_ui_hints() -> None:
-    output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5})
+    output = _make_tool_output(module="calendar", method="read", status=ToolStatus.SUCCESS, data={"total": 5})
    output = output.model_copy(update={"ui_hints": {"view": "custom_view", "custom": True}})
    processed = postprocess_tool_output(output)
    assert processed.ui_hints["view"] == "custom_view"
@@ -3,6 +3,7 @@ import asyncio
 from core.agentscope.tools.internal.project_cli import PROJECT_CLI_TOOL_NAME
 from core.agentscope.tools.internal.view_skill_file import VIEW_SKILL_FILE_TOOL_NAME
 from core.agentscope.tools.internal import make_view_skill_file_wrapper
+from core.agentscope.tools.skill_session import SkillSessionState
 from core.agentscope.tools.toolkit import build_toolkit
 from schemas.agent.skill_config import SkillName

@@ -48,8 +49,22 @@ def test_build_toolkit_registers_project_cli() -> None:
    }


+def test_build_toolkit_uses_custom_agent_skill_prompt_contract() -> None:
+    toolkit = build_toolkit(enabled_skill_names={"calendar"})
+
+    prompt = toolkit.get_agent_skill_prompt()
+
+    assert prompt is not None
+    assert "The entries below are skill indexes, not full execution instructions." in prompt
+    assert 'file_path="calendar/SKILL.md"' in prompt
+    assert "/home/" not in prompt
+
+
 def test_view_skill_file_rejects_path_outside_enabled_skill_dirs() -> None:
-    wrapper = make_view_skill_file_wrapper(enabled_skill_names={"calendar"})
+    wrapper = make_view_skill_file_wrapper(
+        enabled_skill_names={"calendar"},
+        skill_session=SkillSessionState(),
+    )

    response = asyncio.run(
        wrapper(file_path="/tmp/not-allowed.txt", ranges=None),
@@ -62,10 +77,48 @@ def test_view_skill_file_rejects_path_outside_enabled_skill_dirs() -> None:


 def test_view_skill_file_reads_enabled_skill_file() -> None:
-    wrapper = make_view_skill_file_wrapper(enabled_skill_names={"calendar"})
+    skill_session = SkillSessionState()
+    wrapper = make_view_skill_file_wrapper(
+        enabled_skill_names={"calendar"},
+        skill_session=skill_session,
+    )
    response = asyncio.run(wrapper(file_path="calendar/SKILL.md", ranges=[1, 10]))

    assert response.content
    block = response.content[0]
    text = block["text"] if isinstance(block, dict) else block.text
    assert "Calendar Skill" in text or "name: calendar" in text
+    assert skill_session.has_read(skill_name="calendar") is True
+
+
+def test_view_skill_file_reads_calendar_action_card() -> None:
+    skill_session = SkillSessionState()
+    wrapper = make_view_skill_file_wrapper(
+        enabled_skill_names={"calendar"},
+        skill_session=skill_session,
+    )
+    response = asyncio.run(
+        wrapper(file_path="calendar/actions/get_event.md", ranges=[1, 20])
+    )
+
+    assert response.content
+    block = response.content[0]
+    text = block["text"] if isinstance(block, dict) else block.text
+    assert "get_event" in text
+    assert '"action": "get_event"' in text
+    assert skill_session.has_read(skill_name="calendar") is True
+
+
+def test_view_skill_file_rejects_action_card_for_disabled_skill() -> None:
+    wrapper = make_view_skill_file_wrapper(
+        enabled_skill_names={"contacts"},
+        skill_session=SkillSessionState(),
+    )
+    response = asyncio.run(
+        wrapper(file_path="calendar/actions/get_event.md", ranges=[1, 20])
+    )
+
+    assert response.content
+    block = response.content[0]
+    text = block["text"] if isinstance(block, dict) else block.text
+    assert "ACCESS_DENIED" in text