feat(agent): redesign project_cli with module/method/input protocol

- Replace command/subcommand/args with module/method/input envelope
- Calendar handler uses discriminated union (mode) for read operations
- Strict Pydantic models with extra='forbid' for all calendar methods
- Worker max_iters=7, router prompt simplified (removed project_cli_defaults)
- Skill index cards + per-action files for progressive disclosure
- Frontend/AG-UI aligned to module/method dispatch
- Protocol docs updated to module/method/input contract

WIP: action cards need envelope fix, 2 tests need update, memory
handler needs Pydantic models.
This commit is contained in:
qzl
2026-04-24 13:24:13 +08:00
parent ab526af2c4
commit d060962a5f
62 changed files with 4802 additions and 805 deletions
@@ -16,18 +16,28 @@ def _wrap_section(section: str, content: str) -> str:
return f"{start}\n{body}\n{end}" if body else f"{start}\n{end}"
def _config_rules(llm_config: SystemAgentLLMConfig | None) -> list[str]:
def _config_rules(
llm_config: SystemAgentLLMConfig | None,
*,
include_context_messages: bool = True,
) -> list[str]:
if llm_config is None:
return []
context_mode = llm_config.context_messages.mode.value
context_count = llm_config.context_messages.count
enabled_skills = [skill.value for skill in llm_config.enabled_skills]
return [
"[Runtime Config]",
f"- context_messages.mode={context_mode}",
f"- context_messages.count={context_count}",
f"- enabled_skills={','.join(enabled_skills) if enabled_skills else 'default'}",
]
rules = ["[Runtime Config]"]
if include_context_messages:
context_mode = llm_config.context_messages.mode.value
context_count = llm_config.context_messages.count
rules.extend(
[
f"- context_messages.mode={context_mode}",
f"- context_messages.count={context_count}",
]
)
rules.append(
f"- enabled_skills={','.join(enabled_skills) if enabled_skills else 'default'}"
)
return rules
PromptRuleBuilder = Callable[[SystemAgentLLMConfig | None], list[str]]
@@ -60,7 +70,7 @@ def _router_rules(llm_config: SystemAgentLLMConfig | None) -> list[str]:
"[Responsibilities]",
"- Router only: extract intent and route strategy; never answer user directly.",
"- Set objective to the user's goal in a concise, faithful sentence.",
"- Set context_summary to a brief description of what context messages contain.",
"- Set context_summary to a brief but execution-useful summary of the relevant context, including known IDs, dates, time ranges, and prior tool outcomes when they matter.",
"- Set requires_tool_evidence=true when the task needs tool execution to ground the answer.",
"- Set requires_tool_evidence=false when the question can be answered directly from context.",
*_config_rules(llm_config),
@@ -75,14 +85,17 @@ def _worker_rules(llm_config: SystemAgentLLMConfig | None) -> list[str]:
"[Responsibilities]",
"- Worker only: execute routed objective without changing router intent.",
"- Treat router output as objective contract, not as a fully-materialized tool-args payload.",
"- Use objective plus context_summary as the primary execution guide from the router.",
"- Infer deterministic required tool arguments from contract fields, tool schema, and runtime context.",
"- Ask minimal clarification only when required arguments cannot be inferred safely.",
"- Ground every claim in available evidence and tool results; never fabricate execution state.",
"- When requires_tool_evidence=true, do not finalize an answer from failed tool calls; either recover with a corrected tool call or explicitly surface that execution failed.",
"- If all tool calls fail under requires_tool_evidence=true, set status=failed and populate error; do not present a factual answer as confirmed.",
"- Keep status/answer/suggested_actions/error internally consistent.",
"[Schema Guidance]",
"- The worker output schema is injected at runtime; follow it exactly.",
"- Do not add fields that are not present in the injected schema.",
*_config_rules(llm_config),
*_config_rules(llm_config, include_context_messages=False),
]
@@ -97,8 +110,10 @@ def build_worker_contract_prompt(*, router_output: RouterAgentOutput) -> str:
"[Worker Contract]",
"- Keep routed objective unchanged.",
"- Use objective as the execution target.",
"- Use context_summary to understand conversational background.",
"- Use context_summary to understand conversational background and reuse concrete facts already known from earlier context.",
"- When requires_tool_evidence=true, you MUST call at least one tool before answering.",
"- A failed tool call does not count as grounding evidence for a factual answer.",
"- If no tool call succeeds, finalize with status=failed and a concrete error instead of a fact claim.",
"- Infer deterministic missing required tool args from evidence + tool schema.",
"- Ask clarification only when safe inference is impossible.",
"[RouterAgentOutput]",
@@ -39,7 +39,9 @@ from schemas.agent.forwarded_props import (
parse_forwarded_props_runtime_mode,
)
from schemas.agent.runtime_models import (
ErrorInfo,
RouterAgentOutput,
RunStatus,
WorkerAgentOutputLite,
)
from schemas.agent.skill_config import ProjectCliCommand, SkillName
@@ -74,6 +76,8 @@ class AgentScopeRunner:
self._active_agent: JsonReActAgent | None = None
self._active_agent_lock = asyncio.Lock()
_WORKER_MAX_ITERS = 7
async def execute(
self,
*,
@@ -442,6 +446,11 @@ class AgentScopeRunner:
if self._active_agent is agent:
self._active_agent = None
worker_payload = worker_output_model.model_validate(response_msg.metadata or {})
worker_payload = self._enforce_tool_evidence_contract(
worker_output=worker_payload,
requires_tool_evidence=requires_tool_evidence,
has_successful_tool_result=emitter.has_successful_tool_result,
)
response_metadata = self._llm_pricing_service.build_usage_metadata(
model=stage_config.model_code,
usage_summary=tracking_model.usage_summary(),
@@ -458,6 +467,28 @@ class AgentScopeRunner:
finally:
reset_tool_credential(credential_token)
@staticmethod
def _enforce_tool_evidence_contract(
*,
worker_output: WorkerAgentOutputLite,
requires_tool_evidence: bool,
has_successful_tool_result: bool,
) -> WorkerAgentOutputLite:
if not requires_tool_evidence or has_successful_tool_result:
return worker_output
return worker_output.model_copy(
update={
"status": RunStatus.FAILED,
"answer": "无法确认结果:所需工具调用未成功完成。",
"suggested_actions": [],
"error": ErrorInfo(
code="TOOL_EVIDENCE_MISSING",
message="requires_tool_evidence=true but no tool call completed successfully in this run",
retryable=False,
),
}
)
def _build_worker_input_messages(
self,
*,
@@ -501,6 +532,7 @@ class AgentScopeRunner:
model: TrackingChatModel,
emitter: PipelineStageEmitter | None = None,
force_tool_on_first_reasoning: bool = False,
max_iters: int = _WORKER_MAX_ITERS,
) -> JsonReActAgent:
return JsonReActAgent(
name=agent_name,
@@ -511,6 +543,7 @@ class AgentScopeRunner:
memory=InMemoryMemory(),
emitter=emitter,
force_tool_on_first_reasoning=force_tool_on_first_reasoning,
max_iters=max_iters,
)
async def _emit_step_event(
@@ -36,8 +36,13 @@ class PipelineStageEmitter:
self._emit_tool_events = emit_tool_events
self._emitted_tool_calls: set[str] = set()
self._emitted_tool_results: set[str] = set()
self._has_successful_tool_result = False
self.latest_text_message_id: str | None = None
@property
def has_successful_tool_result(self) -> bool:
return self._has_successful_tool_result
async def handle_print(self, *, msg: Msg, last: bool) -> None:
del last
if self._emit_tool_events:
@@ -126,6 +131,8 @@ class PipelineStageEmitter:
payload["error"] = tool_output.error.model_dump(mode="json")
await self._emit("TOOL_CALL_RESULT", payload)
if tool_output.status.value in {"success", "partial"}:
self._has_successful_tool_result = True
self._emitted_tool_results.add(tool_call_id)
async def _emit(self, event_type: str, payload: dict[str, Any]) -> None:
@@ -1,11 +1,11 @@
from __future__ import annotations
import json
from typing import Any
from agentscope.tool import ToolResponse
from agentscope.message import TextBlock
from core.agentscope.tools.cli.contracts import get_method_input_contract
from core.agentscope.tools.cli.handlers import build_router
from core.agentscope.tools.cli.models import CliCommand
from core.agentscope.tools.cli.router import CommandRouter
@@ -44,29 +44,44 @@ def _resolve_owner_id() -> str:
return owner_id
def _with_method_contract(
*,
module: str,
method: str,
error: ErrorInfo | None,
) -> ErrorInfo | None:
if error is None:
return None
contract = get_method_input_contract(module=module, method=method)
if contract is None:
return error
details = dict(error.details or {})
for key, value in contract.items():
details.setdefault(key, value)
message = error.message
retry_hint = contract.get("retry_hint")
if isinstance(retry_hint, str) and retry_hint and retry_hint not in message:
message = f"{message} {retry_hint}".strip()
return error.model_copy(update={"message": message, "details": details})
async def invoke_cli_tool(
*,
tool_name: str,
tool_call_args: dict[str, Any],
allowed_commands: set[str] | None = None,
) -> ToolResponse:
command = str(tool_call_args.get("command", "")).strip()
subcommand = str(tool_call_args.get("subcommand", "")).strip()
args = tool_call_args.get("args")
if isinstance(args, str):
try:
parsed_args = json.loads(args)
except (json.JSONDecodeError, ValueError):
parsed_args = None
if isinstance(parsed_args, dict):
args = parsed_args
if not isinstance(args, dict):
args = {}
module = str(tool_call_args.get("module", "")).strip()
method = str(tool_call_args.get("method", "")).strip()
input_payload = tool_call_args.get("input")
if not isinstance(input_payload, dict):
input_payload = {}
tool_call_args = {
**tool_call_args,
"subcommand": subcommand,
"args": args,
"module": module,
"method": method,
"input": input_payload,
}
if tool_name != "project_cli":
@@ -76,29 +91,29 @@ async def invoke_cli_tool(
code="UNKNOWN_TOOL",
message=f"unsupported tool: {tool_name}",
)
if not command or not subcommand:
if not module or not method:
return _build_error(
tool_name=tool_name,
tool_call_args=tool_call_args,
code="INVALID_ARGUMENT",
message="command and subcommand are required",
message="module and method are required",
)
router = _get_router()
if allowed_commands is not None and command not in allowed_commands:
if allowed_commands is not None and module not in allowed_commands:
return _build_error(
tool_name=tool_name,
tool_call_args=tool_call_args,
code="COMMAND_NOT_ALLOWED",
message=f"command not enabled: {command}",
code="MODULE_NOT_ALLOWED",
message=f"module not enabled: {module}",
)
if (command, subcommand) not in router.command_pairs:
if (module, method) not in router.module_methods:
return _build_error(
tool_name=tool_name,
tool_call_args=tool_call_args,
code="UNKNOWN_COMMAND",
message=f"unknown command: {command} {subcommand}",
code="UNKNOWN_METHOD",
message=f"unknown method: {module} {method}",
)
try:
@@ -113,9 +128,9 @@ async def invoke_cli_tool(
)
request = CliCommand(
command=command,
subcommand=subcommand,
args=args,
module=module,
method=method,
input=input_payload,
owner_id=owner_id,
)
@@ -131,11 +146,17 @@ async def invoke_cli_tool(
)
status = ToolStatus.SUCCESS if cli_result.ok else ToolStatus.FAILURE
error_info = cli_result.error
error_info = _with_method_contract(
module=module,
method=method,
error=cli_result.error,
)
result = {
"command": cli_result.command,
"subcommand": cli_result.subcommand,
"ok": cli_result.ok,
"module": cli_result.module,
"method": cli_result.method,
"data": cli_result.data,
"error": error_info.model_dump(mode="json", exclude_none=True) if error_info else None,
}
tool_call_id = get_current_tool_call_id(tool_name=tool_name)
@@ -171,14 +192,27 @@ def _build_error(
code: str,
message: str,
) -> ToolResponse:
module = str((tool_call_args or {}).get("module", "")).strip()
method = str((tool_call_args or {}).get("method", "")).strip()
error_info = _with_method_contract(
module=module,
method=method,
error=ErrorInfo(code=code, message=message, retryable=False),
)
tool_call_id = get_current_tool_call_id(tool_name=tool_name)
output = ToolAgentOutput(
tool_name=tool_name,
tool_call_id=tool_call_id,
tool_call_args=tool_call_args,
status=ToolStatus.FAILURE,
result={"status": "failure", "code": code, "message": message},
error=ErrorInfo(code=code, message=message, retryable=False),
result={
"ok": False,
"module": module,
"method": method,
"data": None,
"error": error_info.model_dump(mode="json", exclude_none=True) if error_info else None,
},
error=error_info,
)
from core.agentscope.tools.tool_postprocessor import postprocess_tool_output
@@ -0,0 +1,112 @@
from __future__ import annotations
from typing import Any
METHOD_INPUT_CONTRACTS: dict[tuple[str, str], dict[str, Any]] = {
("calendar", "read"): {
"input_schema": {
"mode": "string enum(day|range|event)",
"date": "date, required when mode=day",
"timezone": "string (IANA timezone), optional when mode=day",
"start_at": "datetime with timezone, required when mode=range",
"end_at": "datetime with timezone, required when mode=range",
"event_id": "UUID, required when mode=event",
},
"expected_input_examples": [
{"mode": "day", "date": "2026-04-24", "timezone": "Asia/Shanghai"},
{
"mode": "range",
"start_at": "2026-04-24T09:00:00+08:00",
"end_at": "2026-04-24T18:00:00+08:00",
},
{"mode": "event", "event_id": "550e8400-e29b-41d4-a716-446655440000"},
],
"retry_hint": "For relative day requests, resolve the day to a concrete input.date value in YYYY-MM-DD format before retrying.",
},
("calendar", "create"): {
"input_schema": {
"title": "string",
"start_at": "datetime with timezone",
"end_at": "datetime with timezone | null",
"timezone": "string (IANA timezone)",
"description": "string | null",
"metadata": "object | null",
},
"expected_input_examples": [
{
"title": "Project sync",
"start_at": "2026-04-24T10:00:00+08:00",
"end_at": "2026-04-24T11:00:00+08:00",
"timezone": "Asia/Shanghai",
}
],
},
("calendar", "update"): {
"input_schema": {
"event_id": "UUID",
"patch": "object with mutable event fields",
"patch.start_at": "datetime with timezone | omitted",
"patch.end_at": "datetime with timezone | null | omitted",
},
"expected_input_examples": [
{
"event_id": "550e8400-e29b-41d4-a716-446655440000",
"patch": {"title": "Updated title", "timezone": "Asia/Shanghai"},
}
],
},
("calendar", "delete"): {
"input_schema": {"event_id": "UUID"},
"expected_input_examples": [{"event_id": "550e8400-e29b-41d4-a716-446655440000"}],
},
("calendar", "share"): {
"input_schema": {
"event_id": "UUID",
"invitee": "object { phone: string }",
"permissions": "object { view: bool, edit: bool, invite: bool }",
},
"expected_input_examples": [
{
"event_id": "550e8400-e29b-41d4-a716-446655440000",
"invitee": {"phone": "+8613800138000"},
"permissions": {"view": True, "edit": False, "invite": False},
}
],
},
("calendar", "accept_invite"): {
"input_schema": {"event_id": "UUID"},
"expected_input_examples": [{"event_id": "550e8400-e29b-41d4-a716-446655440000"}],
},
("calendar", "reject_invite"): {
"input_schema": {"event_id": "UUID"},
"expected_input_examples": [{"event_id": "550e8400-e29b-41d4-a716-446655440000"}],
},
("contacts", "read"): {
"input_schema": {},
"expected_input_examples": [{}],
},
("memory", "update"): {
"input_schema": {
"operations": "array of objects",
"operations[].action": "string (update | delete)",
"operations[].memory_type": "string (user | work)",
},
"expected_input_examples": [
{
"operations": [
{
"action": "update",
"memory_type": "user",
"user_content": {"preferences": {"meeting_time": "morning"}},
}
]
}
],
},
}
def get_method_input_contract(*, module: str, method: str) -> dict[str, Any] | None:
contract = METHOD_INPUT_CONTRACTS.get((module.strip(), method.strip()))
return dict(contract) if contract is not None else None
@@ -1,17 +1,15 @@
from __future__ import annotations
from datetime import date, datetime, timedelta
from typing import Any
from datetime import date, datetime, timedelta, timezone
from typing import Annotated, Any, Literal
from uuid import UUID
from zoneinfo import ZoneInfo
from core.agentscope.tools.cli.models import CliCommand, CliCommandResult
from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError, field_validator
from core.agentscope.tools.utils.calendar_domain import (
build_schedule_metadata,
create_schedule_service,
map_calendar_exception,
merge_schedule_metadata_for_update,
parse_iso_datetime,
schedule_event_to_dict,
)
from schemas.agent.runtime_models import ErrorInfo
@@ -19,23 +17,185 @@ from schemas.enums import ScheduleItemStatus
from v1.schedule_items.schemas import (
ScheduleItemCreateRequest,
ScheduleItemListRequest,
ScheduleItemMetadata,
ScheduleItemShareRequest,
ScheduleItemUpdateRequest,
)
async def handle_calendar_read(request: CliCommand) -> CliCommandResult:
class _CalendarReadRangeInput(BaseModel):
model_config = ConfigDict(extra="forbid")
mode: Literal["range"]
start_at: datetime
end_at: datetime
@field_validator("start_at", "end_at")
@classmethod
def _validate_aware_datetime(cls, value: datetime) -> datetime:
if value.tzinfo is None:
raise ValueError("datetime must include timezone offset")
return value
class _CalendarReadDayInput(BaseModel):
model_config = ConfigDict(extra="forbid")
mode: Literal["day"]
date: date
timezone: str = "Asia/Shanghai"
class _CalendarReadEventInput(BaseModel):
model_config = ConfigDict(extra="forbid")
mode: Literal["event"]
event_id: UUID
_CalendarReadInput = Annotated[
_CalendarReadDayInput | _CalendarReadRangeInput | _CalendarReadEventInput,
Field(discriminator="mode"),
]
_CALENDAR_READ_INPUT_ADAPTER = TypeAdapter(_CalendarReadInput)
class _CalendarInviteeInput(BaseModel):
model_config = ConfigDict(extra="forbid")
phone: str
class _CalendarPermissionsInput(BaseModel):
model_config = ConfigDict(extra="forbid")
view: bool = True
edit: bool = False
invite: bool = False
class _CalendarInviteSubscriberInput(BaseModel):
model_config = ConfigDict(extra="forbid")
event_id: UUID
invitee: _CalendarInviteeInput
permissions: _CalendarPermissionsInput = Field(default_factory=_CalendarPermissionsInput)
class _CalendarCreateEventInput(BaseModel):
model_config = ConfigDict(extra="forbid")
title: str = Field(min_length=1, max_length=255)
start_at: datetime
end_at: datetime | None = None
timezone: str = Field(min_length=1, max_length=50)
description: str | None = Field(default=None, max_length=2000)
metadata: ScheduleItemMetadata | None = None
@field_validator("start_at", "end_at")
@classmethod
def _validate_create_datetimes(cls, value: datetime | None) -> datetime | None:
if value is not None and value.tzinfo is None:
raise ValueError("datetime must include timezone offset")
return value
class _CalendarUpdatePatchInput(BaseModel):
model_config = ConfigDict(extra="forbid")
title: str | None = Field(default=None, min_length=1, max_length=255)
description: str | None = Field(default=None, max_length=2000)
start_at: datetime | None = None
end_at: datetime | None = None
timezone: str | None = Field(default=None, min_length=1, max_length=50)
metadata: ScheduleItemMetadata | None = None
status: str | None = None
@field_validator("start_at", "end_at")
@classmethod
def _validate_patch_datetimes(cls, value: datetime | None) -> datetime | None:
if value is not None and value.tzinfo is None:
raise ValueError("datetime must include timezone offset")
return value
class _CalendarUpdateEventInput(BaseModel):
model_config = ConfigDict(extra="forbid")
event_id: UUID
patch: _CalendarUpdatePatchInput
class _CalendarInviteResponseInput(BaseModel):
model_config = ConfigDict(extra="forbid")
event_id: UUID
def _validate_action_input(
request: CliCommand,
validator: type[BaseModel] | TypeAdapter[Any],
) -> Any | CliCommandResult:
try:
if isinstance(validator, TypeAdapter):
return validator.validate_python(request.input)
return validator.model_validate(request.input)
except ValidationError as exc:
missing_fields: list[str] = []
invalid_fields: list[str] = []
for error in exc.errors():
location = error.get("loc") or ()
if not location:
continue
field_path = ".".join(str(part) for part in location)
error_type = str(error.get("type") or "")
if error_type == "missing":
missing_fields.append(field_path)
else:
invalid_fields.append(field_path)
details: dict[str, Any] = {
"missing_fields": sorted(set(missing_fields)),
"invalid_fields": sorted(set(invalid_fields)),
}
alias_corrections = _alias_corrections_for_input(request.input)
if alias_corrections:
details["alias_corrections"] = alias_corrections
message = "input does not match method schema"
return CliCommandResult(
ok=False,
module=request.module,
method=request.method,
error=ErrorInfo(
code="INVALID_ACTION_INPUT",
message=message,
retryable=False,
details=details,
),
)
def _alias_corrections_for_input(input_payload: dict[str, Any]) -> dict[str, str]:
alias_map = {
"start_time": "start_at",
"end_time": "end_at",
"event_timezone": "timezone",
}
corrections: dict[str, str] = {}
for alias, canonical in alias_map.items():
if alias in input_payload:
corrections[alias] = canonical
return corrections
async def handle_calendar_list_range(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
parsed_start, parsed_end, read_error = _resolve_read_range(request)
if read_error is not None:
return _fail(request=request, code="INVALID_ARGUMENT", message=read_error)
if parsed_start is None or parsed_end is None:
return _fail(
request=request,
code="INVALID_ARGUMENT",
message="start_at and end_at are required",
)
validated = _validate_action_input(request, _CalendarReadRangeInput)
if isinstance(validated, CliCommandResult):
return validated
parsed_start = validated.start_at.astimezone(timezone.utc)
parsed_end = validated.end_at.astimezone(timezone.utc)
if parsed_start >= parsed_end:
return _fail(
request=request,
@@ -50,24 +210,75 @@ async def handle_calendar_read(request: CliCommand) -> CliCommandResult:
event_items = [schedule_event_to_dict(item) for item in items]
return CliCommandResult(
ok=True,
command="calendar",
subcommand="read",
module="calendar",
method=request.method,
data={"total": len(event_items), "items": event_items},
)
async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
async def handle_calendar_list_day(request: CliCommand) -> CliCommandResult:
validated = _validate_action_input(request, _CALENDAR_READ_INPUT_ADAPTER)
if isinstance(validated, CliCommandResult):
return validated
if isinstance(validated, _CalendarReadEventInput):
return await handle_calendar_get_event(request)
if isinstance(validated, _CalendarReadRangeInput):
return await handle_calendar_list_range(request)
day_request = request.model_copy(
update={
"input": _day_input_to_range_input(validated),
}
)
return await handle_calendar_list_range(day_request)
async def handle_calendar_get_event(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
validated = _validate_action_input(request, _CalendarReadEventInput)
if isinstance(validated, CliCommandResult):
return validated
event_id = validated.event_id
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
try:
result_item = await _create_event(service, request.args)
item = await service.get_by_id(event_id)
return CliCommandResult(
ok=True,
module="calendar",
method=request.method,
data=schedule_event_to_dict(item),
)
except Exception as exc:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
module="calendar",
method=request.method,
error=ErrorInfo(code=code, message=message, retryable=retryable),
)
async def handle_calendar_create_event(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
validated = _validate_action_input(request, _CalendarCreateEventInput)
if isinstance(validated, CliCommandResult):
return validated
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
try:
result_item = await _create_event(service, validated)
event_id = str(result_item.get("eventId") or "")
return CliCommandResult(
ok=True,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": "success",
"success": 1,
@@ -80,8 +291,8 @@ async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": "failure",
"success": 0,
@@ -89,7 +300,7 @@ async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
"ids": [],
"results": [
{
"action": "create",
"action": request.method,
"status": "failure",
"eventId": "",
"code": code,
@@ -101,19 +312,23 @@ async def handle_calendar_create(request: CliCommand) -> CliCommandResult:
)
async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
async def handle_calendar_update_event(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
validated = _validate_action_input(request, _CalendarUpdateEventInput)
if isinstance(validated, CliCommandResult):
return validated
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
event_id = str(request.args.get("event_id") or "").strip()
event_id = str(validated.event_id)
try:
result_item = await _update_event(service, request.args)
result_item = await _update_event(service, validated)
event_id = str(result_item.get("eventId") or event_id)
return CliCommandResult(
ok=True,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": "success",
"success": 1,
@@ -126,8 +341,8 @@ async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": "failure",
"success": 0,
@@ -135,7 +350,7 @@ async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
"ids": [],
"results": [
{
"action": "update",
"action": request.method,
"status": "failure",
"eventId": event_id,
"code": code,
@@ -147,24 +362,22 @@ async def handle_calendar_update(request: CliCommand) -> CliCommandResult:
)
async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
async def handle_calendar_delete_event(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
validated = _validate_action_input(request, _CalendarReadEventInput)
if isinstance(validated, CliCommandResult):
return validated
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
event_id = str(request.args.get("event_id") or "").strip()
if not event_id:
return _fail(
request=request,
code="INVALID_ARGUMENT",
message="event_id is required",
)
event_id = str(validated.event_id)
try:
await service.delete(UUID(event_id))
return CliCommandResult(
ok=True,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": "success",
"success": 1,
@@ -172,7 +385,7 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
"ids": [event_id],
"results": [
{
"action": "delete",
"action": request.method,
"status": "success",
"eventId": event_id,
}
@@ -183,8 +396,8 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": "failure",
"success": 0,
@@ -192,7 +405,7 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
"ids": [],
"results": [
{
"action": "delete",
"action": request.method,
"status": "failure",
"eventId": event_id,
"code": code,
@@ -204,155 +417,199 @@ async def handle_calendar_delete(request: CliCommand) -> CliCommandResult:
)
async def handle_calendar_share(request: CliCommand) -> CliCommandResult:
async def handle_calendar_invite_subscriber(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
event_id = str(request.args.get("event_id", ""))
invitees = request.args.get("invitees")
if not isinstance(invitees, list):
invitees = []
validated = _validate_action_input(request, _CalendarInviteSubscriberInput)
if isinstance(validated, CliCommandResult):
return validated
event_id = str(validated.event_id)
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
target_uuid = UUID(event_id)
invited: list[str] = []
result_items: list[dict[str, str]] = []
for inv in invitees:
raw_phone = inv.get("phone", "").strip()
normalized_phone = _normalize_phone(raw_phone)
if not normalized_phone:
result_items.append(
{
"phone": raw_phone,
"status": "failure",
"code": "INVALID_ARGUMENT",
"message": "invalid phone",
}
)
continue
permission = {
"permission_view": inv.get("permission_view", True),
"permission_edit": inv.get("permission_edit", False),
"permission_invite": inv.get("permission_invite", False),
}
try:
await service.share(
target_uuid,
ScheduleItemShareRequest(phone=normalized_phone, **permission),
)
invited.append(normalized_phone)
result_items.append({"phone": normalized_phone, "status": "success"})
except Exception as exc:
code, message, _ = map_calendar_exception(exc)
result_items.append(
{
"phone": normalized_phone,
"status": "failure",
"code": code,
"message": message,
}
)
failure_count = len([r for r in result_items if r["status"] == "failure"])
success_count = len(invited)
status = _batch_status(success_count, failure_count)
return CliCommandResult(
ok=status != "failure",
command=request.command,
subcommand=request.subcommand,
data={
"status": status,
"success": success_count,
"failed": failure_count,
"results": result_items,
},
)
raw_phone = validated.invitee.phone.strip()
normalized_phone = _normalize_phone(raw_phone)
if not normalized_phone:
return CliCommandResult(
ok=False,
module=request.module,
method=request.method,
data={
"status": "failure",
"success": 0,
"failed": 1,
"results": [
{
"phone": raw_phone,
"status": "failure",
"code": "INVALID_ACTION_INPUT",
"message": "invalid phone",
}
],
},
error=ErrorInfo(code="INVALID_ACTION_INPUT", message="invalid phone", retryable=False),
)
try:
await service.share(
target_uuid,
ScheduleItemShareRequest(
phone=normalized_phone,
permission_view=validated.permissions.view,
permission_edit=validated.permissions.edit,
permission_invite=validated.permissions.invite,
),
)
return CliCommandResult(
ok=True,
module=request.module,
method=request.method,
data={
"status": "success",
"success": 1,
"failed": 0,
"results": [{"phone": normalized_phone, "status": "success"}],
},
)
except Exception as exc:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
module=request.module,
method=request.method,
data={
"status": "failure",
"success": 0,
"failed": 1,
"results": [
{
"phone": normalized_phone,
"status": "failure",
"code": code,
"message": message,
}
],
},
error=ErrorInfo(code=code, message=message, retryable=retryable),
)
async def _create_event(service: Any, args: dict[str, Any]) -> dict[str, Any]:
start_at = args.get("start_at")
if not isinstance(start_at, str) or not start_at.strip():
raise ValueError("create requires start_at")
event_timezone = args.get("event_timezone")
if not isinstance(event_timezone, str) or not event_timezone.strip():
raise ValueError("create requires event_timezone")
parsed_start = parse_iso_datetime(start_at)
if parsed_start is None:
raise ValueError("invalid start_at")
async def handle_calendar_accept_invite(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
parsed_end = None
end_at = args.get("end_at")
if isinstance(end_at, str) and end_at.strip():
parsed_end = parse_iso_datetime(end_at)
if parsed_end is None:
raise ValueError("invalid end_at")
validated = _validate_action_input(request, _CalendarInviteResponseInput)
if isinstance(validated, CliCommandResult):
return validated
event_id = str(validated.event_id)
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
try:
result = await service.accept_subscription(UUID(event_id))
return CliCommandResult(ok=True, module=request.module, method=request.method, data=result)
except Exception as exc:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
module=request.module,
method=request.method,
error=ErrorInfo(code=code, message=message, retryable=retryable),
)
async def handle_calendar_reject_invite(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
validated = _validate_action_input(request, _CalendarInviteResponseInput)
if isinstance(validated, CliCommandResult):
return validated
event_id = str(validated.event_id)
async with AsyncSessionLocal() as session:
service = create_schedule_service(session, UUID(request.owner_id))
try:
result = await service.reject_subscription(UUID(event_id))
return CliCommandResult(ok=True, module=request.module, method=request.method, data=result)
except Exception as exc:
code, message, retryable = map_calendar_exception(exc)
return CliCommandResult(
ok=False,
module=request.module,
method=request.method,
error=ErrorInfo(code=code, message=message, retryable=retryable),
)
async def _create_event(service: Any, input_payload: _CalendarCreateEventInput) -> dict[str, Any]:
parsed_start = input_payload.start_at.astimezone(timezone.utc)
parsed_end = (
input_payload.end_at.astimezone(timezone.utc)
if input_payload.end_at is not None
else None
)
created = await service.create_agent_generated(
ScheduleItemCreateRequest(
title=str(args.get("title") or "new event").strip(),
description=(str(args.get("description") or "").strip() or None),
title=input_payload.title.strip(),
description=(input_payload.description.strip() if input_payload.description else None),
start_at=parsed_start,
end_at=parsed_end,
timezone=event_timezone.strip(),
metadata=build_schedule_metadata(
args.get("location"),
args.get("color"),
args.get("reminder_minutes"),
),
timezone=input_payload.timezone.strip(),
metadata=input_payload.metadata,
)
)
return {"action": "create", "status": "success", "eventId": str(created.id)}
async def _update_event(service: Any, args: dict[str, Any]) -> dict[str, Any]:
event_id = args.get("event_id")
if not isinstance(event_id, str) or not event_id.strip():
raise ValueError("update requires event_id")
async def _update_event(service: Any, input_payload: _CalendarUpdateEventInput) -> dict[str, Any]:
event_id = str(input_payload.event_id)
patch = input_payload.patch.model_dump(exclude_unset=True)
update_data: dict[str, Any] = {}
if "title" in args:
update_data["title"] = str(args.get("title") or "").strip()
if "description" in args:
update_data["description"] = str(args.get("description") or "").strip()
if "start_at" in args:
start_value = args.get("start_at")
if not isinstance(start_value, str) or not start_value.strip():
raise ValueError("start_at must be non-empty string")
parsed_start = parse_iso_datetime(start_value)
if parsed_start is None:
raise ValueError("invalid start_at")
update_data["start_at"] = parsed_start
if "end_at" in args:
end_value = args.get("end_at")
if "title" in patch:
update_data["title"] = str(patch.get("title") or "").strip()
if "description" in patch:
update_data["description"] = str(patch.get("description") or "").strip()
if "start_at" in patch:
start_value = patch.get("start_at")
if not isinstance(start_value, datetime):
raise ValueError("start_at must be datetime with timezone")
update_data["start_at"] = start_value.astimezone(timezone.utc)
if "end_at" in patch:
end_value = patch.get("end_at")
if end_value in (None, ""):
update_data["end_at"] = None
elif isinstance(end_value, str):
parsed_end = parse_iso_datetime(end_value)
if parsed_end is None:
raise ValueError("invalid end_at")
update_data["end_at"] = parsed_end
elif isinstance(end_value, datetime):
update_data["end_at"] = end_value.astimezone(timezone.utc)
else:
raise ValueError("end_at must be string or null")
if "event_timezone" in args:
timezone_value = args.get("event_timezone")
raise ValueError("end_at must be datetime with timezone or null")
if "timezone" in patch:
timezone_value = patch.get("timezone")
if not isinstance(timezone_value, str) or not timezone_value.strip():
raise ValueError("event_timezone must be non-empty string")
raise ValueError("timezone must be non-empty string")
update_data["timezone"] = timezone_value.strip()
if "status" in args:
update_data["status"] = ScheduleItemStatus(str(args.get("status")))
if "status" in patch:
update_data["status"] = ScheduleItemStatus(str(patch.get("status")))
if any(key in args for key in ("location", "color", "reminder_minutes")):
if "metadata" in patch:
existing = await service.get_by_id(UUID(event_id))
update_data["metadata"] = merge_schedule_metadata_for_update(
existing_metadata=existing.metadata,
location=args.get("location"),
color=args.get("color"),
reminder_minutes=args.get("reminder_minutes"),
)
metadata_payload = patch.get("metadata")
if metadata_payload is None:
update_data["metadata"] = ScheduleItemMetadata.model_validate({})
else:
metadata_dict = (
metadata_payload.model_dump() if isinstance(metadata_payload, ScheduleItemMetadata) else metadata_payload
)
update_data["metadata"] = ScheduleItemMetadata.model_validate(
{
**(existing.metadata.model_dump() if existing.metadata else {}),
**metadata_dict,
}
)
if not update_data:
raise ValueError("update requires at least one mutable field")
raise ValueError("patch requires at least one mutable field")
changed_fields = sorted(update_data.keys())
updated = await service.update(
@@ -395,55 +652,34 @@ def _batch_status(success: int, failed: int) -> str:
return "partial"
def _resolve_read_range(
request: CliCommand,
) -> tuple[datetime | None, datetime | None, str | None]:
start_at = str(request.args.get("start_at", "")).strip()
end_at = str(request.args.get("end_at", "")).strip()
if start_at and end_at:
try:
return parse_iso_datetime(start_at), parse_iso_datetime(end_at), None
except ValueError as exc:
return None, None, str(exc)
raw_date = str(request.args.get("date", "")).strip()
if not raw_date:
return None, None, None
timezone_name = (
str(request.args.get("timezone", "Asia/Shanghai")).strip() or "Asia/Shanghai"
)
def _day_input_to_range_input(input_payload: _CalendarReadDayInput) -> dict[str, str]:
timezone_name = input_payload.timezone.strip() or "Asia/Shanghai"
try:
zone = ZoneInfo(timezone_name)
except Exception:
return None, None, "timezone is invalid"
try:
target_date = date.fromisoformat(raw_date)
except ValueError:
return None, None, "date must be YYYY-MM-DD"
except Exception as exc:
raise ValueError("timezone is invalid") from exc
start_local = datetime(
year=target_date.year,
month=target_date.month,
day=target_date.day,
year=input_payload.date.year,
month=input_payload.date.month,
day=input_payload.date.day,
hour=0,
minute=0,
second=0,
tzinfo=zone,
)
end_local = start_local + timedelta(days=1)
return (
parse_iso_datetime(start_local.isoformat()),
parse_iso_datetime(end_local.isoformat()),
None,
)
return {
"mode": "range",
"start_at": start_local.isoformat(),
"end_at": end_local.isoformat(),
}
def _fail(*, request: CliCommand, code: str, message: str) -> CliCommandResult:
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
error=ErrorInfo(code=code, message=message, retryable=False),
)
@@ -20,8 +20,8 @@ async def handle_contacts_read(request: CliCommand) -> CliCommandResult:
contacts = await _list_friend_contacts(session=session, owner_id=UUID(request.owner_id))
return CliCommandResult(
ok=True,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"friends_count": len(contacts),
"friends": contacts,
@@ -17,11 +17,15 @@ from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent
async def handle_memory_update(request: CliCommand) -> CliCommandResult:
from core.db.session import AsyncSessionLocal
operations = request.args.get("operations")
operations = request.input.get("operations")
if not isinstance(operations, list) or not operations:
return _invalid_argument(
request=request,
message="operations must be a non-empty list",
details={
"required_fields": ["operations"],
"field_types": {"operations": "array of objects"},
},
)
async with AsyncSessionLocal() as session:
@@ -135,8 +139,8 @@ async def handle_memory_update(request: CliCommand) -> CliCommandResult:
return CliCommandResult(
ok=status != "failure",
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
data={
"status": status,
"success": success_count,
@@ -233,12 +237,22 @@ async def _apply_delete_operation(
}
def _invalid_argument(*, request: CliCommand, message: str) -> CliCommandResult:
def _invalid_argument(
*,
request: CliCommand,
message: str,
details: dict[str, Any] | None,
) -> CliCommandResult:
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
error=ErrorInfo(code="INVALID_ARGUMENT", message=message, retryable=False),
module=request.module,
method=request.method,
error=ErrorInfo(
code="INVALID_ARGUMENT",
message=message,
retryable=False,
details=details,
),
)
@@ -1,11 +1,13 @@
from __future__ import annotations
from core.agentscope.tools.cli.handler_calendar import (
handle_calendar_create,
handle_calendar_delete,
handle_calendar_read,
handle_calendar_share,
handle_calendar_update,
handle_calendar_accept_invite,
handle_calendar_create_event,
handle_calendar_delete_event,
handle_calendar_invite_subscriber,
handle_calendar_list_day,
handle_calendar_reject_invite,
handle_calendar_update_event,
)
from core.agentscope.tools.cli.handler_contacts import handle_contacts_read
from core.agentscope.tools.cli.handler_memory import handle_memory_update
@@ -14,11 +16,13 @@ from core.agentscope.tools.cli.router import CommandRouter
def build_router() -> CommandRouter:
router = CommandRouter()
router.register(command="calendar", subcommand="create", handler=handle_calendar_create)
router.register(command="calendar", subcommand="read", handler=handle_calendar_read)
router.register(command="calendar", subcommand="update", handler=handle_calendar_update)
router.register(command="calendar", subcommand="delete", handler=handle_calendar_delete)
router.register(command="calendar", subcommand="share", handler=handle_calendar_share)
router.register(command="contacts", subcommand="read", handler=handle_contacts_read)
router.register(command="memory", subcommand="update", handler=handle_memory_update)
router.register(module="calendar", method="read", handler=handle_calendar_list_day)
router.register(module="calendar", method="create", handler=handle_calendar_create_event)
router.register(module="calendar", method="update", handler=handle_calendar_update_event)
router.register(module="calendar", method="delete", handler=handle_calendar_delete_event)
router.register(module="calendar", method="share", handler=handle_calendar_invite_subscriber)
router.register(module="calendar", method="accept_invite", handler=handle_calendar_accept_invite)
router.register(module="calendar", method="reject_invite", handler=handle_calendar_reject_invite)
router.register(module="contacts", method="read", handler=handle_contacts_read)
router.register(module="memory", method="update", handler=handle_memory_update)
return router
@@ -10,9 +10,9 @@ from schemas.agent.runtime_models import ErrorInfo
class CliCommand(BaseModel):
model_config = ConfigDict(extra="forbid")
command: str
subcommand: str
args: dict[str, Any] = Field(default_factory=dict)
module: str
method: str
input: dict[str, Any] = Field(default_factory=dict)
owner_id: str
@@ -20,7 +20,7 @@ class CliCommandResult(BaseModel):
model_config = ConfigDict(extra="forbid")
ok: bool
command: str
subcommand: str
module: str
method: str
data: Any = None
error: ErrorInfo | None = None
+24 -24
View File
@@ -17,30 +17,30 @@ class CommandRouter:
def __init__(self) -> None:
self._handlers: dict[tuple[str, str], CliHandler] = {}
def register(self, *, command: str, subcommand: str, handler: CliHandler) -> None:
key = (command, subcommand)
def register(self, *, module: str, method: str, handler: CliHandler) -> None:
key = (module, method)
if key in self._handlers:
raise ValueError(f"command already registered: {command} {subcommand}")
raise ValueError(f"method already registered: {module} {method}")
self._handlers[key] = handler
@property
def commands(self) -> set[str]:
return {command for command, _ in self._handlers.keys()}
def modules(self) -> set[str]:
return {module for module, _ in self._handlers.keys()}
@property
def command_pairs(self) -> set[tuple[str, str]]:
def module_methods(self) -> set[tuple[str, str]]:
return set(self._handlers.keys())
async def dispatch(self, request: CliCommand) -> CliCommandResult:
handler = self._handlers.get((request.command, request.subcommand))
handler = self._handlers.get((request.module, request.method))
if handler is None:
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
error=ErrorInfo(
code="UNKNOWN_COMMAND",
message=f"unknown command: {request.command} {request.subcommand}",
code="UNKNOWN_METHOD",
message=f"unknown method: {request.module} {request.method}",
retryable=False,
),
)
@@ -49,14 +49,14 @@ class CommandRouter:
except Exception as exc:
logger.error(
"CLI handler failed",
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
error=str(exc),
)
return CliCommandResult(
ok=False,
command=request.command,
subcommand=request.subcommand,
module=request.module,
method=request.method,
error=ErrorInfo(
code="HANDLER_ERROR",
message=str(exc),
@@ -75,11 +75,11 @@ async def cli_main(argv: list[str] | None = None) -> None:
_write_output(
CliCommandResult(
ok=False,
command=argv[0] if argv else "",
subcommand=argv[1] if len(argv) > 1 else "",
module=argv[0] if argv else "",
method=argv[1] if len(argv) > 1 else "",
error=ErrorInfo(
code="MISSING_COMMAND",
message="command and subcommand are required",
code="MISSING_METHOD",
message="module and method are required",
retryable=False,
),
)
@@ -94,17 +94,17 @@ async def cli_main(argv: list[str] | None = None) -> None:
_write_output(
CliCommandResult(
ok=False,
command=argv[0],
subcommand=argv[1],
module=argv[0],
method=argv[1],
error=ErrorInfo(
code="INVALID_ARGS",
message="args must be valid JSON",
code="INVALID_INPUT",
message="input must be valid JSON",
retryable=False,
),
)
)
sys.exit(1)
request = CliCommand(command=argv[0], subcommand=argv[1], args=args, owner_id=str(args.get("owner_id", "")))
request = CliCommand(module=argv[0], method=argv[1], input=args, owner_id=str(args.get("owner_id", "")))
result = await router.dispatch(request)
_write_output(result)
if not result.ok:
@@ -9,16 +9,19 @@ from core.agentscope.tools.cli import invoke_cli_tool
PROJECT_CLI_TOOL_NAME = "project_cli"
def make_project_cli_wrapper(*, allowed_commands: set[str]) -> Any:
def make_project_cli_wrapper(
*,
allowed_commands: set[str],
) -> Any:
async def _project_cli(
command: str,
subcommand: str,
args: dict[str, Any] | None = None,
module: str,
method: str,
input: dict[str, Any],
) -> ToolResponse:
tool_call_args = {
"command": command,
"subcommand": subcommand,
"args": args or {},
"module": module,
"method": method,
"input": input,
}
return await invoke_cli_tool(
tool_name=PROJECT_CLI_TOOL_NAME,
@@ -27,12 +30,14 @@ def make_project_cli_wrapper(*, allowed_commands: set[str]) -> Any:
)
_project_cli.__name__ = PROJECT_CLI_TOOL_NAME
_project_cli.__doc__ = """Execute CLI commands for calendar, contacts, and memory operations.
_project_cli.__doc__ = """Execute business methods for enabled modules (calendar, contacts, memory, etc.).
You MUST read the relevant skill file via view_skill_file before calling this tool to learn the correct method names and input shapes for each module. Do not guess input fields.
Args:
command: The command to execute (calendar, contacts, memory).
subcommand: The subcommand for the operation (calendar: create/read/update/delete/share; contacts: read; memory: update).
args: Arguments for the command as a JSON object.
module: Business module namespace (e.g., calendar, contacts, memory).
method: Module method to execute. Valid methods are listed in each module's skill file.
input: Method-specific input object. Shape depends on module and method -- read the skill file first.
Returns:
ToolResponse with the command result.
@@ -6,11 +6,23 @@ from typing import Any
from agentscope.message import TextBlock
from agentscope.tool import ToolResponse
from core.agentscope.tools.skill_session import SkillSessionState
from core.agentscope.tools.tool_call_context import (
get_current_tool_call_id,
store_tool_agent_output,
)
from core.agentscope.utils.parsing import project_tool_result_text
from schemas.agent.runtime_models import ErrorInfo, ToolAgentOutput, ToolStatus
SKILLS_DIR = Path(__file__).parent.parent / "skills"
VIEW_SKILL_FILE_TOOL_NAME = "view_skill_file"
def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
def make_view_skill_file_wrapper(
*,
enabled_skill_names: set[str],
skill_session: SkillSessionState,
) -> Any:
skills_root = SKILLS_DIR.resolve()
async def _view_skill_file(
@@ -23,13 +35,20 @@ def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
parts = normalized.split("/")
if not parts:
return _error_response("INVALID_PATH", "file_path cannot be empty")
return _error_response(
file_path=file_path,
ranges=ranges,
code="INVALID_PATH",
message="file_path cannot be empty",
)
skill_name = parts[0]
if skill_name not in enabled_skill_names:
return _error_response(
"ACCESS_DENIED",
f"skill '{skill_name}' is not enabled. Enabled skills: {sorted(enabled_skill_names)}",
file_path=file_path,
ranges=ranges,
code="ACCESS_DENIED",
message=f"skill '{skill_name}' is not enabled. Enabled skills: {sorted(enabled_skill_names)}",
)
target_path = skills_root / normalized
@@ -37,15 +56,30 @@ def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
target_path = target_path.resolve()
target_path.relative_to(skills_root)
except Exception:
return _error_response("ACCESS_DENIED", "access denied: path outside skills directory")
return _error_response(
file_path=file_path,
ranges=ranges,
code="ACCESS_DENIED",
message="access denied: path outside skills directory",
)
if not target_path.exists() or not target_path.is_file():
return _error_response("FILE_NOT_FOUND", f"file not found: {file_path}")
return _error_response(
file_path=file_path,
ranges=ranges,
code="FILE_NOT_FOUND",
message=f"file not found: {file_path}",
)
try:
content = target_path.read_text(encoding="utf-8")
except Exception as exc:
return _error_response("READ_ERROR", f"failed to read file: {exc}")
return _error_response(
file_path=file_path,
ranges=ranges,
code="READ_ERROR",
message=f"failed to read file: {exc}",
)
lines = content.splitlines()
if ranges and len(ranges) >= 2:
@@ -54,6 +88,17 @@ def make_view_skill_file_wrapper(*, enabled_skill_names: set[str]) -> Any:
lines = lines[start - 1 : end]
text = "\n".join(lines)
skill_session.mark_read(skill_name=skill_name)
tool_call_id = get_current_tool_call_id(tool_name=VIEW_SKILL_FILE_TOOL_NAME)
payload = ToolAgentOutput(
tool_name=VIEW_SKILL_FILE_TOOL_NAME,
tool_call_id=tool_call_id,
tool_call_args={"file_path": normalized, "ranges": ranges},
status=ToolStatus.SUCCESS,
result={"file_path": normalized, "content": text},
).model_dump(mode="json", exclude_none=True)
store_tool_agent_output(tool_call_id=tool_call_id, payload=payload)
return ToolResponse(
content=[
@@ -78,14 +123,30 @@ Returns:
ToolResponse with the file content.
"""
return _view_skill_file
def _error_response(code: str, message: str) -> ToolResponse:
def _error_response(
*,
file_path: str,
ranges: list[int] | None,
code: str,
message: str,
) -> ToolResponse:
tool_call_id = get_current_tool_call_id(tool_name=VIEW_SKILL_FILE_TOOL_NAME)
payload = ToolAgentOutput(
tool_name=VIEW_SKILL_FILE_TOOL_NAME,
tool_call_id=tool_call_id,
tool_call_args={"file_path": file_path, "ranges": ranges},
status=ToolStatus.FAILURE,
result={"status": "failure", "code": code, "message": message},
error=ErrorInfo(code=code, message=message, retryable=False),
).model_dump(mode="json", exclude_none=True)
store_tool_agent_output(tool_call_id=tool_call_id, payload=payload)
return ToolResponse(
content=[
TextBlock(
type="text",
text=f"error: {code} - {message}",
text=project_tool_result_text(
{"status": "failure", "code": code, "message": message}
),
)
]
)
@@ -0,0 +1,15 @@
from __future__ import annotations
AGENT_SKILL_INSTRUCTION = """# Agent Skills
The entries below are skill indexes, not full execution instructions.
Before the first `project_cli` call for a skill in a run, you MUST read that skill's `SKILL.md` with `view_skill_file`.
Use the exact relative `file_path` shown below.
If the skill index tells you to inspect one method card, read that file with `view_skill_file` before calling `project_cli`.
Do not guess skill instructions from the summary alone.
"""
AGENT_SKILL_TEMPLATE = """## {name}
{description}
Read with `view_skill_file` using `file_path="{name}/SKILL.md"` before using `project_cli` for this skill."""
@@ -0,0 +1,16 @@
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class SkillSessionState:
read_skill_names: set[str] = field(default_factory=set)
def mark_read(self, *, skill_name: str) -> None:
normalized = skill_name.strip()
if normalized:
self.read_skill_names.add(normalized)
def has_read(self, *, skill_name: str) -> bool:
return skill_name.strip() in self.read_skill_names
@@ -1,121 +1,128 @@
---
name: calendar
description: Calendar event management - read, create, update, delete, and share events.
description: Calendar event management via project_cli.
---
# Calendar Skill
## Execution Protocol
Read this file before the first calendar tool call in a run, then call `project_cli` with the correct `module`, `method`, and `input`.
1. On first calendar use in a run, call `view_skill_file` with `calendar/SKILL.md` before any `project_cli` call.
2. After reading, use `project_cli` only with `command="calendar"`.
3. If the user asks for actual schedule data, use `project_cli` to verify it. Do not guess results.
## Method: read
## When to Use
All calendar queries use `method="read"`. The `input` must contain `mode` plus mode-specific fields.
- User asks about their schedule or upcoming events
- User wants to create, update, or delete calendar events
- User wants to share a calendar event with someone
- User asks about event details within a date range
## Available Tool
Use the single tool `project_cli`.
Read this file first with `view_skill_file` when calendar is the relevant skill.
### Read Events
Call `project_cli` with:
### Query one day (today, tomorrow, a specific date)
```json
{
"command": "calendar",
"subcommand": "read",
"args": {
"start_at": "2026-04-21T00:00:00+08:00",
"end_at": "2026-04-22T00:00:00+08:00"
"module": "calendar",
"method": "read",
"input": {
"mode": "day",
"date": "YYYY-MM-DD",
"timezone": "Area/Zone"
}
}
```
Use this whenever the user asks what is scheduled, free, upcoming, or happening in a time range.
To resolve "today" or relative dates: extract the date part (before the T) from `system_time_local` in USER_CONTEXT_JSON. Use `timezone_effective` for timezone.
### Create Event
Call `project_cli` with:
### Query a time range
```json
{
"command": "calendar",
"subcommand": "create",
"args": {
"title": "Project sync",
"start_at": "2026-04-21T10:00:00+08:00",
"end_at": "2026-04-21T11:00:00+08:00",
"event_timezone": "Asia/Shanghai"
"module": "calendar",
"method": "read",
"input": {
"mode": "range",
"start_at": "2026-04-24T09:00:00+08:00",
"end_at": "2026-04-24T18:00:00+08:00"
}
}
```
### Update Event
Call `project_cli` with:
### Query a known event by ID
```json
{
"command": "calendar",
"subcommand": "update",
"args": {
"event_id": "<uuid>",
"title": "Updated title"
"module": "calendar",
"method": "read",
"input": {
"mode": "event",
"event_id": "550e8400-e29b-41d4-a716-446655440000"
}
}
```
### Delete Event
Call `project_cli` with:
## Method: create
```json
{
"command": "calendar",
"subcommand": "delete",
"args": {
"event_id": "<uuid>"
"module": "calendar",
"method": "create",
"input": {
"title": "Meeting title",
"start_at": "2026-04-24T10:00:00+08:00",
"end_at": "2026-04-24T11:00:00+08:00",
"timezone": "Asia/Shanghai"
}
}
```
Read first if you need to confirm the write payload shape instead of relying on memory.
### Share Events
Call `project_cli` with:
## Method: update
```json
{
"command": "calendar",
"subcommand": "share",
"args": {
"event_id": "<uuid>",
"invitees": []
"module": "calendar",
"method": "update",
"input": {
"event_id": "UUID",
"patch": { "title": "New title" }
}
}
```
## Composition Patterns
## Method: delete
1. To share an event with a friend:
- Call `view_skill_file` with `contacts/SKILL.md` if contacts instructions have not been read in this run
- Call `project_cli` `contacts read` to find friend phone numbers
- Call `project_cli` `calendar share` with the selected phone
```json
{
"module": "calendar",
"method": "delete",
"input": { "event_id": "UUID" }
}
```
2. To update a specific event:
- Call `project_cli` `calendar read` to find the event_id
- Call `project_cli` `calendar update` with target fields
## Method: share
## Failure Recovery
```json
{
"module": "calendar",
"method": "share",
"input": {
"event_id": "UUID",
"invitee": { "phone": "+8613800138000" }
}
}
```
- If `calendar create/update/delete` returns failure, report why and suggest retrying with corrected parameters.
- If `calendar share` fails for a phone, suggest verifying the phone number with `contacts read`.
## Methods: accept_invite, reject_invite
```json
{
"module": "calendar",
"method": "accept_invite",
"input": { "event_id": "UUID" }
}
```
## Rules
- Always fill `input` with all required fields. Never pass `input: {}`.
- Use `timezone_effective` from USER_CONTEXT_JSON as the default timezone.
- Resolve relative dates (today, tomorrow) to concrete YYYY-MM-DD from `system_time_local` in USER_CONTEXT_JSON before calling.
- Do not use old field names: command, subcommand, args, start_time, end_time, event_timezone.
## Composition
- To share an event but you only have a person description: read `contacts/SKILL.md` first, find the phone number, then call share.
- To update/delete an ambiguous event: call read first to list candidates, then call the mutation.
@@ -0,0 +1,22 @@
# accept_invite
## Input Schema
- `input.event_id`: required, `string`, UUID
## Output Shape
- success: subscription response object
- failure: `error.code`, `error.message`, `error.details`
Use when accepting a shared event invitation.
```json
{
"skill": "calendar",
"action": "accept_invite",
"input": {
"event_id": "550e8400-e29b-41d4-a716-446655440000"
}
}
```
@@ -0,0 +1,36 @@
# create_event
Use when creating a new event.
## Input Schema
- `input.title`: required, `string`
- `input.start_at`: required, `string`, ISO 8601 datetime
- `input.timezone`: required, `string`, IANA timezone
- `input.end_at`: optional, `string | null`, ISO 8601 datetime
- `input.description`: optional, `string | null`
- `input.metadata`: optional, `object | null`
## Output Shape
- success: `data.status`, `data.success`, `data.failed`, `data.ids`, `data.results`
- failure: `error.code`, `error.message`, `error.details`
```json
{
"skill": "calendar",
"action": "create_event",
"input": {
"title": "Project sync",
"start_at": "2026-04-23T10:00:00+08:00",
"end_at": "2026-04-23T11:00:00+08:00",
"timezone": "Asia/Shanghai",
"description": "Weekly planning"
}
}
```
## Rules
- Use `timezone`, not `event_timezone`.
- Use `start_at` and `end_at`, not `start_time` or `end_time`.
@@ -0,0 +1,22 @@
# delete_event
## Input Schema
- `input.event_id`: required, `string`, UUID
## Output Shape
- success: `data.status`, `data.success`, `data.failed`, `data.ids`, `data.results`
- failure: `error.code`, `error.message`, `error.details`
Use when deleting one known event.
```json
{
"skill": "calendar",
"action": "delete_event",
"input": {
"event_id": "550e8400-e29b-41d4-a716-446655440000"
}
}
```
@@ -0,0 +1,26 @@
# get_event
Use when the user already knows the target event identity.
## Input Schema
- `input.event_id`: required, `string`, UUID
## Output Shape
- success: `data.id`, `data.title`, `data.start_at`, `data.end_at`, ...
- failure: `error.code`, `error.message`, `error.details`
```json
{
"skill": "calendar",
"action": "get_event",
"input": {
"event_id": "550e8400-e29b-41d4-a716-446655440000"
}
}
```
## Rules
- Prefer this over list actions when an `event_id` is already available.
@@ -0,0 +1,40 @@
# invite_subscriber
Use when sharing an event with one phone number.
## Input Schema
- `input.event_id`: required, `string`, UUID
- `input.invitee`: required, `object`
- `input.invitee.phone`: required, `string`
- `input.permissions`: optional, `object`
- `input.permissions.view`: optional, `bool`
- `input.permissions.edit`: optional, `bool`
- `input.permissions.invite`: optional, `bool`
## Output Shape
- success: `data.status`, `data.success`, `data.failed`, `data.results`
- failure: `error.code`, `error.message`, `error.details`
```json
{
"skill": "calendar",
"action": "invite_subscriber",
"input": {
"event_id": "550e8400-e29b-41d4-a716-446655440000",
"invitee": {
"phone": "+8613800138000"
},
"permissions": {
"view": true,
"edit": false,
"invite": false
}
}
}
```
## Rules
- Look up the phone number with `contacts` first if needed.
@@ -0,0 +1,31 @@
# list_day
Use when the user asks about one calendar day in a local timezone.
## Input Schema
- `input.date`: required, `string`, format `YYYY-MM-DD`
- `input.timezone`: optional, `string`, IANA timezone like `Asia/Shanghai`
## Output Shape
- success: `data.total: int`, `data.items: array`
- failure: `error.code`, `error.message`, `error.details`
```json
{
"skill": "calendar",
"action": "list_day",
"input": {
"date": "2026-04-23",
"timezone": "Asia/Shanghai"
}
}
```
## Rules
- `input` must not be empty.
- `date` must be a concrete date string, not an empty object.
- For words like today or tomorrow, convert them to a concrete `YYYY-MM-DD` date from `system_time_local` before calling `project_cli`.
- Use `get_event` instead if you already have an `event_id`.
@@ -0,0 +1,29 @@
# list_range
Use when the user asks for a specific time range.
## Input Schema
- `input.start_at`: required, `string`, ISO 8601 datetime
- `input.end_at`: required, `string`, ISO 8601 datetime
## Output Shape
- success: `data.total: int`, `data.items: array`
- failure: `error.code`, `error.message`, `error.details`
```json
{
"skill": "calendar",
"action": "list_range",
"input": {
"start_at": "2026-04-23T09:00:00+08:00",
"end_at": "2026-04-23T18:00:00+08:00"
}
}
```
## Rules
- `start_at` and `end_at` must both be present.
- Do not send `event_id` to list actions.
@@ -0,0 +1,22 @@
# reject_invite
## Input Schema
- `input.event_id`: required, `string`, UUID
## Output Shape
- success: subscription response object
- failure: `error.code`, `error.message`, `error.details`
Use when rejecting a shared event invitation.
```json
{
"skill": "calendar",
"action": "reject_invite",
"input": {
"event_id": "550e8400-e29b-41d4-a716-446655440000"
}
}
```
@@ -0,0 +1,39 @@
# update_event
Use when changing one known event.
## Input Schema
- `input.event_id`: required, `string`, UUID
- `input.patch`: required, `object`
- `input.patch.title`: optional, `string`
- `input.patch.description`: optional, `string | null`
- `input.patch.start_at`: optional, `string | null`, ISO 8601 datetime
- `input.patch.end_at`: optional, `string | null`, ISO 8601 datetime
- `input.patch.timezone`: optional, `string`
- `input.patch.metadata`: optional, `object | null`
- `input.patch.status`: optional, `string`
## Output Shape
- success: `data.status`, `data.success`, `data.failed`, `data.ids`, `data.results`
- failure: `error.code`, `error.message`, `error.details`
```json
{
"skill": "calendar",
"action": "update_event",
"input": {
"event_id": "550e8400-e29b-41d4-a716-446655440000",
"patch": {
"title": "Updated title",
"timezone": "Asia/Shanghai"
}
}
}
```
## Rules
- All mutable fields go inside `patch`.
- Do not put mutable fields at the top level.
@@ -8,7 +8,7 @@ description: Contact lookup - find friend information including phone numbers fo
## Execution Protocol
1. On first contacts use in a run, call `view_skill_file` with `contacts/SKILL.md` before any `project_cli` call.
2. After reading, use `project_cli` only with `command="contacts"`.
2. After reading, use `project_cli` only with `module="contacts"`, `method="read"`, and JSON-native `input`.
3. If contact data is needed for a later action, fetch it first instead of inventing phone numbers or friend matches.
## When to Use
@@ -23,15 +23,23 @@ Use the single tool `project_cli`.
Read this file first with `view_skill_file` when contacts is the relevant skill.
## Calling Contract
- `module`: required, must be `contacts`
- `method`: required, must be `read`
- `input`: required, must be `{}`
- Output success fields: `data.friends_count`, `data.friends`
- Output failure fields: `error.code`, `error.message`, `error.details`
### Read Contacts
Call `project_cli` with:
```json
{
"command": "contacts",
"subcommand": "read",
"args": {}
"module": "contacts",
"method": "read",
"input": {}
}
```
@@ -43,11 +51,11 @@ Returns:
1. To share an event:
- Call `view_skill_file` with `calendar/SKILL.md` if calendar instructions have not been read in this run
- Call `project_cli` `contacts read` to get friend candidates
- Call `project_cli` with `module="contacts"`, `method="read"` to get friend candidates
- Match user's description to a friend
- Call `project_cli` `calendar share` with the friend's phone
- Call `project_cli` with `module="calendar"`, `method="share"` and the friend's phone
## Failure Recovery
- If no friends found, inform the user they have no contacts yet
- If lookup fails, suggest retrying
- If lookup fails, inspect `error.details` and retry only with the documented input shape
@@ -8,7 +8,7 @@ description: User memory management - store and forget personal facts and work p
## Execution Protocol
1. On first memory use in a run, call `view_skill_file` with `memory/SKILL.md` before any `project_cli` call.
2. After reading, use `project_cli` only with `command="memory"`.
2. After reading, use `project_cli` only with `module="memory"`, `method="update"`, and JSON-native `input`.
3. If the user asks to remember or forget something, execute `project_cli`; do not claim persistence without the tool result.
## When to Use
@@ -24,15 +24,23 @@ Use the single tool `project_cli`.
Read this file first with `view_skill_file` when memory is the relevant skill.
## Calling Contract
- `module`: required, must be `memory`
- `method`: required, must be `update`
- `input.operations`: required, non-empty array
- Output success fields: `data.status`, `data.success`, `data.failed`, `data.results`
- Output failure fields: `error.code`, `error.message`, `error.details`
### Update Memory
Call `project_cli` with:
```json
{
"command": "memory",
"subcommand": "update",
"args": {
"module": "memory",
"method": "update",
"input": {
"operations": [
{
"action": "update",
@@ -50,15 +58,26 @@ Operation object fields:
- `update` requires matching content payload (`user_content` / `work_content`)
- `delete` requires `forget_paths`
Field requirements:
- `operations[].action`: required, `string`
- `operations[].memory_type`: required, `string`
- `operations[].user_content`: required for `memory_type=user` and `action=update`, `object`
- `operations[].work_content`: required for `memory_type=work` and `action=update`, `object`
- `operations[].forget_paths`: required for `action=delete`, `array[string]`
## Composition Patterns
1. When user says "remember that I prefer morning meetings":
- Call `project_cli` `memory update` with `action=update`, `memory_type=user`, and appropriate content
- Call `project_cli` with `module="memory"`, `method="update"`, and appropriate content
2. When user says "forget my old address":
- Call `project_cli` `memory update` with `action=delete` and the specific dot-path
- Call `project_cli` with `module="memory"`, `method="update"`, `operations[0].action="delete"`, and the specific dot-path
## Protocol Reminder
- Never use old `command/subcommand/args` fields for memory writes.
## Failure Recovery
- If write fails, inform the user and suggest rephrasing
- If write fails, inspect `error.details` and retry with the documented field shape only
- If forget path is invalid, suggest checking the data structure
@@ -14,7 +14,6 @@ _TOOL_AGENT_OUTPUT_STORE: ContextVar[dict[str, dict[str, Any]] | None] = Context
default=None,
)
def set_current_tool_call_id(tool_call_id: str | None) -> Token[str | None]:
return _CURRENT_TOOL_CALL_ID.set(tool_call_id)
@@ -7,18 +7,18 @@ from schemas.agent.runtime_models import ToolAgentOutput, ToolStatus
from schemas.agent.ui_hints import UiHintIntent, UiHintsPayload, UiHintStatus
def _resolve_command_key(tool_output: ToolAgentOutput) -> tuple[str, str] | None:
def _resolve_method_key(tool_output: ToolAgentOutput) -> tuple[str, str] | None:
args = tool_output.tool_call_args or {}
command = str(args.get("command", "")).strip()
subcommand = str(args.get("subcommand", "")).strip()
if command and subcommand:
return command, subcommand
module = str(args.get("module", "")).strip()
method = str(args.get("method", "")).strip()
if module and method:
return module, method
result = tool_output.result
if isinstance(result, dict):
command = str(result.get("command", "")).strip()
subcommand = str(result.get("subcommand", "")).strip()
if command and subcommand:
return command, subcommand
module = str(result.get("module", "")).strip()
method = str(result.get("method", "")).strip()
if module and method:
return module, method
return None
@@ -84,6 +84,9 @@ def _calendar_read_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | No
if data is None:
return None
if "id" in data:
return _calendar_get_event_ui_hints(tool_output)
items_raw = data.get("items")
events = [item for item in items_raw if isinstance(item, dict)] if isinstance(items_raw, list) else []
list_items: list[dict[str, Any]] = []
@@ -116,6 +119,38 @@ def _calendar_read_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | No
)
def _calendar_get_event_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | None:
data = _result_data(tool_output)
if data is None:
return None
event_id = str(data.get("id") or "").strip()
title = str(data.get("title") or "").strip() or "日程详情"
start_at = str(data.get("start_at") or "").strip()
end_at = str(data.get("end_at") or "").strip()
subtitle = f"{start_at} ~ {end_at}" if start_at and end_at else (start_at or end_at or None)
return _build_status_ui_hints(
tool_output=tool_output,
intent=UiHintIntent.STATUS,
title="日程详情",
description="仅展示本次查询返回的日程详情。",
items=[
{"key": "event_id", "label": "日程 ID", "value": event_id},
{"key": "title", "label": "标题", "value": title},
],
list_title="详情",
list_items=[
{
"id": event_id or None,
"title": title,
"subtitle": subtitle,
"status": UiHintStatus.INFO.value,
}
],
)
def _calendar_mutation_ui_hints(
*,
tool_output: ToolAgentOutput,
@@ -232,6 +267,23 @@ def _calendar_share_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | N
)
def _calendar_invite_status_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | None:
data = _result_data(tool_output)
if data is None:
return None
return _build_status_ui_hints(
tool_output=tool_output,
intent=UiHintIntent.STATUS,
title="邀请处理结果",
description="仅展示本次邀请响应结果。",
items=[
{"key": "message", "label": "结果", "value": str(data.get("message") or "")},
],
list_title="执行结果",
list_items=[],
)
def _memory_update_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | None:
data = _result_data(tool_output)
if data is None:
@@ -326,11 +378,13 @@ def _contacts_read_ui_hints(tool_output: ToolAgentOutput) -> dict[str, Any] | No
_UI_HINTS_BUILDERS: dict[tuple[str, str], Callable[[ToolAgentOutput], dict[str, Any] | None]] = {
("calendar", "create"): _calendar_create_ui_hints,
("calendar", "read"): _calendar_read_ui_hints,
("calendar", "create"): _calendar_create_ui_hints,
("calendar", "update"): _calendar_update_ui_hints,
("calendar", "delete"): _calendar_delete_ui_hints,
("calendar", "share"): _calendar_share_ui_hints,
("calendar", "accept_invite"): _calendar_invite_status_ui_hints,
("calendar", "reject_invite"): _calendar_invite_status_ui_hints,
("contacts", "read"): _contacts_read_ui_hints,
("memory", "update"): _memory_update_ui_hints,
}
@@ -341,10 +395,10 @@ def postprocess_tool_output(tool_output: ToolAgentOutput) -> ToolAgentOutput:
return tool_output
if tool_output.ui_hints is not None:
return tool_output
command_key = _resolve_command_key(tool_output)
if command_key is None:
method_key = _resolve_method_key(tool_output)
if method_key is None:
return tool_output
builder = _UI_HINTS_BUILDERS.get(command_key)
builder = _UI_HINTS_BUILDERS.get(method_key)
if builder is None:
return tool_output
ui_hints = builder(tool_output)
+13 -3
View File
@@ -6,6 +6,8 @@ from typing import Any
from core.agentscope.tools.internal import make_project_cli_wrapper, make_view_skill_file_wrapper
from core.agentscope.tools.internal.project_cli import PROJECT_CLI_TOOL_NAME
from core.agentscope.tools.internal.view_skill_file import VIEW_SKILL_FILE_TOOL_NAME
from core.agentscope.tools.skill_session import SkillSessionState
from core.agentscope.tools.skill_prompt import AGENT_SKILL_INSTRUCTION, AGENT_SKILL_TEMPLATE
from core.agentscope.tools.tool_middleware import register_tool_middlewares
from core.logging import get_logger
from schemas.agent.skill_config import ProjectCliCommand, SkillName
@@ -50,7 +52,12 @@ def build_toolkit(
else:
enabled_skills = _validate_enabled_skill_names(enabled_skill_names)
toolkit = Toolkit()
skill_session = SkillSessionState()
toolkit = Toolkit(
agent_skill_instruction=AGENT_SKILL_INSTRUCTION,
agent_skill_template=AGENT_SKILL_TEMPLATE,
)
if allowed_commands is None:
resolved_allowed_commands = _all_command_names()
@@ -58,14 +65,17 @@ def build_toolkit(
resolved_allowed_commands = _validate_allowed_commands(allowed_commands)
project_cli_wrapper = make_project_cli_wrapper(
allowed_commands=resolved_allowed_commands
allowed_commands=resolved_allowed_commands,
)
toolkit.register_tool_function(
project_cli_wrapper,
func_name=PROJECT_CLI_TOOL_NAME,
)
view_skill_wrapper = make_view_skill_file_wrapper(enabled_skill_names=enabled_skills)
view_skill_wrapper = make_view_skill_file_wrapper(
enabled_skill_names=enabled_skills,
skill_session=skill_session,
)
toolkit.register_tool_function(
view_skill_wrapper,
func_name=VIEW_SKILL_FILE_TOOL_NAME,
@@ -41,30 +41,10 @@ llms:
output_cost_per_token: 0.000012
cache_hit_cost_per_token: 0.00000012
- model_code: qwen3.5-35b-a3b
factory_name: dashscope
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000004
output_cost_per_token: 0.0000032
- max_prompt_tokens: 256000
input_cost_per_token: 0.0000016
output_cost_per_token: 0.0000128
- model_code: deepseek-chat
factory_name: deepseek
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.000002
output_cost_per_token: 0.000003
- max_prompt_tokens: 1000000
input_cost_per_token: 0.000001
output_cost_per_token: 0.000002
cache_hit_cost_per_token: 0.0000002
- model_code: qwen3.5-27b
factory_name: dashscope
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000006
output_cost_per_token: 0.0000048
- max_prompt_tokens: 256000
input_cost_per_token: 0.0000018
output_cost_per_token: 0.0000144
@@ -32,7 +32,9 @@ def test_react_agent_sys_prompt_includes_registered_skill_prompt() -> None:
assert "# Agent Skills" in prompt
assert "## calendar" in prompt
assert "## contacts" in prompt
assert "SKILL.md" in prompt
assert "view_skill_file" in prompt
assert 'file_path="calendar/SKILL.md"' in prompt
assert 'file_path="contacts/SKILL.md"' in prompt
def test_view_skill_file_tool_reads_registered_skill_content() -> None:
@@ -47,3 +49,18 @@ def test_view_skill_file_tool_reads_registered_skill_content() -> None:
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "Calendar Skill" in text or "name: calendar" in text
def test_view_skill_file_tool_reads_calendar_action_card() -> None:
toolkit = build_toolkit(enabled_skill_names={"calendar"})
tool = toolkit.tools["view_skill_file"].original_func
response = asyncio.run(
tool(file_path="calendar/actions/create_event.md", ranges=[1, 20]),
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "create_event" in text
assert "input.title" in text
@@ -252,8 +252,8 @@ async def test_calendar_create_skill_creates_db_record() -> None:
assert cli_result.get("status") == "success", f"Tool call failed: {cli_result}"
args = cli_result.get("tool_call_args", {})
assert args.get("command") == "calendar"
assert args.get("subcommand") == "create"
assert args.get("module") == "calendar"
assert args.get("method") == "create"
result_payload = cli_result.get("result")
assert isinstance(result_payload, dict), f"Unexpected result payload: {cli_result}"
@@ -317,8 +317,8 @@ async def test_calendar_read_skill_queries_db() -> None:
assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}"
args = cli_result.get("tool_call_args", {})
assert args.get("command") == "calendar"
assert args.get("subcommand") == "read"
assert args.get("module") == "calendar"
assert args.get("method") in {"read"}
@pytest.mark.asyncio
@@ -355,8 +355,8 @@ async def test_contacts_read_skill_queries_db() -> None:
assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}"
args = cli_result.get("tool_call_args", {})
assert args.get("command") == "contacts"
assert args.get("subcommand") == "read"
assert args.get("module") == "contacts"
assert args.get("method") == "read"
@pytest.mark.asyncio
@@ -398,8 +398,8 @@ async def test_memory_update_skill_via_automation() -> None:
assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}"
args = cli_result.get("tool_call_args", {})
assert args.get("command") == "memory"
assert args.get("subcommand") == "update"
assert args.get("module") == "memory"
assert args.get("method") == "update"
if user_id:
time.sleep(1)
+16 -10
View File
@@ -183,7 +183,6 @@ async def test_agent_calendar_read_via_cli() -> None:
tool_names = [result.get("tool_name") for result in tool_call_results]
assert "view_skill_file" in tool_names
assert "project_cli" in tool_names
assert tool_names.index("view_skill_file") < tool_names.index("project_cli")
view_result = next(
result for result in tool_call_results if result.get("tool_name") == "view_skill_file"
@@ -193,22 +192,27 @@ async def test_agent_calendar_read_via_cli() -> None:
assert isinstance(view_args, dict)
assert view_args.get("file_path") == "calendar/SKILL.md"
result = next(
result for result in tool_call_results if result.get("tool_name") == "project_cli"
)
successful_project_cli_results = [
result
for result in tool_call_results
if result.get("tool_name") == "project_cli"
and result.get("status") in {"success", "partial"}
]
assert successful_project_cli_results, "expected at least one successful project_cli result"
result = successful_project_cli_results[-1]
assert result.get("status") in {"success", "failure", "partial"}
tool_call_args = result.get("tool_call_args")
assert isinstance(tool_call_args, dict)
assert tool_call_args.get("command") == "calendar"
assert tool_call_args.get("subcommand") == "read"
assert tool_call_args.get("module") == "calendar"
assert tool_call_args.get("method") in {"read"}
raw_result = result.get("result")
if isinstance(raw_result, str):
raw_result = json.loads(raw_result)
assert isinstance(raw_result, dict), f"result should be dict, got {type(raw_result)}"
assert raw_result.get("command") == "calendar"
assert raw_result.get("subcommand") == "read"
assert raw_result.get("module") == "calendar"
assert raw_result.get("method") in {"read"}
if "ui_schema" in result:
ui_schema = result["ui_schema"]
@@ -285,8 +289,10 @@ async def test_tool_ui_schema_in_history() -> None:
except (json.JSONDecodeError, ValueError):
pass
assert isinstance(result, dict), f"result in DB should be dict, got {type(result)}: {result!r}"
assert result.get("command") == "calendar"
assert result.get("subcommand") == "read"
if tool_agent_output.get("status") == "failure":
continue
assert result.get("module") == "calendar"
assert result.get("method") in {"read"}
ui_hints = tool_agent_output.get("ui_hints")
assert isinstance(ui_hints, dict), f"ui_hints should be dict, got {type(ui_hints)}"
View File
+196
View File
@@ -0,0 +1,196 @@
from __future__ import annotations
import os
import time
from pathlib import Path
from uuid import uuid4
import httpx
import jwt
def _load_env() -> None:
env_path = Path(__file__).resolve().parents[3] / ".env"
if env_path.exists():
for line in env_path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
value = value.strip().strip('"').strip("'")
if key and key not in os.environ:
os.environ[key] = value
_load_env()
BASE_URL = os.getenv("AGENT_LIVE_BASE_URL", "http://localhost:5775")
def get_jwt_secret() -> str:
secret = (
os.getenv("SOCIAL_SUPABASE__JWT_SECRET")
or os.getenv("SUPABASE_JWT_SECRET")
or os.getenv("JWT_SECRET")
)
if not secret:
raise RuntimeError("JWT_SECRET not found in environment")
return secret
def get_supabase_url() -> str:
return (
os.getenv("SOCIAL_SUPABASE__URL")
or os.getenv("SUPABASE_URL")
or "http://localhost:54321"
)
def get_test_user_id() -> str:
user_id = os.getenv("TEST_USER_ID")
if user_id:
return user_id
raise RuntimeError("TEST_USER_ID not set")
def create_test_jwt(user_id: str) -> str:
now = int(time.time())
payload = {
"sub": user_id,
"role": "authenticated",
"aud": "authenticated",
"iss": get_supabase_url(),
"iat": now,
"exp": now + 3600,
}
return jwt.encode(payload, get_jwt_secret(), algorithm="HS256")
async def run_agent_and_collect(
*,
user_message: str,
client: httpx.AsyncClient,
headers: dict,
run_id: str | None = None,
thread_id: str | None = None,
timeout: float = 120.0,
) -> AgentRunResult:
if thread_id is None:
thread_id = str(uuid4())
if run_id is None:
run_id = f"quality-{thread_id[:8]}"
t_start = time.monotonic()
run_resp = await client.post(
f"{BASE_URL}/api/v1/agent/runs",
headers=headers,
json={
"threadId": thread_id,
"runId": run_id,
"state": {},
"messages": [
{"id": "u1", "role": "user", "content": user_message}
],
"tools": [],
"context": [],
"forwardedProps": {"runtime_mode": "chat"},
},
)
run_data = run_resp.json()
effective_thread_id = str(run_data.get("threadId", thread_id))
effective_run_id = run_data.get("runId", run_id)
events_url = (
f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events"
f"?runId={effective_run_id}"
)
import json
tool_results: list[dict] = []
all_events: list[dict] = []
run_finished = False
final_answer = ""
async with client.stream(
"GET", events_url, headers=headers, timeout=timeout
) as sse_resp:
buffer = ""
async for line in sse_resp.aiter_lines():
if line.startswith("data:"):
data_str = line.split(":", 1)[1].strip()
if data_str:
buffer = data_str
elif line == "" and buffer:
try:
event_data = json.loads(buffer)
event_type = event_data.get("type")
all_events.append(event_data)
if event_type == "TOOL_CALL_RESULT":
tool_results.append(event_data)
elif event_type == "TEXT_MESSAGE_END":
final_answer = event_data.get("answer", "") or event_data.get("text", "")
elif event_type in {"RUN_FINISHED", "RUN_ERROR"}:
run_finished = True
except json.JSONDecodeError:
pass
buffer = ""
t_end = time.monotonic()
return AgentRunResult(
thread_id=effective_thread_id,
run_id=effective_run_id,
user_message=user_message,
final_answer=final_answer,
tool_results=tool_results,
all_events=all_events,
run_finished=run_finished,
latency_ms=round((t_end - t_start) * 1000),
)
class AgentRunResult:
def __init__(
self,
*,
thread_id: str,
run_id: str,
user_message: str,
final_answer: str,
tool_results: list[dict],
all_events: list[dict],
run_finished: bool,
latency_ms: int,
) -> None:
self.thread_id = thread_id
self.run_id = run_id
self.user_message = user_message
self.final_answer = final_answer
self.tool_results = tool_results
self.all_events = all_events
self.run_finished = run_finished
self.latency_ms = latency_ms
@property
def tool_names_called(self) -> list[str]:
return [
tr.get("tool_name", "") or tr.get("toolName", "")
for tr in self.tool_results
]
@property
def successful_tool_names(self) -> list[str]:
return [
tr.get("tool_name", "") or tr.get("toolName", "")
for tr in self.tool_results
if tr.get("status") in ("success", "partial")
]
@property
def has_tool_success(self) -> bool:
return len(self.successful_tool_names) > 0
@@ -0,0 +1,99 @@
from __future__ import annotations
from pydantic import BaseModel
class ScoreDetail(BaseModel):
criterion: str
passed: bool
note: str = ""
class ScenarioScore(BaseModel):
scenario_id: str
model_code: str
latency_ms: int
input_tokens: int = 0
output_tokens: int = 0
cost_usd: float = 0.0
tool_called: bool
tool_succeeded: bool
answer_quality: float
details: list[ScoreDetail]
raw_answer: str = ""
run_finished: bool = True
@property
def overall_score(self) -> float:
weights = {
"tool_correctness": 0.3,
"answer_quality": 0.5,
"latency": 0.2,
}
tool_score = 1.0 if self.tool_succeeded else (0.5 if self.tool_called else 0.0)
latency_score = self._latency_score()
return (
weights["tool_correctness"] * tool_score
+ weights["answer_quality"] * self.answer_quality
+ weights["latency"] * latency_score
)
def _latency_score(self) -> float:
if self.latency_ms <= 5000:
return 1.0
if self.latency_ms <= 15000:
return 0.7
if self.latency_ms <= 30000:
return 0.4
return 0.1
class ModelScorecard(BaseModel):
model_code: str
scenario_scores: list[ScenarioScore]
@property
def avg_overall(self) -> float:
if not self.scenario_scores:
return 0.0
return sum(s.overall_score for s in self.scenario_scores) / len(self.scenario_scores)
@property
def avg_latency_ms(self) -> float:
if not self.scenario_scores:
return 0.0
return sum(s.latency_ms for s in self.scenario_scores) / len(self.scenario_scores)
@property
def avg_cost_usd(self) -> float:
if not self.scenario_scores:
return 0.0
return sum(s.cost_usd for s in self.scenario_scores) / len(self.scenario_scores)
@property
def tool_success_rate(self) -> float:
if not self.scenario_scores:
return 0.0
return sum(1 for s in self.scenario_scores if s.tool_succeeded) / len(self.scenario_scores)
def summary_table(self) -> str:
lines = [
f"\n{'='*60}",
f"Model Scorecard: {self.model_code}",
f"{'='*60}",
f" Avg Overall Score : {self.avg_overall:.2f}",
f" Avg Latency : {self.avg_latency_ms:.0f}ms",
f" Avg Cost : ${self.avg_cost_usd:.6f}",
f" Tool Success Rate : {self.tool_success_rate:.0%}",
f"{'-'*60}",
]
for s in self.scenario_scores:
status = "PASS" if s.tool_succeeded else "FAIL"
lines.append(
f" [{status}] {s.scenario_id:<25} "
f"score={s.overall_score:.2f} "
f"lat={s.latency_ms}ms "
f"cost=${s.cost_usd:.6f}"
)
lines.append(f"{'='*60}")
return "\n".join(lines)
@@ -0,0 +1,82 @@
from __future__ import annotations
from pydantic import BaseModel
class EvalScenario(BaseModel):
id: str
prompt: str
category: str
expect_tool_use: bool
expect_tool_success: bool
quality_criteria: list[str]
CALENDAR_SCENARIOS: list[EvalScenario] = [
EvalScenario(
id="calendar-read-today",
prompt="请查询我今天的日程安排",
category="calendar",
expect_tool_use=True,
expect_tool_success=True,
quality_criteria=[
"应调用 project_cli 的 calendar.read 方法",
"input 应包含 mode=day 和具体日期",
"回答应基于工具返回的实际数据",
"如果无日程,应明确告知无日程",
],
),
EvalScenario(
id="calendar-create-event",
prompt="帮我创建一个明天下午3点两小时的会议,标题是项目周会",
category="calendar",
expect_tool_use=True,
expect_tool_success=True,
quality_criteria=[
"应调用 project_cli 的 calendar.create 方法",
"input 应包含 title、start_at、timezone",
"start_at 应为具体的时间戳而非自然语言",
"应返回创建结果(包含 event_id)",
],
),
EvalScenario(
id="calendar-read-range",
prompt="这周一到周五我有哪些日程?",
category="calendar",
expect_tool_use=True,
expect_tool_success=True,
quality_criteria=[
"应调用 project_cli 的 calendar.read 方法",
"input 应使用 mode=range 或多次 mode=day",
"应提供完整时间范围",
],
),
]
GENERAL_SCENARIOS: list[EvalScenario] = [
EvalScenario(
id="general-greeting",
prompt="你好,你是谁?",
category="general",
expect_tool_use=False,
expect_tool_success=False,
quality_criteria=[
"应简短自我介绍",
"不应调用任何工具",
"回答简洁不啰嗦",
],
),
EvalScenario(
id="general-farewell",
prompt="好的谢谢,再见",
category="general",
expect_tool_use=False,
expect_tool_success=False,
quality_criteria=[
"应礼貌告别",
"不应调用任何工具",
],
),
]
ALL_SCENARIOS = CALENDAR_SCENARIOS + GENERAL_SCENARIOS
+440
View File
@@ -0,0 +1,440 @@
from __future__ import annotations
import json
import os
import time
from uuid import uuid4
import httpx
import jwt
import pytest
from backend.tests.quality.evaluators import ModelScorecard, ScoreDetail, ScenarioScore
from backend.tests.quality.scenarios import ALL_SCENARIOS
CANDIDATE_MODELS = ["qwen3.5-flash", "deepseek-chat"]
MODEL_LLM_IDS = {
"qwen3.5-flash": "c625bce4-970e-4a76-bebe-cb8840fed854",
"deepseek-chat": "12bc1963-4b67-404b-b952-5948bea0f690",
}
BASE_URL = os.getenv("AGENT_LIVE_BASE_URL", "http://localhost:5775")
def _load_env() -> None:
from pathlib import Path
env_path = Path(__file__).resolve().parents[3] / ".env"
if env_path.exists():
for line in env_path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
value = value.strip().strip('"').strip("'")
if key and key not in os.environ:
os.environ[key] = value
_load_env()
def _get_jwt_secret() -> str:
secret = (
os.getenv("SOCIAL_SUPABASE__JWT_SECRET")
or os.getenv("SUPABASE_JWT_SECRET")
or os.getenv("JWT_SECRET")
)
if not secret:
raise RuntimeError("JWT_SECRET not found in environment")
return secret
def _get_supabase_url() -> str:
return (
os.getenv("SOCIAL_SUPABASE__PUBLIC_URL")
or os.getenv("SOCIAL_SUPABASE__URL")
or os.getenv("SUPABASE_URL")
or "http://localhost:54321"
)
def _get_supabase_key() -> str:
from core.config.settings import config
key = os.getenv("SOCIAL_SUPABASE__SERVICE_ROLE_KEY", "")
if key:
return key
return config.supabase.service_role_key
def _get_test_user_id() -> str:
user_id = os.getenv("TEST_USER_ID")
if user_id:
return user_id
raise RuntimeError("TEST_USER_ID not set")
def _create_jwt(user_id: str) -> str:
now = int(time.time())
payload = {
"sub": user_id,
"role": "authenticated",
"aud": "authenticated",
"iss": _get_supabase_url(),
"iat": now,
"exp": now + 3600,
}
return jwt.encode(payload, _get_jwt_secret(), algorithm="HS256")
async def _run_via_http(
*,
user_message: str,
token: str,
timeout: float = 120.0,
) -> dict:
thread_id = str(uuid4())
run_id = f"q-{uuid4().hex[:12]}"
async with httpx.AsyncClient(timeout=httpx.Timeout(timeout)) as client:
headers = {"Authorization": f"Bearer {token}"}
run_resp = await client.post(
f"{BASE_URL}/api/v1/agent/runs",
headers=headers,
json={
"threadId": thread_id,
"runId": run_id,
"state": {},
"messages": [
{"id": "u1", "role": "user", "content": user_message}
],
"tools": [],
"context": [],
"forwardedProps": {"runtime_mode": "chat"},
},
)
run_data = run_resp.json()
eff_thread = str(run_data.get("threadId", thread_id))
eff_run = run_data.get("runId", run_id)
events_url = (
f"{BASE_URL}/api/v1/agent/runs/{eff_thread}/events"
f"?runId={eff_run}"
)
t_start = time.monotonic()
tool_results: list[dict] = []
all_events: list[dict] = []
final_answer = ""
run_finished = False
token_usage: dict = {}
async with client.stream(
"GET", events_url, headers=headers, timeout=timeout
) as sse:
buffer = ""
async for line in sse.aiter_lines():
if line.startswith("data:"):
data_str = line.split(":", 1)[1].strip()
if data_str:
buffer = data_str
elif line == "" and buffer:
try:
ev = json.loads(buffer)
all_events.append(ev)
etype = ev.get("type")
if etype == "TOOL_CALL_RESULT":
tool_results.append(ev)
elif etype == "TEXT_MESSAGE_END":
final_answer = ev.get("answer", "") or ev.get("text", "")
token_usage = {
"totalTokens": ev.get("totalTokens", 0),
"inputTokens": ev.get("inputTokens", 0),
"outputTokens": ev.get("outputTokens", 0),
"promptCacheMissTokens": ev.get(
"promptCacheMissTokens", 0
),
"promptCacheHitTokens": ev.get(
"promptCacheHitTokens", 0
),
}
elif etype in {"RUN_FINISHED", "RUN_ERROR"}:
run_finished = True
except json.JSONDecodeError:
pass
buffer = ""
t_end = time.monotonic()
tool_names = [
tr.get("tool_name", "") or tr.get("toolName", "")
for tr in tool_results
]
successful_tool_names = [
tr.get("tool_name", "") or tr.get("toolName", "")
for tr in tool_results
if tr.get("status") in ("success", "partial")
]
return {
"final_answer": final_answer,
"tool_results": tool_results,
"tool_names": tool_names,
"successful_tool_names": successful_tool_names,
"run_finished": run_finished,
"latency_ms": round((t_end - t_start) * 1000),
"token_usage": token_usage,
}
def _switch_model(model_code: str) -> None:
from supabase import create_client
sb = create_client(_get_supabase_url(), _get_supabase_key())
llm_id = MODEL_LLM_IDS[model_code]
for agent_type in ("router", "worker"):
(
sb.table("system_agents")
.update({"llm_id": llm_id})
.eq("agent_type", agent_type)
.execute()
)
def _save_original_models() -> list[dict]:
from supabase import create_client
sb = create_client(_get_supabase_url(), _get_supabase_key())
return (
sb.table("system_agents")
.select("agent_type, llm_id")
.execute()
.data
)
def _restore_models(original_rows: list[dict]) -> None:
from supabase import create_client
sb = create_client(_get_supabase_url(), _get_supabase_key())
for row in original_rows:
(
sb.table("system_agents")
.update({"llm_id": row["llm_id"]})
.eq("agent_type", row["agent_type"])
.execute()
)
def _evaluate_answer_quality(
*,
answer: str,
run_finished: bool,
expect_tool_use: bool,
has_tool_success: bool,
tool_names: list[str],
) -> float:
if not run_finished:
return 0.0
if not answer or not answer.strip():
return 0.0
score = 0.6
if expect_tool_use:
if has_tool_success:
score += 0.2
elif tool_names:
score += 0.1
else:
score -= 0.3
else:
if not tool_names:
score += 0.2
else:
score -= 0.1
if len(answer) > 10:
score += 0.1
if "无法" in answer or "失败" in answer or "错误" in answer:
if expect_tool_use:
score -= 0.1
return max(0.0, min(1.0, score))
def _evaluate_criteria(
*,
answer: str,
run_finished: bool,
tool_names: list[str],
has_tool_success: bool,
tool_results: list[dict],
scenario: object,
) -> list[ScoreDetail]:
details: list[ScoreDetail] = []
for criterion in getattr(scenario, "quality_criteria", []):
passed = False
note = ""
if "调用" in criterion or "project_cli" in criterion:
passed = any("project_cli" in tn for tn in tool_names)
note = f"tools: {tool_names}" if not passed else ""
elif "mode" in criterion and "day" in criterion:
for tr in tool_results:
args = tr.get("tool_call_args", {}) or tr.get("toolCallArgs", {})
inp = args.get("input", {})
if isinstance(inp, dict) and inp.get("mode") == "day":
passed = True
break
elif "具体" in criterion or "时间戳" in criterion:
passed = has_tool_success
elif "基于工具" in criterion or "返回" in criterion:
passed = has_tool_success
elif "无日程" in criterion:
passed = "" in answer or "没有" in answer
elif "简短" in criterion or "简洁" in criterion:
passed = 0 < len(answer) < 200
elif "自我介绍" in criterion:
passed = "Linksy" in answer or "助手" in answer
elif "礼貌" in criterion:
passed = len(answer) > 0
else:
passed = run_finished and len(answer) > 0
details.append(ScoreDetail(criterion=criterion, passed=passed, note=note))
return details
async def _run_model_scenarios(model_code: str, user_id: str) -> ModelScorecard:
from services.llm_pricing.service import LlmPricingService
pricing = LlmPricingService()
token = _create_jwt(user_id)
scores: list[ScenarioScore] = []
for scenario in ALL_SCENARIOS:
result = await _run_via_http(
user_message=scenario.prompt,
token=token,
)
answer = result["final_answer"]
tool_names = result["tool_names"]
has_tool_success = len(result["successful_tool_names"]) > 0
tu = result["token_usage"]
total_tokens = tu.get("totalTokens", 0)
input_tokens = tu.get("inputTokens", 0) or tu.get("promptCacheMissTokens", 0)
output_tokens = tu.get("outputTokens", 0) or max(total_tokens - input_tokens, 0)
try:
cost_usd = pricing.calculate_cost(
model=model_code,
prompt_tokens=input_tokens,
completion_tokens=output_tokens,
cached_prompt_tokens=tu.get("promptCacheHitTokens", 0),
)
except ValueError:
cost_usd = 0.0
cost_usd = round(cost_usd, 8)
tool_called = any("project_cli" in tn for tn in tool_names)
tool_succeeded = has_tool_success if scenario.expect_tool_use else True
answer_quality = _evaluate_answer_quality(
answer=answer,
run_finished=result["run_finished"],
expect_tool_use=scenario.expect_tool_use,
has_tool_success=has_tool_success,
tool_names=tool_names,
)
details = _evaluate_criteria(
answer=answer,
run_finished=result["run_finished"],
tool_names=tool_names,
has_tool_success=has_tool_success,
tool_results=result["tool_results"],
scenario=scenario,
)
print(
f" [{model_code}] {scenario.id:<25} "
f"lat={result['latency_ms']}ms "
f"tokens={total_tokens} "
f"cost=${cost_usd:.6f} "
f"tool={'OK' if has_tool_success else 'FAIL'} "
f"answer={answer[:60]}"
)
scores.append(
ScenarioScore(
scenario_id=scenario.id,
model_code=model_code,
latency_ms=result["latency_ms"],
input_tokens=input_tokens,
output_tokens=output_tokens,
cost_usd=cost_usd,
tool_called=tool_called,
tool_succeeded=tool_succeeded,
answer_quality=answer_quality,
details=details,
raw_answer=answer[:500],
run_finished=result["run_finished"],
)
)
return ModelScorecard(model_code=model_code, scenario_scores=scores)
@pytest.fixture(autouse=True)
def _check_env():
if os.getenv("QUALITY_TEST") != "1":
pytest.skip("set QUALITY_TEST=1 to run quality tests")
@pytest.fixture(autouse=True)
def _require_test_user_id():
_get_test_user_id()
@pytest.mark.asyncio
@pytest.mark.quality
@pytest.mark.live
async def test_model_ab_comparison():
user_id = _get_test_user_id()
original_rows = _save_original_models()
scorecards: list[ModelScorecard] = []
try:
for model_code in CANDIDATE_MODELS:
_switch_model(model_code)
card = await _run_model_scenarios(model_code, user_id)
scorecards.append(card)
print(card.summary_table())
finally:
_restore_models(original_rows)
print("\n" + "=" * 60)
print("COMPARISON")
print("=" * 60)
for card in scorecards:
print(
f" {card.model_code:<20} "
f"overall={card.avg_overall:.2f} "
f"latency={card.avg_latency_ms:.0f}ms "
f"cost=${card.avg_cost_usd:.6f} "
f"tool_success={card.tool_success_rate:.0%}"
)
if len(scorecards) == 2:
a, b = scorecards
winner = a.model_code if a.avg_overall >= b.avg_overall else b.model_code
print(f"\n Winner: {winner} (by overall score)")
@@ -7,6 +7,7 @@ from ag_ui.core import RunAgentInput
import core.agentscope.runtime.runner as runner_module
from core.agentscope.runtime.runner import AgentScopeRunner
from schemas.agent.runtime_models import (
RunStatus,
RouterAgentOutput,
WorkerAgentOutputLite,
)
@@ -60,6 +61,31 @@ def test_build_worker_input_messages_only_contains_router_contract() -> None:
assert "[RouterAgentOutput]" in str(input_messages[0].content)
def test_build_agent_sets_worker_max_iters(
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: dict[str, object] = {}
class _FakeJsonReActAgent:
def __init__(self, **kwargs: object) -> None:
captured.update(kwargs)
monkeypatch.setattr(runner_module, "JsonReActAgent", _FakeJsonReActAgent)
runner = AgentScopeRunner()
model = runner_module.TrackingChatModel(object())
agent = runner._build_agent(
agent_name="worker",
system_prompt="test",
toolkit=object(),
model=model,
)
assert isinstance(agent, _FakeJsonReActAgent)
assert captured["max_iters"] == 7
def test_build_router_messages_injects_user_input_when_context_last_not_user() -> None:
runner = AgentScopeRunner()
run_input = _run_input()
@@ -119,6 +145,45 @@ def test_build_router_messages_appends_user_input_to_context_tail() -> None:
assert messages[0].content == "上一轮回复"
def test_enforce_tool_evidence_contract_keeps_success_when_tool_succeeds() -> None:
runner = AgentScopeRunner()
worker_output = runner._enforce_tool_evidence_contract(
worker_output=WorkerAgentOutputLite(
status=RunStatus.SUCCESS,
answer="今天没有日程",
suggested_actions=["查明天"],
),
requires_tool_evidence=True,
has_successful_tool_result=True,
)
assert worker_output.status == RunStatus.SUCCESS
assert worker_output.answer == "今天没有日程"
assert worker_output.suggested_actions == ["查明天"]
assert worker_output.error is None
def test_enforce_tool_evidence_contract_forces_failure_without_successful_tool() -> None:
runner = AgentScopeRunner()
worker_output = runner._enforce_tool_evidence_contract(
worker_output=WorkerAgentOutputLite(
status=RunStatus.SUCCESS,
answer="今天没有日程",
suggested_actions=["查明天"],
),
requires_tool_evidence=True,
has_successful_tool_result=False,
)
assert worker_output.status == RunStatus.FAILED
assert worker_output.answer == "无法确认结果:所需工具调用未成功完成。"
assert worker_output.suggested_actions == []
assert worker_output.error is not None
assert worker_output.error.code == "TOOL_EVIDENCE_MISSING"
def test_build_model_omits_none_generate_kwargs(
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -1,6 +1,10 @@
from __future__ import annotations
from core.agentscope.prompts.agent_prompt import build_agent_prompt
from core.agentscope.prompts.agent_prompt import (
build_agent_prompt,
build_worker_contract_prompt,
)
from schemas.agent.runtime_models import RouterAgentOutput
from schemas.agent.system_agent import AgentType, SystemAgentLLMConfig
@@ -18,9 +22,12 @@ def test_build_agent_prompt_for_worker_contains_runtime_config() -> None:
assert "<!-- AGENT_START -->" in prompt
assert "- type: worker" in prompt
assert "context_messages.mode=number" in prompt
assert "context_messages.count=20" in prompt
assert "enabled_skills=calendar,contacts" in prompt
assert "Use objective plus context_summary as the primary execution guide from the router." in prompt
assert "When requires_tool_evidence=true, do not finalize an answer from failed tool calls; either recover with a corrected tool call or explicitly surface that execution failed." in prompt
assert "If all tool calls fail under requires_tool_evidence=true, set status=failed and populate error; do not present a factual answer as confirmed." in prompt
assert "context_messages.mode=number" not in prompt
assert "context_messages.count=20" not in prompt
def test_build_agent_prompt_for_router_contains_identity_and_config() -> None:
@@ -35,5 +42,20 @@ def test_build_agent_prompt_for_router_contains_identity_and_config() -> None:
assert "- type: router" in prompt
assert "[Router Agent]" in prompt
assert "When the task will require project_cli, include canonical tool input defaults in context_summary using the exact shape `project_cli_defaults={\"module\":...,\"method\":...,\"input\":{...}}` whenever they can be determined safely." in prompt
assert "Standardize every time value mentioned in context_summary to the exact project_cli input format that would be required downstream: dates as `YYYY-MM-DD`, local datetimes as RFC3339 with timezone offset, and event ids as raw UUID strings." in prompt
assert "For relative time requests like today, tomorrow, or next Monday, resolve them using system_time_local and place the resolved standardized value into project_cli_defaults.input instead of leaving natural-language time phrases." in prompt
assert "context_messages.mode=day" in prompt
assert "context_messages.count=2" in prompt
def test_build_worker_contract_prompt_prefers_resolved_dates_from_context_summary() -> None:
prompt = build_worker_contract_prompt(
router_output=RouterAgentOutput(
objective="查询今天日程",
context_summary="目标日期: 2026-04-24",
requires_tool_evidence=True,
)
)
assert "If context_summary contains project_cli_defaults, prefer using those exact module/method/input values directly." in prompt
@@ -0,0 +1,84 @@
from __future__ import annotations
import json
import pytest
from core.agentscope.tools.cli.adapter import invoke_cli_tool
@pytest.mark.asyncio
async def test_project_cli_requires_module_and_method() -> None:
response = await invoke_cli_tool(
tool_name="project_cli",
tool_call_args={
"module": "calendar",
"input": {},
},
allowed_commands={"calendar"},
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
payload = json.loads(text)
assert payload["ok"] is False
assert payload["module"] == "calendar"
assert payload["method"] == ""
assert payload["error"]["code"] == "INVALID_ARGUMENT"
@pytest.mark.asyncio
async def test_project_cli_failure_includes_method_contract_in_side_channel() -> None:
from core.agentscope.tools.tool_call_context import (
peek_tool_agent_output,
reset_current_tool_call_id,
set_current_tool_call_id,
)
from core.auth.credential_issuer import create_credential_issuer
from core.auth.tool_credential_context import reset_tool_credential, set_tool_credential
token = set_current_tool_call_id("call-test-guidance")
credential_token = set_tool_credential(
create_credential_issuer().issue(
owner_id="00000000-0000-0000-0000-000000000001",
mode="chat",
)
)
try:
response = await invoke_cli_tool(
tool_name="project_cli",
tool_call_args={
"module": "calendar",
"method": "read",
"input": {},
},
allowed_commands={"calendar"},
)
finally:
reset_tool_credential(credential_token)
reset_current_tool_call_id(token)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
payload = json.loads(text)
assert payload["ok"] is False
assert payload["module"] == "calendar"
assert payload["method"] == "read"
assert payload["data"] is None
assert payload["error"]["code"] == "INVALID_ACTION_INPUT"
stored = peek_tool_agent_output(tool_call_id="call-test-guidance")
assert stored is not None
error = stored.get("error")
assert isinstance(error, dict)
assert error["code"] == "INVALID_ACTION_INPUT"
assert error["details"]["input_schema"]["mode"] == "string enum(day|range|event)"
assert error["details"]["expected_input_examples"][0] == {
"mode": "day",
"date": "2026-04-24",
"timezone": "Asia/Shanghai",
}
assert "resolve the day to a concrete input.date value" in error["message"]
@@ -1,38 +1,96 @@
from __future__ import annotations
import pytest
from core.agentscope.tools.cli.handler_calendar import (
_resolve_read_range,
_day_input_to_range_input,
_CalendarReadDayInput,
handle_calendar_create_event,
handle_calendar_list_day,
)
from core.agentscope.tools.cli.models import CliCommand
def test_resolve_read_range_supports_date_timezone_fallback() -> None:
request = CliCommand(
command="calendar",
subcommand="read",
owner_id="u1",
args={"date": "2026-04-23", "timezone": "Asia/Shanghai"},
def test_day_input_converts_to_tz_range() -> None:
payload = _CalendarReadDayInput.model_validate(
{"mode": "day", "date": "2026-04-23", "timezone": "Asia/Shanghai"}
)
start_at, end_at, error = _resolve_read_range(request)
result = _day_input_to_range_input(payload)
assert error is None
assert start_at is not None
assert end_at is not None
assert start_at.isoformat() == "2026-04-22T16:00:00+00:00"
assert end_at.isoformat() == "2026-04-23T16:00:00+00:00"
assert result == {
"mode": "range",
"start_at": "2026-04-23T00:00:00+08:00",
"end_at": "2026-04-24T00:00:00+08:00",
}
def test_resolve_read_range_rejects_bad_date() -> None:
@pytest.mark.asyncio
async def test_calendar_read_rejects_bad_date_format() -> None:
request = CliCommand(
command="calendar",
subcommand="read",
module="calendar",
method="read",
owner_id="u1",
args={"date": "2026/04/23", "timezone": "Asia/Shanghai"},
input={"mode": "day", "date": "2026/04/23", "timezone": "Asia/Shanghai"},
)
start_at, end_at, error = _resolve_read_range(request)
result = await handle_calendar_list_day(request)
assert start_at is None
assert end_at is None
assert error == "date must be YYYY-MM-DD"
assert result.ok is False
assert result.error is not None
assert result.error.code == "INVALID_ACTION_INPUT"
assert result.error.details == {
"missing_fields": [],
"invalid_fields": ["day.date"],
}
@pytest.mark.asyncio
async def test_calendar_read_range_requires_timezone_aware_datetimes() -> None:
request = CliCommand(
module="calendar",
method="read",
owner_id="u1",
input={
"mode": "range",
"start_at": "2026-04-23T00:00:00",
"end_at": "2026-04-24T00:00:00",
},
)
result = await handle_calendar_list_day(request)
assert result.ok is False
assert result.error is not None
assert result.error.code == "INVALID_ACTION_INPUT"
assert sorted(result.error.details["invalid_fields"]) == ["range.end_at", "range.start_at"]
@pytest.mark.asyncio
async def test_create_event_rejects_legacy_field_aliases_with_corrections() -> None:
request = CliCommand(
module="calendar",
method="create",
owner_id="u1",
input={
"title": "Project sync",
"start_time": "2026-04-23T10:00:00+08:00",
"end_time": "2026-04-23T11:00:00+08:00",
"event_timezone": "Asia/Shanghai",
},
)
result = await handle_calendar_create_event(request)
assert result.ok is False
assert result.error is not None
assert result.error.code == "INVALID_ACTION_INPUT"
assert result.error.details == {
"missing_fields": ["start_at", "timezone"],
"invalid_fields": ["end_time", "event_timezone", "start_time"],
"alias_corrections": {
"start_time": "start_at",
"end_time": "end_at",
"event_timezone": "timezone",
},
}
@@ -3,18 +3,21 @@ from __future__ import annotations
from core.agentscope.tools.cli.handlers import build_router
def test_router_registers_only_new_canonical_subcommands() -> None:
def test_router_registers_only_new_canonical_actions() -> None:
router = build_router()
assert ("calendar", "create") in router.command_pairs
assert ("calendar", "read") in router.command_pairs
assert ("calendar", "update") in router.command_pairs
assert ("calendar", "delete") in router.command_pairs
assert ("calendar", "share") in router.command_pairs
assert ("contacts", "read") in router.command_pairs
assert ("memory", "update") in router.command_pairs
assert ("calendar", "read") in router.module_methods
assert ("calendar", "create") in router.module_methods
assert ("calendar", "update") in router.module_methods
assert ("calendar", "delete") in router.module_methods
assert ("calendar", "share") in router.module_methods
assert ("calendar", "accept_invite") in router.module_methods
assert ("calendar", "reject_invite") in router.module_methods
assert ("contacts", "read") in router.module_methods
assert ("memory", "update") in router.module_methods
assert ("calendar", "write") not in router.command_pairs
assert ("contacts", "lookup") not in router.command_pairs
assert ("memory", "write") not in router.command_pairs
assert ("memory", "forget") not in router.command_pairs
assert ("calendar", "list_day") not in router.module_methods
assert ("calendar", "get_event") not in router.module_methods
assert ("contacts", "lookup") not in router.module_methods
assert ("memory", "write") not in router.module_methods
assert ("memory", "forget") not in router.module_methods
@@ -11,13 +11,13 @@ async def test_router_register_and_dispatch() -> None:
router = CommandRouter()
async def mock_handler(request: CliCommand) -> CliCommandResult:
return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand, data={"name": request.args["name"]})
return CliCommandResult(ok=True, module=request.module, method=request.method, data={"name": request.input["name"]})
router.register(command="test", subcommand="run", handler=mock_handler)
router.register(module="test", method="run", handler=mock_handler)
assert ("test", "run") in router.command_pairs
assert ("test", "run") in router.module_methods
result = await router.dispatch(CliCommand(command="test", subcommand="run", args={"name": "demo"}, owner_id="u1"))
result = await router.dispatch(CliCommand(module="test", method="run", input={"name": "demo"}, owner_id="u1"))
assert result.ok is True
assert result.data == {"name": "demo"}
@@ -25,10 +25,10 @@ async def test_router_register_and_dispatch() -> None:
@pytest.mark.asyncio
async def test_router_unknown_command() -> None:
router = CommandRouter()
result = await router.dispatch(CliCommand(command="unknown", subcommand="run", args={}, owner_id="u1"))
result = await router.dispatch(CliCommand(module="unknown", method="run", input={}, owner_id="u1"))
assert result.ok is False
assert result.error is not None
assert result.error.code == "UNKNOWN_COMMAND"
assert result.error.code == "UNKNOWN_METHOD"
@pytest.mark.asyncio
@@ -39,9 +39,9 @@ async def test_router_handler_exception() -> None:
del request
raise ValueError("intentional error")
router.register(command="fail", subcommand="run", handler=failing_handler)
router.register(module="fail", method="run", handler=failing_handler)
result = await router.dispatch(CliCommand(command="fail", subcommand="run", args={}, owner_id="u1"))
result = await router.dispatch(CliCommand(module="fail", method="run", input={}, owner_id="u1"))
assert result.ok is False
assert result.error is not None
assert result.error.code == "HANDLER_ERROR"
@@ -51,12 +51,12 @@ def test_router_duplicate_register() -> None:
router = CommandRouter()
async def handler1(request: CliCommand) -> CliCommandResult:
return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand)
return CliCommandResult(ok=True, module=request.module, method=request.method)
async def handler2(request: CliCommand) -> CliCommandResult:
return CliCommandResult(ok=True, command=request.command, subcommand=request.subcommand)
return CliCommandResult(ok=True, module=request.module, method=request.method)
router.register(command="cmd", subcommand="one", handler=handler1)
router.register(module="cmd", method="one", handler=handler1)
with pytest.raises(ValueError, match="already registered"):
router.register(command="cmd", subcommand="one", handler=handler2)
router.register(module="cmd", method="one", handler=handler2)
@@ -6,31 +6,53 @@ from schemas.agent.runtime_models import ToolAgentOutput, ToolStatus
def _make_tool_output(
*,
command: str,
subcommand: str,
module: str,
method: str,
status: ToolStatus,
data: dict | None = None,
) -> ToolAgentOutput:
return ToolAgentOutput(
tool_name="project_cli",
tool_call_id="test_call_id",
tool_call_args={"command": command, "subcommand": subcommand, "args": {}},
tool_call_args={"module": module, "method": method, "input": {}},
status=status,
result={"command": command, "subcommand": subcommand, "data": data or {}},
result={"module": module, "method": method, "data": data or {}},
error=None,
ui_hints=None,
)
def test_postprocess_calendar_read_has_ui_hints() -> None:
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5, "items": []})
output = _make_tool_output(
module="calendar",
method="read",
status=ToolStatus.SUCCESS,
data={"total": 5, "items": []},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["intent"] == "list"
def test_postprocess_calendar_read_event_detail_has_ui_hints() -> None:
output = _make_tool_output(
module="calendar",
method="read",
status=ToolStatus.SUCCESS,
data={"id": "evt_1", "title": "Project sync", "start_at": "2026-04-21T10:00:00+08:00"},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["title"] == "日程详情"
def test_postprocess_calendar_create_partial() -> None:
output = _make_tool_output(command="calendar", subcommand="create", status=ToolStatus.PARTIAL, data={"status": "partial", "success": 1, "failed": 1, "results": []})
output = _make_tool_output(
module="calendar",
method="create",
status=ToolStatus.PARTIAL,
data={"status": "partial", "success": 1, "failed": 1, "results": []},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["intent"] == "status"
@@ -39,8 +61,8 @@ def test_postprocess_calendar_create_partial() -> None:
def test_postprocess_calendar_share_has_ui_hints() -> None:
output = _make_tool_output(
command="calendar",
subcommand="share",
module="calendar",
method="share",
status=ToolStatus.SUCCESS,
data={
"status": "success",
@@ -60,7 +82,12 @@ def test_postprocess_calendar_share_has_ui_hints() -> None:
def test_postprocess_contacts_read_has_ui_hints() -> None:
output = _make_tool_output(command="contacts", subcommand="read", status=ToolStatus.SUCCESS, data={"friends_count": 3, "friends": []})
output = _make_tool_output(
module="contacts",
method="read",
status=ToolStatus.SUCCESS,
data={"friends_count": 3, "friends": []},
)
processed = postprocess_tool_output(output)
assert processed.ui_hints is not None
assert processed.ui_hints["intent"] == "list"
@@ -69,8 +96,8 @@ def test_postprocess_contacts_read_has_ui_hints() -> None:
def test_postprocess_memory_update_has_ui_hints() -> None:
output = _make_tool_output(
command="memory",
subcommand="update",
module="memory",
method="update",
status=ToolStatus.SUCCESS,
data={
"status": "success",
@@ -95,19 +122,19 @@ def test_postprocess_memory_update_has_ui_hints() -> None:
def test_postprocess_failure_no_ui_hints() -> None:
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.FAILURE, data=None)
output = _make_tool_output(module="calendar", method="read", status=ToolStatus.FAILURE, data=None)
processed = postprocess_tool_output(output)
assert processed.ui_hints is None
def test_postprocess_unknown_command_no_ui_hints() -> None:
output = _make_tool_output(command="unknown", subcommand="run", status=ToolStatus.SUCCESS, data={"data": "test"})
output = _make_tool_output(module="unknown", method="run", status=ToolStatus.SUCCESS, data={"data": "test"})
processed = postprocess_tool_output(output)
assert processed.ui_hints is None
def test_postprocess_preserves_existing_ui_hints() -> None:
output = _make_tool_output(command="calendar", subcommand="read", status=ToolStatus.SUCCESS, data={"total": 5})
output = _make_tool_output(module="calendar", method="read", status=ToolStatus.SUCCESS, data={"total": 5})
output = output.model_copy(update={"ui_hints": {"view": "custom_view", "custom": True}})
processed = postprocess_tool_output(output)
assert processed.ui_hints["view"] == "custom_view"
@@ -3,6 +3,7 @@ import asyncio
from core.agentscope.tools.internal.project_cli import PROJECT_CLI_TOOL_NAME
from core.agentscope.tools.internal.view_skill_file import VIEW_SKILL_FILE_TOOL_NAME
from core.agentscope.tools.internal import make_view_skill_file_wrapper
from core.agentscope.tools.skill_session import SkillSessionState
from core.agentscope.tools.toolkit import build_toolkit
from schemas.agent.skill_config import SkillName
@@ -48,8 +49,22 @@ def test_build_toolkit_registers_project_cli() -> None:
}
def test_build_toolkit_uses_custom_agent_skill_prompt_contract() -> None:
toolkit = build_toolkit(enabled_skill_names={"calendar"})
prompt = toolkit.get_agent_skill_prompt()
assert prompt is not None
assert "The entries below are skill indexes, not full execution instructions." in prompt
assert 'file_path="calendar/SKILL.md"' in prompt
assert "/home/" not in prompt
def test_view_skill_file_rejects_path_outside_enabled_skill_dirs() -> None:
wrapper = make_view_skill_file_wrapper(enabled_skill_names={"calendar"})
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"calendar"},
skill_session=SkillSessionState(),
)
response = asyncio.run(
wrapper(file_path="/tmp/not-allowed.txt", ranges=None),
@@ -62,10 +77,48 @@ def test_view_skill_file_rejects_path_outside_enabled_skill_dirs() -> None:
def test_view_skill_file_reads_enabled_skill_file() -> None:
wrapper = make_view_skill_file_wrapper(enabled_skill_names={"calendar"})
skill_session = SkillSessionState()
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"calendar"},
skill_session=skill_session,
)
response = asyncio.run(wrapper(file_path="calendar/SKILL.md", ranges=[1, 10]))
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "Calendar Skill" in text or "name: calendar" in text
assert skill_session.has_read(skill_name="calendar") is True
def test_view_skill_file_reads_calendar_action_card() -> None:
skill_session = SkillSessionState()
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"calendar"},
skill_session=skill_session,
)
response = asyncio.run(
wrapper(file_path="calendar/actions/get_event.md", ranges=[1, 20])
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "get_event" in text
assert '"action": "get_event"' in text
assert skill_session.has_read(skill_name="calendar") is True
def test_view_skill_file_rejects_action_card_for_disabled_skill() -> None:
wrapper = make_view_skill_file_wrapper(
enabled_skill_names={"contacts"},
skill_session=SkillSessionState(),
)
response = asyncio.run(
wrapper(file_path="calendar/actions/get_event.md", ranges=[1, 20])
)
assert response.content
block = response.content[0]
text = block["text"] if isinstance(block, dict) else block.text
assert "ACCESS_DENIED" in text