from __future__ import annotations from pydantic import BaseModel class EvalScenario(BaseModel): id: str prompt: str category: str expect_tool_use: bool expect_tool_success: bool quality_criteria: list[str] CALENDAR_SCENARIOS: list[EvalScenario] = [ EvalScenario( id="calendar-read-today", prompt="请查询我今天的日程安排", category="calendar", expect_tool_use=True, expect_tool_success=True, quality_criteria=[ "应调用 project_cli 的 calendar.read 方法", "input 应包含 mode=day 和具体日期", "回答应基于工具返回的实际数据", "如果无日程,应明确告知无日程", ], ), EvalScenario( id="calendar-create-event", prompt="帮我创建一个明天下午3点两小时的会议,标题是项目周会", category="calendar", expect_tool_use=True, expect_tool_success=True, quality_criteria=[ "应调用 project_cli 的 calendar.create 方法", "input 应包含 title、start_at、timezone", "start_at 应为具体的时间戳而非自然语言", "应返回创建结果(包含 event_id)", ], ), EvalScenario( id="calendar-read-range", prompt="这周一到周五我有哪些日程?", category="calendar", expect_tool_use=True, expect_tool_success=True, quality_criteria=[ "应调用 project_cli 的 calendar.read 方法", "input 应使用 mode=range 或多次 mode=day", "应提供完整时间范围", ], ), ] GENERAL_SCENARIOS: list[EvalScenario] = [ EvalScenario( id="general-greeting", prompt="你好,你是谁?", category="general", expect_tool_use=False, expect_tool_success=False, quality_criteria=[ "应简短自我介绍", "不应调用任何工具", "回答简洁不啰嗦", ], ), EvalScenario( id="general-farewell", prompt="好的谢谢,再见", category="general", expect_tool_use=False, expect_tool_success=False, quality_criteria=[ "应礼貌告别", "不应调用任何工具", ], ), ] ALL_SCENARIOS = CALENDAR_SCENARIOS + GENERAL_SCENARIOS