Files
social-app/backend/tests/quality/scenarios/__init__.py
T

83 lines
2.4 KiB
Python
Raw Normal View History

from __future__ import annotations
from pydantic import BaseModel
class EvalScenario(BaseModel):
id: str
prompt: str
category: str
expect_tool_use: bool
expect_tool_success: bool
quality_criteria: list[str]
CALENDAR_SCENARIOS: list[EvalScenario] = [
EvalScenario(
id="calendar-read-today",
prompt="请查询我今天的日程安排",
category="calendar",
expect_tool_use=True,
expect_tool_success=True,
quality_criteria=[
"应调用 project_cli 的 calendar.read 方法",
"input 应包含 mode=day 和具体日期",
"回答应基于工具返回的实际数据",
"如果无日程,应明确告知无日程",
],
),
EvalScenario(
id="calendar-create-event",
prompt="帮我创建一个明天下午3点两小时的会议,标题是项目周会",
category="calendar",
expect_tool_use=True,
expect_tool_success=True,
quality_criteria=[
"应调用 project_cli 的 calendar.create 方法",
"input 应包含 title、start_at、timezone",
"start_at 应为具体的时间戳而非自然语言",
"应返回创建结果(包含 event_id)",
],
),
EvalScenario(
id="calendar-read-range",
prompt="这周一到周五我有哪些日程?",
category="calendar",
expect_tool_use=True,
expect_tool_success=True,
quality_criteria=[
"应调用 project_cli 的 calendar.read 方法",
"input 应使用 mode=range 或多次 mode=day",
"应提供完整时间范围",
],
),
]
GENERAL_SCENARIOS: list[EvalScenario] = [
EvalScenario(
id="general-greeting",
prompt="你好,你是谁?",
category="general",
expect_tool_use=False,
expect_tool_success=False,
quality_criteria=[
"应简短自我介绍",
"不应调用任何工具",
"回答简洁不啰嗦",
],
),
EvalScenario(
id="general-farewell",
prompt="好的谢谢,再见",
category="general",
expect_tool_use=False,
expect_tool_success=False,
quality_criteria=[
"应礼貌告别",
"不应调用任何工具",
],
),
]
ALL_SCENARIOS = CALENDAR_SCENARIOS + GENERAL_SCENARIOS