from __future__ import annotations import json import os import subprocess import time from pathlib import Path from uuid import uuid4 import httpx import jwt import pytest BASE_URL = os.getenv("AGENT_LIVE_BASE_URL", "http://localhost:5775") def _load_env() -> None: env_path = Path(__file__).resolve().parents[3] / ".env" if env_path.exists(): for line in env_path.read_text().splitlines(): line = line.strip() if not line or line.startswith("#") or "=" not in line: continue key, _, value = line.partition("=") key = key.strip() value = value.strip().strip('"').strip("'") if key and key not in os.environ: os.environ[key] = value _load_env() def _get_jwt_secret() -> str | None: return ( os.getenv("SOCIAL_SUPABASE__JWT_SECRET") or os.getenv("SUPABASE_JWT_SECRET") or os.getenv("JWT_SECRET") ) def _get_supabase_url() -> str: return ( os.getenv("SOCIAL_SUPABASE__URL") or os.getenv("SUPABASE_URL") or "http://localhost:54321" ) def _get_test_user_id() -> str | None: return os.getenv("TEST_USER_ID") def _create_test_jwt(user_id: str) -> str: jwt_secret = _get_jwt_secret() if not jwt_secret: raise RuntimeError("JWT_SECRET not found in environment") supabase_url = _get_supabase_url() now = int(time.time()) payload = { "sub": user_id, "role": "authenticated", "aud": "authenticated", "iss": supabase_url, "iat": now, "exp": now + 3600, } return jwt.encode(payload, jwt_secret, algorithm="HS256") async def _get_test_user_token() -> str: user_id = _get_test_user_id() if user_id: return _create_test_jwt(user_id) result = subprocess.run( ["psql", "-t", "-A", "-c", "SELECT id FROM auth.users LIMIT 1;"], capture_output=True, text=True, env={ **os.environ, "PGHOST": "localhost", "PGPORT": "54322", "PGDATABASE": "postgres", "PGUSER": "postgres", "PGPASSWORD": "postgres", }, ) if result.returncode == 0 and result.stdout.strip(): user_id = result.stdout.strip() return _create_test_jwt(user_id) pytest.skip("Could not find test user. Set TEST_USER_ID or ensure database is accessible") async def _run_agent_and_collect_events( client: httpx.AsyncClient, headers: dict, thread_id: str, run_id: str, user_message: str, runtime_mode: str = "chat", ) -> tuple[list[dict], bool, str]: run_resp = await client.post( f"{BASE_URL}/api/v1/agent/runs", headers=headers, json={ "threadId": thread_id, "runId": run_id, "state": {}, "messages": [ { "id": "u1", "role": "user", "content": user_message, } ], "tools": [], "context": [], "forwardedProps": {"runtime_mode": runtime_mode}, }, ) if run_resp.status_code != 202: pytest.fail(f"Run request failed: {run_resp.status_code} - {run_resp.text}") assert run_resp.status_code == 202 run_data = run_resp.json() effective_thread_id = str(run_data.get("threadId", thread_id)) effective_run_id = run_data.get("runId", run_id) events_url = f"{BASE_URL}/api/v1/agent/runs/{effective_thread_id}/events?runId={effective_run_id}" tool_call_results: list[dict] = [] run_finished = False async with client.stream( "GET", events_url, headers=headers, timeout=120.0 ) as sse_resp: if sse_resp.status_code != 200: error_body = await sse_resp.aread() pytest.fail(f"SSE request failed: {sse_resp.status_code} - {error_body.decode()}") assert sse_resp.status_code == 200 buffer = "" async for line in sse_resp.aiter_lines(): if line.startswith("data:"): data_str = line.split(":", 1)[1].strip() if data_str: buffer = data_str elif line == "" and buffer: try: event_data = json.loads(buffer) event_type = event_data.get("type") if event_type == "TOOL_CALL_RESULT": tool_call_results.append(event_data) elif event_type == "RUN_ERROR": run_finished = True print(f"RUN_ERROR: {event_data}") break elif event_type == "RUN_FINISHED": run_finished = True break except json.JSONDecodeError: pass buffer = "" return tool_call_results, run_finished, effective_thread_id def _check_db_record(table: str, user_id: str, extra_condition: str = "") -> bool: result = subprocess.run( [ "psql", "-t", "-A", "-c", f"SELECT COUNT(*) FROM {table} WHERE owner_id = '{user_id}'{extra_condition};", ], capture_output=True, text=True, env={ **os.environ, "PGHOST": "localhost", "PGPORT": "54322", "PGDATABASE": "postgres", "PGUSER": "postgres", "PGPASSWORD": "postgres", }, ) if result.returncode == 0: count = int(result.stdout.strip() or "0") return count > 0 return False @pytest.mark.asyncio @pytest.mark.live @pytest.mark.skipif( os.getenv("CLI_SKILLS_LIVE_TEST") != "1", reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test", ) async def test_calendar_write_skill_creates_db_record() -> None: token = await _get_test_user_token() user_id = _get_test_user_id() async with httpx.AsyncClient(timeout=120.0) as client: headers = {"Authorization": f"Bearer {token}"} thread_id = str(uuid4()) tomorrow = time.strftime("%Y-%m-%d", time.localtime(time.time() + 86400)) user_message = ( f"请帮我创建一个日程测试事件,标题为'CLI集成测试-{thread_id[:8]}'," f"开始时间是明天{tomorrow}上午10点,持续1小时。" f"严格按技能说明执行,不要猜测结果。" ) tool_call_results, run_finished, _ = await _run_agent_and_collect_events( client=client, headers=headers, thread_id=thread_id, run_id="run-calendar-write-test", user_message=user_message, ) assert run_finished, "Run did not finish" project_cli_results = [ r for r in tool_call_results if r.get("tool_name") == "project_cli" ] assert project_cli_results, "No project_cli tool call found" cli_result = project_cli_results[0] assert cli_result.get("status") == "success", f"Tool call failed: {cli_result}" args = cli_result.get("tool_call_args", {}) assert args.get("command") == "calendar" assert args.get("subcommand") == "write" if user_id: time.sleep(1) has_record = _check_db_record( "schedule_items", user_id, f" AND title LIKE '%CLI集成测试-{thread_id[:8]}%'", ) assert has_record, f"No schedule_items record found for user {user_id}" @pytest.mark.asyncio @pytest.mark.live @pytest.mark.skipif( os.getenv("CLI_SKILLS_LIVE_TEST") != "1", reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test", ) async def test_calendar_read_skill_queries_db() -> None: token = await _get_test_user_token() async with httpx.AsyncClient(timeout=120.0) as client: headers = {"Authorization": f"Bearer {token}"} thread_id = str(uuid4()) user_message = "请查询我今天的日程安排,严格按技能说明执行,不要猜测结果。" tool_call_results, run_finished, _ = await _run_agent_and_collect_events( client=client, headers=headers, thread_id=thread_id, run_id="run-calendar-read-test", user_message=user_message, ) assert run_finished, "Run did not finish" print(f"\n=== Tool call results: {len(tool_call_results)} ===") for r in tool_call_results: print(f" - tool_name: {r.get('tool_name')}") print(f" status: {r.get('status')}") print(f" tool_call_args: {json.dumps(r.get('tool_call_args', {}), ensure_ascii=False)}") result = r.get("result") if isinstance(result, str) and len(result) < 200: print(f" result: {result}") elif isinstance(result, dict): print(f" result keys: {list(result.keys())}") project_cli_results = [ r for r in tool_call_results if r.get("tool_name") == "project_cli" ] assert project_cli_results, "No project_cli tool call found" cli_result = project_cli_results[0] assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}" args = cli_result.get("tool_call_args", {}) assert args.get("command") == "calendar" assert args.get("subcommand") == "read" @pytest.mark.asyncio @pytest.mark.live @pytest.mark.skipif( os.getenv("CLI_SKILLS_LIVE_TEST") != "1", reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test", ) async def test_contacts_lookup_skill_queries_db() -> None: token = await _get_test_user_token() async with httpx.AsyncClient(timeout=120.0) as client: headers = {"Authorization": f"Bearer {token}"} thread_id = str(uuid4()) user_message = "请帮我查找我的联系人列表,严格按技能说明执行,不要猜测结果。" tool_call_results, run_finished, _ = await _run_agent_and_collect_events( client=client, headers=headers, thread_id=thread_id, run_id="run-contacts-lookup-test", user_message=user_message, ) assert run_finished, "Run did not finish" project_cli_results = [ r for r in tool_call_results if r.get("tool_name") == "project_cli" ] assert project_cli_results, "No project_cli tool call found" cli_result = project_cli_results[0] assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}" args = cli_result.get("tool_call_args", {}) assert args.get("command") == "contacts" assert args.get("subcommand") == "lookup" @pytest.mark.asyncio @pytest.mark.live @pytest.mark.skipif( os.getenv("CLI_SKILLS_LIVE_TEST") != "1", reason="set CLI_SKILLS_LIVE_TEST=1 to run live CLI + skills integration test", ) async def test_memory_write_skill_via_automation() -> None: token = await _get_test_user_token() user_id = _get_test_user_id() async with httpx.AsyncClient(timeout=120.0) as client: headers = {"Authorization": f"Bearer {token}"} thread_id = str(uuid4()) user_message = ( f"请将以下信息写入我的记忆:用户偏好测试字段值为'test-value-{thread_id[:8]}'。" f"严格按技能说明执行,不要猜测结果。" ) tool_call_results, run_finished, _ = await _run_agent_and_collect_events( client=client, headers=headers, thread_id=thread_id, run_id="run-memory-write-test", user_message=user_message, runtime_mode="automation", ) assert run_finished, "Run did not finish" project_cli_results = [ r for r in tool_call_results if r.get("tool_name") == "project_cli" ] assert project_cli_results, "No project_cli tool call found" cli_result = project_cli_results[0] assert cli_result.get("status") in {"success", "partial"}, f"Tool call failed: {cli_result}" args = cli_result.get("tool_call_args", {}) assert args.get("command") == "memory" assert args.get("subcommand") in {"write", "update"} if user_id: time.sleep(1) result = subprocess.run( [ "psql", "-t", "-A", "-c", f"SELECT content FROM memories WHERE owner_id = '{user_id}' AND memory_type = 'user' ORDER BY updated_at DESC LIMIT 1;", ], capture_output=True, text=True, env={ **os.environ, "PGHOST": "localhost", "PGPORT": "54322", "PGDATABASE": "postgres", "PGUSER": "postgres", "PGPASSWORD": "postgres", }, ) if result.returncode == 0 and result.stdout.strip(): content = result.stdout.strip() assert f"test-value-{thread_id[:8]}" in content or "测试" in content