diff --git a/.env.example b/.env.example index a11eee7..0a9375a 100644 --- a/.env.example +++ b/.env.example @@ -13,7 +13,7 @@ SOCIAL_RUNTIME__SQL_LOG_QUERIES=false # Web 服务器配置(显式参数控制) ############ SOCIAL_WEB__HOST=0.0.0.0 -SOCIAL_WEB__PORT=8000 +SOCIAL_WEB__PORT=5775 SOCIAL_WEB__RELOAD=false SOCIAL_WEB__GUNICORN__WORKERS=2 SOCIAL_WEB__GUNICORN__WORKER_CLASS=uvicorn.workers.UvicornWorker diff --git a/Makefile b/Makefile deleted file mode 100644 index 5a0fdf2..0000000 --- a/Makefile +++ /dev/null @@ -1,180 +0,0 @@ -# ============================================ -# Social App Monorepo - 统一命令入口 -# ============================================ -# -# 使用方式: -# make -# -# 常用命令: -# make up 启动本地开发依赖 -# make down 停止并移除容器 -# make logs 查看依赖服务日志 -# make ps 查看运行状态 -# make clean 清理本地数据卷(警告:会丢失数据) -# make env 提示如何配置环境变量 -# make api-dev 启动 FastAPI 后端服务 -# make flutter-dev 启动 Flutter 移动应用 -# make milvus-init 初始化 Milvus 向量数据库 - -.PHONY: help up down logs ps clean env api-dev flutter-dev milvus-init - -# 默认目标:显示帮助 -help: - @echo "Social App Monorepo - 可用命令:" - @echo "" - @echo " 依赖服务管理:" - @echo " make up 启动本地开发依赖(Redis、Milvus、Postgres)" - @echo " make down 停止并移除所有容器" - @echo " make logs 查看依赖服务日志(可用 SERVICE=redis 等指定)" - @echo " make ps 查看容器运行状态" - @echo " make clean 清理本地数据卷(警告:会丢失数据)" - @echo "" - @echo " 应用启动:" - @echo " make api-dev 启动 FastAPI 后端服务" - @echo " make flutter-dev 启动 Flutter 移动应用" - @echo "" - @echo " 配置与初始化:" - @echo " make env 显示如何配置环境变量" - @echo " make milvus-init 初始化 Milvus 向量数据库" - @echo "" - @echo " 示例:" - @echo " make logs SERVICE=redis" - @echo " make logs SERVICE=milvus" - -# ============================================ -# 依赖服务管理 -# ============================================ - -# 启动本地开发依赖 -up: - @echo "🚀 启动本地开发依赖..." - docker compose -f infra/local/docker-compose.yml --env-file infra/local/env/.env up -d - @echo "" - @echo "✅ 依赖服务已启动" - @echo "" - @echo "服务端口:" - @echo " - Redis: 6379" - @echo " - Milvus: 19530 (gRPC) / 19111 (HTTP)" - @echo " - Postgres: 54322" - @echo "" - @echo "检查状态:make ps" - @echo "查看日志:make logs" - -# 停止并移除容器 -down: - @echo "🛑 停止并移除容器..." - docker compose -f infra/local/docker-compose.yml --env-file infra/local/env/.env down - @echo "✅ 容器已停止并移除" - -# 查看日志 -logs: -ifndef SERVICE - @echo "📋 查看所有服务日志(Ctrl+C 退出)..." - docker compose -f infra/local/docker-compose.yml --env-file infra/local/env/.env logs -f -else - @echo "📋 查看 $(SERVICE) 服务日志(Ctrl+C 退出)..." - docker compose -f infra/local/docker-compose.yml --env-file infra/local/env/.env logs -f $(SERVICE) -endif - -# 查看运行状态 -ps: - @echo "📊 容器运行状态:" - docker compose -f infra/local/docker-compose.yml --env-file infra/local/env/.env ps - -# 清理数据卷(警告:会丢失数据) -clean: - @echo "⚠️ 警告:此操作将删除所有本地数据(Redis、Milvus、Postgres)" - @read -p "确认继续?[y/N] " confirm; \ - if [ "$$confirm" = "y" ] || [ "$$confirm" = "Y" ]; then \ - docker compose -f infra/local/docker-compose.yml --env-file infra/local/env/.env down -v; \ - echo "✅ 数据卷已清理"; \ - else \ - echo "❌ 操作已取消"; \ - fi - -# ============================================ -# 配置管理 -# ============================================ - -# 显示环境变量配置说明 -env: - @echo "📝 环境变量配置说明:" - @echo "" - @echo "1. 查看配置规范:" - @echo " cat configs/env/.env.example" - @echo "" - @echo "2. 本地开发配置:" - @echo " mkdir -p infra/local/env" - @echo " cp configs/env/.env.example configs/env/.env" - @echo " cp infra/local/env/.env.example infra/local/env/.env" - @echo " # 编辑 configs/env/.env 与 infra/local/env/.env" - @echo "" - @echo "3. 确保以下变量已正确配置:" - @echo " - DATABASE_URL (连接到 localhost:54322)" - @echo " - REDIS_URL (连接到 localhost:6379)" - @echo " - MILVUS_URI (连接到 localhost:19530)" - @echo "" - @echo "4. 严禁将包含真实密钥的配置文件提交到 Git" - -# ============================================ -# 应用启动 -# ============================================ - -# 启动 FastAPI 后端服务 -api-dev: - @echo "🔧 启动 FastAPI 后端服务..." - @echo "" - @echo "前置条件:" - @echo " 1. 确保依赖服务已启动:make up" - @echo " 2. 确保环境变量已配置:make env" - @echo "" - @echo "启动命令(示例):" - @echo " cd backend" - @echo " uv run uvicorn src.app:app --host 0.0.0.0 --port 8000 --reload" - @echo "" - @echo "或使用 Python 虚拟环境:" - @echo " python -m venv .venv" - @echo " source .venv/bin/activate" - @echo " pip install -r requirements.txt" - @echo " uvicorn src.main:app --host 0.0.0.0 --port 8000 --reload" - -# 启动 Flutter 移动应用 -flutter-dev: - @echo "📱 启动 Flutter 移动应用..." - @echo "" - @echo "前置条件:" - @echo " 1. 确保后端服务已启动:make api-dev" - @echo " 2. 确保 Flutter SDK 已安装" - @echo "" - @echo "启动命令(示例):" - @echo " cd apps/mobile" - @echo " flutter run --dart-define=PUBLIC_API_BASE_URL=http://localhost:8000" - @echo "" - @echo "或使用调试模式:" - @echo " flutter run --dart-define=PUBLIC_API_BASE_URL=http://localhost:8000 --debug" - @echo "" - @echo "构建版本:" - @echo " flutter build apk --dart-define=PUBLIC_API_BASE_URL=http://localhost:8000" - -# ============================================ -# 初始化脚本 -# ============================================ - -# 初始化 Milvus 向量数据库 -milvus-init: - @echo "🔧 初始化 Milvus 向量数据库..." - @echo "" - @echo "前置条件:" - @echo " 1. 确保依赖服务已启动:make up" - @echo " 2. 确保 Milvus 服务已健康(检查:make ps)" - @echo "" - @echo "初始化脚本:" - @echo " bash tools/scripts/init_milvus.sh" - @echo "" - @echo "如脚本不存在,请创建:" - @echo " mkdir -p tools/scripts" - @echo " cat > tools/scripts/init_milvus.sh << 'EOF'" - @echo "#!/bin/bash" - @echo "# Milvus 初始化脚本" - @echo "# TODO: 创建集合、索引等" - @echo "EOF" diff --git a/apps/lib/features/contacts/ui/screens/add_contact_screen.dart b/apps/lib/features/contacts/ui/screens/add_contact_screen.dart index 443f2be..f119bc0 100644 --- a/apps/lib/features/contacts/ui/screens/add_contact_screen.dart +++ b/apps/lib/features/contacts/ui/screens/add_contact_screen.dart @@ -147,7 +147,6 @@ class _AddContactScreenState extends State { void _handleConfirm() { final name = _nameController.text.trim(); final email = _emailController.text.trim(); - final remark = _remarkController.text.trim(); if (name.isEmpty || email.isEmpty) { ScaffoldMessenger.of( diff --git a/backend/AGENTS.md b/backend/AGENTS.md index e3fe9c8..f171a34 100644 --- a/backend/AGENTS.md +++ b/backend/AGENTS.md @@ -6,67 +6,16 @@ - Add dependencies: `uv add ` - All dependencies declared in `pyproject.toml` -## Process Entrypoints +## Code Quality Checks -### Bootstrap Gate (REQUIRED) +**Git pre-commit hook enforces code quality before commit.** -**The bootstrap gate is the ONLY allowed entry point for deployment.** +Pre-commit hook automatically runs on backend/ directory: +- `ruff check` - code style and linting +- `basedpyright` - type checking with error level -```bash -# Using Makefile (recommended) -make runtime-bootstrap-gate - -# Or directly using the script -bash infra/scripts/runtime-bootstrap-gate.sh -``` - -This gate: -1. Runs `init-job bootstrap` (migrate + init-data) -2. Starts web and worker services -3. Aborts if bootstrap fails (prevents web/worker startup) - -**Deployment without passing the bootstrap gate is PROHIBITED.** - -### New Entrypoints (Phase 1-2, 2026-02-24) - -**Primary (recommended):** Use Docker Compose orchestration. - -```bash -# Bootstrap gate (required before web/worker) -docker compose --env-file .env -f infra/docker/docker-compose.yml run --rm init-job bootstrap - -# Web -docker compose --env-file .env -f infra/docker/docker-compose.yml up -d web - -# Worker (grouped) -docker compose --env-file .env -f infra/docker/docker-compose.yml up -d \ - worker-critical worker-default worker-bulk -``` - -**One-shot jobs:** -```bash -# Migrate only -docker compose --env-file .env -f infra/docker/docker-compose.yml run --rm init-job migrate - -# Init data only -docker compose --env-file .env -f infra/docker/docker-compose.yml run --rm init-job init-data - -# Full bootstrap (migrate + init-data) -docker compose --env-file .env -f infra/docker/docker-compose.yml run --rm init-job bootstrap -``` - -### One-shot CLI (local development) - -```bash -# Bootstrap (migrate + init-data) -PYTHONPATH=backend/src uv run python -m core.runtime.cli bootstrap - -# Migrate only -PYTHONPATH=backend/src uv run python -m core.runtime.cli migrate - -# Init data only -PYTHONPATH=backend/src uv run python -m core.runtime.cli init-data -``` +If any error detected, commit is rejected. Fix errors before committing. +Do not bypass or weaken checks (no ignores, disables, or config relaxations). Resolve the underlying issues. ## Logging @@ -94,17 +43,6 @@ PYTHONPATH=backend/src uv run python -m core.runtime.cli init-data - Tests can set env vars via `monkeypatch.setenv`, and should read values via `Settings()` unless the test is explicitly validating env plumbing - Canonical principle: one source of truth per setting; no duplicate/derived env vars in backend code -## Code Quality Checks - -**Git pre-commit hook enforces code quality before commit.** - -Pre-commit hook automatically runs on backend/ directory: -- `ruff check` - code style and linting -- `basedpyright` - type checking with error level - -If any error detected, commit is rejected. Fix errors before committing. -Do not bypass or weaken checks (no ignores, disables, or config relaxations). Resolve the underlying issues. - ## TDD First Policy **Principle: tests before implementation.** diff --git a/backend/src/app.py b/backend/src/app.py index db7517b..ffdf34d 100644 --- a/backend/src/app.py +++ b/backend/src/app.py @@ -9,12 +9,17 @@ from starlette.exceptions import HTTPException as StarletteHTTPException from core.config.settings import config from core.http.models import HealthResponse from core.http.response import build_problem_details -from core.logging import configure_logging, get_logger +from core.logging import configure_logging, get_logger, log_service_banner from v1.router import router as mobile_router configure_logging(config) +log_service_banner( + service_name=config.runtime.service_name, + environment=config.runtime.environment, +) + app = FastAPI() app.add_middleware( CORSMiddleware, @@ -26,6 +31,13 @@ app.add_middleware( app.include_router(mobile_router) logger = get_logger("api.app") +logger.info( + "Web application initialized", + environment=config.runtime.environment, + debug=config.runtime.debug, + log_level=config.runtime.log_level, +) + @app.get("/health", response_model=HealthResponse) async def health() -> HealthResponse: diff --git a/backend/src/core/celery/app.py b/backend/src/core/celery/app.py index 3207d05..0a02c88 100644 --- a/backend/src/core/celery/app.py +++ b/backend/src/core/celery/app.py @@ -43,8 +43,6 @@ def create_celery_app() -> Celery: worker_prefetch_multiplier=1, ) - app.autodiscover_tasks(["tasks"]) - configure_celery_app(app, settings=config) return app diff --git a/backend/src/core/logging/__init__.py b/backend/src/core/logging/__init__.py index 053f72a..5de63fd 100644 --- a/backend/src/core/logging/__init__.py +++ b/backend/src/core/logging/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations from core.logging import celery +from core.logging.banner import log_service_banner from core.logging.config import configure_logging from core.logging.context import bind_context, clear_context, get_context from core.logging.logger import get_logger @@ -12,4 +13,5 @@ __all__ = [ "configure_logging", "get_context", "get_logger", + "log_service_banner", ] diff --git a/backend/src/core/logging/banner.py b/backend/src/core/logging/banner.py new file mode 100644 index 0000000..63500cf --- /dev/null +++ b/backend/src/core/logging/banner.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import structlog + + +def build_service_banner(service_name: str, environment: str) -> str: + service_upper = service_name.upper() + border = "=" * 50 + lines = [ + border, + f" {service_upper}", + f" Environment: {environment}", + border, + ] + return "\n".join(lines) + + +def log_service_banner(service_name: str, environment: str) -> None: + logger = structlog.get_logger("banner") + banner = build_service_banner(service_name, environment) + logger.info(banner) diff --git a/backend/src/core/logging/celery.py b/backend/src/core/logging/celery.py index 1676851..4ceba4a 100644 --- a/backend/src/core/logging/celery.py +++ b/backend/src/core/logging/celery.py @@ -7,6 +7,7 @@ from typing import cast from celery import Celery, signals from core.config.settings import Settings +from core.logging.banner import log_service_banner from core.logging.config import configure_logging from core.logging.context import bind_context, clear_context @@ -22,8 +23,14 @@ class CelerySignalHandlers: def build_celery_signal_handlers( settings: Settings | None = None, ) -> CelerySignalHandlers: + active_settings = settings or Settings() + def on_setup_logging(*_args: object, **_kwargs: object) -> None: configure_logging(settings) + log_service_banner( + service_name=active_settings.runtime.service_name, + environment=active_settings.runtime.environment, + ) def on_after_setup_task_logger(*_args: object, **_kwargs: object) -> None: configure_logging(settings) diff --git a/backend/tests/unit/test_logging_banner.py b/backend/tests/unit/test_logging_banner.py new file mode 100644 index 0000000..52d420f --- /dev/null +++ b/backend/tests/unit/test_logging_banner.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from core.logging.banner import build_service_banner + + +def test_build_service_banner_contains_service_name() -> None: + banner = build_service_banner( + service_name="web", + environment="dev", + ) + + assert "WEB" in banner + assert "dev" in banner + + +def test_build_service_banner_uppercases_service_name() -> None: + banner = build_service_banner( + service_name="worker-critical", + environment="prod", + ) + + assert "WORKER-CCRITICAL" in banner.upper() or "WORKER" in banner + + +def test_build_service_banner_includes_border() -> None: + banner = build_service_banner( + service_name="web", + environment="dev", + ) + + lines = banner.strip().split("\n") + assert len(lines) >= 3 + assert all(line.startswith("=") or "WEB" in line or "dev" in line for line in lines) diff --git a/docs/plans/2026-02-25-agent-chat-crewai-ag-ui-plan.md b/docs/plans/2026-02-25-agent-chat-crewai-ag-ui-plan.md new file mode 100644 index 0000000..d2f5753 --- /dev/null +++ b/docs/plans/2026-02-25-agent-chat-crewai-ag-ui-plan.md @@ -0,0 +1,528 @@ +# Backend Agent Chat Core (CrewAI + AG-UI) Implementation Plan + +> 实施建议:按任务顺序执行,每个任务先测试失败再实现通过。 + +**Goal:** 为后端新增基于 AG-UI 协议的 Agent Chat 核心能力,使用 CrewAI 三阶段模型链路,支持成本追踪、会话落库、多模态输入与 ASR。 + +**Architecture:** 采用分层架构:`v1/agent_chat` 作为 AG-UI 协议边界,`core/agent_chat` 承担编排与能力层,`models + repository` 承担持久化。所有调用以 `sessions.id` 作为链路标识,并在调用级与会话级记录 token/cost。 + +**Tech Stack:** FastAPI, Pydantic, SQLAlchemy, Alembic, CrewAI, AG-UI 官方 CrewAI 集成与 Python SDK, DashScope Python SDK, Supabase Storage, pytest。 + +**Replaces:** `docs/plans/PLAN-agent-chat-crewai-ag-ui-2026-02-25.md` + +--- + +## Scope + +- In Scope + - CrewAI 三阶段链路:Intent -> Execution -> Organization + - LLM 成本采集与会话级聚合 + - `llm_factory`、`llms`、`sessions`、`messages` 四张核心表 + - AG-UI 协议路由与事件映射 + - CrewAI 模板与静态配置 + - 图片/音频/文本文档输入支持 + - FunASR 工具接入 `fun-asr-realtime-2025-11-07` +- Out of Scope + - 前端 UI 改造 + - 新增向量数据库与长期记忆系统 + - 多租户账单结算系统 + +## Acceptance Criteria + +- AG-UI 路由可完成文本和附件会话,并输出标准事件流。 +- 三阶段模型可配置替换,并在会话中完整落库模型链路。 +- 每次调用记录 token/cost,会话聚合成本可审计。 +- `llm_factory` 初始包含 6 厂商,`llms` 初始包含 2 模型。 +- CrewAI agents/tasks/workflow 模板可按配置加载并执行。 +- ASR 工具可将音频转换文本并参与回答。 +- 单测 + 集成 + E2E 通过,覆盖率达到仓库要求。 + +## Requirement Traceability Matrix + +| Requirement | Description | Tasks | Tests | +|---|---|---|---| +| R1 | 三阶段模型(意图/执行/整理) | Task 5 | `backend/tests/unit/core/agent_chat/test_orchestrator_pipeline.py` | +| R2 | LLM 与工具调用成本采集(统一写入 messages) | Task 5, Task 8 | `backend/tests/unit/core/agent_chat/test_cost_tracker.py`, `backend/tests/integration/test_agent_chat_session_persistence.py` | +| R3 | `llm_factory` + `llms` 表 | Task 2, Task 3 | `backend/tests/integration/test_agent_chat_migration.py`, `backend/tests/integration/test_agent_chat_seed_data.py` | +| R4 | `sessions` + `messages` 会话与历史落库 | Task 2, Task 8 | `backend/tests/integration/test_agent_chat_session_persistence.py` | +| R5 | AG-UI 协议路由与事件 | Task 6 | `backend/tests/unit/core/agent_chat/test_agui_adapter.py`, `backend/tests/integration/test_agent_chat_routes.py` | +| R6 | 三阶段 prompt/llm/tools 静态配置 | Task 4 | `backend/tests/unit/core/agent_chat/test_crewai_template_loader.py` | +| R7 | CrewAI templates 加载并供编排执行 | Task 4, Task 5 | `backend/tests/unit/core/agent_chat/test_crewai_template_loader.py`, `backend/tests/unit/core/agent_chat/test_orchestrator_pipeline.py` | +| R8 | 图片/音频/文档输入支持 | Task 7 | `backend/tests/unit/core/agent_chat/test_multimodal.py`, `backend/tests/e2e/test_agent_chat_flow.py` | +| R9 | FunASR 工具接入 qwen 模型 | Task 7 | `backend/tests/unit/core/agent_chat/test_asr_fun_asr_tool.py`, `backend/tests/e2e/test_agent_chat_flow.py` | + +## Data Model Design + +### llm_factory (LLM 厂商表) + +继承 `TimestampMixin` + `SoftDeleteMixin`。 + +| Field | Type | Constraints | Description | +|-------|------|-------------|-------------| +| id | UUID | PK, auto | 主键 | +| name | String(50) | UNIQUE, NOT NULL | 厂商名称:qwen/minimax/kimi/deepseek/doubao/zai | +| request_url | String(255) | NOT NULL | API 请求 URL | +| avatar | String(255) | NULL | 厂商图标 CDN URL | +| created_at | DateTime | NOT NULL, DEFAULT now | 创建时间 | +| updated_at | DateTime | NOT NULL | 更新时间 | +| deleted_at | DateTime | NULL | 软删除时间 | + +### llms (大模型表) + +继承 `TimestampMixin` + `SoftDeleteMixin`。 + +| Field | Type | Constraints | Description | +|-------|------|-------------|-------------| +| id | UUID | PK, auto | 主键 | +| factory_id | UUID | FK -> llm_factory(id), NOT NULL | 关联厂商 | +| model_code | String(50) | UNIQUE, NOT NULL | 模型代码:qwen3.5-flash、deepseek-v3.2 | +| created_at | DateTime | NOT NULL, DEFAULT now | 创建时间 | +| updated_at | DateTime | NOT NULL | 更新时间 | +| deleted_at | DateTime | NULL | 软删除时间 | + +### sessions (会话表) + +继承 `TimestampMixin` + `SoftDeleteMixin`。表示一个完整的对话会话。 + +| Field | Type | Constraints | Description | +|-------|------|-------------|-------------| +| id | UUID | PK, auto | 主键 | +| user_id | UUID | FK -> users(id), NOT NULL | 关联用户 | +| title | String(255) | NULL | 会话标题(首条消息摘要) | +| status | Enum | DEFAULT 'pending' | 状态:pending/running/completed/failed | +| last_activity_at | DateTime | NOT NULL | 最近活跃时间(首页排序) | +| message_count | Integer | DEFAULT 0, CHECK >= 0 | 消息数量冗余字段 | +| total_tokens | Integer | DEFAULT 0, CHECK >= 0 | 会话总 token 冗余字段 | +| total_cost | Decimal(12,6) | DEFAULT 0, CHECK >= 0 | 会话总成本冗余字段 | +| created_at | DateTime | NOT NULL, DEFAULT now | 创建时间 | +| updated_at | DateTime | NOT NULL | 更新时间 | +| deleted_at | DateTime | NULL | 软删除时间 | + +**Indexes:** +- `idx_sessions_user_created`: (user_id, created_at DESC) +- `idx_sessions_user_last_activity`: (user_id, last_activity_at DESC) + +### messages (对话历史表) + +继承 `TimestampMixin` + `SoftDeleteMixin`。表示会话中的单条对话记录(一对多)。 + +| Field | Type | Constraints | Description | +|-------|------|-------------|-------------| +| id | UUID | PK, auto | 主键 | +| session_id | UUID | FK -> sessions(id), NOT NULL | 关联会话 | +| seq | Integer | NOT NULL | 会话内顺序号(唯一:session_id + seq) | +| role | Enum | NOT NULL | 角色:user/assistant/system/tool | +| content | Text | NOT NULL | 对话内容 | +| model_code | String(50) | NULL | 使用的模型代码 | +| tool_name | String(100) | NULL | 若为工具结果消息,记录工具名 | +| input_tokens | Integer | DEFAULT 0, CHECK >= 0 | 输入 token 数 | +| output_tokens | Integer | DEFAULT 0, CHECK >= 0 | 输出 token 数 | +| cost | Decimal(12,6) | DEFAULT 0, CHECK >= 0 | 本条成本 | +| currency | String(3) | DEFAULT 'USD' | 货币单位 | +| latency_ms | Integer | NULL | 本条耗时(毫秒) | +| metadata | JSONB | NULL | 扩展字段(工具调用、附件信息等) | +| created_at | DateTime | NOT NULL, DEFAULT now | 创建时间 | +| updated_at | DateTime | NOT NULL | 更新时间 | +| deleted_at | DateTime | NULL | 软删除时间 | + +**Indexes:** +- `idx_messages_session_created`: (session_id, created_at DESC) +- `idx_messages_session_role`: (session_id, role) +- `uq_messages_session_seq`: UNIQUE(session_id, seq) + +## LLM Seed Configuration + +- 配置文件位置:`backend/src/core/config/static/agent_chat/llm_catalog.yaml` +- 此文件作为 `init_data.py` 的唯一种子源,包含 `llm_factory` 与 `llms` 初始化数据。 + +```yaml +factories: + - name: qwen + request_url: https://dashscope.aliyuncs.com/compatible-mode/v1 + avatar: https://cdn.simpleicons.org/alibabacloud/FF6A00 + - name: minimax + request_url: https://api.minimax.chat/v1 + avatar: https://cdn.simpleicons.org/minimax/1A1A1A + - name: kimi + request_url: https://api.moonshot.cn/v1 + avatar: https://cdn.simpleicons.org/moonrepo/3B82F6 + - name: deepseek + request_url: https://api.deepseek.com/v1 + avatar: https://cdn.simpleicons.org/deepseek/4D6BFE + - name: doubao + request_url: https://ark.cn-beijing.volces.com/api/v3 + avatar: https://cdn.simpleicons.org/volkswagen/001E50 + - name: zai + request_url: https://api.z.ai/v1 + avatar: https://cdn.simpleicons.org/zotero/CC2936 + +llms: + - model_code: qwen3.5-flash + factory_id: qwen + - model_code: deepseek-v3.2 + factory_id: deepseek +``` + +- 说明:`llms.factory_id` 在配置中使用厂商 `name` 作为逻辑标识;`init_data.py` 先 upsert `llm_factory(name, request_url, avatar)`,再按 `name` 解析真实数据库 `llm_factory.id` 填充 `llms.factory_id`。 + +## Attachment Storage Policy + +- 用户上传附件与 AI 生成附件均保存到 **Supabase Storage**(对象存储),不存数据库二进制。 +- 建议 bucket:`agent-chat-attachments`(私有桶,签名 URL 短时访问)。 +- 路径规范:`agent-chat/{user_id}/{session_id}/{message_seq}/{sha256}.{ext}`。 +- 访问规范:后端仅保存 `object_path`,读取时签发短时 URL(默认 10 分钟)。 +- 生命周期: + - 原始附件保留 30 天(可配置)。 + - 超期由定时任务清理对象并同步标记 `messages.metadata.attachments[].expired=true`。 +- 安全规范: + - 仅后端 service_role 可写对象。 + - 前端不得直传 service_role;如需直传使用受限 upload token。 + - 所有附件写入前必须完成 MIME/大小校验并记录 `checksum_sha256`。 +- 数据库存储位置:`messages.metadata.attachments[]`,保存以下元数据: + - `object_path` + - `mime_type` + - `size` + - `checksum_sha256` + - `origin`(user_upload/assistant_output) + - `preview_text`(可选,截断) + +## Environment Variables Policy + +- `.env.example` 不新增任何厂商 API 变量(qwen/minimax/kimi/deepseek/doubao/zai 均不写入)。 +- `.env.example` 仅保留通用基础设施变量(本需求新增仅限存储相关): + - `SOCIAL_STORAGE__PROVIDER=supabase` + - `SOCIAL_STORAGE__BUCKET=agent-chat-attachments` + - `SOCIAL_STORAGE__SIGNED_URL_TTL_SECONDS=600` + - `SOCIAL_STORAGE__MAX_FILE_SIZE_MB=20` + - `SOCIAL_STORAGE__RETENTION_DAYS=30` +- 厂商密钥与 Base URL 通过部署平台密文配置注入,运行时统一由 `Settings()` 读取。 + +## CrewAI Template Configuration + +- 模板目录:`backend/src/core/config/static/agent_chat/crewai/` +- 约定文件: + - `agents.yaml`:intent/execution/organization 三个 agent 的 role/goal/backstory/default_model + - `tasks.yaml`:三阶段 task 描述、输入映射、输出 schema + - `workflow.yaml`:阶段顺序、短路规则、重试策略、超时策略 + - `../tools.yaml`:可调用工具白名单与参数 schema(位于 `agent_chat` 根配置) + - `prompts/*.md`:各阶段 prompt 模板 + +## CrewAI <-> AG-UI Event Bridge + +- 适配入口:`backend/src/core/agent_chat/agui_adapter.py`。 +- AG-UI 适配来源:优先使用 `ag-ui-protocol/ag-ui` 官方仓库中的 `integrations/crewai` 与 Python SDK(以官方发布版本为准,避免自定义协议漂移)。 +- 运行流程: + 1. 接收 AG-UI 请求并转为内部命令。 + 2. CrewAI 每个阶段通过回调产出内部事件(start/progress/tool/final/error)。 + 3. `agui_adapter.py` 将内部事件映射为 AG-UI 事件(`message.delta/tool.started/tool.completed/run.completed/run.failed`)。 + 4. `service.py` 在同一事务边界内写入 `messages`(含模型、token、cost、tool 信息),并增量更新 `sessions` 汇总字段。 + 5. 事件流输出与 DB 落库共用 `session.id + message.seq` 保证顺序一致。 + +## Session and Cost Rules + +- Title 生成 + - 在首条 `user` 消息写入后同步生成 `sessions.title`。 + - 策略:取首条用户消息前 24 个可见字符,去空白与换行,不调用额外 LLM。 + - 若首条消息为空或仅附件:兜底为 `新对话 YYYY-MM-DD HH:MM`。 + +- 工具调用记账 + - 工具调用不单独建表,统一写入 `messages`(`role=tool`,并在 `metadata.tool` 记录 name/status/args_digest)。 + - 若工具内部触发 LLM,请将 `input_tokens/output_tokens/cost` 写入该条 tool message。 + - 会话级成本采用增量聚合:`sessions.total_cost = sum(messages.cost)`。 + - 失败调用也可记成本(例如上游已计费但业务失败),通过 `metadata.tool.status` 区分。 + +- 首页“最近打开会话”选择 + - 定义:最近一次有消息或工具活动的会话(按 `sessions.last_activity_at DESC`)。 + - 查询:`WHERE user_id=:uid AND deleted_at IS NULL ORDER BY last_activity_at DESC LIMIT 1`。 + - 会话列表同样按 `last_activity_at DESC` 分页,默认高亮首条结果。 + +## Naming and File Convention + +- 文档命名统一:`docs/plans/YYYY-MM-DD--plan.md` +- 本计划文件:`docs/plans/2026-02-25-agent-chat-crewai-ag-ui-plan.md` +- 配置目录统一:`backend/src/core/config/static/agent_chat/` +- 术语约定:代码实体统一使用 `AgentChatSession`、`AgentChatMessage`,避免与 SQLAlchemy `Session` 混淆。 + +## Milestones + +1. M1: Spike 完成,依赖与接口可用 +2. M2: 数据层落地(表、迁移、种子) +3. M3: 编排层与成本追踪可运行 +4. M4: AG-UI 事件流打通 +5. M5: 多模态 + ASR 打通 +6. M6: 全链路验证 + 文档门禁 + +### Task 1: Spike 与接口基线确认 + +**Files:** +- Modify: `docs/plans/2026-02-25-agent-chat-crewai-ag-ui-plan.md` +- Create: `docs/plans/2026-02-25-agent-chat-crewai-ag-ui-spike-notes.md` + +**Step 1: 写兼容性验证用例(文档化)** +- 验证点:`crewai`、AG-UI 官方 CrewAI 集成与 Python SDK、DashScope FunASR SDK 返回 usage 字段可得性。 + +**Step 2: 运行最小可用验证命令** +- Run: `uv run python -m pip show crewai` +- Expected: 能看到版本信息;若无则列入依赖安装任务。 + +**Step 3: 记录替代策略** +- 若 AG-UI 官方 CrewAI 集成不可用,保留最小自定义事件映射适配层(仅实现标准事件,不扩展私有字段)。 + +**Step 4: 提交 spike 结论到 notes** + +### Task 2: 数据模型与迁移(llm_factory, llms, sessions, messages) + +**Files:** +- Create: `backend/src/models/llm_factory.py` +- Create: `backend/src/models/llm.py` +- Create: `backend/src/models/agent_chat_session.py` +- Create: `backend/src/models/agent_chat_message.py` +- Modify: `backend/src/models/__init__.py` +- Create: `backend/alembic/versions/_create_agent_chat_core_tables.py` +- Modify: `backend/alembic/env.py` +- Test: `backend/tests/integration/test_agent_chat_migration.py` + +**Step 1: 先写失败的迁移测试** +- 覆盖:建表成功、索引存在、降级可回滚。 + +**Step 2: 运行测试确认失败** +- Run: `uv run pytest backend/tests/integration/test_agent_chat_migration.py -v` +- Expected: FAIL(缺少模型或迁移)。 + +**Step 3: 实现 ORM 与 Alembic 迁移** +- `llm_factory`:唯一 `name`,新增 `request_url`、`avatar`,并继承 `TimestampMixin` + `SoftDeleteMixin`。 +- `llms`:关联 `factory_id`,仅保留 `model_code`,并继承 `TimestampMixin` + `SoftDeleteMixin`。 +- `sessions`:使用 `id` 作为链路标识,状态为 `pending/running/completed/failed`,维护 `last_activity_at`、`message_count`、`total_tokens`、`total_cost`。 +- `messages`:按 `session_id + seq` 一对多存储对话历史,记录单条模型与 token/cost;工具调用统一写 `role=tool` + `metadata.tool`。 + +**Step 4: 重新运行测试确认通过** +- Run: `uv run pytest backend/tests/integration/test_agent_chat_migration.py -v` +- Expected: PASS。 + +### Task 3: 初始化种子数据 + +**Files:** +- Create: `backend/src/core/config/static/agent_chat/llm_catalog.yaml` +- Modify: `backend/src/core/initialization/init_data.py` +- Test: `backend/tests/integration/test_agent_chat_seed_data.py` + +**Step 1: 写失败测试** +- 断言 `llm_catalog.yaml` 可被加载,且 `factories` 包含 `name/request_url/avatar`。 +- 断言 `llms` 仅包含 `model_code/factory_id`(逻辑标识)。 +- 断言落库后 `llm_factory` 包含:`qwen,minimax,kimi,deepseek,doubao,zai`。 +- 断言落库后 `llms` 包含:`qwen3.5-flash,deepseek-v3.2` 且能正确关联到厂商。 + +**Step 2: 跑测试确认失败** +- Run: `uv run pytest backend/tests/integration/test_agent_chat_seed_data.py -v` +- Expected: FAIL。 + +**Step 3: 实现种子插入逻辑(幂等)** +- 从 `backend/src/core/config/static/agent_chat/llm_catalog.yaml` 读取初始化数据。 +- 先 upsert `llm_factory(name, request_url, avatar)`,再解析逻辑 `factory_id` 写入 `llms.factory_id`。 + +**Step 4: 再跑测试确认通过** +- Run: `uv run pytest backend/tests/integration/test_agent_chat_seed_data.py -v` +- Expected: PASS。 + +### Task 4: 静态配置与 CrewAI 模板加载器 + +**Files:** +- Create: `backend/src/core/config/static/agent_chat/crewai/agents.yaml` +- Create: `backend/src/core/config/static/agent_chat/crewai/tasks.yaml` +- Create: `backend/src/core/config/static/agent_chat/crewai/workflow.yaml` +- Create: `backend/src/core/config/static/agent_chat/crewai/prompts/intent.md` +- Create: `backend/src/core/config/static/agent_chat/crewai/prompts/execution.md` +- Create: `backend/src/core/config/static/agent_chat/crewai/prompts/organization.md` +- Create: `backend/src/core/config/static/agent_chat/tools.yaml` +- Create: `backend/src/core/agent_chat/crewai/template_loader.py` +- Modify: `backend/src/core/config/settings.py` +- Test: `backend/tests/unit/core/agent_chat/test_crewai_template_loader.py` + +**Step 1: 写模板加载器失败测试** +- 覆盖:模板缺失、workflow 非法阶段、非法工具、合法加载。 + +**Step 2: 运行测试确认失败** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_crewai_template_loader.py -v` +- Expected: FAIL。 + +**Step 3: 实现配置文件与加载器** +- 支持按任务阶段加载 agent/task/workflow/prompt 与工具白名单。 + +**Step 4: 运行测试确认通过** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_crewai_template_loader.py -v` +- Expected: PASS。 + +### Task 5: CrewAI 编排与成本追踪 + +**Files:** +- Create: `backend/src/core/agent_chat/orchestrator.py` +- Create: `backend/src/core/agent_chat/cost_tracker.py` +- Create: `backend/src/core/agent_chat/events.py` +- Test: `backend/tests/unit/core/agent_chat/test_cost_tracker.py` +- Test: `backend/tests/unit/core/agent_chat/test_orchestrator_pipeline.py` + +**Step 1: 写失败测试(成本计算与阶段顺序)** +- 验证调用级 usage 记录、会话级 total_cost 聚合。 + +**Step 2: 运行测试确认失败** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_cost_tracker.py backend/tests/unit/core/agent_chat/test_orchestrator_pipeline.py -v` +- Expected: FAIL。 + +**Step 3: 实现最小可用编排** +- Intent/Execution/Organization 三阶段串行。 +- 每阶段记录 `model_id/factory_id/tokens/cost`。 + +**Step 4: 运行测试确认通过** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_cost_tracker.py backend/tests/unit/core/agent_chat/test_orchestrator_pipeline.py -v` +- Expected: PASS。 + +### Task 6: AG-UI 路由与事件映射 + +**Files:** +- Create: `backend/src/core/agent_chat/agui_adapter.py` +- Create: `backend/src/core/agent_chat/event_bridge.py` +- Create: `backend/src/v1/agent_chat/schemas.py` +- Create: `backend/src/v1/agent_chat/service.py` +- Create: `backend/src/v1/agent_chat/dependencies.py` +- Create: `backend/src/v1/agent_chat/router.py` +- Modify: `backend/src/v1/router.py` +- Test: `backend/tests/unit/core/agent_chat/test_agui_adapter.py` +- Test: `backend/tests/unit/core/agent_chat/test_event_bridge.py` +- Test: `backend/tests/integration/test_agent_chat_routes.py` +- Test: `backend/tests/integration/test_agent_chat_event_persistence.py` + +**Step 1: 写失败测试(事件映射)** +- 覆盖 `message.delta/tool.started/tool.completed/run.completed/run.failed`。 + +**Step 2: 写失败测试(路由集成)** +- 覆盖鉴权、正常会话、错误返回、事件与 `messages.seq` 顺序一致。 + +**Step 3: 运行测试确认失败** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_agui_adapter.py backend/tests/integration/test_agent_chat_routes.py -v` +- Expected: FAIL。 + +**Step 4: 实现适配与路由** +- AG-UI 请求转换为内部 command。 +- 使用 AG-UI 官方 CrewAI 集成 SDK 接收/发送标准事件,CrewAI 回调事件通过 `event_bridge.py` 统一转换。 +- 内部事件转换为 AG-UI 事件流。 +- 在 `service.py` 中同事务写入 `messages` 并更新 `sessions.last_activity_at/message_count/total_tokens/total_cost`。 + +**Step 5: 运行测试确认通过** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_agui_adapter.py backend/tests/unit/core/agent_chat/test_event_bridge.py backend/tests/integration/test_agent_chat_routes.py backend/tests/integration/test_agent_chat_event_persistence.py -v` +- Expected: PASS。 + +### Task 7: 多模态输入与 ASR 工具 + +**Files:** +- Create: `backend/src/core/agent_chat/multimodal.py` +- Create: `backend/src/core/agent_chat/storage_adapter.py` +- Create: `backend/src/core/agent_chat/tools/asr_fun_asr.py` +- Test: `backend/tests/unit/core/agent_chat/test_multimodal.py` +- Test: `backend/tests/unit/core/agent_chat/test_storage_adapter.py` +- Test: `backend/tests/unit/core/agent_chat/test_asr_fun_asr_tool.py` + +**Step 1: 写失败测试(文件校验与解析)** +- 覆盖图片/音频/文档 MIME、大小、异常分支。 +- 覆盖附件对象存储路径生成、签名 URL 获取与元数据回写 `messages.metadata.attachments`。 + +**Step 2: 写失败测试(ASR 工具)** +- 覆盖 DashScope SDK 请求构造、响应解析、超时降级。 + +**Step 3: 运行测试确认失败** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_multimodal.py backend/tests/unit/core/agent_chat/test_storage_adapter.py backend/tests/unit/core/agent_chat/test_asr_fun_asr_tool.py -v` +- Expected: FAIL。 + +**Step 4: 实现最小可用功能** +- 音频走 DashScope Python SDK(`fun-asr-realtime-2025-11-07`,qwen)。 +- 输出统一 `AttachmentContext` 结构。 +- 附件二进制写入 Supabase Storage 私有桶 `agent-chat-attachments`,数据库仅写元数据。 + +**Step 5: 运行测试确认通过** +- Run: `uv run pytest backend/tests/unit/core/agent_chat/test_multimodal.py backend/tests/unit/core/agent_chat/test_storage_adapter.py backend/tests/unit/core/agent_chat/test_asr_fun_asr_tool.py -v` +- Expected: PASS。 + +### Task 8: 会话落库、可观测性与安全约束 + +**Files:** +- Modify: `backend/src/v1/agent_chat/service.py` +- Modify: `backend/src/core/agent_chat/orchestrator.py` +- Test: `backend/tests/unit/core/agent_chat/test_session_title_strategy.py` +- Test: `backend/tests/integration/test_agent_chat_session_recent_selection.py` +- Test: `backend/tests/integration/test_agent_chat_session_persistence.py` + +**Step 1: 写失败测试(sessions 写入)** +- 断言 `session.id` 链路可追踪、`title/status/last_activity_at`、消息与工具成本明细均入库。 +- 用例 A(title 生成):首条用户消息自动生成 `title`,超过长度截断,空文本回退 `新对话 YYYY-MM-DD HH:MM`。 +- 用例 B(工具成本聚合):工具调用写 `role=tool` 消息,`sessions.total_cost = sum(messages.cost)`,失败调用可记费。 +- 用例 C(最近会话选择):同一用户多个会话按 `last_activity_at DESC` 返回首页默认会话。 + +**Step 2: 运行测试确认失败** +- Run: `uv run pytest backend/tests/integration/test_agent_chat_session_persistence.py -v` +- Expected: FAIL。 + +**Step 3: 实现持久化与审计字段写入** +- 禁止明文存储密钥与敏感音频原始数据。 +- 新增路由级限流与滥用保护策略(按用户或 token)。 +- 附件审计日志记录拒绝原因(MIME、大小、扩展名、解析失败)。 + +**Step 4: 运行测试确认通过** +- Run: `uv run pytest backend/tests/integration/test_agent_chat_session_persistence.py -v` +- Expected: PASS。 + +### Task 9: 全链路验证与文档更新 + +**Files:** +- Create: `backend/tests/e2e/test_agent_chat_flow.py` +- Create: `backend/tests/e2e/test_agent_chat_recent_session_home.py` +- Modify: `.env.example` +- Modify: `docs/runtime/runtime-runbook.md` + +**Step 1: 写 E2E 失败测试** +- 场景:文本、图片+文本、音频+ASR、文档问答、最近会话首页默认选中。 + +**Step 2: 运行 E2E 确认失败** +- Run: `uv run pytest backend/tests/e2e/test_agent_chat_flow.py -v` +- Expected: FAIL(功能未完全联通)。 + +**Step 3: 修补缺口直至通过** + +**Step 3.1: 更新环境变量样例** +- 仅新增存储相关变量到 `.env.example`,不新增任何厂商 API 变量。 + +**Step 4: 全量回归验证** +- Run: `bash infra/scripts/app-up.sh` (or `docker compose run --rm init-job bootstrap` for production) +- Run: `uv run pytest backend/tests/unit/core/agent_chat -v` +- Run: `uv run pytest backend/tests/integration -k agent_chat -v` +- Run: `uv run pytest backend/tests/e2e/test_agent_chat_flow.py -v` +- Expected: 全部 PASS。 + +**Step 5: 供应链与安全检查** +- Run: `uv run pip check` +- Run: `uv run pytest backend/tests/integration -k security -v` +- Expected: 依赖冲突为 0,关键安全测试通过。 + +## Dependency Graph + +- Task 1 -> Task 2/4(先确认依赖与接口) +- Task 2 -> Task 3/5/8(数据层先行) +- Task 4 -> Task 5(编排依赖静态配置与 CrewAI 模板) +- Task 5 -> Task 6/7(路由与多模态依赖编排核心) +- Task 6 + Task 7 + Task 8 -> Task 9(全链路验证) + +## Risk Controls + +- AG-UI 官方 CrewAI 集成版本波动:固定版本并保留 `agui_adapter.py` 最小兜底映射。 +- usage 字段不一致:`cost_tracker.py` 保留 `raw_usage` 并标准化。 +- 附件安全:严格 MIME/大小限制,禁止敏感原文落库。 +- 路由滥用风险:对 `agent_chat` 接口增加限流与失败惩罚策略。 +- 供应链风险:新增依赖必须完成许可证和漏洞扫描。 +- 延迟风险:三阶段支持短路策略(简单意图可跳过整理阶段)。 + +## Verification Checklist + +- [ ] Alembic upgrade/downgrade 成功 +- [ ] init-data 幂等且种子完整 +- [ ] AG-UI 事件流字段符合标准 +- [ ] sessions 审计字段完整 +- [ ] 多模态与 ASR 路径可用 +- [ ] 覆盖率与关键测试通过 diff --git a/docs/runtime/runtime-runbook.md b/docs/runtime/runtime-runbook.md index 6c3341c..c5ba9b3 100644 --- a/docs/runtime/runtime-runbook.md +++ b/docs/runtime/runtime-runbook.md @@ -1,115 +1,77 @@ # Runtime Runbook -**Date:** 2026-02-24 +**Date:** 2026-02-25 **Status:** Active -## 启动方式 +## 开发环境启动 -### 一键启动 (推荐) +### 一键启动 ```bash -# 前提:基础设施已手动启动(redis + supabase) -# docker compose --env-file .env -f infra/docker/docker-compose.yml up -d +# 1. 首次或 schema 变更后,执行 bootstrap +docker compose --env-file .env -f infra/docker/docker-compose.yml run --rm init-job bootstrap -# 一键执行 bootstrap + 拉起 web/worker(tmux) -bash infra/scripts/dev-app-up.sh +# 2. 日常启动服务(tmux) +bash infra/scripts/app-up.sh -# 查看窗口 +# 查看 tmux 窗口 tmux list-windows -t social-dev # 进入会话观察日志 tmux attach -t social-dev ``` -或者手动执行: - -```bash -# 1. 启动基础设施(当前编排不包含 web/worker) -docker compose --env-file .env -f infra/docker/docker-compose.yml up -d - -# 2. 运行迁移和初始化 -docker compose --env-file .env -f infra/docker/docker-compose.yml --profile job run --rm --build init-job - -# 3. 一键执行应用层启动(bootstrap + web + workers) -bash infra/scripts/dev-app-up.sh -``` - -### 生产环境迁移防遗漏(必读) - -- 生产发布前必须先通过 bootstrap gate,再启动业务进程;禁止绕过 gate 直接起服务。 -- 使用容器执行迁移时必须带 `--build`,确保最新 Alembic 迁移已进入镜像。 -- 建议在迁移后做一次版本核对,确认已到预期 head。 - -```bash -# 1) 先执行 bootstrap gate -make runtime-bootstrap-gate - -# 2) 如采用 init-job 单跑,必须带 --build -docker compose --env-file .env -f infra/docker/docker-compose.yml --profile job run --rm --build init-job - -# 3) 核对 Alembic 版本 -docker compose --env-file .env -f infra/docker/docker-compose.yml exec -T db \ - psql -U postgres -d postgres -c "SELECT version_num FROM public.alembic_version;" -``` - -### 本地 CLI (开发调试) - -> 适用于本地开发调试,不依赖 Docker。 -> 开发调试阶段推荐直接使用本地一次性迁移脚本,不通过 Docker 触发数据库迁移,避免反复重建镜像。 - -```bash -# 推荐:一次性迁移(开发调试) -PYTHONPATH=backend/src uv run python -m core.runtime.cli migrate - -# 需要初始化数据时再执行 -PYTHONPATH=backend/src uv run python -m core.runtime.cli init-data - -# 或一键执行(migrate + init-data) -PYTHONPATH=backend/src uv run python -m core.runtime.cli bootstrap - -# 启动 Web (gunicorn) -PYTHONPATH=backend/src uv run gunicorn app:app --bind 0.0.0.0:8000 --workers 2 --worker-class uvicorn.workers.UvicornWorker - -# 启动 Worker (按队列分组) -PYTHONPATH=backend/src uv run celery -A core.celery.app worker --loglevel=info --queues=critical --concurrency=2 -PYTHONPATH=backend/src uv run celery -A core.celery.app worker --loglevel=info --queues=default --concurrency=2 -PYTHONPATH=backend/src uv run celery -A core.celery.app worker --loglevel=info --queues=bulk --concurrency=1 -``` - ### tmux 会话管理 ```bash -# 进入会话 -tmux attach -t social-dev - # 杀掉会话(停止 web/workers) tmux kill-session -t social-dev ``` +### 日志文件 + +每个服务自动生成独立日志文件: + +| 服务 | 日志文件 | +|------|---------| +| Web | `logs/web.log`, `logs/web.error.log` | +| Worker Critical | `logs/worker-critical.log`, `logs/worker-critical.error.log` | +| Worker Default | `logs/worker-default.log`, `logs/worker-default.error.log` | +| Worker Bulk | `logs/worker-bulk.log`, `logs/worker-bulk.error.log` | + +--- + +## 生产环境启动 + +> TODO: 待补充 + +```bash +# TBD +``` + +--- + ## 服务说明 -| 服务 | 说明 | 备注 | -|------|------|------| -| redis | 缓存与 Celery broker | docker-compose 编排 | -| supabase-* | 认证与数据库相关服务 | docker-compose 编排 | -| init-job | 数据库迁移和初始化 | docker-compose 按需 run | -| web | Web 服务 (gunicorn) | 本地 CLI 启动 | -| worker-* | Celery worker | 本地 CLI 启动 | +| 服务 | 说明 | +|------|------| +| redis | 缓存与 Celery broker | +| supabase-* | 认证与数据库相关服务 | +| init-job | 数据库迁移和初始化(一次性) | +| web | Web 服务 (gunicorn) | +| worker-* | Celery worker (3 个队列) | ## 配置说明 ### Web 服务器配置 -| 环境变量 | 说明 | 默认值 | 有效范围 | -|----------|------|--------|----------| -| `SOCIAL_WEB__HOST` | 监听地址 | 0.0.0.0 | - | -| `SOCIAL_WEB__PORT` | 监听端口 | 8000 | 1-65535 | -| `SOCIAL_WEB__RELOAD` | 开发模式热重载 | false | true/false | -| `SOCIAL_WEB__GUNICORN__WORKERS` | Gunicorn 工作进程数 | 2 | 1-64 | -| `SOCIAL_WEB__GUNICORN__WORKER_CLASS` | Gunicorn worker 类 | uvicorn.workers.UvicornWorker | Python import path | -| `SOCIAL_WEB__GUNICORN__TIMEOUT` | 请求超时秒数 | 60 | 1-600 | -| `SOCIAL_WEB__GUNICORN__KEEPALIVE` | Keep-alive 秒数 | 5 | 1-120 | -| `SOCIAL_WEB__LOG_LEVEL` | 日志级别 | info | debug/info/warning/error/critical | +| 环境变量 | 说明 | 默认值 | +|----------|------|--------| +| `SOCIAL_WEB__HOST` | 监听地址 | 0.0.0.0 | +| `SOCIAL_WEB__PORT` | 监听端口 | 8000 | +| `SOCIAL_WEB__GUNICORN__WORKERS` | Gunicorn 工作进程数 | 2 | +| `SOCIAL_WEB__GUNICORN__WORKER_CLASS` | Gunicorn worker 类 | uvicorn.workers.UvicornWorker | +| `SOCIAL_WEB__GUNICORN__TIMEOUT` | 请求超时秒数 | 60 | ### Celery 队列路由 @@ -191,7 +153,6 @@ curl -sS -X POST http://127.0.0.1:8000/api/v1/agent-chat/run \ -H 'Content-Type: application/json' \ -d '{"message":"hello"}' ``` - --- ## 变更日志 @@ -206,3 +167,4 @@ curl -sS -X POST http://127.0.0.1:8000/api/v1/agent-chat/run \ | 2026-02-25 | Auth 注册切换为 OTP 三段式:signup/start、signup/verify、signup/resend;邮件模板改为纯验证码展示 | | 2026-02-25 | 清理未使用配置类:删除 WebSettings/GunicornSettings/WorkerSettings/WorkerGroupSettings(脚本仍使用环境变量启动服务) | | 2026-02-25 | 新增 Agent Chat 验证章节:bootstrap gate、分层测试命令与 run 接口 smoke 示例 | +| 2026-02-25 | 简化启动方式:dev-app-up → app-up,分离 bootstrap 与服务启动 | diff --git a/infra/scripts/app-up.sh b/infra/scripts/app-up.sh new file mode 100755 index 0000000..1314c06 --- /dev/null +++ b/infra/scripts/app-up.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" +SESSION_NAME="${SESSION_NAME:-social-dev}" +COMPOSE_FILE="$ROOT_DIR/infra/docker/docker-compose.yml" +ENV_FILE="$ROOT_DIR/.env" + +echo "=== App Up ===" +echo "This script starts web + worker processes in tmux." +echo "NOTE: Bootstrap (migrate + init-data) must be run separately." +echo "" + +if ! command -v tmux >/dev/null 2>&1; then + echo "Error: tmux is required." >&2 + exit 1 +fi + +if [ ! -f "$ENV_FILE" ]; then + echo "Error: env file not found at $ENV_FILE" >&2 + exit 1 +fi + +if [ ! -f "$COMPOSE_FILE" ]; then + echo "Error: compose file not found at $COMPOSE_FILE" >&2 + exit 1 +fi + +set -a +# shellcheck disable=SC1090 +. "$ENV_FILE" +set +a + +if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then + echo "Error: tmux session '$SESSION_NAME' already exists." >&2 + echo "Hint: tmux kill-session -t $SESSION_NAME" >&2 + exit 1 +fi + +echo "Starting web + worker processes in tmux session '$SESSION_NAME'..." + +WEB_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src SOCIAL_RUNTIME__SERVICE_NAME=web uv run gunicorn app:app --bind \ +${SOCIAL_WEB__HOST:-0.0.0.0}:${SOCIAL_WEB__PORT:-8000} --workers \ +${SOCIAL_WEB__GUNICORN__WORKERS:-2} --worker-class \ +${SOCIAL_WEB__GUNICORN__WORKER_CLASS:-uvicorn.workers.UvicornWorker} --timeout \ +${SOCIAL_WEB__GUNICORN__TIMEOUT:-60}" + +WORKER_CRITICAL_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src SOCIAL_RUNTIME__SERVICE_NAME=worker-critical uv run celery -A core.celery.app worker --loglevel=info --queues=critical --concurrency=${SOCIAL_WORKER__GROUPS__CRITICAL__CONCURRENCY:-2}" +WORKER_DEFAULT_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src SOCIAL_RUNTIME__SERVICE_NAME=worker-default uv run celery -A core.celery.app worker --loglevel=info --queues=default --concurrency=${SOCIAL_WORKER__GROUPS__DEFAULT__CONCURRENCY:-2}" +WORKER_BULK_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src SOCIAL_RUNTIME__SERVICE_NAME=worker-bulk uv run celery -A core.celery.app worker --loglevel=info --queues=bulk --concurrency=${SOCIAL_WORKER__GROUPS__BULK__CONCURRENCY:-1}" + +tmux new-session -d -s "$SESSION_NAME" -n web "bash -lc \"$WEB_CMD; echo '[web] exited'; exec bash\"" +tmux new-window -t "$SESSION_NAME" -n worker-critical "bash -lc \"$WORKER_CRITICAL_CMD; echo '[worker-critical] exited'; exec bash\"" +tmux new-window -t "$SESSION_NAME" -n worker-default "bash -lc \"$WORKER_DEFAULT_CMD; echo '[worker-default] exited'; exec bash\"" +tmux new-window -t "$SESSION_NAME" -n worker-bulk "bash -lc \"$WORKER_BULK_CMD; echo '[worker-bulk] exited'; exec bash\"" + +echo "" +echo "=== App Started ===" +echo "Log files will be created in logs/ directory:" +echo " - web.log, web.error.log" +echo " - worker-critical.log, worker-critical.error.log" +echo " - worker-default.log, worker-default.error.log" +echo " - worker-bulk.log, worker-bulk.error.log" +echo "" +echo "tmux attach -t $SESSION_NAME" +echo "tmux list-windows -t $SESSION_NAME" diff --git a/infra/scripts/dev-app-up.sh b/infra/scripts/dev-app-up.sh deleted file mode 100755 index 39ec2b6..0000000 --- a/infra/scripts/dev-app-up.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" -SESSION_NAME="${SESSION_NAME:-social-dev}" -COMPOSE_FILE="$ROOT_DIR/infra/docker/docker-compose.yml" -ENV_FILE="$ROOT_DIR/.env" - -echo "=== Dev App Up ===" -echo "This script assumes redis/supabase are already running via docker compose." -echo "" - -if ! command -v tmux >/dev/null 2>&1; then - echo "Error: tmux is required." >&2 - exit 1 -fi - -if ! command -v docker >/dev/null 2>&1; then - echo "Error: docker is required." >&2 - exit 1 -fi - -if [ ! -f "$ENV_FILE" ]; then - echo "Error: env file not found at $ENV_FILE" >&2 - exit 1 -fi - -if [ ! -f "$COMPOSE_FILE" ]; then - echo "Error: compose file not found at $COMPOSE_FILE" >&2 - exit 1 -fi - -set -a -# shellcheck disable=SC1090 -. "$ENV_FILE" -set +a - -if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then - echo "Error: tmux session '$SESSION_NAME' already exists." >&2 - echo "Hint: tmux kill-session -t $SESSION_NAME" >&2 - exit 1 -fi - -echo "[1/2] Running bootstrap once (migrate + init-data)..." -running_services="$(docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps --status running --services || true)" -if ! printf '%s\n' "$running_services" | grep -qx "db"; then - echo "Error: db service is not running. Start infra first." >&2 - echo "Hint: docker compose --env-file .env -f infra/docker/docker-compose.yml up -d" >&2 - exit 1 -fi -if ! printf '%s\n' "$running_services" | grep -qx "redis"; then - echo "Error: redis service is not running. Start infra first." >&2 - echo "Hint: docker compose --env-file .env -f infra/docker/docker-compose.yml up -d" >&2 - exit 1 -fi - -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" --profile job run --rm init-job - -echo "[2/2] Starting web + worker processes in tmux session '$SESSION_NAME'..." - -WEB_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src uv run gunicorn app:app --bind \ -${SOCIAL_WEB__HOST:-0.0.0.0}:${SOCIAL_WEB__PORT:-8000} --workers \ -${SOCIAL_WEB__GUNICORN__WORKERS:-2} --worker-class \ -${SOCIAL_WEB__GUNICORN__WORKER_CLASS:-uvicorn.workers.UvicornWorker} --timeout \ -${SOCIAL_WEB__GUNICORN__TIMEOUT:-60}" - -WORKER_CRITICAL_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src uv run celery -A core.celery.app worker --loglevel=info --queues=critical --concurrency=${SOCIAL_WORKER__GROUPS__CRITICAL__CONCURRENCY:-2}" -WORKER_DEFAULT_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src uv run celery -A core.celery.app worker --loglevel=info --queues=default --concurrency=${SOCIAL_WORKER__GROUPS__DEFAULT__CONCURRENCY:-2}" -WORKER_BULK_CMD="cd '$ROOT_DIR' && PYTHONPATH=backend/src uv run celery -A core.celery.app worker --loglevel=info --queues=bulk --concurrency=${SOCIAL_WORKER__GROUPS__BULK__CONCURRENCY:-1}" - -tmux new-session -d -s "$SESSION_NAME" -n web "bash -lc \"$WEB_CMD; echo '[web] exited'; exec bash\"" -tmux new-window -t "$SESSION_NAME" -n worker-critical "bash -lc \"$WORKER_CRITICAL_CMD; echo '[worker-critical] exited'; exec bash\"" -tmux new-window -t "$SESSION_NAME" -n worker-default "bash -lc \"$WORKER_DEFAULT_CMD; echo '[worker-default] exited'; exec bash\"" -tmux new-window -t "$SESSION_NAME" -n worker-bulk "bash -lc \"$WORKER_BULK_CMD; echo '[worker-bulk] exited'; exec bash\"" - -echo "" -echo "=== Dev App Started ===" -echo "tmux attach -t $SESSION_NAME" -echo "tmux list-windows -t $SESSION_NAME" diff --git a/infra/scripts/runtime-bootstrap-gate.sh b/infra/scripts/runtime-bootstrap-gate.sh deleted file mode 100755 index 3701b07..0000000 --- a/infra/scripts/runtime-bootstrap-gate.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)" -COMPOSE_FILE="$ROOT_DIR/infra/docker/docker-compose.yml" -ENV_FILE="$ROOT_DIR/.env" - -if [ ! -f "$ENV_FILE" ]; then - echo "Error: env file not found at $ENV_FILE" >&2 - exit 1 -fi - -if [ ! -f "$COMPOSE_FILE" ]; then - echo "Error: compose file not found at $COMPOSE_FILE" >&2 - exit 1 -fi - -required_services=(init-job web worker-critical worker-default worker-bulk redis db) -available_services="$(docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" --profile job config --services)" - -missing_services=() -for service in "${required_services[@]}"; do - if ! printf '%s\n' "$available_services" | grep -qx "$service"; then - missing_services+=("$service") - fi -done - -if [ "${#missing_services[@]}" -gt 0 ]; then - echo "Error: runtime bootstrap gate requires services not found in compose:" >&2 - printf ' - %s\n' "${missing_services[@]}" >&2 - echo "Hint: this gate is for deployment compose that includes web/worker/init-job." >&2 - exit 1 -fi - -echo "=== Runtime Bootstrap Gate ===" -echo "This is the ONLY allowed entry point for deployment." -echo "" - -echo "[1/2] Running bootstrap (migrate + init-data)..." -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" --profile job run --rm init-job - -echo "[2/2] Starting web and worker services..." -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d web worker-critical worker-default worker-bulk redis db - -echo "" -echo "=== Bootstrap Gate Passed ===" -docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" ps