From 443977be9befcd5900cddf08ab399c65ce9b4b4a Mon Sep 17 00:00:00 2001 From: qzl Date: Wed, 25 Feb 2026 18:04:05 +0800 Subject: [PATCH] fix(security): enforce defensive RLS for agent chat tables Close Supabase advisor findings by enabling RLS and deny-by-default policies on new public agent-chat tables. Clarify backend RLS governance and incident runbook steps to prevent config-drift regressions. --- backend/AGENTS.md | 22 +++++-- ...260226_enable_rls_for_agent_chat_tables.py | 58 +++++++++++++++++++ docs/runtime/runtime-runbook.md | 37 ++++++++++-- 3 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 backend/alembic/versions/20260226_enable_rls_for_agent_chat_tables.py diff --git a/backend/AGENTS.md b/backend/AGENTS.md index f171a34..8cf44cf 100644 --- a/backend/AGENTS.md +++ b/backend/AGENTS.md @@ -104,8 +104,20 @@ Use `schemas / repository / service` pattern: - Migrations must be reversible; no reliance on generated IDs ### RLS Guidance -- Backend does not rely on RLS for correctness (uses service_role) -- **Backend-only tables**: RLS optional (skip to reduce maintenance) -- **Client-direct tables**: must enable RLS with policies covering select/insert/update/delete -- `alembic_version` must not be exposed to anonymous clients (revoke anon access) -- Business tables that may be exposed to clients should enable defensive RLS even if the backend does not depend on it +- Backend does not rely on RLS for correctness (uses service_role), but RLS is mandatory as a defensive boundary for tables in PostgREST-exposed schemas. +- **Mandatory default**: any new business table in `public` must enable RLS in the same Alembic migration. +- The same migration must create policies covering `SELECT/INSERT/UPDATE/DELETE` (minimum requirement). +- Recommended default policy set for `anon, authenticated`: deny all operations first, then open explicit access only when required. +- `alembic_version` must not be exposed to `anon` or `authenticated`. + +#### Exemption Rule (strict) +- Exemptions are allowed only when a new `public` table is guaranteed not to be exposed to PostgREST clients. +- Exemptions must be explicit in the migration file with rationale and verification notes (why safe, how exposure is prevented). +- If exposure is uncertain, do not exempt: enable defensive RLS by default. + +#### Migration Acceptance Checklist (RLS) +- [ ] New `public` business table has `ALTER TABLE ... ENABLE ROW LEVEL SECURITY` in migration +- [ ] Policies for `SELECT/INSERT/UPDATE/DELETE` are present in migration +- [ ] Policy target roles are explicit (`anon`, `authenticated`, or both) +- [ ] Downgrade path is reversible and does not silently weaken intended production security +- [ ] Any exemption is documented with clear non-exposure evidence diff --git a/backend/alembic/versions/20260226_enable_rls_for_agent_chat_tables.py b/backend/alembic/versions/20260226_enable_rls_for_agent_chat_tables.py new file mode 100644 index 0000000..1d99fa3 --- /dev/null +++ b/backend/alembic/versions/20260226_enable_rls_for_agent_chat_tables.py @@ -0,0 +1,58 @@ +"""enable_rls_for_agent_chat_tables + +Revision ID: 20260226_agent_chat_rls +Revises: 20260226_agent_chat_core +Create Date: 2026-02-26 18:00:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + + +revision: str = "20260226_agent_chat_rls" +down_revision: Union[str, Sequence[str], None] = "20260226_agent_chat_core" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +TABLES = ("llm_factory", "llms", "sessions", "messages") + + +def _enable_rls_and_deny_public(table: str) -> None: + op.execute(f"ALTER TABLE public.{table} ENABLE ROW LEVEL SECURITY") + op.execute( + f"CREATE POLICY {table}_deny_public_select ON public.{table} " + "FOR SELECT TO anon, authenticated USING (false)" + ) + op.execute( + f"CREATE POLICY {table}_deny_public_insert ON public.{table} " + "FOR INSERT TO anon, authenticated WITH CHECK (false)" + ) + op.execute( + f"CREATE POLICY {table}_deny_public_update ON public.{table} " + "FOR UPDATE TO anon, authenticated USING (false) WITH CHECK (false)" + ) + op.execute( + f"CREATE POLICY {table}_deny_public_delete ON public.{table} " + "FOR DELETE TO anon, authenticated USING (false)" + ) + + +def _disable_rls_and_drop_policies(table: str) -> None: + op.execute(f"DROP POLICY IF EXISTS {table}_deny_public_select ON public.{table}") + op.execute(f"DROP POLICY IF EXISTS {table}_deny_public_insert ON public.{table}") + op.execute(f"DROP POLICY IF EXISTS {table}_deny_public_update ON public.{table}") + op.execute(f"DROP POLICY IF EXISTS {table}_deny_public_delete ON public.{table}") + op.execute(f"ALTER TABLE public.{table} DISABLE ROW LEVEL SECURITY") + + +def upgrade() -> None: + for table in TABLES: + _enable_rls_and_deny_public(table) + + +def downgrade() -> None: + for table in reversed(TABLES): + _disable_rls_and_drop_policies(table) diff --git a/docs/runtime/runtime-runbook.md b/docs/runtime/runtime-runbook.md index fbd8ee6..af5a363 100644 --- a/docs/runtime/runtime-runbook.md +++ b/docs/runtime/runtime-runbook.md @@ -81,10 +81,10 @@ tmux kill-session -t social-dev | 服务 | 日志文件 | |------|---------| -| Web | `logs/web.log`, `logs/web.error.log` | -| Worker Critical | `logs/worker-critical.log`, `logs/worker-critical.error.log` | -| Worker Default | `logs/worker-default.log`, `logs/worker-default.error.log` | -| Worker Bulk | `logs/worker-bulk.log`, `logs/worker-bulk.error.log` | +| Web | `logs/web.log`, `logs/errors/web.error.log` | +| Worker Critical | `logs/worker-critical.log`, `logs/errors/worker-critical.error.log` | +| Worker Default | `logs/worker-default.log`, `logs/errors/worker-default.error.log` | +| Worker Bulk | `logs/worker-bulk.log`, `logs/errors/worker-bulk.error.log` | --- @@ -177,9 +177,35 @@ curl -sS -X POST "${WEB_BASE_URL}/api/v1/agent-chat/run" \ ### 4) Agent Chat 启动后异常 - 症状:`/api/v1/agent-chat/run` 返回 5xx 或事件不完整。 -- 定位:先跑 L3 测试,再看 `logs/web.error.log`。 +- 定位:先跑 L3 测试,再看 `logs/errors/web.error.log`。 - 修复:先恢复到可用版本,再排查迁移、配置与依赖差异。 +### 5) Auth 邮件模板未生效 / 注册返回超时但邮件已发送 + +- 症状: + - 收到默认英文模板(非 `infra/mail-templates`)。 + - `signup/start` 偶发 500 或超时,但邮箱仍收到验证码邮件。 +- 根因:容器配置漂移(旧容器未按最新 compose/.env 重建),导致: + - `supabase-auth` 缺少 `GOTRUE_MAILER_TEMPLATES_*` 环境变量。 + - `supabase-mail-templates` 仍挂载旧路径。 +- 定位: + +```bash +docker inspect supabase-auth --format '{{ range .Config.Env }}{{ println . }}{{ end }}' | grep GOTRUE_MAILER_TEMPLATES +docker inspect supabase-mail-templates --format '{{ range .Mounts }}{{ .Source }} -> {{ .Destination }}{{ println }}{{ end }}' +``` + +- 修复:强制重建 auth 和 mail-templates(不改其他服务): + +```bash +docker compose --env-file .env -f infra/docker/docker-compose.yml up -d --force-recreate --no-deps mail-templates auth +``` + +- 复核标准: + - `docker inspect supabase-auth` 能看到 `GOTRUE_MAILER_TEMPLATES_CONFIRMATION/RECOVERY`。 + - `supabase-mail-templates` 挂载源为 `infra/mail-templates`。 + - `POST /api/v1/auth/signup/start` 返回 `202` 且耗时恢复正常。 + --- ## Rollback Procedure @@ -217,3 +243,4 @@ curl -sS -X POST "${WEB_BASE_URL}/api/v1/agent-chat/run" \ | 2026-02-25 | 新增 Agent Chat 验证章节:bootstrap gate、分层测试命令与 run 接口 smoke 示例 | | 2026-02-25 | 简化启动方式:dev-app-up -> app-up,分离 bootstrap 与服务启动 | | 2026-02-25 | 重构为运维分层手册:Bootstrap Gate、分层验证、故障与回滚流程 | +| 2026-02-25 | 新增配置漂移故障条目:修复 Auth 邮件模板失效与 signup 超时场景 |