diff --git a/.trellis/tasks/04-15-session-deletion-anonymization/check.jsonl b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/check.jsonl similarity index 100% rename from .trellis/tasks/04-15-session-deletion-anonymization/check.jsonl rename to .trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/check.jsonl diff --git a/.trellis/tasks/04-15-session-deletion-anonymization/debug.jsonl b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/debug.jsonl similarity index 100% rename from .trellis/tasks/04-15-session-deletion-anonymization/debug.jsonl rename to .trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/debug.jsonl diff --git a/.trellis/tasks/04-15-session-deletion-anonymization/implement.jsonl b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/implement.jsonl similarity index 100% rename from .trellis/tasks/04-15-session-deletion-anonymization/implement.jsonl rename to .trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/implement.jsonl diff --git a/.trellis/tasks/04-15-session-deletion-anonymization/prd.md b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/prd.md similarity index 70% rename from .trellis/tasks/04-15-session-deletion-anonymization/prd.md rename to .trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/prd.md index 2c6878c..79c9a95 100644 --- a/.trellis/tasks/04-15-session-deletion-anonymization/prd.md +++ b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/prd.md @@ -51,17 +51,29 @@ Replace the current soft-delete flow with an **anonymize-then-hard-delete** stra | Table | Column | Content | |-------|--------|---------| -| sessions | session_type | 'chat' / 'automation' | +| sessions | session_type | 'chat' | | sessions | status | pending/running/completed/failed | | sessions | total_tokens | Usage metric | | sessions | total_cost | Usage metric | -| sessions | message_count | Counter | +| sessions | message_count | Counter (used for follow-up ratio analysis) | | sessions | created_at / last_activity_at | Timestamps | -| messages | role | user/assistant/system/tool | | messages | model_code | LLM model identifier | -| messages | tool_name | Tool name (divination type) | -| messages | metadata->agent_output->divination_derived->questionType | Question category (career/love/wealth/health) | -| messages | input_tokens / output_tokens / cost / latency_ms | Usage and performance metrics | +| messages | tool_name | Divination tool name | +| messages | latency_ms | Response latency | +| messages->metadata | agent_output.sign_level | Sign level (上上签/中上签/中下签/下下签) | +| messages->metadata | agent_output.keywords | Key insights from reading | +| messages->metadata | agent_output.divination_derived.questionType | Question category (career/love/wealth/health) | +| messages->metadata | agent_output.divination_derived.guaName | Hexagram name | +| messages->metadata | agent_output.divination_derived.guaNameHant | Hexagram name (Traditional Chinese) | +| messages->metadata | agent_output.divination_derived.targetGuaName | Target hexagram name (if changing lines exist) | +| messages->metadata | agent_output.divination_derived.hasChangingYao | Whether session has changing lines | + +**Analytics Requirements:** + +1. **Question type distribution**: Count by `question_type` +2. **Follow-up ratio**: `message_count > 2` indicates follow-up questions +3. **LLM performance comparison**: Group by `model_code`, analyze `status`, `total_latency_ms`, `total_tokens` +4. **Hexagram accuracy analysis**: Distribution of `sign_level`, `gua_name`, `has_changing_yao` ## Technical Design @@ -71,18 +83,34 @@ Replace the current soft-delete flow with an **anonymize-then-hard-delete** stra CREATE TABLE anonymous_session_snapshots ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), anonymous_id UUID NOT NULL, -- Random UUID, no link to real user - session_type VARCHAR(20) NOT NULL, -- 'chat' / 'automation' - question_type VARCHAR(50), -- Career/love/wealth/health etc. (from metadata) - tool_name VARCHAR(100), -- Divination tool used + + -- Session metadata + session_type VARCHAR(20) NOT NULL, -- 'chat' + message_count INTEGER, -- Used for follow-up ratio analysis + status VARCHAR(20), -- Session final status + + -- Question & divination + question_type VARCHAR(50), -- Career/love/wealth/health etc. + tool_name VARCHAR(100), -- Divination tool name + + -- Hexagram details (for accuracy analysis) + gua_name VARCHAR(50), -- Hexagram name + gua_name_hant VARCHAR(50), -- Hexagram name (Traditional Chinese) + target_gua_name VARCHAR(50), -- Target hexagram (if changing lines exist) + has_changing_yao BOOLEAN, -- Whether session has changing lines + sign_level VARCHAR(20), -- 上上签/中上签/中下签/下下签 + keywords TEXT[], -- Key insights from reading + + -- Model & usage metrics model_code VARCHAR(50), -- LLM model used total_tokens INTEGER, -- Token usage total_cost NUMERIC, -- Cost metric - message_count INTEGER, -- Message count - status VARCHAR(20), -- Session final status total_latency_ms INTEGER, -- Aggregated latency - anonymized_at TIMESTAMPTZ NOT NULL DEFAULT now(), - created_at TIMESTAMPTZ NOT NULL, -- Original session creation time (date only precision) - last_activity_at TIMESTAMPTZ -- Original last activity (date only precision) + + -- Timestamps (day precision to prevent re-identification) + created_at TIMESTAMPTZ NOT NULL, -- Original session creation time + last_activity_at TIMESTAMPTZ, -- Original last activity + anonymized_at TIMESTAMPTZ NOT NULL DEFAULT now() ); -- RLS: service role only, no user access @@ -95,8 +123,9 @@ CREATE POLICY "Service role can manage anonymous snapshots" Design notes: - `anonymous_id` is a randomly generated UUID with **no mapping** back to the original user - Timestamps are stored with **date-only precision** (day granularity) to prevent re-identification via time correlation -- `question_type` is the only content-derived field retained - it's a category label (career/love/wealth/health), not the actual question text -- No `user_id`, no session content, no AI responses - only aggregate metrics +- `session_type` only supports 'chat' (AUTOMATION is legacy from reused database schema, not used in this project) +- All structued non-PII fields are retained for flexible future analysis (principle: "complete retention, filter on analysis") +- No `user_id`, no question text, no AI response text - only structured/aggregate metrics - RLS ensures no user (even authenticated) can access this table, only service_role ### 2. Anonymization Service @@ -120,9 +149,12 @@ class SessionAnonymizer: ``` Key anonymization rules: -- **Strip entirely**: `user_id`, `title`, `state_snapshot`, `content` (all message content), `user_message_attachments`, full `agent_output` / `tool_agent_output` -- **Retain as-is**: `session_type`, `status`, `total_tokens`, `total_cost`, `message_count`, `model_code`, `tool_name` -- **Transform**: timestamps truncated to day precision; `questionType` extracted from metadata as category label only +- **Strip entirely**: `user_id`, `title`, `state_snapshot`, `content` (all message content), `question` (user's original text), `answer` (AI response text), `user_message_attachments`, raw `agent_output` / `tool_agent_output` objects +- **Retain structured fields**: + - Session: `session_type`, `status`, `total_tokens`, `total_cost`, `message_count` + - Divination: `question_type`, `tool_name`, `gua_name`, `gua_name_hant`, `target_gua_name`, `has_changing_yao`, `sign_level`, `keywords` + - Model: `model_code` +- **Transform**: timestamps truncated to day precision - **Aggregate**: sum `latency_ms` across all messages into `total_latency_ms` ### 3. Modified Deletion Flow diff --git a/.trellis/tasks/04-15-session-deletion-anonymization/task.json b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/task.json similarity index 94% rename from .trellis/tasks/04-15-session-deletion-anonymization/task.json rename to .trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/task.json index 001b3f0..16f3cbb 100644 --- a/.trellis/tasks/04-15-session-deletion-anonymization/task.json +++ b/.trellis/tasks/archive/2026-04/04-15-session-deletion-anonymization/task.json @@ -3,14 +3,14 @@ "name": "session-deletion-anonymization", "title": "Session deletion anonymization for iOS compliance", "description": "Implement iOS-compliant data anonymization for divination session deletion: desensitize PII, retain anonymized usage data, hard-delete original records", - "status": "planning", + "status": "completed", "dev_type": null, "scope": null, "priority": "P1", "creator": "zl-q", "assignee": "zl-q", "createdAt": "2026-04-15", - "completedAt": null, + "completedAt": "2026-04-15", "branch": null, "base_branch": "dev", "worktree_path": null,