chore(task): archive 04-15-session-deletion-anonymization
This commit is contained in:
+51
-19
@@ -51,17 +51,29 @@ Replace the current soft-delete flow with an **anonymize-then-hard-delete** stra
|
||||
|
||||
| Table | Column | Content |
|
||||
|-------|--------|---------|
|
||||
| sessions | session_type | 'chat' / 'automation' |
|
||||
| sessions | session_type | 'chat' |
|
||||
| sessions | status | pending/running/completed/failed |
|
||||
| sessions | total_tokens | Usage metric |
|
||||
| sessions | total_cost | Usage metric |
|
||||
| sessions | message_count | Counter |
|
||||
| sessions | message_count | Counter (used for follow-up ratio analysis) |
|
||||
| sessions | created_at / last_activity_at | Timestamps |
|
||||
| messages | role | user/assistant/system/tool |
|
||||
| messages | model_code | LLM model identifier |
|
||||
| messages | tool_name | Tool name (divination type) |
|
||||
| messages | metadata->agent_output->divination_derived->questionType | Question category (career/love/wealth/health) |
|
||||
| messages | input_tokens / output_tokens / cost / latency_ms | Usage and performance metrics |
|
||||
| messages | tool_name | Divination tool name |
|
||||
| messages | latency_ms | Response latency |
|
||||
| messages->metadata | agent_output.sign_level | Sign level (上上签/中上签/中下签/下下签) |
|
||||
| messages->metadata | agent_output.keywords | Key insights from reading |
|
||||
| messages->metadata | agent_output.divination_derived.questionType | Question category (career/love/wealth/health) |
|
||||
| messages->metadata | agent_output.divination_derived.guaName | Hexagram name |
|
||||
| messages->metadata | agent_output.divination_derived.guaNameHant | Hexagram name (Traditional Chinese) |
|
||||
| messages->metadata | agent_output.divination_derived.targetGuaName | Target hexagram name (if changing lines exist) |
|
||||
| messages->metadata | agent_output.divination_derived.hasChangingYao | Whether session has changing lines |
|
||||
|
||||
**Analytics Requirements:**
|
||||
|
||||
1. **Question type distribution**: Count by `question_type`
|
||||
2. **Follow-up ratio**: `message_count > 2` indicates follow-up questions
|
||||
3. **LLM performance comparison**: Group by `model_code`, analyze `status`, `total_latency_ms`, `total_tokens`
|
||||
4. **Hexagram accuracy analysis**: Distribution of `sign_level`, `gua_name`, `has_changing_yao`
|
||||
|
||||
## Technical Design
|
||||
|
||||
@@ -71,18 +83,34 @@ Replace the current soft-delete flow with an **anonymize-then-hard-delete** stra
|
||||
CREATE TABLE anonymous_session_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
anonymous_id UUID NOT NULL, -- Random UUID, no link to real user
|
||||
session_type VARCHAR(20) NOT NULL, -- 'chat' / 'automation'
|
||||
question_type VARCHAR(50), -- Career/love/wealth/health etc. (from metadata)
|
||||
tool_name VARCHAR(100), -- Divination tool used
|
||||
|
||||
-- Session metadata
|
||||
session_type VARCHAR(20) NOT NULL, -- 'chat'
|
||||
message_count INTEGER, -- Used for follow-up ratio analysis
|
||||
status VARCHAR(20), -- Session final status
|
||||
|
||||
-- Question & divination
|
||||
question_type VARCHAR(50), -- Career/love/wealth/health etc.
|
||||
tool_name VARCHAR(100), -- Divination tool name
|
||||
|
||||
-- Hexagram details (for accuracy analysis)
|
||||
gua_name VARCHAR(50), -- Hexagram name
|
||||
gua_name_hant VARCHAR(50), -- Hexagram name (Traditional Chinese)
|
||||
target_gua_name VARCHAR(50), -- Target hexagram (if changing lines exist)
|
||||
has_changing_yao BOOLEAN, -- Whether session has changing lines
|
||||
sign_level VARCHAR(20), -- 上上签/中上签/中下签/下下签
|
||||
keywords TEXT[], -- Key insights from reading
|
||||
|
||||
-- Model & usage metrics
|
||||
model_code VARCHAR(50), -- LLM model used
|
||||
total_tokens INTEGER, -- Token usage
|
||||
total_cost NUMERIC, -- Cost metric
|
||||
message_count INTEGER, -- Message count
|
||||
status VARCHAR(20), -- Session final status
|
||||
total_latency_ms INTEGER, -- Aggregated latency
|
||||
anonymized_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
created_at TIMESTAMPTZ NOT NULL, -- Original session creation time (date only precision)
|
||||
last_activity_at TIMESTAMPTZ -- Original last activity (date only precision)
|
||||
|
||||
-- Timestamps (day precision to prevent re-identification)
|
||||
created_at TIMESTAMPTZ NOT NULL, -- Original session creation time
|
||||
last_activity_at TIMESTAMPTZ, -- Original last activity
|
||||
anonymized_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- RLS: service role only, no user access
|
||||
@@ -95,8 +123,9 @@ CREATE POLICY "Service role can manage anonymous snapshots"
|
||||
Design notes:
|
||||
- `anonymous_id` is a randomly generated UUID with **no mapping** back to the original user
|
||||
- Timestamps are stored with **date-only precision** (day granularity) to prevent re-identification via time correlation
|
||||
- `question_type` is the only content-derived field retained - it's a category label (career/love/wealth/health), not the actual question text
|
||||
- No `user_id`, no session content, no AI responses - only aggregate metrics
|
||||
- `session_type` only supports 'chat' (AUTOMATION is legacy from reused database schema, not used in this project)
|
||||
- All structued non-PII fields are retained for flexible future analysis (principle: "complete retention, filter on analysis")
|
||||
- No `user_id`, no question text, no AI response text - only structured/aggregate metrics
|
||||
- RLS ensures no user (even authenticated) can access this table, only service_role
|
||||
|
||||
### 2. Anonymization Service
|
||||
@@ -120,9 +149,12 @@ class SessionAnonymizer:
|
||||
```
|
||||
|
||||
Key anonymization rules:
|
||||
- **Strip entirely**: `user_id`, `title`, `state_snapshot`, `content` (all message content), `user_message_attachments`, full `agent_output` / `tool_agent_output`
|
||||
- **Retain as-is**: `session_type`, `status`, `total_tokens`, `total_cost`, `message_count`, `model_code`, `tool_name`
|
||||
- **Transform**: timestamps truncated to day precision; `questionType` extracted from metadata as category label only
|
||||
- **Strip entirely**: `user_id`, `title`, `state_snapshot`, `content` (all message content), `question` (user's original text), `answer` (AI response text), `user_message_attachments`, raw `agent_output` / `tool_agent_output` objects
|
||||
- **Retain structured fields**:
|
||||
- Session: `session_type`, `status`, `total_tokens`, `total_cost`, `message_count`
|
||||
- Divination: `question_type`, `tool_name`, `gua_name`, `gua_name_hant`, `target_gua_name`, `has_changing_yao`, `sign_level`, `keywords`
|
||||
- Model: `model_code`
|
||||
- **Transform**: timestamps truncated to day precision
|
||||
- **Aggregate**: sum `latency_ms` across all messages into `total_latency_ms`
|
||||
|
||||
### 3. Modified Deletion Flow
|
||||
+2
-2
@@ -3,14 +3,14 @@
|
||||
"name": "session-deletion-anonymization",
|
||||
"title": "Session deletion anonymization for iOS compliance",
|
||||
"description": "Implement iOS-compliant data anonymization for divination session deletion: desensitize PII, retain anonymized usage data, hard-delete original records",
|
||||
"status": "planning",
|
||||
"status": "completed",
|
||||
"dev_type": null,
|
||||
"scope": null,
|
||||
"priority": "P1",
|
||||
"creator": "zl-q",
|
||||
"assignee": "zl-q",
|
||||
"createdAt": "2026-04-15",
|
||||
"completedAt": null,
|
||||
"completedAt": "2026-04-15",
|
||||
"branch": null,
|
||||
"base_branch": "dev",
|
||||
"worktree_path": null,
|
||||
Reference in New Issue
Block a user