feat(agent): session deletion anonymization for iOS compliance

Replace soft-delete with anonymize + hard-delete to meet iOS App Store
data retention requirements. Non-PII fields are preserved in
anonymous_session_snapshots for analytics.

- Add anonymous_session_snapshots table and ORM model
- Implement anonymizer to extract non-PII fields before deletion
- Remove points_ledger.biz_id FK constraint (snapshot-style reference)
- Preserve transaction history while allowing session deletion
- Add 14 unit tests + 1 integration test
This commit is contained in:
qzl
2026-04-15 18:18:39 +08:00
parent a244eaa666
commit c2b726e7bd
10 changed files with 829 additions and 7 deletions
+216
View File
@@ -0,0 +1,216 @@
from __future__ import annotations
from datetime import datetime, timezone
from decimal import Decimal
from uuid import uuid4
from schemas.enums import AgentChatMessageRole, AgentChatSessionStatus, SessionType
from models.agent_chat_message import AgentChatMessage
from models.agent_chat_session import AgentChatSession
from v1.agent.anonymizer import (
_aggregate_latency,
_extract_derived_fields,
_extract_keywords,
_extract_model_code,
_extract_question_type,
_extract_sign_level,
_extract_tool_name,
_truncate_to_day,
anonymize,
)
def _make_session(**overrides: object) -> AgentChatSession:
defaults: dict[str, object] = {
"id": uuid4(),
"user_id": uuid4(),
"session_type": SessionType.CHAT,
"status": AgentChatSessionStatus.COMPLETED,
"message_count": 3,
"total_tokens": 1500,
"total_cost": Decimal("0.05"),
"created_at": datetime(2026, 4, 15, 14, 32, 0, tzinfo=timezone.utc),
"last_activity_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
"job_id": None,
"title": "Will I get the job?",
"state_snapshot": None,
"updated_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
"deleted_at": None,
}
defaults.update(overrides)
return AgentChatSession(**defaults)
def _make_message(
*,
session_id: object | None = None,
role: AgentChatMessageRole = AgentChatMessageRole.ASSISTANT,
metadata_json: dict[str, object] | None = None,
model_code: str | None = None,
tool_name: str | None = None,
latency_ms: int | None = None,
) -> AgentChatMessage:
return AgentChatMessage(
id=uuid4(),
session_id=session_id or uuid4(),
seq=1,
role=role,
content="some content",
model_code=model_code,
tool_name=tool_name,
input_tokens=100,
output_tokens=200,
cost=Decimal("0.02"),
latency_ms=latency_ms,
visibility_mask=0,
metadata_json=metadata_json,
created_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
updated_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
deleted_at=None,
)
def test_truncate_to_day() -> None:
dt = datetime(2026, 4, 15, 14, 32, 45, 123456, tzinfo=timezone.utc)
result = _truncate_to_day(dt)
assert result == datetime(2026, 4, 15, 0, 0, 0, 0, tzinfo=timezone.utc)
def test_extract_derived_fields_found() -> None:
msg = _make_message(
metadata_json={
"agent_output": {
"divination_derived": {
"guaName": "",
"questionType": "career",
"hasChangingYao": True,
}
}
}
)
derived = _extract_derived_fields([msg])
assert derived.get("guaName") == ""
assert derived.get("questionType") == "career"
def test_extract_derived_fields_missing() -> None:
msg = _make_message(metadata_json={"run_id": "abc"})
derived = _extract_derived_fields([msg])
assert derived == {}
def test_extract_sign_level() -> None:
msg = _make_message(metadata_json={"agent_output": {"sign_level": "中上签"}})
assert _extract_sign_level([msg]) == "中上签"
def test_extract_sign_level_none() -> None:
msg = _make_message(metadata_json={"agent_output": {}})
assert _extract_sign_level([msg]) is None
def test_extract_keywords() -> None:
msg = _make_message(metadata_json={"agent_output": {"keywords": ["事业", "贵人"]}})
assert _extract_keywords([msg]) == ["事业", "贵人"]
def test_extract_question_type_from_derived() -> None:
msg = _make_message(
metadata_json={
"agent_output": {
"divination_derived": {"questionType": "career"},
}
}
)
assert _extract_question_type([msg]) == "career"
def test_extract_question_type_from_agent_output() -> None:
msg = _make_message(metadata_json={"agent_output": {"questionType": "love"}})
assert _extract_question_type([msg]) == "love"
def test_extract_model_code() -> None:
msg = _make_message(model_code="qwen3.5-flash")
assert _extract_model_code([msg]) == "qwen3.5-flash"
def test_extract_tool_name() -> None:
msg = _make_message(tool_name="liuyao")
assert _extract_tool_name([msg]) == "liuyao"
def test_aggregate_latency() -> None:
msg1 = _make_message(latency_ms=500)
msg2 = _make_message(latency_ms=300)
assert _aggregate_latency([msg1, msg2]) == 800
def test_aggregate_latency_none() -> None:
msg = _make_message(latency_ms=None)
assert _aggregate_latency([msg]) is None
def test_anonymize_full_snapshot() -> None:
session = _make_session()
msg = _make_message(
session_id=session.id,
role=AgentChatMessageRole.ASSISTANT,
model_code="qwen3.5-flash",
tool_name="liuyao",
latency_ms=1200,
metadata_json={
"agent_output": {
"sign_level": "上上签",
"keywords": ["事业", "贵人"],
"divination_derived": {
"questionType": "career",
"guaName": "",
"guaNameHant": "",
"targetGuaName": "",
"hasChangingYao": True,
},
}
},
)
user_msg = _make_message(
session_id=session.id,
role=AgentChatMessageRole.USER,
latency_ms=None,
)
snapshot = anonymize(session=session, messages=[msg, user_msg])
assert snapshot.session_type == "chat"
assert snapshot.message_count == 3
assert snapshot.status == "completed"
assert snapshot.question_type == "career"
assert snapshot.tool_name == "liuyao"
assert snapshot.model_code == "qwen3.5-flash"
assert snapshot.gua_name == ""
assert snapshot.gua_name_hant == ""
assert snapshot.target_gua_name == ""
assert snapshot.has_changing_yao is True
assert snapshot.sign_level == "上上签"
assert snapshot.keywords == ["事业", "贵人"]
assert snapshot.total_tokens == 1500
assert snapshot.total_cost == Decimal("0.05")
assert snapshot.total_latency_ms == 1200
assert snapshot.created_at == datetime(2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc)
assert snapshot.last_activity_at == datetime(
2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc
)
assert snapshot.anonymous_id is not None
assert snapshot.id is not None
def test_anonymize_no_metadata() -> None:
session = _make_session()
msg = _make_message(session_id=session.id, metadata_json=None)
snapshot = anonymize(session=session, messages=[msg])
assert snapshot.question_type is None
assert snapshot.gua_name is None
assert snapshot.sign_level is None
assert snapshot.keywords is None
assert snapshot.has_changing_yao is None