feat(agent): session deletion anonymization for iOS compliance

Replace soft-delete with anonymize + hard-delete to meet iOS App Store
data retention requirements. Non-PII fields are preserved in
anonymous_session_snapshots for analytics.

- Add anonymous_session_snapshots table and ORM model
- Implement anonymizer to extract non-PII fields before deletion
- Remove points_ledger.biz_id FK constraint (snapshot-style reference)
- Preserve transaction history while allowing session deletion
- Add 14 unit tests + 1 integration test
This commit is contained in:
qzl
2026-04-15 18:18:39 +08:00
parent a244eaa666
commit c2b726e7bd
10 changed files with 829 additions and 7 deletions
@@ -0,0 +1,183 @@
from __future__ import annotations
import json
import time
import uuid
from typing import TypedDict
import httpx
import pytest
from sqlalchemy import select
from core.db.session import AsyncSessionLocal
from models.agent_chat_session import AgentChatSession
from models.agent_chat_message import AgentChatMessage
from models.anonymous_session_snapshot import AnonymousSessionSnapshot
class IdentityData(TypedDict):
email: str
code: str
async def _create_email_session(
client: httpx.AsyncClient,
*,
email: str,
code: str,
) -> dict[str, object]:
resp = await client.post(
"/api/v1/auth/email-session",
json={"email": email, "token": code},
)
resp.raise_for_status()
return resp.json()
async def _wait_terminal_event(
client: httpx.AsyncClient,
*,
access_token: str,
thread_id: str,
run_id: str,
timeout_s: int = 180,
) -> str:
headers = {"Authorization": f"Bearer {access_token}"}
params = {"runId": run_id, "idle_limit": 120}
started = time.time()
async with client.stream(
"GET",
f"/api/v1/agent/runs/{thread_id}/events",
headers=headers,
params=params,
) as resp:
resp.raise_for_status()
async for line in resp.aiter_lines():
if time.time() - started > timeout_s:
raise TimeoutError("SSE timed out")
if not line or not line.startswith("data: "):
continue
event = json.loads(line[6:])
event_type = event.get("type")
if event_type in {"RUN_FINISHED", "RUN_ERROR"}:
return str(event_type)
raise RuntimeError("No terminal SSE event")
def _build_run_payload(*, thread_id: str, run_id: str) -> dict[str, object]:
now = int(time.time() * 1000)
return {
"threadId": thread_id,
"runId": run_id,
"state": {},
"messages": [
{
"id": f"msg_{run_id}_user_0",
"role": "user",
"content": "今天事业运如何?",
}
],
"tools": [],
"context": [],
"forwardedProps": {
"runtime_mode": "chat",
"client_time": {
"device_timezone": "Asia/Shanghai",
"client_now_iso": "2026-04-15T12:00:00Z",
"client_epoch_ms": now,
},
"divinationPayload": {
"divinationMethod": "自动起卦",
"questionType": "事业",
"question": "今天事业运如何?",
"divinationTimeIso": "2026-04-15T12:00:00Z",
"yaoLines": ["少阳", "少阴", "老阳", "少阳", "老阴", "少阴"],
},
},
}
@pytest.mark.asyncio
async def test_session_delete_anonymizes_and_hard_deletes(
api_client: httpx.AsyncClient,
test_identity: IdentityData,
db_cleanup: list[str],
) -> None:
email = str(test_identity["email"]).strip().lower()
db_cleanup.append(email)
auth_resp = await _create_email_session(
api_client,
email=email,
code=str(test_identity["code"]),
)
user = auth_resp.get("user")
assert isinstance(user, dict)
access_token = str(auth_resp["access_token"])
headers = {"Authorization": f"Bearer {access_token}"}
thread_id = str(uuid.uuid4())
run_id = f"run_{int(time.time() * 1000)}"
enqueue = await api_client.post(
"/api/v1/agent/runs",
headers=headers,
json=_build_run_payload(thread_id=thread_id, run_id=run_id),
)
assert enqueue.status_code == 202
terminal = await _wait_terminal_event(
api_client,
access_token=access_token,
thread_id=thread_id,
run_id=run_id,
)
assert terminal in {"RUN_FINISHED", "RUN_ERROR"}
async with AsyncSessionLocal() as session:
session_result = await session.execute(
select(AgentChatSession).where(AgentChatSession.id == uuid.UUID(thread_id))
)
session_obj = session_result.scalar_one_or_none()
assert session_obj is not None, "Session should exist before deletion"
delete_resp = await api_client.delete(
f"/api/v1/agent/sessions/{thread_id}",
headers=headers,
)
assert delete_resp.status_code == 204
async with AsyncSessionLocal() as session:
session_result = await session.execute(
select(AgentChatSession).where(AgentChatSession.id == uuid.UUID(thread_id))
)
deleted_session = session_result.scalar_one_or_none()
assert deleted_session is None, (
"Session should be hard-deleted, not soft-deleted"
)
msg_result = await session.execute(
select(AgentChatMessage).where(
AgentChatMessage.session_id == uuid.UUID(thread_id)
)
)
remaining_messages = msg_result.scalars().all()
assert len(remaining_messages) == 0, (
"Messages should be hard-deleted along with session"
)
snapshot_result = await session.execute(
select(AnonymousSessionSnapshot).order_by(
AnonymousSessionSnapshot.anonymized_at.desc()
)
)
snapshots = snapshot_result.scalars().all()
assert len(snapshots) >= 1, "At least one anonymous snapshot should exist"
snapshot = snapshots[0]
assert snapshot.session_type == "chat"
assert snapshot.anonymous_id is not None
assert snapshot.id is not None
assert snapshot.anonymized_at is not None
+216
View File
@@ -0,0 +1,216 @@
from __future__ import annotations
from datetime import datetime, timezone
from decimal import Decimal
from uuid import uuid4
from schemas.enums import AgentChatMessageRole, AgentChatSessionStatus, SessionType
from models.agent_chat_message import AgentChatMessage
from models.agent_chat_session import AgentChatSession
from v1.agent.anonymizer import (
_aggregate_latency,
_extract_derived_fields,
_extract_keywords,
_extract_model_code,
_extract_question_type,
_extract_sign_level,
_extract_tool_name,
_truncate_to_day,
anonymize,
)
def _make_session(**overrides: object) -> AgentChatSession:
defaults: dict[str, object] = {
"id": uuid4(),
"user_id": uuid4(),
"session_type": SessionType.CHAT,
"status": AgentChatSessionStatus.COMPLETED,
"message_count": 3,
"total_tokens": 1500,
"total_cost": Decimal("0.05"),
"created_at": datetime(2026, 4, 15, 14, 32, 0, tzinfo=timezone.utc),
"last_activity_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
"job_id": None,
"title": "Will I get the job?",
"state_snapshot": None,
"updated_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
"deleted_at": None,
}
defaults.update(overrides)
return AgentChatSession(**defaults)
def _make_message(
*,
session_id: object | None = None,
role: AgentChatMessageRole = AgentChatMessageRole.ASSISTANT,
metadata_json: dict[str, object] | None = None,
model_code: str | None = None,
tool_name: str | None = None,
latency_ms: int | None = None,
) -> AgentChatMessage:
return AgentChatMessage(
id=uuid4(),
session_id=session_id or uuid4(),
seq=1,
role=role,
content="some content",
model_code=model_code,
tool_name=tool_name,
input_tokens=100,
output_tokens=200,
cost=Decimal("0.02"),
latency_ms=latency_ms,
visibility_mask=0,
metadata_json=metadata_json,
created_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
updated_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
deleted_at=None,
)
def test_truncate_to_day() -> None:
dt = datetime(2026, 4, 15, 14, 32, 45, 123456, tzinfo=timezone.utc)
result = _truncate_to_day(dt)
assert result == datetime(2026, 4, 15, 0, 0, 0, 0, tzinfo=timezone.utc)
def test_extract_derived_fields_found() -> None:
msg = _make_message(
metadata_json={
"agent_output": {
"divination_derived": {
"guaName": "",
"questionType": "career",
"hasChangingYao": True,
}
}
}
)
derived = _extract_derived_fields([msg])
assert derived.get("guaName") == ""
assert derived.get("questionType") == "career"
def test_extract_derived_fields_missing() -> None:
msg = _make_message(metadata_json={"run_id": "abc"})
derived = _extract_derived_fields([msg])
assert derived == {}
def test_extract_sign_level() -> None:
msg = _make_message(metadata_json={"agent_output": {"sign_level": "中上签"}})
assert _extract_sign_level([msg]) == "中上签"
def test_extract_sign_level_none() -> None:
msg = _make_message(metadata_json={"agent_output": {}})
assert _extract_sign_level([msg]) is None
def test_extract_keywords() -> None:
msg = _make_message(metadata_json={"agent_output": {"keywords": ["事业", "贵人"]}})
assert _extract_keywords([msg]) == ["事业", "贵人"]
def test_extract_question_type_from_derived() -> None:
msg = _make_message(
metadata_json={
"agent_output": {
"divination_derived": {"questionType": "career"},
}
}
)
assert _extract_question_type([msg]) == "career"
def test_extract_question_type_from_agent_output() -> None:
msg = _make_message(metadata_json={"agent_output": {"questionType": "love"}})
assert _extract_question_type([msg]) == "love"
def test_extract_model_code() -> None:
msg = _make_message(model_code="qwen3.5-flash")
assert _extract_model_code([msg]) == "qwen3.5-flash"
def test_extract_tool_name() -> None:
msg = _make_message(tool_name="liuyao")
assert _extract_tool_name([msg]) == "liuyao"
def test_aggregate_latency() -> None:
msg1 = _make_message(latency_ms=500)
msg2 = _make_message(latency_ms=300)
assert _aggregate_latency([msg1, msg2]) == 800
def test_aggregate_latency_none() -> None:
msg = _make_message(latency_ms=None)
assert _aggregate_latency([msg]) is None
def test_anonymize_full_snapshot() -> None:
session = _make_session()
msg = _make_message(
session_id=session.id,
role=AgentChatMessageRole.ASSISTANT,
model_code="qwen3.5-flash",
tool_name="liuyao",
latency_ms=1200,
metadata_json={
"agent_output": {
"sign_level": "上上签",
"keywords": ["事业", "贵人"],
"divination_derived": {
"questionType": "career",
"guaName": "",
"guaNameHant": "",
"targetGuaName": "",
"hasChangingYao": True,
},
}
},
)
user_msg = _make_message(
session_id=session.id,
role=AgentChatMessageRole.USER,
latency_ms=None,
)
snapshot = anonymize(session=session, messages=[msg, user_msg])
assert snapshot.session_type == "chat"
assert snapshot.message_count == 3
assert snapshot.status == "completed"
assert snapshot.question_type == "career"
assert snapshot.tool_name == "liuyao"
assert snapshot.model_code == "qwen3.5-flash"
assert snapshot.gua_name == ""
assert snapshot.gua_name_hant == ""
assert snapshot.target_gua_name == ""
assert snapshot.has_changing_yao is True
assert snapshot.sign_level == "上上签"
assert snapshot.keywords == ["事业", "贵人"]
assert snapshot.total_tokens == 1500
assert snapshot.total_cost == Decimal("0.05")
assert snapshot.total_latency_ms == 1200
assert snapshot.created_at == datetime(2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc)
assert snapshot.last_activity_at == datetime(
2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc
)
assert snapshot.anonymous_id is not None
assert snapshot.id is not None
def test_anonymize_no_metadata() -> None:
session = _make_session()
msg = _make_message(session_id=session.id, metadata_json=None)
snapshot = anonymize(session=session, messages=[msg])
assert snapshot.question_type is None
assert snapshot.gua_name is None
assert snapshot.sign_level is None
assert snapshot.keywords is None
assert snapshot.has_changing_yao is None