feat(agent): session deletion anonymization for iOS compliance
Replace soft-delete with anonymize + hard-delete to meet iOS App Store data retention requirements. Non-PII fields are preserved in anonymous_session_snapshots for analytics. - Add anonymous_session_snapshots table and ORM model - Implement anonymizer to extract non-PII fields before deletion - Remove points_ledger.biz_id FK constraint (snapshot-style reference) - Preserve transaction history while allowing session deletion - Add 14 unit tests + 1 integration test
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from typing import TypedDict
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from sqlalchemy import select
|
||||
|
||||
from core.db.session import AsyncSessionLocal
|
||||
from models.agent_chat_session import AgentChatSession
|
||||
from models.agent_chat_message import AgentChatMessage
|
||||
from models.anonymous_session_snapshot import AnonymousSessionSnapshot
|
||||
|
||||
|
||||
class IdentityData(TypedDict):
|
||||
email: str
|
||||
code: str
|
||||
|
||||
|
||||
async def _create_email_session(
|
||||
client: httpx.AsyncClient,
|
||||
*,
|
||||
email: str,
|
||||
code: str,
|
||||
) -> dict[str, object]:
|
||||
resp = await client.post(
|
||||
"/api/v1/auth/email-session",
|
||||
json={"email": email, "token": code},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
async def _wait_terminal_event(
|
||||
client: httpx.AsyncClient,
|
||||
*,
|
||||
access_token: str,
|
||||
thread_id: str,
|
||||
run_id: str,
|
||||
timeout_s: int = 180,
|
||||
) -> str:
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
params = {"runId": run_id, "idle_limit": 120}
|
||||
started = time.time()
|
||||
|
||||
async with client.stream(
|
||||
"GET",
|
||||
f"/api/v1/agent/runs/{thread_id}/events",
|
||||
headers=headers,
|
||||
params=params,
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
async for line in resp.aiter_lines():
|
||||
if time.time() - started > timeout_s:
|
||||
raise TimeoutError("SSE timed out")
|
||||
if not line or not line.startswith("data: "):
|
||||
continue
|
||||
event = json.loads(line[6:])
|
||||
event_type = event.get("type")
|
||||
if event_type in {"RUN_FINISHED", "RUN_ERROR"}:
|
||||
return str(event_type)
|
||||
|
||||
raise RuntimeError("No terminal SSE event")
|
||||
|
||||
|
||||
def _build_run_payload(*, thread_id: str, run_id: str) -> dict[str, object]:
|
||||
now = int(time.time() * 1000)
|
||||
return {
|
||||
"threadId": thread_id,
|
||||
"runId": run_id,
|
||||
"state": {},
|
||||
"messages": [
|
||||
{
|
||||
"id": f"msg_{run_id}_user_0",
|
||||
"role": "user",
|
||||
"content": "今天事业运如何?",
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"context": [],
|
||||
"forwardedProps": {
|
||||
"runtime_mode": "chat",
|
||||
"client_time": {
|
||||
"device_timezone": "Asia/Shanghai",
|
||||
"client_now_iso": "2026-04-15T12:00:00Z",
|
||||
"client_epoch_ms": now,
|
||||
},
|
||||
"divinationPayload": {
|
||||
"divinationMethod": "自动起卦",
|
||||
"questionType": "事业",
|
||||
"question": "今天事业运如何?",
|
||||
"divinationTimeIso": "2026-04-15T12:00:00Z",
|
||||
"yaoLines": ["少阳", "少阴", "老阳", "少阳", "老阴", "少阴"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_delete_anonymizes_and_hard_deletes(
|
||||
api_client: httpx.AsyncClient,
|
||||
test_identity: IdentityData,
|
||||
db_cleanup: list[str],
|
||||
) -> None:
|
||||
email = str(test_identity["email"]).strip().lower()
|
||||
db_cleanup.append(email)
|
||||
|
||||
auth_resp = await _create_email_session(
|
||||
api_client,
|
||||
email=email,
|
||||
code=str(test_identity["code"]),
|
||||
)
|
||||
user = auth_resp.get("user")
|
||||
assert isinstance(user, dict)
|
||||
access_token = str(auth_resp["access_token"])
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
thread_id = str(uuid.uuid4())
|
||||
run_id = f"run_{int(time.time() * 1000)}"
|
||||
|
||||
enqueue = await api_client.post(
|
||||
"/api/v1/agent/runs",
|
||||
headers=headers,
|
||||
json=_build_run_payload(thread_id=thread_id, run_id=run_id),
|
||||
)
|
||||
assert enqueue.status_code == 202
|
||||
|
||||
terminal = await _wait_terminal_event(
|
||||
api_client,
|
||||
access_token=access_token,
|
||||
thread_id=thread_id,
|
||||
run_id=run_id,
|
||||
)
|
||||
assert terminal in {"RUN_FINISHED", "RUN_ERROR"}
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
session_result = await session.execute(
|
||||
select(AgentChatSession).where(AgentChatSession.id == uuid.UUID(thread_id))
|
||||
)
|
||||
session_obj = session_result.scalar_one_or_none()
|
||||
assert session_obj is not None, "Session should exist before deletion"
|
||||
|
||||
delete_resp = await api_client.delete(
|
||||
f"/api/v1/agent/sessions/{thread_id}",
|
||||
headers=headers,
|
||||
)
|
||||
assert delete_resp.status_code == 204
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
session_result = await session.execute(
|
||||
select(AgentChatSession).where(AgentChatSession.id == uuid.UUID(thread_id))
|
||||
)
|
||||
deleted_session = session_result.scalar_one_or_none()
|
||||
assert deleted_session is None, (
|
||||
"Session should be hard-deleted, not soft-deleted"
|
||||
)
|
||||
|
||||
msg_result = await session.execute(
|
||||
select(AgentChatMessage).where(
|
||||
AgentChatMessage.session_id == uuid.UUID(thread_id)
|
||||
)
|
||||
)
|
||||
remaining_messages = msg_result.scalars().all()
|
||||
assert len(remaining_messages) == 0, (
|
||||
"Messages should be hard-deleted along with session"
|
||||
)
|
||||
|
||||
snapshot_result = await session.execute(
|
||||
select(AnonymousSessionSnapshot).order_by(
|
||||
AnonymousSessionSnapshot.anonymized_at.desc()
|
||||
)
|
||||
)
|
||||
snapshots = snapshot_result.scalars().all()
|
||||
assert len(snapshots) >= 1, "At least one anonymous snapshot should exist"
|
||||
|
||||
snapshot = snapshots[0]
|
||||
assert snapshot.session_type == "chat"
|
||||
assert snapshot.anonymous_id is not None
|
||||
assert snapshot.id is not None
|
||||
assert snapshot.anonymized_at is not None
|
||||
@@ -0,0 +1,216 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from decimal import Decimal
|
||||
from uuid import uuid4
|
||||
|
||||
from schemas.enums import AgentChatMessageRole, AgentChatSessionStatus, SessionType
|
||||
|
||||
from models.agent_chat_message import AgentChatMessage
|
||||
from models.agent_chat_session import AgentChatSession
|
||||
from v1.agent.anonymizer import (
|
||||
_aggregate_latency,
|
||||
_extract_derived_fields,
|
||||
_extract_keywords,
|
||||
_extract_model_code,
|
||||
_extract_question_type,
|
||||
_extract_sign_level,
|
||||
_extract_tool_name,
|
||||
_truncate_to_day,
|
||||
anonymize,
|
||||
)
|
||||
|
||||
|
||||
def _make_session(**overrides: object) -> AgentChatSession:
|
||||
defaults: dict[str, object] = {
|
||||
"id": uuid4(),
|
||||
"user_id": uuid4(),
|
||||
"session_type": SessionType.CHAT,
|
||||
"status": AgentChatSessionStatus.COMPLETED,
|
||||
"message_count": 3,
|
||||
"total_tokens": 1500,
|
||||
"total_cost": Decimal("0.05"),
|
||||
"created_at": datetime(2026, 4, 15, 14, 32, 0, tzinfo=timezone.utc),
|
||||
"last_activity_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
|
||||
"job_id": None,
|
||||
"title": "Will I get the job?",
|
||||
"state_snapshot": None,
|
||||
"updated_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
|
||||
"deleted_at": None,
|
||||
}
|
||||
defaults.update(overrides)
|
||||
return AgentChatSession(**defaults)
|
||||
|
||||
|
||||
def _make_message(
|
||||
*,
|
||||
session_id: object | None = None,
|
||||
role: AgentChatMessageRole = AgentChatMessageRole.ASSISTANT,
|
||||
metadata_json: dict[str, object] | None = None,
|
||||
model_code: str | None = None,
|
||||
tool_name: str | None = None,
|
||||
latency_ms: int | None = None,
|
||||
) -> AgentChatMessage:
|
||||
return AgentChatMessage(
|
||||
id=uuid4(),
|
||||
session_id=session_id or uuid4(),
|
||||
seq=1,
|
||||
role=role,
|
||||
content="some content",
|
||||
model_code=model_code,
|
||||
tool_name=tool_name,
|
||||
input_tokens=100,
|
||||
output_tokens=200,
|
||||
cost=Decimal("0.02"),
|
||||
latency_ms=latency_ms,
|
||||
visibility_mask=0,
|
||||
metadata_json=metadata_json,
|
||||
created_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
|
||||
updated_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
|
||||
deleted_at=None,
|
||||
)
|
||||
|
||||
|
||||
def test_truncate_to_day() -> None:
|
||||
dt = datetime(2026, 4, 15, 14, 32, 45, 123456, tzinfo=timezone.utc)
|
||||
result = _truncate_to_day(dt)
|
||||
assert result == datetime(2026, 4, 15, 0, 0, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def test_extract_derived_fields_found() -> None:
|
||||
msg = _make_message(
|
||||
metadata_json={
|
||||
"agent_output": {
|
||||
"divination_derived": {
|
||||
"guaName": "乾",
|
||||
"questionType": "career",
|
||||
"hasChangingYao": True,
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
derived = _extract_derived_fields([msg])
|
||||
assert derived.get("guaName") == "乾"
|
||||
assert derived.get("questionType") == "career"
|
||||
|
||||
|
||||
def test_extract_derived_fields_missing() -> None:
|
||||
msg = _make_message(metadata_json={"run_id": "abc"})
|
||||
derived = _extract_derived_fields([msg])
|
||||
assert derived == {}
|
||||
|
||||
|
||||
def test_extract_sign_level() -> None:
|
||||
msg = _make_message(metadata_json={"agent_output": {"sign_level": "中上签"}})
|
||||
assert _extract_sign_level([msg]) == "中上签"
|
||||
|
||||
|
||||
def test_extract_sign_level_none() -> None:
|
||||
msg = _make_message(metadata_json={"agent_output": {}})
|
||||
assert _extract_sign_level([msg]) is None
|
||||
|
||||
|
||||
def test_extract_keywords() -> None:
|
||||
msg = _make_message(metadata_json={"agent_output": {"keywords": ["事业", "贵人"]}})
|
||||
assert _extract_keywords([msg]) == ["事业", "贵人"]
|
||||
|
||||
|
||||
def test_extract_question_type_from_derived() -> None:
|
||||
msg = _make_message(
|
||||
metadata_json={
|
||||
"agent_output": {
|
||||
"divination_derived": {"questionType": "career"},
|
||||
}
|
||||
}
|
||||
)
|
||||
assert _extract_question_type([msg]) == "career"
|
||||
|
||||
|
||||
def test_extract_question_type_from_agent_output() -> None:
|
||||
msg = _make_message(metadata_json={"agent_output": {"questionType": "love"}})
|
||||
assert _extract_question_type([msg]) == "love"
|
||||
|
||||
|
||||
def test_extract_model_code() -> None:
|
||||
msg = _make_message(model_code="qwen3.5-flash")
|
||||
assert _extract_model_code([msg]) == "qwen3.5-flash"
|
||||
|
||||
|
||||
def test_extract_tool_name() -> None:
|
||||
msg = _make_message(tool_name="liuyao")
|
||||
assert _extract_tool_name([msg]) == "liuyao"
|
||||
|
||||
|
||||
def test_aggregate_latency() -> None:
|
||||
msg1 = _make_message(latency_ms=500)
|
||||
msg2 = _make_message(latency_ms=300)
|
||||
assert _aggregate_latency([msg1, msg2]) == 800
|
||||
|
||||
|
||||
def test_aggregate_latency_none() -> None:
|
||||
msg = _make_message(latency_ms=None)
|
||||
assert _aggregate_latency([msg]) is None
|
||||
|
||||
|
||||
def test_anonymize_full_snapshot() -> None:
|
||||
session = _make_session()
|
||||
msg = _make_message(
|
||||
session_id=session.id,
|
||||
role=AgentChatMessageRole.ASSISTANT,
|
||||
model_code="qwen3.5-flash",
|
||||
tool_name="liuyao",
|
||||
latency_ms=1200,
|
||||
metadata_json={
|
||||
"agent_output": {
|
||||
"sign_level": "上上签",
|
||||
"keywords": ["事业", "贵人"],
|
||||
"divination_derived": {
|
||||
"questionType": "career",
|
||||
"guaName": "乾",
|
||||
"guaNameHant": "乾",
|
||||
"targetGuaName": "姤",
|
||||
"hasChangingYao": True,
|
||||
},
|
||||
}
|
||||
},
|
||||
)
|
||||
user_msg = _make_message(
|
||||
session_id=session.id,
|
||||
role=AgentChatMessageRole.USER,
|
||||
latency_ms=None,
|
||||
)
|
||||
snapshot = anonymize(session=session, messages=[msg, user_msg])
|
||||
|
||||
assert snapshot.session_type == "chat"
|
||||
assert snapshot.message_count == 3
|
||||
assert snapshot.status == "completed"
|
||||
assert snapshot.question_type == "career"
|
||||
assert snapshot.tool_name == "liuyao"
|
||||
assert snapshot.model_code == "qwen3.5-flash"
|
||||
assert snapshot.gua_name == "乾"
|
||||
assert snapshot.gua_name_hant == "乾"
|
||||
assert snapshot.target_gua_name == "姤"
|
||||
assert snapshot.has_changing_yao is True
|
||||
assert snapshot.sign_level == "上上签"
|
||||
assert snapshot.keywords == ["事业", "贵人"]
|
||||
assert snapshot.total_tokens == 1500
|
||||
assert snapshot.total_cost == Decimal("0.05")
|
||||
assert snapshot.total_latency_ms == 1200
|
||||
assert snapshot.created_at == datetime(2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc)
|
||||
assert snapshot.last_activity_at == datetime(
|
||||
2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc
|
||||
)
|
||||
assert snapshot.anonymous_id is not None
|
||||
assert snapshot.id is not None
|
||||
|
||||
|
||||
def test_anonymize_no_metadata() -> None:
|
||||
session = _make_session()
|
||||
msg = _make_message(session_id=session.id, metadata_json=None)
|
||||
snapshot = anonymize(session=session, messages=[msg])
|
||||
|
||||
assert snapshot.question_type is None
|
||||
assert snapshot.gua_name is None
|
||||
assert snapshot.sign_level is None
|
||||
assert snapshot.keywords is None
|
||||
assert snapshot.has_changing_yao is None
|
||||
Reference in New Issue
Block a user