feat(agent): session deletion anonymization for iOS compliance

Replace soft-delete with anonymize + hard-delete to meet iOS App Store
data retention requirements. Non-PII fields are preserved in
anonymous_session_snapshots for analytics.

- Add anonymous_session_snapshots table and ORM model
- Implement anonymizer to extract non-PII fields before deletion
- Remove points_ledger.biz_id FK constraint (snapshot-style reference)
- Preserve transaction history while allowing session deletion
- Add 14 unit tests + 1 integration test
This commit is contained in:
qzl
2026-04-15 18:18:39 +08:00
parent a244eaa666
commit c2b726e7bd
10 changed files with 829 additions and 7 deletions
+2
View File
@@ -2,6 +2,7 @@ from __future__ import annotations
from .agent_chat_message import AgentChatMessage
from .agent_chat_session import AgentChatSession
from .anonymous_session_snapshot import AnonymousSessionSnapshot
from .auth_user import AuthUser
from .invite_code import InviteCode
from .llm import Llm
@@ -18,6 +19,7 @@ from .user_points import UserPoints
__all__ = [
"AgentChatMessage",
"AgentChatSession",
"AnonymousSessionSnapshot",
"AuthUser",
"InviteCode",
"Llm",
@@ -0,0 +1,46 @@
from __future__ import annotations
from datetime import datetime
from decimal import Decimal
import uuid
from sqlalchemy import Boolean, DateTime, Integer, Numeric, String, Text
from sqlalchemy.dialects.postgresql import ARRAY, UUID
from sqlalchemy.orm import Mapped, mapped_column
from core.db.base import Base
__all__ = ["AnonymousSessionSnapshot"]
class AnonymousSessionSnapshot(Base):
__tablename__: str = "anonymous_session_snapshots"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
anonymous_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
session_type: Mapped[str] = mapped_column(String(20), nullable=False)
message_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
status: Mapped[str | None] = mapped_column(String(20), nullable=True)
question_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
tool_name: Mapped[str | None] = mapped_column(String(100), nullable=True)
gua_name: Mapped[str | None] = mapped_column(String(50), nullable=True)
gua_name_hant: Mapped[str | None] = mapped_column(String(50), nullable=True)
target_gua_name: Mapped[str | None] = mapped_column(String(50), nullable=True)
has_changing_yao: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
sign_level: Mapped[str | None] = mapped_column(String(20), nullable=True)
keywords: Mapped[list[str] | None] = mapped_column(ARRAY(Text()), nullable=True)
model_code: Mapped[str | None] = mapped_column(String(50), nullable=True)
total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
total_cost: Mapped[Decimal | None] = mapped_column(Numeric(12, 6), nullable=True)
total_latency_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)
last_activity_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
anonymized_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False
)
+162
View File
@@ -0,0 +1,162 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from models.agent_chat_message import AgentChatMessage
from models.agent_chat_session import AgentChatSession
from models.anonymous_session_snapshot import AnonymousSessionSnapshot
from core.logging import get_logger
logger = get_logger(__name__)
def _truncate_to_day(dt: datetime) -> datetime:
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
def _extract_derived_fields(
messages: list[AgentChatMessage],
) -> dict[str, Any]:
for message in messages:
metadata_raw = message.metadata_json
if not isinstance(metadata_raw, dict):
continue
agent_output = metadata_raw.get("agent_output")
if not isinstance(agent_output, dict):
continue
derived = agent_output.get("divination_derived")
if isinstance(derived, dict) and derived:
return derived
return {}
def _extract_sign_level(
messages: list[AgentChatMessage],
) -> str | None:
for message in messages:
metadata_raw = message.metadata_json
if not isinstance(metadata_raw, dict):
continue
agent_output = metadata_raw.get("agent_output")
if not isinstance(agent_output, dict):
continue
sign_level = agent_output.get("sign_level")
if isinstance(sign_level, str) and sign_level:
return sign_level
return None
def _extract_keywords(
messages: list[AgentChatMessage],
) -> list[str] | None:
for message in messages:
metadata_raw = message.metadata_json
if not isinstance(metadata_raw, dict):
continue
agent_output = metadata_raw.get("agent_output")
if not isinstance(agent_output, dict):
continue
keywords = agent_output.get("keywords")
if isinstance(keywords, list) and keywords:
return keywords
return None
def _extract_question_type(
messages: list[AgentChatMessage],
) -> str | None:
derived = _extract_derived_fields(messages)
if not derived:
for message in messages:
metadata_raw = message.metadata_json
if not isinstance(metadata_raw, dict):
continue
agent_output = metadata_raw.get("agent_output")
if not isinstance(agent_output, dict):
continue
question_type = agent_output.get("questionType")
if isinstance(question_type, str) and question_type:
return question_type
return None
question_type = derived.get("questionType")
if isinstance(question_type, str) and question_type:
return question_type
return None
def _extract_model_code(
messages: list[AgentChatMessage],
) -> str | None:
for message in messages:
if message.model_code:
return message.model_code
return None
def _extract_tool_name(
messages: list[AgentChatMessage],
) -> str | None:
for message in messages:
if message.tool_name:
return message.tool_name
return None
def _aggregate_latency(
messages: list[AgentChatMessage],
) -> int | None:
total = 0
found = False
for message in messages:
if message.latency_ms is not None:
total += message.latency_ms
found = True
return total if found else None
def anonymize(
session: AgentChatSession,
messages: list[AgentChatMessage],
) -> AnonymousSessionSnapshot:
derived = _extract_derived_fields(messages)
gua_name = derived.get("guaName") if derived else None
gua_name_hant = derived.get("guaNameHant") if derived else None
target_gua_name = derived.get("targetGuaName") if derived else None
has_changing_yao = derived.get("hasChangingYao") if derived else None
created_at = _truncate_to_day(session.created_at)
last_activity_at = (
_truncate_to_day(session.last_activity_at) if session.last_activity_at else None
)
return AnonymousSessionSnapshot(
id=uuid4(),
anonymous_id=uuid4(),
session_type=session.session_type.value
if hasattr(session.session_type, "value")
else str(session.session_type),
message_count=session.message_count,
status=session.status.value
if hasattr(session.status, "value")
else str(session.status),
question_type=_extract_question_type(messages),
tool_name=_extract_tool_name(messages),
gua_name=gua_name if isinstance(gua_name, str) else None,
gua_name_hant=gua_name_hant if isinstance(gua_name_hant, str) else None,
target_gua_name=target_gua_name if isinstance(target_gua_name, str) else None,
has_changing_yao=has_changing_yao
if isinstance(has_changing_yao, bool)
else None,
sign_level=_extract_sign_level(messages),
keywords=_extract_keywords(messages),
model_code=_extract_model_code(messages),
total_tokens=session.total_tokens,
total_cost=session.total_cost,
total_latency_ms=_aggregate_latency(messages),
created_at=created_at,
last_activity_at=last_activity_at,
anonymized_at=datetime.now(timezone.utc),
)
+45 -5
View File
@@ -5,7 +5,7 @@ from decimal import Decimal
from typing import Any, Protocol
from uuid import UUID, uuid4
from sqlalchemy import Select, func, select
from sqlalchemy import Select, delete, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from core.http.errors import ApiProblemError
@@ -17,6 +17,7 @@ from schemas.domain.chat_message import (
AgentChatMessage as AgentChatMessageSchema,
AgentChatMessageMetadata,
)
from v1.agent.anonymizer import anonymize
class ToolResultPayloadStorage(Protocol):
@@ -96,7 +97,7 @@ class AgentRepository:
async def rollback(self) -> None:
await self._session.rollback()
async def delete_session(self, *, session_id: str) -> None:
async def delete_session(self, *, session_id: str) -> list[dict[str, str]]:
try:
session_uuid = UUID(session_id)
except ValueError as exc:
@@ -112,11 +113,50 @@ class AgentRepository:
)
session = (await self._session.execute(stmt)).scalar_one_or_none()
if session is None:
return
return []
if session.deleted_at is not None:
return
session.deleted_at = datetime.now(timezone.utc)
return []
messages_stmt = (
select(AgentChatMessage)
.where(AgentChatMessage.session_id == session_uuid)
.order_by(AgentChatMessage.seq)
)
messages = list((await self._session.execute(messages_stmt)).scalars().all())
attachment_paths = self._collect_attachment_paths(messages)
snapshot = anonymize(session=session, messages=messages)
self._session.add(snapshot)
await self._session.flush()
stmt_delete_messages = delete(AgentChatMessage).where(
AgentChatMessage.session_id == session_uuid
)
await self._session.execute(stmt_delete_messages)
stmt_delete_session = delete(AgentChatSession).where(
AgentChatSession.id == session_uuid
)
await self._session.execute(stmt_delete_session)
await self._session.flush()
return attachment_paths
@staticmethod
def _collect_attachment_paths(
messages: list[AgentChatMessage],
) -> list[dict[str, str]]:
paths: list[dict[str, str]] = []
for message in messages:
metadata_raw = message.metadata_json
if not isinstance(metadata_raw, dict):
continue
attachments_raw = metadata_raw.get("user_message_attachments")
if not isinstance(attachments_raw, list):
continue
for attachment in attachments_raw:
if not isinstance(attachment, dict):
continue
bucket = attachment.get("bucket")
path = attachment.get("path")
if isinstance(bucket, str) and isinstance(path, str):
paths.append({"bucket": bucket, "path": path})
return paths
async def persist_user_message(
self,
+3 -1
View File
@@ -23,7 +23,7 @@ class AgentRepositoryLike(Protocol):
async def rollback(self) -> None: ...
async def delete_session(self, *, session_id: str) -> None: ...
async def delete_session(self, *, session_id: str) -> list[dict[str, str]]: ...
async def get_history_day(
self,
@@ -126,6 +126,8 @@ class AttachmentStorageLike(Protocol):
expires_in_seconds: int,
) -> str: ...
async def delete_prefix(self, *, bucket: str, prefix: str) -> int: ...
def parse_signed_url(self, url: str) -> tuple[str, str]: ...
+21 -1
View File
@@ -235,8 +235,28 @@ class AgentService:
return
raise
ensure_session_owner(owner_id=owner, current_user=current_user)
await self._repository.delete_session(session_id=thread_id)
attachment_paths = await self._repository.delete_session(session_id=thread_id)
await self._repository.commit()
await self._cleanup_attachments(attachment_paths)
async def _cleanup_attachments(
self, attachment_paths: list[dict[str, str]]
) -> None:
if not attachment_paths or self._attachment_storage is None:
return
for attachment in attachment_paths:
bucket = attachment.get("bucket")
path = attachment.get("path")
if not bucket or not path:
continue
try:
await self._attachment_storage.delete_prefix(bucket=bucket, prefix=path)
except Exception:
logger.warning(
"attachment_cleanup_failed",
bucket=bucket,
path=path,
)
async def _append_context_cache_user_message(
self,