feat(agent): session deletion anonymization for iOS compliance
Replace soft-delete with anonymize + hard-delete to meet iOS App Store data retention requirements. Non-PII fields are preserved in anonymous_session_snapshots for analytics. - Add anonymous_session_snapshots table and ORM model - Implement anonymizer to extract non-PII fields before deletion - Remove points_ledger.biz_id FK constraint (snapshot-style reference) - Preserve transaction history while allowing session deletion - Add 14 unit tests + 1 integration test
This commit is contained in:
@@ -0,0 +1,111 @@
|
|||||||
|
"""add anonymous_session_snapshots table for iOS compliance
|
||||||
|
|
||||||
|
Revision ID: 20260415_0001
|
||||||
|
Revises: 20260413_0004
|
||||||
|
Create Date: 2026-04-15 00:10:00
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
revision: str = "20260415_0001"
|
||||||
|
down_revision: Union[str, Sequence[str], None] = "20260413_0004"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_table(
|
||||||
|
"anonymous_session_snapshots",
|
||||||
|
sa.Column("id", sa.UUID(), nullable=False),
|
||||||
|
sa.Column("anonymous_id", sa.UUID(), nullable=False),
|
||||||
|
sa.Column("session_type", sa.String(length=20), nullable=False),
|
||||||
|
sa.Column("message_count", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("status", sa.String(length=20), nullable=True),
|
||||||
|
sa.Column("question_type", sa.String(length=50), nullable=True),
|
||||||
|
sa.Column("tool_name", sa.String(length=100), nullable=True),
|
||||||
|
sa.Column("gua_name", sa.String(length=50), nullable=True),
|
||||||
|
sa.Column("gua_name_hant", sa.String(length=50), nullable=True),
|
||||||
|
sa.Column("target_gua_name", sa.String(length=50), nullable=True),
|
||||||
|
sa.Column("has_changing_yao", sa.Boolean(), nullable=True),
|
||||||
|
sa.Column("sign_level", sa.String(length=20), nullable=True),
|
||||||
|
sa.Column("keywords", postgresql.ARRAY(sa.Text()), nullable=True),
|
||||||
|
sa.Column("model_code", sa.String(length=50), nullable=True),
|
||||||
|
sa.Column("total_tokens", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("total_cost", sa.Numeric(12, 6), nullable=True),
|
||||||
|
sa.Column("total_latency_ms", sa.Integer(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"created_at",
|
||||||
|
sa.DateTime(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column("last_activity_at", sa.DateTime(timezone=True), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"anonymized_at",
|
||||||
|
sa.DateTime(timezone=True),
|
||||||
|
server_default=sa.text("now()"),
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.PrimaryKeyConstraint("id"),
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_anonymous_session_snapshots_anonymous_id",
|
||||||
|
"anonymous_session_snapshots",
|
||||||
|
["anonymous_id"],
|
||||||
|
unique=False,
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_anonymous_session_snapshots_created_at",
|
||||||
|
"anonymous_session_snapshots",
|
||||||
|
["created_at"],
|
||||||
|
unique=False,
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_anonymous_session_snapshots_question_type",
|
||||||
|
"anonymous_session_snapshots",
|
||||||
|
["question_type"],
|
||||||
|
unique=False,
|
||||||
|
)
|
||||||
|
_enable_service_role_only_rls("anonymous_session_snapshots")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
_drop_rls("anonymous_session_snapshots")
|
||||||
|
op.drop_index(
|
||||||
|
"ix_anonymous_session_snapshots_question_type",
|
||||||
|
table_name="anonymous_session_snapshots",
|
||||||
|
)
|
||||||
|
op.drop_index(
|
||||||
|
"ix_anonymous_session_snapshots_created_at",
|
||||||
|
table_name="anonymous_session_snapshots",
|
||||||
|
)
|
||||||
|
op.drop_index(
|
||||||
|
"ix_anonymous_session_snapshots_anonymous_id",
|
||||||
|
table_name="anonymous_session_snapshots",
|
||||||
|
)
|
||||||
|
op.drop_table("anonymous_session_snapshots")
|
||||||
|
|
||||||
|
|
||||||
|
def _enable_service_role_only_rls(table_name: str) -> None:
|
||||||
|
for role in ["anon", "authenticated"]:
|
||||||
|
for action in ["select", "insert", "update", "delete"]:
|
||||||
|
op.execute(
|
||||||
|
f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
|
||||||
|
)
|
||||||
|
op.execute(f"ALTER TABLE {table_name} ENABLE ROW LEVEL SECURITY")
|
||||||
|
op.execute(
|
||||||
|
f"CREATE POLICY service_role_all_{table_name} ON {table_name} FOR ALL TO service_role USING (true) WITH CHECK (true)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _drop_rls(table_name: str) -> None:
|
||||||
|
for role in ["anon", "authenticated"]:
|
||||||
|
for action in ["select", "insert", "update", "delete"]:
|
||||||
|
op.execute(
|
||||||
|
f"DROP POLICY IF EXISTS {role}_{action}_{table_name} ON {table_name}"
|
||||||
|
)
|
||||||
|
op.execute(f"DROP POLICY IF EXISTS service_role_all_{table_name} ON {table_name}")
|
||||||
|
op.execute(f"ALTER TABLE {table_name} DISABLE ROW LEVEL SECURITY")
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
"""drop points_ledger.biz_id foreign key for snapshot-style reference
|
||||||
|
|
||||||
|
Revision ID: 20260415_0002
|
||||||
|
Revises: 20260415_0001
|
||||||
|
Create Date: 2026-04-15 10:00:00
|
||||||
|
|
||||||
|
points_ledger.biz_id stores a snapshot reference to sessions.id for audit purposes.
|
||||||
|
This allows sessions to be deleted while preserving the biz_id value in points_ledger
|
||||||
|
for user-facing transaction history.
|
||||||
|
|
||||||
|
The FK constraint is removed because:
|
||||||
|
1. Users need to see their points transaction history even after session deletion
|
||||||
|
2. Session deletion (anonymization for iOS compliance) should not cascade delete
|
||||||
|
points_ledger records
|
||||||
|
3. biz_id becomes a "snapshot" reference - the value is kept but no FK enforcement
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "20260415_0002"
|
||||||
|
down_revision: Union[str, Sequence[str], None] = "20260415_0001"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.drop_constraint("points_ledger_biz_id_fkey", "points_ledger", type_="foreignkey")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.create_foreign_key(
|
||||||
|
"points_ledger_biz_id_fkey",
|
||||||
|
"points_ledger",
|
||||||
|
"sessions",
|
||||||
|
["biz_id"],
|
||||||
|
["id"],
|
||||||
|
ondelete="SET NULL",
|
||||||
|
)
|
||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from .agent_chat_message import AgentChatMessage
|
from .agent_chat_message import AgentChatMessage
|
||||||
from .agent_chat_session import AgentChatSession
|
from .agent_chat_session import AgentChatSession
|
||||||
|
from .anonymous_session_snapshot import AnonymousSessionSnapshot
|
||||||
from .auth_user import AuthUser
|
from .auth_user import AuthUser
|
||||||
from .invite_code import InviteCode
|
from .invite_code import InviteCode
|
||||||
from .llm import Llm
|
from .llm import Llm
|
||||||
@@ -18,6 +19,7 @@ from .user_points import UserPoints
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"AgentChatMessage",
|
"AgentChatMessage",
|
||||||
"AgentChatSession",
|
"AgentChatSession",
|
||||||
|
"AnonymousSessionSnapshot",
|
||||||
"AuthUser",
|
"AuthUser",
|
||||||
"InviteCode",
|
"InviteCode",
|
||||||
"Llm",
|
"Llm",
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from decimal import Decimal
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from sqlalchemy import Boolean, DateTime, Integer, Numeric, String, Text
|
||||||
|
from sqlalchemy.dialects.postgresql import ARRAY, UUID
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
|
||||||
|
from core.db.base import Base
|
||||||
|
|
||||||
|
__all__ = ["AnonymousSessionSnapshot"]
|
||||||
|
|
||||||
|
|
||||||
|
class AnonymousSessionSnapshot(Base):
|
||||||
|
__tablename__: str = "anonymous_session_snapshots"
|
||||||
|
|
||||||
|
id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||||
|
)
|
||||||
|
anonymous_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||||
|
session_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||||
|
message_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
status: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||||||
|
question_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||||
|
tool_name: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
||||||
|
gua_name: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||||
|
gua_name_hant: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||||
|
target_gua_name: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||||
|
has_changing_yao: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||||
|
sign_level: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||||||
|
keywords: Mapped[list[str] | None] = mapped_column(ARRAY(Text()), nullable=True)
|
||||||
|
model_code: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
||||||
|
total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
total_cost: Mapped[Decimal | None] = mapped_column(Numeric(12, 6), nullable=True)
|
||||||
|
total_latency_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), nullable=False
|
||||||
|
)
|
||||||
|
last_activity_at: Mapped[datetime | None] = mapped_column(
|
||||||
|
DateTime(timezone=True), nullable=True
|
||||||
|
)
|
||||||
|
anonymized_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), nullable=False
|
||||||
|
)
|
||||||
@@ -0,0 +1,162 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from models.agent_chat_message import AgentChatMessage
|
||||||
|
from models.agent_chat_session import AgentChatSession
|
||||||
|
from models.anonymous_session_snapshot import AnonymousSessionSnapshot
|
||||||
|
from core.logging import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _truncate_to_day(dt: datetime) -> datetime:
|
||||||
|
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_derived_fields(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
for message in messages:
|
||||||
|
metadata_raw = message.metadata_json
|
||||||
|
if not isinstance(metadata_raw, dict):
|
||||||
|
continue
|
||||||
|
agent_output = metadata_raw.get("agent_output")
|
||||||
|
if not isinstance(agent_output, dict):
|
||||||
|
continue
|
||||||
|
derived = agent_output.get("divination_derived")
|
||||||
|
if isinstance(derived, dict) and derived:
|
||||||
|
return derived
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_sign_level(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> str | None:
|
||||||
|
for message in messages:
|
||||||
|
metadata_raw = message.metadata_json
|
||||||
|
if not isinstance(metadata_raw, dict):
|
||||||
|
continue
|
||||||
|
agent_output = metadata_raw.get("agent_output")
|
||||||
|
if not isinstance(agent_output, dict):
|
||||||
|
continue
|
||||||
|
sign_level = agent_output.get("sign_level")
|
||||||
|
if isinstance(sign_level, str) and sign_level:
|
||||||
|
return sign_level
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_keywords(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> list[str] | None:
|
||||||
|
for message in messages:
|
||||||
|
metadata_raw = message.metadata_json
|
||||||
|
if not isinstance(metadata_raw, dict):
|
||||||
|
continue
|
||||||
|
agent_output = metadata_raw.get("agent_output")
|
||||||
|
if not isinstance(agent_output, dict):
|
||||||
|
continue
|
||||||
|
keywords = agent_output.get("keywords")
|
||||||
|
if isinstance(keywords, list) and keywords:
|
||||||
|
return keywords
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_question_type(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> str | None:
|
||||||
|
derived = _extract_derived_fields(messages)
|
||||||
|
if not derived:
|
||||||
|
for message in messages:
|
||||||
|
metadata_raw = message.metadata_json
|
||||||
|
if not isinstance(metadata_raw, dict):
|
||||||
|
continue
|
||||||
|
agent_output = metadata_raw.get("agent_output")
|
||||||
|
if not isinstance(agent_output, dict):
|
||||||
|
continue
|
||||||
|
question_type = agent_output.get("questionType")
|
||||||
|
if isinstance(question_type, str) and question_type:
|
||||||
|
return question_type
|
||||||
|
return None
|
||||||
|
question_type = derived.get("questionType")
|
||||||
|
if isinstance(question_type, str) and question_type:
|
||||||
|
return question_type
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_model_code(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> str | None:
|
||||||
|
for message in messages:
|
||||||
|
if message.model_code:
|
||||||
|
return message.model_code
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_tool_name(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> str | None:
|
||||||
|
for message in messages:
|
||||||
|
if message.tool_name:
|
||||||
|
return message.tool_name
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _aggregate_latency(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> int | None:
|
||||||
|
total = 0
|
||||||
|
found = False
|
||||||
|
for message in messages:
|
||||||
|
if message.latency_ms is not None:
|
||||||
|
total += message.latency_ms
|
||||||
|
found = True
|
||||||
|
return total if found else None
|
||||||
|
|
||||||
|
|
||||||
|
def anonymize(
|
||||||
|
session: AgentChatSession,
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> AnonymousSessionSnapshot:
|
||||||
|
derived = _extract_derived_fields(messages)
|
||||||
|
|
||||||
|
gua_name = derived.get("guaName") if derived else None
|
||||||
|
gua_name_hant = derived.get("guaNameHant") if derived else None
|
||||||
|
target_gua_name = derived.get("targetGuaName") if derived else None
|
||||||
|
has_changing_yao = derived.get("hasChangingYao") if derived else None
|
||||||
|
|
||||||
|
created_at = _truncate_to_day(session.created_at)
|
||||||
|
last_activity_at = (
|
||||||
|
_truncate_to_day(session.last_activity_at) if session.last_activity_at else None
|
||||||
|
)
|
||||||
|
|
||||||
|
return AnonymousSessionSnapshot(
|
||||||
|
id=uuid4(),
|
||||||
|
anonymous_id=uuid4(),
|
||||||
|
session_type=session.session_type.value
|
||||||
|
if hasattr(session.session_type, "value")
|
||||||
|
else str(session.session_type),
|
||||||
|
message_count=session.message_count,
|
||||||
|
status=session.status.value
|
||||||
|
if hasattr(session.status, "value")
|
||||||
|
else str(session.status),
|
||||||
|
question_type=_extract_question_type(messages),
|
||||||
|
tool_name=_extract_tool_name(messages),
|
||||||
|
gua_name=gua_name if isinstance(gua_name, str) else None,
|
||||||
|
gua_name_hant=gua_name_hant if isinstance(gua_name_hant, str) else None,
|
||||||
|
target_gua_name=target_gua_name if isinstance(target_gua_name, str) else None,
|
||||||
|
has_changing_yao=has_changing_yao
|
||||||
|
if isinstance(has_changing_yao, bool)
|
||||||
|
else None,
|
||||||
|
sign_level=_extract_sign_level(messages),
|
||||||
|
keywords=_extract_keywords(messages),
|
||||||
|
model_code=_extract_model_code(messages),
|
||||||
|
total_tokens=session.total_tokens,
|
||||||
|
total_cost=session.total_cost,
|
||||||
|
total_latency_ms=_aggregate_latency(messages),
|
||||||
|
created_at=created_at,
|
||||||
|
last_activity_at=last_activity_at,
|
||||||
|
anonymized_at=datetime.now(timezone.utc),
|
||||||
|
)
|
||||||
@@ -5,7 +5,7 @@ from decimal import Decimal
|
|||||||
from typing import Any, Protocol
|
from typing import Any, Protocol
|
||||||
from uuid import UUID, uuid4
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
from sqlalchemy import Select, func, select
|
from sqlalchemy import Select, delete, func, select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from core.http.errors import ApiProblemError
|
from core.http.errors import ApiProblemError
|
||||||
@@ -17,6 +17,7 @@ from schemas.domain.chat_message import (
|
|||||||
AgentChatMessage as AgentChatMessageSchema,
|
AgentChatMessage as AgentChatMessageSchema,
|
||||||
AgentChatMessageMetadata,
|
AgentChatMessageMetadata,
|
||||||
)
|
)
|
||||||
|
from v1.agent.anonymizer import anonymize
|
||||||
|
|
||||||
|
|
||||||
class ToolResultPayloadStorage(Protocol):
|
class ToolResultPayloadStorage(Protocol):
|
||||||
@@ -96,7 +97,7 @@ class AgentRepository:
|
|||||||
async def rollback(self) -> None:
|
async def rollback(self) -> None:
|
||||||
await self._session.rollback()
|
await self._session.rollback()
|
||||||
|
|
||||||
async def delete_session(self, *, session_id: str) -> None:
|
async def delete_session(self, *, session_id: str) -> list[dict[str, str]]:
|
||||||
try:
|
try:
|
||||||
session_uuid = UUID(session_id)
|
session_uuid = UUID(session_id)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
@@ -112,11 +113,50 @@ class AgentRepository:
|
|||||||
)
|
)
|
||||||
session = (await self._session.execute(stmt)).scalar_one_or_none()
|
session = (await self._session.execute(stmt)).scalar_one_or_none()
|
||||||
if session is None:
|
if session is None:
|
||||||
return
|
return []
|
||||||
if session.deleted_at is not None:
|
if session.deleted_at is not None:
|
||||||
return
|
return []
|
||||||
session.deleted_at = datetime.now(timezone.utc)
|
messages_stmt = (
|
||||||
|
select(AgentChatMessage)
|
||||||
|
.where(AgentChatMessage.session_id == session_uuid)
|
||||||
|
.order_by(AgentChatMessage.seq)
|
||||||
|
)
|
||||||
|
messages = list((await self._session.execute(messages_stmt)).scalars().all())
|
||||||
|
attachment_paths = self._collect_attachment_paths(messages)
|
||||||
|
snapshot = anonymize(session=session, messages=messages)
|
||||||
|
self._session.add(snapshot)
|
||||||
await self._session.flush()
|
await self._session.flush()
|
||||||
|
stmt_delete_messages = delete(AgentChatMessage).where(
|
||||||
|
AgentChatMessage.session_id == session_uuid
|
||||||
|
)
|
||||||
|
await self._session.execute(stmt_delete_messages)
|
||||||
|
stmt_delete_session = delete(AgentChatSession).where(
|
||||||
|
AgentChatSession.id == session_uuid
|
||||||
|
)
|
||||||
|
await self._session.execute(stmt_delete_session)
|
||||||
|
await self._session.flush()
|
||||||
|
return attachment_paths
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _collect_attachment_paths(
|
||||||
|
messages: list[AgentChatMessage],
|
||||||
|
) -> list[dict[str, str]]:
|
||||||
|
paths: list[dict[str, str]] = []
|
||||||
|
for message in messages:
|
||||||
|
metadata_raw = message.metadata_json
|
||||||
|
if not isinstance(metadata_raw, dict):
|
||||||
|
continue
|
||||||
|
attachments_raw = metadata_raw.get("user_message_attachments")
|
||||||
|
if not isinstance(attachments_raw, list):
|
||||||
|
continue
|
||||||
|
for attachment in attachments_raw:
|
||||||
|
if not isinstance(attachment, dict):
|
||||||
|
continue
|
||||||
|
bucket = attachment.get("bucket")
|
||||||
|
path = attachment.get("path")
|
||||||
|
if isinstance(bucket, str) and isinstance(path, str):
|
||||||
|
paths.append({"bucket": bucket, "path": path})
|
||||||
|
return paths
|
||||||
|
|
||||||
async def persist_user_message(
|
async def persist_user_message(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class AgentRepositoryLike(Protocol):
|
|||||||
|
|
||||||
async def rollback(self) -> None: ...
|
async def rollback(self) -> None: ...
|
||||||
|
|
||||||
async def delete_session(self, *, session_id: str) -> None: ...
|
async def delete_session(self, *, session_id: str) -> list[dict[str, str]]: ...
|
||||||
|
|
||||||
async def get_history_day(
|
async def get_history_day(
|
||||||
self,
|
self,
|
||||||
@@ -126,6 +126,8 @@ class AttachmentStorageLike(Protocol):
|
|||||||
expires_in_seconds: int,
|
expires_in_seconds: int,
|
||||||
) -> str: ...
|
) -> str: ...
|
||||||
|
|
||||||
|
async def delete_prefix(self, *, bucket: str, prefix: str) -> int: ...
|
||||||
|
|
||||||
def parse_signed_url(self, url: str) -> tuple[str, str]: ...
|
def parse_signed_url(self, url: str) -> tuple[str, str]: ...
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -235,8 +235,28 @@ class AgentService:
|
|||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
ensure_session_owner(owner_id=owner, current_user=current_user)
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
||||||
await self._repository.delete_session(session_id=thread_id)
|
attachment_paths = await self._repository.delete_session(session_id=thread_id)
|
||||||
await self._repository.commit()
|
await self._repository.commit()
|
||||||
|
await self._cleanup_attachments(attachment_paths)
|
||||||
|
|
||||||
|
async def _cleanup_attachments(
|
||||||
|
self, attachment_paths: list[dict[str, str]]
|
||||||
|
) -> None:
|
||||||
|
if not attachment_paths or self._attachment_storage is None:
|
||||||
|
return
|
||||||
|
for attachment in attachment_paths:
|
||||||
|
bucket = attachment.get("bucket")
|
||||||
|
path = attachment.get("path")
|
||||||
|
if not bucket or not path:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
await self._attachment_storage.delete_prefix(bucket=bucket, prefix=path)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"attachment_cleanup_failed",
|
||||||
|
bucket=bucket,
|
||||||
|
path=path,
|
||||||
|
)
|
||||||
|
|
||||||
async def _append_context_cache_user_message(
|
async def _append_context_cache_user_message(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -0,0 +1,183 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from typing import TypedDict
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import select
|
||||||
|
|
||||||
|
from core.db.session import AsyncSessionLocal
|
||||||
|
from models.agent_chat_session import AgentChatSession
|
||||||
|
from models.agent_chat_message import AgentChatMessage
|
||||||
|
from models.anonymous_session_snapshot import AnonymousSessionSnapshot
|
||||||
|
|
||||||
|
|
||||||
|
class IdentityData(TypedDict):
|
||||||
|
email: str
|
||||||
|
code: str
|
||||||
|
|
||||||
|
|
||||||
|
async def _create_email_session(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
*,
|
||||||
|
email: str,
|
||||||
|
code: str,
|
||||||
|
) -> dict[str, object]:
|
||||||
|
resp = await client.post(
|
||||||
|
"/api/v1/auth/email-session",
|
||||||
|
json={"email": email, "token": code},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
async def _wait_terminal_event(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
*,
|
||||||
|
access_token: str,
|
||||||
|
thread_id: str,
|
||||||
|
run_id: str,
|
||||||
|
timeout_s: int = 180,
|
||||||
|
) -> str:
|
||||||
|
headers = {"Authorization": f"Bearer {access_token}"}
|
||||||
|
params = {"runId": run_id, "idle_limit": 120}
|
||||||
|
started = time.time()
|
||||||
|
|
||||||
|
async with client.stream(
|
||||||
|
"GET",
|
||||||
|
f"/api/v1/agent/runs/{thread_id}/events",
|
||||||
|
headers=headers,
|
||||||
|
params=params,
|
||||||
|
) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
async for line in resp.aiter_lines():
|
||||||
|
if time.time() - started > timeout_s:
|
||||||
|
raise TimeoutError("SSE timed out")
|
||||||
|
if not line or not line.startswith("data: "):
|
||||||
|
continue
|
||||||
|
event = json.loads(line[6:])
|
||||||
|
event_type = event.get("type")
|
||||||
|
if event_type in {"RUN_FINISHED", "RUN_ERROR"}:
|
||||||
|
return str(event_type)
|
||||||
|
|
||||||
|
raise RuntimeError("No terminal SSE event")
|
||||||
|
|
||||||
|
|
||||||
|
def _build_run_payload(*, thread_id: str, run_id: str) -> dict[str, object]:
|
||||||
|
now = int(time.time() * 1000)
|
||||||
|
return {
|
||||||
|
"threadId": thread_id,
|
||||||
|
"runId": run_id,
|
||||||
|
"state": {},
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"id": f"msg_{run_id}_user_0",
|
||||||
|
"role": "user",
|
||||||
|
"content": "今天事业运如何?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tools": [],
|
||||||
|
"context": [],
|
||||||
|
"forwardedProps": {
|
||||||
|
"runtime_mode": "chat",
|
||||||
|
"client_time": {
|
||||||
|
"device_timezone": "Asia/Shanghai",
|
||||||
|
"client_now_iso": "2026-04-15T12:00:00Z",
|
||||||
|
"client_epoch_ms": now,
|
||||||
|
},
|
||||||
|
"divinationPayload": {
|
||||||
|
"divinationMethod": "自动起卦",
|
||||||
|
"questionType": "事业",
|
||||||
|
"question": "今天事业运如何?",
|
||||||
|
"divinationTimeIso": "2026-04-15T12:00:00Z",
|
||||||
|
"yaoLines": ["少阳", "少阴", "老阳", "少阳", "老阴", "少阴"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_session_delete_anonymizes_and_hard_deletes(
|
||||||
|
api_client: httpx.AsyncClient,
|
||||||
|
test_identity: IdentityData,
|
||||||
|
db_cleanup: list[str],
|
||||||
|
) -> None:
|
||||||
|
email = str(test_identity["email"]).strip().lower()
|
||||||
|
db_cleanup.append(email)
|
||||||
|
|
||||||
|
auth_resp = await _create_email_session(
|
||||||
|
api_client,
|
||||||
|
email=email,
|
||||||
|
code=str(test_identity["code"]),
|
||||||
|
)
|
||||||
|
user = auth_resp.get("user")
|
||||||
|
assert isinstance(user, dict)
|
||||||
|
access_token = str(auth_resp["access_token"])
|
||||||
|
headers = {"Authorization": f"Bearer {access_token}"}
|
||||||
|
|
||||||
|
thread_id = str(uuid.uuid4())
|
||||||
|
run_id = f"run_{int(time.time() * 1000)}"
|
||||||
|
|
||||||
|
enqueue = await api_client.post(
|
||||||
|
"/api/v1/agent/runs",
|
||||||
|
headers=headers,
|
||||||
|
json=_build_run_payload(thread_id=thread_id, run_id=run_id),
|
||||||
|
)
|
||||||
|
assert enqueue.status_code == 202
|
||||||
|
|
||||||
|
terminal = await _wait_terminal_event(
|
||||||
|
api_client,
|
||||||
|
access_token=access_token,
|
||||||
|
thread_id=thread_id,
|
||||||
|
run_id=run_id,
|
||||||
|
)
|
||||||
|
assert terminal in {"RUN_FINISHED", "RUN_ERROR"}
|
||||||
|
|
||||||
|
async with AsyncSessionLocal() as session:
|
||||||
|
session_result = await session.execute(
|
||||||
|
select(AgentChatSession).where(AgentChatSession.id == uuid.UUID(thread_id))
|
||||||
|
)
|
||||||
|
session_obj = session_result.scalar_one_or_none()
|
||||||
|
assert session_obj is not None, "Session should exist before deletion"
|
||||||
|
|
||||||
|
delete_resp = await api_client.delete(
|
||||||
|
f"/api/v1/agent/sessions/{thread_id}",
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
assert delete_resp.status_code == 204
|
||||||
|
|
||||||
|
async with AsyncSessionLocal() as session:
|
||||||
|
session_result = await session.execute(
|
||||||
|
select(AgentChatSession).where(AgentChatSession.id == uuid.UUID(thread_id))
|
||||||
|
)
|
||||||
|
deleted_session = session_result.scalar_one_or_none()
|
||||||
|
assert deleted_session is None, (
|
||||||
|
"Session should be hard-deleted, not soft-deleted"
|
||||||
|
)
|
||||||
|
|
||||||
|
msg_result = await session.execute(
|
||||||
|
select(AgentChatMessage).where(
|
||||||
|
AgentChatMessage.session_id == uuid.UUID(thread_id)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
remaining_messages = msg_result.scalars().all()
|
||||||
|
assert len(remaining_messages) == 0, (
|
||||||
|
"Messages should be hard-deleted along with session"
|
||||||
|
)
|
||||||
|
|
||||||
|
snapshot_result = await session.execute(
|
||||||
|
select(AnonymousSessionSnapshot).order_by(
|
||||||
|
AnonymousSessionSnapshot.anonymized_at.desc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
snapshots = snapshot_result.scalars().all()
|
||||||
|
assert len(snapshots) >= 1, "At least one anonymous snapshot should exist"
|
||||||
|
|
||||||
|
snapshot = snapshots[0]
|
||||||
|
assert snapshot.session_type == "chat"
|
||||||
|
assert snapshot.anonymous_id is not None
|
||||||
|
assert snapshot.id is not None
|
||||||
|
assert snapshot.anonymized_at is not None
|
||||||
@@ -0,0 +1,216 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from decimal import Decimal
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from schemas.enums import AgentChatMessageRole, AgentChatSessionStatus, SessionType
|
||||||
|
|
||||||
|
from models.agent_chat_message import AgentChatMessage
|
||||||
|
from models.agent_chat_session import AgentChatSession
|
||||||
|
from v1.agent.anonymizer import (
|
||||||
|
_aggregate_latency,
|
||||||
|
_extract_derived_fields,
|
||||||
|
_extract_keywords,
|
||||||
|
_extract_model_code,
|
||||||
|
_extract_question_type,
|
||||||
|
_extract_sign_level,
|
||||||
|
_extract_tool_name,
|
||||||
|
_truncate_to_day,
|
||||||
|
anonymize,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_session(**overrides: object) -> AgentChatSession:
|
||||||
|
defaults: dict[str, object] = {
|
||||||
|
"id": uuid4(),
|
||||||
|
"user_id": uuid4(),
|
||||||
|
"session_type": SessionType.CHAT,
|
||||||
|
"status": AgentChatSessionStatus.COMPLETED,
|
||||||
|
"message_count": 3,
|
||||||
|
"total_tokens": 1500,
|
||||||
|
"total_cost": Decimal("0.05"),
|
||||||
|
"created_at": datetime(2026, 4, 15, 14, 32, 0, tzinfo=timezone.utc),
|
||||||
|
"last_activity_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
|
||||||
|
"job_id": None,
|
||||||
|
"title": "Will I get the job?",
|
||||||
|
"state_snapshot": None,
|
||||||
|
"updated_at": datetime(2026, 4, 15, 14, 45, 0, tzinfo=timezone.utc),
|
||||||
|
"deleted_at": None,
|
||||||
|
}
|
||||||
|
defaults.update(overrides)
|
||||||
|
return AgentChatSession(**defaults)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_message(
|
||||||
|
*,
|
||||||
|
session_id: object | None = None,
|
||||||
|
role: AgentChatMessageRole = AgentChatMessageRole.ASSISTANT,
|
||||||
|
metadata_json: dict[str, object] | None = None,
|
||||||
|
model_code: str | None = None,
|
||||||
|
tool_name: str | None = None,
|
||||||
|
latency_ms: int | None = None,
|
||||||
|
) -> AgentChatMessage:
|
||||||
|
return AgentChatMessage(
|
||||||
|
id=uuid4(),
|
||||||
|
session_id=session_id or uuid4(),
|
||||||
|
seq=1,
|
||||||
|
role=role,
|
||||||
|
content="some content",
|
||||||
|
model_code=model_code,
|
||||||
|
tool_name=tool_name,
|
||||||
|
input_tokens=100,
|
||||||
|
output_tokens=200,
|
||||||
|
cost=Decimal("0.02"),
|
||||||
|
latency_ms=latency_ms,
|
||||||
|
visibility_mask=0,
|
||||||
|
metadata_json=metadata_json,
|
||||||
|
created_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
|
||||||
|
updated_at=datetime(2026, 4, 15, 14, 33, 0, tzinfo=timezone.utc),
|
||||||
|
deleted_at=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_to_day() -> None:
|
||||||
|
dt = datetime(2026, 4, 15, 14, 32, 45, 123456, tzinfo=timezone.utc)
|
||||||
|
result = _truncate_to_day(dt)
|
||||||
|
assert result == datetime(2026, 4, 15, 0, 0, 0, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_derived_fields_found() -> None:
|
||||||
|
msg = _make_message(
|
||||||
|
metadata_json={
|
||||||
|
"agent_output": {
|
||||||
|
"divination_derived": {
|
||||||
|
"guaName": "乾",
|
||||||
|
"questionType": "career",
|
||||||
|
"hasChangingYao": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
derived = _extract_derived_fields([msg])
|
||||||
|
assert derived.get("guaName") == "乾"
|
||||||
|
assert derived.get("questionType") == "career"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_derived_fields_missing() -> None:
|
||||||
|
msg = _make_message(metadata_json={"run_id": "abc"})
|
||||||
|
derived = _extract_derived_fields([msg])
|
||||||
|
assert derived == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_sign_level() -> None:
|
||||||
|
msg = _make_message(metadata_json={"agent_output": {"sign_level": "中上签"}})
|
||||||
|
assert _extract_sign_level([msg]) == "中上签"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_sign_level_none() -> None:
|
||||||
|
msg = _make_message(metadata_json={"agent_output": {}})
|
||||||
|
assert _extract_sign_level([msg]) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_keywords() -> None:
|
||||||
|
msg = _make_message(metadata_json={"agent_output": {"keywords": ["事业", "贵人"]}})
|
||||||
|
assert _extract_keywords([msg]) == ["事业", "贵人"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_question_type_from_derived() -> None:
|
||||||
|
msg = _make_message(
|
||||||
|
metadata_json={
|
||||||
|
"agent_output": {
|
||||||
|
"divination_derived": {"questionType": "career"},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert _extract_question_type([msg]) == "career"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_question_type_from_agent_output() -> None:
|
||||||
|
msg = _make_message(metadata_json={"agent_output": {"questionType": "love"}})
|
||||||
|
assert _extract_question_type([msg]) == "love"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_model_code() -> None:
|
||||||
|
msg = _make_message(model_code="qwen3.5-flash")
|
||||||
|
assert _extract_model_code([msg]) == "qwen3.5-flash"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_tool_name() -> None:
|
||||||
|
msg = _make_message(tool_name="liuyao")
|
||||||
|
assert _extract_tool_name([msg]) == "liuyao"
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_latency() -> None:
|
||||||
|
msg1 = _make_message(latency_ms=500)
|
||||||
|
msg2 = _make_message(latency_ms=300)
|
||||||
|
assert _aggregate_latency([msg1, msg2]) == 800
|
||||||
|
|
||||||
|
|
||||||
|
def test_aggregate_latency_none() -> None:
|
||||||
|
msg = _make_message(latency_ms=None)
|
||||||
|
assert _aggregate_latency([msg]) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_anonymize_full_snapshot() -> None:
|
||||||
|
session = _make_session()
|
||||||
|
msg = _make_message(
|
||||||
|
session_id=session.id,
|
||||||
|
role=AgentChatMessageRole.ASSISTANT,
|
||||||
|
model_code="qwen3.5-flash",
|
||||||
|
tool_name="liuyao",
|
||||||
|
latency_ms=1200,
|
||||||
|
metadata_json={
|
||||||
|
"agent_output": {
|
||||||
|
"sign_level": "上上签",
|
||||||
|
"keywords": ["事业", "贵人"],
|
||||||
|
"divination_derived": {
|
||||||
|
"questionType": "career",
|
||||||
|
"guaName": "乾",
|
||||||
|
"guaNameHant": "乾",
|
||||||
|
"targetGuaName": "姤",
|
||||||
|
"hasChangingYao": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
user_msg = _make_message(
|
||||||
|
session_id=session.id,
|
||||||
|
role=AgentChatMessageRole.USER,
|
||||||
|
latency_ms=None,
|
||||||
|
)
|
||||||
|
snapshot = anonymize(session=session, messages=[msg, user_msg])
|
||||||
|
|
||||||
|
assert snapshot.session_type == "chat"
|
||||||
|
assert snapshot.message_count == 3
|
||||||
|
assert snapshot.status == "completed"
|
||||||
|
assert snapshot.question_type == "career"
|
||||||
|
assert snapshot.tool_name == "liuyao"
|
||||||
|
assert snapshot.model_code == "qwen3.5-flash"
|
||||||
|
assert snapshot.gua_name == "乾"
|
||||||
|
assert snapshot.gua_name_hant == "乾"
|
||||||
|
assert snapshot.target_gua_name == "姤"
|
||||||
|
assert snapshot.has_changing_yao is True
|
||||||
|
assert snapshot.sign_level == "上上签"
|
||||||
|
assert snapshot.keywords == ["事业", "贵人"]
|
||||||
|
assert snapshot.total_tokens == 1500
|
||||||
|
assert snapshot.total_cost == Decimal("0.05")
|
||||||
|
assert snapshot.total_latency_ms == 1200
|
||||||
|
assert snapshot.created_at == datetime(2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc)
|
||||||
|
assert snapshot.last_activity_at == datetime(
|
||||||
|
2026, 4, 15, 0, 0, 0, tzinfo=timezone.utc
|
||||||
|
)
|
||||||
|
assert snapshot.anonymous_id is not None
|
||||||
|
assert snapshot.id is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_anonymize_no_metadata() -> None:
|
||||||
|
session = _make_session()
|
||||||
|
msg = _make_message(session_id=session.id, metadata_json=None)
|
||||||
|
snapshot = anonymize(session=session, messages=[msg])
|
||||||
|
|
||||||
|
assert snapshot.question_type is None
|
||||||
|
assert snapshot.gua_name is None
|
||||||
|
assert snapshot.sign_level is None
|
||||||
|
assert snapshot.keywords is None
|
||||||
|
assert snapshot.has_changing_yao is None
|
||||||
Reference in New Issue
Block a user