2026-03-05 15:34:37 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-03-08 17:34:28 +08:00
|
|
|
import asyncio
|
2026-03-05 15:34:37 +08:00
|
|
|
from dataclasses import dataclass
|
2026-03-07 17:30:20 +08:00
|
|
|
from datetime import date
|
2026-03-11 21:06:02 +08:00
|
|
|
import hashlib
|
2026-03-08 17:34:28 +08:00
|
|
|
from typing import Any, Protocol
|
2026-03-13 17:27:18 +08:00
|
|
|
from urllib.parse import urlparse
|
2026-03-05 15:34:37 +08:00
|
|
|
|
2026-03-08 17:34:28 +08:00
|
|
|
import dashscope
|
2026-03-16 09:01:01 +08:00
|
|
|
from ag_ui.core import RunAgentInput
|
2026-03-08 17:34:28 +08:00
|
|
|
from dashscope.audio.asr import Recognition, RecognitionCallback
|
2026-03-05 15:34:37 +08:00
|
|
|
from fastapi import HTTPException
|
2026-03-07 17:30:20 +08:00
|
|
|
from sqlalchemy.exc import IntegrityError
|
2026-03-05 15:34:37 +08:00
|
|
|
|
|
|
|
|
from core.auth.models import CurrentUser
|
2026-03-11 21:06:02 +08:00
|
|
|
from core.agentscope.schemas.agui_input import extract_latest_user_payload
|
2026-03-08 17:34:28 +08:00
|
|
|
from core.config.settings import config
|
|
|
|
|
from core.logging import get_logger
|
2026-03-15 17:14:15 +08:00
|
|
|
from schemas.messages.chat_message import (
|
|
|
|
|
AgentChatMessageMetadata,
|
|
|
|
|
UserMessageAttachments,
|
|
|
|
|
)
|
2026-03-16 09:01:01 +08:00
|
|
|
from v1.agent.schemas import HistorySnapshotResponse
|
2026-03-08 17:34:28 +08:00
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
2026-03-12 09:29:57 +08:00
|
|
|
_ALLOWED_ATTACHMENT_MIME_TYPES = {"image/png", "image/jpeg", "image/webp"}
|
|
|
|
|
_MAX_ATTACHMENT_BYTES = 5 * 1024 * 1024
|
|
|
|
|
_MAX_TOTAL_ATTACHMENT_BYTES = 12 * 1024 * 1024
|
2026-03-05 15:34:37 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class TaskAccepted:
|
|
|
|
|
task_id: str
|
2026-03-07 17:30:20 +08:00
|
|
|
thread_id: str
|
|
|
|
|
run_id: str
|
2026-03-05 15:34:37 +08:00
|
|
|
created: bool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AgentRepositoryLike(Protocol):
|
|
|
|
|
async def get_session_owner(self, *, session_id: str) -> str: ...
|
|
|
|
|
|
2026-03-07 17:30:20 +08:00
|
|
|
async def create_session_for_user(
|
|
|
|
|
self, *, user_id: str, session_id: str | None = None
|
|
|
|
|
) -> str: ...
|
2026-03-05 15:34:37 +08:00
|
|
|
|
|
|
|
|
async def commit(self) -> None: ...
|
|
|
|
|
|
|
|
|
|
async def rollback(self) -> None: ...
|
|
|
|
|
|
2026-03-07 17:30:20 +08:00
|
|
|
async def get_history_day(
|
|
|
|
|
self, *, session_id: str, before: date | None
|
|
|
|
|
) -> dict[str, object] | None: ...
|
|
|
|
|
|
|
|
|
|
async def get_latest_session_id_for_user(self, *, user_id: str) -> str | None: ...
|
|
|
|
|
|
2026-03-11 21:06:02 +08:00
|
|
|
async def persist_user_message(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
session_id: str,
|
2026-03-15 17:14:15 +08:00
|
|
|
content: str,
|
|
|
|
|
metadata: AgentChatMessageMetadata | None,
|
2026-03-11 21:06:02 +08:00
|
|
|
) -> None: ...
|
|
|
|
|
|
2026-03-05 15:34:37 +08:00
|
|
|
|
|
|
|
|
class QueueClientLike(Protocol):
|
|
|
|
|
async def enqueue(
|
|
|
|
|
self, *, command: dict[str, object], dedup_key: str | None
|
|
|
|
|
) -> str: ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EventStreamLike(Protocol):
|
|
|
|
|
async def read(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
session_id: str,
|
|
|
|
|
last_event_id: str | None,
|
|
|
|
|
) -> list[dict[str, object]]: ...
|
|
|
|
|
|
|
|
|
|
|
2026-03-11 21:06:02 +08:00
|
|
|
class AttachmentStorageLike(Protocol):
|
|
|
|
|
async def upload_bytes(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
bucket: str,
|
|
|
|
|
path: str,
|
|
|
|
|
content: bytes,
|
|
|
|
|
content_type: str,
|
|
|
|
|
) -> str: ...
|
|
|
|
|
|
2026-03-12 09:29:57 +08:00
|
|
|
async def download_bytes(self, *, bucket: str, path: str) -> bytes: ...
|
|
|
|
|
|
|
|
|
|
async def create_signed_url(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
bucket: str,
|
|
|
|
|
path: str,
|
|
|
|
|
expires_in_seconds: int,
|
|
|
|
|
) -> str: ...
|
|
|
|
|
|
2026-03-13 15:42:01 +08:00
|
|
|
def parse_signed_url(self, url: str) -> tuple[str, str]: ...
|
|
|
|
|
|
2026-03-11 21:06:02 +08:00
|
|
|
|
2026-03-05 15:34:37 +08:00
|
|
|
def ensure_session_owner(*, owner_id: str, current_user: CurrentUser) -> None:
|
|
|
|
|
if owner_id != str(current_user.id):
|
|
|
|
|
raise HTTPException(status_code=403, detail="Forbidden")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AgentService:
|
2026-03-11 17:16:11 +08:00
|
|
|
_repository: AgentRepositoryLike
|
|
|
|
|
_queue: QueueClientLike
|
|
|
|
|
_stream: EventStreamLike
|
2026-03-11 21:06:02 +08:00
|
|
|
_attachment_storage: AttachmentStorageLike | None
|
2026-03-11 17:16:11 +08:00
|
|
|
|
2026-03-12 09:29:57 +08:00
|
|
|
_SIGNED_URL_EXPIRES_IN_SECONDS = 3600
|
|
|
|
|
|
2026-03-05 15:34:37 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
repository: AgentRepositoryLike,
|
|
|
|
|
queue: QueueClientLike,
|
|
|
|
|
stream: EventStreamLike,
|
2026-03-11 21:06:02 +08:00
|
|
|
attachment_storage: AttachmentStorageLike | None = None,
|
2026-03-05 15:34:37 +08:00
|
|
|
) -> None:
|
|
|
|
|
self._repository = repository
|
|
|
|
|
self._queue = queue
|
|
|
|
|
self._stream = stream
|
2026-03-11 21:06:02 +08:00
|
|
|
self._attachment_storage = attachment_storage
|
2026-03-05 15:34:37 +08:00
|
|
|
|
|
|
|
|
async def enqueue_run(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
2026-03-07 17:30:20 +08:00
|
|
|
run_input: RunAgentInput,
|
2026-03-05 15:34:37 +08:00
|
|
|
current_user: CurrentUser,
|
|
|
|
|
) -> TaskAccepted:
|
|
|
|
|
created = False
|
2026-03-07 17:30:20 +08:00
|
|
|
thread_id = run_input.thread_id
|
|
|
|
|
run_id = run_input.run_id
|
|
|
|
|
try:
|
|
|
|
|
owner = await self._repository.get_session_owner(session_id=thread_id)
|
|
|
|
|
except HTTPException as exc:
|
|
|
|
|
if exc.status_code != 404:
|
|
|
|
|
raise
|
|
|
|
|
try:
|
|
|
|
|
await self._repository.create_session_for_user(
|
|
|
|
|
user_id=str(current_user.id),
|
|
|
|
|
session_id=thread_id,
|
|
|
|
|
)
|
|
|
|
|
await self._repository.commit()
|
|
|
|
|
created = True
|
|
|
|
|
except IntegrityError:
|
|
|
|
|
await self._repository.rollback()
|
|
|
|
|
owner = await self._repository.get_session_owner(session_id=thread_id)
|
|
|
|
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
2026-03-05 15:34:37 +08:00
|
|
|
else:
|
|
|
|
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
|
|
|
|
|
2026-03-11 21:06:02 +08:00
|
|
|
user_message_text, user_message_metadata = await self._prepare_user_message(
|
|
|
|
|
run_input=run_input,
|
|
|
|
|
current_user=current_user,
|
|
|
|
|
)
|
|
|
|
|
await self._repository.persist_user_message(
|
|
|
|
|
session_id=thread_id,
|
2026-03-15 17:14:15 +08:00
|
|
|
content=user_message_text,
|
2026-03-11 21:06:02 +08:00
|
|
|
metadata=user_message_metadata,
|
|
|
|
|
)
|
|
|
|
|
await self._repository.commit()
|
|
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
task_id = await self._queue.enqueue(
|
|
|
|
|
command={
|
|
|
|
|
"command": "run",
|
2026-03-11 17:16:11 +08:00
|
|
|
"owner_id": str(current_user.id),
|
2026-03-15 17:14:15 +08:00
|
|
|
"run_input": {
|
|
|
|
|
"messages": [
|
|
|
|
|
msg.model_dump(mode="json", exclude_none=True)
|
|
|
|
|
for msg in run_input.messages
|
|
|
|
|
],
|
|
|
|
|
},
|
2026-03-10 17:44:29 +08:00
|
|
|
},
|
|
|
|
|
dedup_key=None,
|
|
|
|
|
)
|
2026-03-05 15:34:37 +08:00
|
|
|
return TaskAccepted(
|
2026-03-07 17:30:20 +08:00
|
|
|
task_id=task_id,
|
|
|
|
|
thread_id=thread_id,
|
|
|
|
|
run_id=run_id,
|
|
|
|
|
created=created,
|
2026-03-05 15:34:37 +08:00
|
|
|
)
|
|
|
|
|
|
2026-03-15 17:14:15 +08:00
|
|
|
async def load_agent_input_messages(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
thread_id: str,
|
|
|
|
|
) -> dict[str, object] | None:
|
|
|
|
|
"""Load recent messages for runtime agent input.
|
|
|
|
|
|
|
|
|
|
Returns messages from today and yesterday (if exists).
|
|
|
|
|
"""
|
|
|
|
|
today = await self._repository.get_history_day(
|
|
|
|
|
session_id=thread_id,
|
|
|
|
|
before=None,
|
|
|
|
|
)
|
|
|
|
|
if not today:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
yesterday = await self._repository.get_history_day(
|
|
|
|
|
session_id=thread_id,
|
|
|
|
|
before=today.get("day"), # type: ignore
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
messages: list[dict[str, object]] = []
|
|
|
|
|
if yesterday and yesterday.get("messages"):
|
|
|
|
|
messages.extend(yesterday["messages"]) # type: ignore
|
|
|
|
|
if today.get("messages"):
|
|
|
|
|
messages.extend(today["messages"]) # type: ignore
|
|
|
|
|
|
|
|
|
|
return {"messages": messages}
|
|
|
|
|
|
2026-03-11 21:06:02 +08:00
|
|
|
async def _prepare_user_message(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
run_input: RunAgentInput,
|
|
|
|
|
current_user: CurrentUser,
|
2026-03-15 17:14:15 +08:00
|
|
|
) -> tuple[str, AgentChatMessageMetadata | None]:
|
2026-03-13 15:42:01 +08:00
|
|
|
text, content_blocks = extract_latest_user_payload(run_input)
|
|
|
|
|
|
|
|
|
|
user_attachments: UserMessageAttachments | None = None
|
|
|
|
|
for block in content_blocks:
|
|
|
|
|
if not isinstance(block, dict):
|
|
|
|
|
continue
|
|
|
|
|
block_type = block.get("type")
|
|
|
|
|
if block_type != "binary":
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
url = block.get("url")
|
|
|
|
|
mime_type = block.get("mimeType")
|
|
|
|
|
if not isinstance(url, str) or not url:
|
|
|
|
|
continue
|
|
|
|
|
if not isinstance(mime_type, str):
|
|
|
|
|
mime_type = "application/octet-stream"
|
|
|
|
|
|
2026-03-12 09:29:57 +08:00
|
|
|
if self._attachment_storage is None:
|
2026-03-13 17:27:18 +08:00
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=503,
|
|
|
|
|
detail="Attachment storage unavailable",
|
|
|
|
|
)
|
2026-03-12 09:29:57 +08:00
|
|
|
|
2026-03-13 15:42:01 +08:00
|
|
|
try:
|
2026-03-13 17:27:18 +08:00
|
|
|
bucket, path = self._validate_binary_signed_url(
|
|
|
|
|
url=url,
|
|
|
|
|
thread_id=run_input.thread_id,
|
|
|
|
|
current_user=current_user,
|
|
|
|
|
)
|
2026-03-13 15:42:01 +08:00
|
|
|
user_attachments = UserMessageAttachments(
|
|
|
|
|
bucket=bucket,
|
|
|
|
|
path=path,
|
|
|
|
|
mime_type=mime_type,
|
2026-03-11 21:06:02 +08:00
|
|
|
)
|
2026-03-13 15:42:01 +08:00
|
|
|
break
|
2026-03-13 17:27:18 +08:00
|
|
|
except HTTPException:
|
|
|
|
|
raise
|
|
|
|
|
except Exception as exc: # noqa: BLE001
|
|
|
|
|
logger.warning("Failed to parse signed URL", url=url, error=str(exc))
|
|
|
|
|
raise HTTPException(status_code=422, detail="Invalid signed image url")
|
2026-03-13 15:42:01 +08:00
|
|
|
|
2026-03-15 17:14:15 +08:00
|
|
|
metadata: AgentChatMessageMetadata | None = None
|
2026-03-13 15:42:01 +08:00
|
|
|
if user_attachments is not None:
|
2026-03-15 17:14:15 +08:00
|
|
|
metadata = AgentChatMessageMetadata(
|
|
|
|
|
run_id=run_input.run_id,
|
|
|
|
|
user_message_attachments=user_attachments,
|
|
|
|
|
)
|
2026-03-13 15:42:01 +08:00
|
|
|
|
|
|
|
|
return text, metadata
|
2026-03-11 21:06:02 +08:00
|
|
|
|
2026-03-12 09:29:57 +08:00
|
|
|
async def upload_attachment(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
thread_id: str,
|
|
|
|
|
filename: str | None,
|
|
|
|
|
content_type: str | None,
|
|
|
|
|
payload: bytes,
|
|
|
|
|
current_user: CurrentUser,
|
|
|
|
|
) -> dict[str, str]:
|
|
|
|
|
try:
|
|
|
|
|
owner = await self._repository.get_session_owner(session_id=thread_id)
|
|
|
|
|
except HTTPException as exc:
|
|
|
|
|
if exc.status_code != 404:
|
|
|
|
|
raise
|
|
|
|
|
try:
|
|
|
|
|
await self._repository.create_session_for_user(
|
|
|
|
|
user_id=str(current_user.id),
|
|
|
|
|
session_id=thread_id,
|
|
|
|
|
)
|
|
|
|
|
await self._repository.commit()
|
|
|
|
|
except IntegrityError:
|
|
|
|
|
await self._repository.rollback()
|
|
|
|
|
owner = await self._repository.get_session_owner(session_id=thread_id)
|
|
|
|
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
|
|
|
|
else:
|
|
|
|
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
|
|
|
|
if self._attachment_storage is None:
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=503, detail="Attachment storage unavailable"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not isinstance(content_type, str):
|
|
|
|
|
raise HTTPException(status_code=422, detail="Unsupported attachment type")
|
|
|
|
|
mime_type = content_type.lower()
|
|
|
|
|
if mime_type not in _ALLOWED_ATTACHMENT_MIME_TYPES:
|
|
|
|
|
raise HTTPException(status_code=422, detail="Unsupported attachment type")
|
|
|
|
|
if not payload:
|
|
|
|
|
raise HTTPException(status_code=422, detail="Empty attachment")
|
|
|
|
|
if len(payload) > _MAX_ATTACHMENT_BYTES:
|
|
|
|
|
raise HTTPException(status_code=413, detail="Attachment too large")
|
|
|
|
|
|
|
|
|
|
suffix = _mime_to_suffix(mime_type)
|
|
|
|
|
checksum = hashlib.sha1(payload).hexdigest()[:16]
|
|
|
|
|
filename_seed = filename if isinstance(filename, str) and filename else "upload"
|
|
|
|
|
filename_hash = hashlib.sha1(filename_seed.encode("utf-8")).hexdigest()[:8]
|
|
|
|
|
path = (
|
|
|
|
|
f"agent-inputs/{current_user.id}/{thread_id}/uploads/"
|
|
|
|
|
f"{filename_hash}-{checksum}.{suffix}"
|
|
|
|
|
)
|
|
|
|
|
bucket_name = config.storage.bucket
|
|
|
|
|
try:
|
|
|
|
|
stored_path = await self._attachment_storage.upload_bytes(
|
|
|
|
|
bucket=bucket_name,
|
|
|
|
|
path=path,
|
|
|
|
|
content=payload,
|
|
|
|
|
content_type=mime_type,
|
|
|
|
|
)
|
|
|
|
|
signed_url = await self._attachment_storage.create_signed_url(
|
|
|
|
|
bucket=bucket_name,
|
|
|
|
|
path=stored_path,
|
|
|
|
|
expires_in_seconds=self._SIGNED_URL_EXPIRES_IN_SECONDS,
|
|
|
|
|
)
|
|
|
|
|
except Exception: # noqa: BLE001
|
|
|
|
|
logger.exception(
|
|
|
|
|
"Attachment upload failed",
|
|
|
|
|
extra={
|
|
|
|
|
"bucket": bucket_name,
|
|
|
|
|
"path": path,
|
|
|
|
|
"mime_type": mime_type,
|
|
|
|
|
"thread_id": thread_id,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
raise HTTPException(status_code=502, detail="Failed to upload attachment")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"bucket": bucket_name,
|
|
|
|
|
"path": stored_path,
|
|
|
|
|
"mimeType": mime_type,
|
|
|
|
|
"url": signed_url,
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-13 17:27:18 +08:00
|
|
|
async def create_attachment_signed_url(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
bucket: str,
|
|
|
|
|
path: str,
|
|
|
|
|
current_user: CurrentUser,
|
|
|
|
|
) -> dict[str, str]:
|
|
|
|
|
if self._attachment_storage is None:
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=503, detail="Attachment storage unavailable"
|
|
|
|
|
)
|
|
|
|
|
normalized_bucket = bucket.strip()
|
|
|
|
|
if normalized_bucket != config.storage.bucket:
|
|
|
|
|
raise HTTPException(status_code=422, detail="Invalid attachment bucket")
|
|
|
|
|
|
|
|
|
|
normalized_path = path.strip()
|
|
|
|
|
expected_prefix = f"agent-inputs/{current_user.id}/"
|
|
|
|
|
if not _is_safe_attachment_path(
|
|
|
|
|
normalized_path, expected_prefix=expected_prefix
|
|
|
|
|
):
|
|
|
|
|
raise HTTPException(status_code=422, detail="Invalid attachment path scope")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
signed_url = await self._attachment_storage.create_signed_url(
|
|
|
|
|
bucket=normalized_bucket,
|
|
|
|
|
path=normalized_path,
|
|
|
|
|
expires_in_seconds=self._SIGNED_URL_EXPIRES_IN_SECONDS,
|
|
|
|
|
)
|
|
|
|
|
except Exception: # noqa: BLE001
|
|
|
|
|
logger.exception(
|
|
|
|
|
"Attachment signed URL generation failed",
|
|
|
|
|
extra={
|
|
|
|
|
"bucket": normalized_bucket,
|
|
|
|
|
"path": normalized_path,
|
|
|
|
|
"user_id": str(current_user.id),
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
raise HTTPException(status_code=502, detail="Failed to generate signed URL")
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"bucket": normalized_bucket,
|
|
|
|
|
"path": normalized_path,
|
|
|
|
|
"url": signed_url,
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-05 15:34:37 +08:00
|
|
|
async def stream_events(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
2026-03-07 17:30:20 +08:00
|
|
|
thread_id: str,
|
2026-03-05 15:34:37 +08:00
|
|
|
last_event_id: str | None,
|
|
|
|
|
current_user: CurrentUser,
|
|
|
|
|
) -> list[dict[str, object]]:
|
2026-03-07 17:30:20 +08:00
|
|
|
owner = await self._repository.get_session_owner(session_id=thread_id)
|
2026-03-05 15:34:37 +08:00
|
|
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
|
|
|
|
return await self._stream.read(
|
2026-03-07 17:30:20 +08:00
|
|
|
session_id=thread_id,
|
2026-03-05 15:34:37 +08:00
|
|
|
last_event_id=last_event_id,
|
|
|
|
|
)
|
2026-03-07 17:30:20 +08:00
|
|
|
|
|
|
|
|
async def get_history_snapshot(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
thread_id: str,
|
|
|
|
|
before: date | None,
|
|
|
|
|
current_user: CurrentUser,
|
2026-03-16 09:01:01 +08:00
|
|
|
) -> HistorySnapshotResponse:
|
|
|
|
|
from schemas.messages.chat_message import AgentChatMessage
|
|
|
|
|
from v1.agent.utils import convert_message_to_history
|
|
|
|
|
from v1.agent.schemas import HistoryMessage
|
|
|
|
|
|
2026-03-07 17:30:20 +08:00
|
|
|
owner = await self._repository.get_session_owner(session_id=thread_id)
|
|
|
|
|
ensure_session_owner(owner_id=owner, current_user=current_user)
|
|
|
|
|
day_payload = await self._repository.get_history_day(
|
|
|
|
|
session_id=thread_id,
|
|
|
|
|
before=before,
|
|
|
|
|
)
|
2026-03-16 09:01:01 +08:00
|
|
|
|
|
|
|
|
messages: list[HistoryMessage] = []
|
|
|
|
|
if day_payload:
|
|
|
|
|
raw_messages = day_payload.get("messages") or []
|
|
|
|
|
for msg_dict in raw_messages:
|
|
|
|
|
msg = AgentChatMessage.model_validate(msg_dict)
|
|
|
|
|
|
|
|
|
|
signed_url: str | None = None
|
|
|
|
|
if self._attachment_storage and msg.metadata:
|
|
|
|
|
att = msg.metadata.user_message_attachments
|
|
|
|
|
if att:
|
|
|
|
|
signed_url = await self._attachment_storage.create_signed_url(
|
|
|
|
|
bucket=att.bucket,
|
|
|
|
|
path=att.path,
|
|
|
|
|
expires_in_seconds=self._SIGNED_URL_EXPIRES_IN_SECONDS,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
converted = convert_message_to_history(msg, None)
|
|
|
|
|
if signed_url:
|
|
|
|
|
converted["url"] = signed_url
|
|
|
|
|
messages.append(HistoryMessage.model_validate(converted))
|
|
|
|
|
|
|
|
|
|
return HistorySnapshotResponse(
|
|
|
|
|
scope="history_day",
|
|
|
|
|
threadId=thread_id,
|
|
|
|
|
day=str(day_payload.get("day"))
|
|
|
|
|
if day_payload and day_payload.get("day")
|
|
|
|
|
else None,
|
|
|
|
|
hasMore=bool(day_payload.get("hasMore")) if day_payload else False,
|
|
|
|
|
messages=messages,
|
2026-03-07 17:30:20 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
async def get_user_history_snapshot(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
current_user: CurrentUser,
|
|
|
|
|
thread_id: str | None,
|
|
|
|
|
before: date | None,
|
2026-03-16 09:01:01 +08:00
|
|
|
) -> HistorySnapshotResponse:
|
2026-03-07 17:30:20 +08:00
|
|
|
target_thread_id = thread_id
|
|
|
|
|
if target_thread_id is None:
|
|
|
|
|
target_thread_id = await self._repository.get_latest_session_id_for_user(
|
|
|
|
|
user_id=str(current_user.id)
|
|
|
|
|
)
|
|
|
|
|
if target_thread_id is None:
|
2026-03-16 09:01:01 +08:00
|
|
|
return HistorySnapshotResponse(
|
|
|
|
|
scope="history_day",
|
|
|
|
|
threadId=None,
|
|
|
|
|
day=None,
|
|
|
|
|
hasMore=False,
|
|
|
|
|
messages=[],
|
|
|
|
|
)
|
2026-03-07 17:30:20 +08:00
|
|
|
return await self.get_history_snapshot(
|
|
|
|
|
thread_id=target_thread_id,
|
|
|
|
|
before=before,
|
|
|
|
|
current_user=current_user,
|
|
|
|
|
)
|
2026-03-08 17:34:28 +08:00
|
|
|
|
2026-03-13 17:27:18 +08:00
|
|
|
def _validate_binary_signed_url(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
url: str,
|
|
|
|
|
thread_id: str,
|
|
|
|
|
current_user: CurrentUser,
|
|
|
|
|
) -> tuple[str, str]:
|
|
|
|
|
if self._attachment_storage is None:
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=503, detail="Attachment storage unavailable"
|
|
|
|
|
)
|
|
|
|
|
parsed = urlparse(url)
|
|
|
|
|
expected_host = urlparse(config.supabase.url).netloc
|
|
|
|
|
if parsed.netloc != expected_host:
|
|
|
|
|
raise HTTPException(status_code=422, detail="INVALID_BINARY_URL_HOST")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
bucket, path = self._attachment_storage.parse_signed_url(url)
|
|
|
|
|
except Exception as exc: # noqa: BLE001
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=422, detail="Invalid signed image url"
|
|
|
|
|
) from exc
|
|
|
|
|
|
|
|
|
|
if bucket != config.storage.bucket:
|
|
|
|
|
raise HTTPException(status_code=422, detail="INVALID_BINARY_URL_BUCKET")
|
|
|
|
|
|
|
|
|
|
expected_prefix = f"agent-inputs/{current_user.id}/{thread_id}/uploads/"
|
|
|
|
|
if not _is_safe_attachment_path(path, expected_prefix=expected_prefix):
|
|
|
|
|
raise HTTPException(status_code=422, detail="INVALID_BINARY_URL_PATH_SCOPE")
|
|
|
|
|
return bucket, path
|
|
|
|
|
|
2026-03-08 17:34:28 +08:00
|
|
|
|
|
|
|
|
class AsrService:
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self._api_key: str | None = None
|
|
|
|
|
|
|
|
|
|
def _get_api_key(self) -> str:
|
|
|
|
|
if self._api_key is None:
|
|
|
|
|
dashscope_key = config.llm.provider_keys.get("dashscope")
|
|
|
|
|
if not dashscope_key:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"DASHSCOPE_API_KEY not configured. Set SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE in environment."
|
|
|
|
|
)
|
|
|
|
|
self._api_key = dashscope_key
|
|
|
|
|
return self._api_key
|
|
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
async def transcribe_file(self, file_path: str, filename: str) -> str:
|
2026-03-08 17:34:28 +08:00
|
|
|
try:
|
|
|
|
|
dashscope.api_key = self._get_api_key()
|
|
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
loop = asyncio.get_event_loop()
|
2026-03-08 17:34:28 +08:00
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
class SyncCallback(RecognitionCallback):
|
|
|
|
|
error: str | None = None
|
2026-03-08 17:34:28 +08:00
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
def on_error(self, result: Any) -> None:
|
|
|
|
|
self.error = str(result)
|
2026-03-08 17:34:28 +08:00
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
callback = SyncCallback()
|
|
|
|
|
recognizer = Recognition(
|
|
|
|
|
model="fun-asr-realtime-2026-02-28",
|
|
|
|
|
callback=callback,
|
|
|
|
|
format="wav",
|
|
|
|
|
sample_rate=16000,
|
|
|
|
|
)
|
2026-03-08 17:34:28 +08:00
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
result: Any = await loop.run_in_executor(
|
|
|
|
|
None,
|
|
|
|
|
lambda: recognizer.call(file=file_path),
|
|
|
|
|
)
|
2026-03-08 17:34:28 +08:00
|
|
|
|
|
|
|
|
if callback.error:
|
|
|
|
|
raise RuntimeError(f"ASR error: {callback.error}")
|
2026-03-10 17:44:29 +08:00
|
|
|
status_code = self._extract_field(result, "status_code")
|
|
|
|
|
if status_code != 200:
|
|
|
|
|
message = self._extract_field(result, "message")
|
|
|
|
|
raise RuntimeError(f"ASR transcription failed: {message}")
|
|
|
|
|
|
|
|
|
|
sentence = self._extract_sentence_payload(result)
|
|
|
|
|
if sentence is None:
|
|
|
|
|
request_id = self._extract_field(result, "request_id")
|
2026-03-08 17:34:28 +08:00
|
|
|
logger.warning(
|
2026-03-10 17:44:29 +08:00
|
|
|
"ASR returned empty result", extra={"request_id": request_id}
|
2026-03-08 17:34:28 +08:00
|
|
|
)
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
if isinstance(sentence, dict):
|
|
|
|
|
transcription = sentence.get("text", "")
|
|
|
|
|
elif isinstance(sentence, list):
|
|
|
|
|
transcription = " ".join(
|
|
|
|
|
item.get("text", "") for item in sentence if isinstance(item, dict)
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
transcription = str(sentence) if sentence else ""
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
"ASR transcription completed",
|
|
|
|
|
extra={"filename": filename, "transcript_length": len(transcription)},
|
|
|
|
|
)
|
|
|
|
|
return transcription
|
|
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
raise
|
|
|
|
|
except RuntimeError:
|
|
|
|
|
raise
|
2026-03-08 17:34:28 +08:00
|
|
|
except Exception as exc:
|
|
|
|
|
logger.exception("ASR transcription error")
|
|
|
|
|
raise RuntimeError(f"ASR transcription failed: {exc}") from exc
|
|
|
|
|
|
2026-03-10 17:44:29 +08:00
|
|
|
def _extract_sentence_payload(self, result: Any) -> Any | None:
|
|
|
|
|
if isinstance(result, dict):
|
|
|
|
|
output = result.get("output")
|
|
|
|
|
if isinstance(output, dict):
|
|
|
|
|
return output.get("sentence")
|
|
|
|
|
if output is not None:
|
|
|
|
|
return getattr(output, "sentence", None)
|
|
|
|
|
return result.get("sentence")
|
|
|
|
|
|
|
|
|
|
get_sentence = getattr(result, "get_sentence", None)
|
|
|
|
|
if callable(get_sentence):
|
|
|
|
|
sentence = get_sentence()
|
|
|
|
|
if sentence is not None:
|
|
|
|
|
return sentence
|
|
|
|
|
|
|
|
|
|
output = getattr(result, "output", None)
|
|
|
|
|
if output is None:
|
|
|
|
|
return None
|
|
|
|
|
if isinstance(output, dict):
|
|
|
|
|
return output.get("sentence")
|
|
|
|
|
return getattr(output, "sentence", None)
|
|
|
|
|
|
|
|
|
|
def _extract_field(self, result: Any, field: str) -> Any | None:
|
|
|
|
|
if isinstance(result, dict):
|
|
|
|
|
return result.get(field)
|
|
|
|
|
return getattr(result, field, None)
|
|
|
|
|
|
2026-03-08 17:34:28 +08:00
|
|
|
|
|
|
|
|
asr_service = AsrService()
|
2026-03-11 21:06:02 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _mime_to_suffix(mime_type: str) -> str:
|
|
|
|
|
mapping = {
|
|
|
|
|
"image/png": "png",
|
|
|
|
|
"image/jpeg": "jpg",
|
|
|
|
|
"image/webp": "webp",
|
|
|
|
|
}
|
|
|
|
|
return mapping.get(mime_type.lower(), "bin")
|
2026-03-12 09:29:57 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_safe_attachment_path(path: str, *, expected_prefix: str) -> bool:
|
|
|
|
|
normalized = path.strip()
|
|
|
|
|
if not normalized:
|
|
|
|
|
return False
|
|
|
|
|
if normalized.startswith("/"):
|
|
|
|
|
return False
|
|
|
|
|
if ".." in normalized:
|
|
|
|
|
return False
|
|
|
|
|
return normalized.startswith(expected_prefix)
|