feat(agent): support multimodal intent input and ASR transcribe endpoint

This commit is contained in:
zl-q
2026-03-08 17:34:28 +08:00
parent 5ada60e834
commit 1060503a2d
11 changed files with 422 additions and 74 deletions
+37 -5
View File
@@ -5,12 +5,12 @@ import asyncio
from datetime import date
import re
import time
from typing import Annotated
from typing import Annotated, Union
from ag_ui.core import RunAgentInput
from fastapi import APIRouter, Depends, Header, Query, Request, status
from fastapi import APIRouter, Depends, Header, Query, Request, status, UploadFile
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from fastapi.responses import JSONResponse, StreamingResponse
from core.agent.infrastructure.agui.stream import to_sse_event
from core.agent.domain.agui_input import (
@@ -20,8 +20,8 @@ from core.agent.domain.agui_input import (
from core.auth.models import CurrentUser
from services.base.redis import get_or_init_redis_client
from v1.agent.dependencies import get_agent_service
from v1.agent.schemas import TaskAcceptedResponse
from v1.agent.service import AgentService
from v1.agent.schemas import AsrTranscribeResponse, TaskAcceptedResponse
from v1.agent.service import AgentService, asr_service
from v1.users.dependencies import get_current_user
router = APIRouter(prefix="/agent", tags=["agent"])
@@ -211,3 +211,35 @@ async def get_user_history_snapshot(
thread_id=thread_id,
before=before,
)
@router.post(
"/transcribe",
response_model=AsrTranscribeResponse,
status_code=status.HTTP_200_OK,
)
async def transcribe(
audio: UploadFile,
current_user: Annotated[CurrentUser, Depends(get_current_user)],
) -> Union[AsrTranscribeResponse, JSONResponse]:
try:
audio_data = await audio.read()
if not audio_data:
raise ValueError("Empty audio file")
transcript = await asr_service.transcribe(
audio_data, audio.filename or "unknown"
)
return AsrTranscribeResponse(transcript=transcript)
except ValueError as exc:
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content={"detail": str(exc)},
)
except RuntimeError as exc:
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"detail": str(exc)},
)