eryao/backend/src/v1/agent/asr.py

from __future__ import annotations

import asyncio
from typing import Any

from core.config.settings import config
from core.logging import get_logger

logger = get_logger(__name__)


class AsrService:
    def __init__(self) -> None:
        self._api_key: str | None = None

    def _get_api_key(self) -> str:
        if self._api_key is None:
            dashscope_key = config.llm.provider_keys.get("dashscope")
            if not dashscope_key:
                raise ValueError(
                    "DASHSCOPE_API_KEY not configured. Set SOCIAL_LLM__PROVIDER_KEYS__DASHSCOPE in environment."
                )
            self._api_key = dashscope_key
        return self._api_key

    async def transcribe_file(self, file_path: str, filename: str) -> str:
        try:
            import dashscope
            from dashscope.audio.asr import Recognition, RecognitionCallback

            dashscope.api_key = self._get_api_key()

            loop = asyncio.get_event_loop()

            class SyncCallback(RecognitionCallback):
                error: str | None = None

                def on_error(self, result: Any) -> None:
                    self.error = str(result)

            callback = SyncCallback()
            recognizer = Recognition(
                model="fun-asr-realtime-2026-02-28",
                callback=callback,
                format="wav",
                sample_rate=16000,
            )

            result: Any = await loop.run_in_executor(
                None,
                lambda: recognizer.call(file=file_path),
            )

            if callback.error:
                raise RuntimeError(f"ASR error: {callback.error}")
            status_code = self._extract_field(result, "status_code")
            if status_code != 200:
                message = self._extract_field(result, "message")
                raise RuntimeError(f"ASR transcription failed: {message}")

            sentence = self._extract_sentence_payload(result)
            if sentence is None:
                request_id = self._extract_field(result, "request_id")
                logger.warning(
                    "ASR returned empty result", extra={"request_id": request_id}
                )
                return ""

            if isinstance(sentence, dict):
                transcription = sentence.get("text", "")
            elif isinstance(sentence, list):
                transcription = " ".join(
                    item.get("text", "") for item in sentence if isinstance(item, dict)
                )
            else:
                transcription = str(sentence) if sentence else ""

            logger.info(
                "ASR transcription completed",
                extra={"filename": filename, "transcript_length": len(transcription)},
            )
            return transcription

        except asyncio.CancelledError:
            raise
        except RuntimeError:
            raise
        except Exception as exc:
            logger.exception("ASR transcription error")
            raise RuntimeError(f"ASR transcription failed: {exc}") from exc

    def _extract_sentence_payload(self, result: Any) -> Any | None:
        if isinstance(result, dict):
            output = result.get("output")
            if isinstance(output, dict):
                return output.get("sentence")
            if output is not None:
                return getattr(output, "sentence", None)
            return result.get("sentence")

        get_sentence = getattr(result, "get_sentence", None)
        if callable(get_sentence):
            sentence = get_sentence()
            if sentence is not None:
                return sentence

        output = getattr(result, "output", None)
        if output is None:
            return None
        if isinstance(output, dict):
            return output.get("sentence")
        return getattr(output, "sentence", None)

    def _extract_field(self, result: Any, field: str) -> Any | None:
        if isinstance(result, dict):
            return result.get(field)
        return getattr(result, field, None)


asr_service = AsrService()