feat(agent): support multimodal intent input and ASR transcribe endpoint

2026-03-08 17:34:28 +08:00
parent 5ada60e834
commit 1060503a2d
11 changed files with 422 additions and 74 deletions
@@ -1,5 +1,6 @@
 from __future__ import annotations

+from io import BytesIO
 from types import SimpleNamespace
 from uuid import uuid4

@@ -346,3 +347,36 @@ def test_resume_accepts_tool_message_without_user_message() -> None:
        assert response.json()["taskId"] == "task-resume-1"
    finally:
        app.dependency_overrides = {}
+
+
+def test_asr_transcribe_returns_sync_transcript(monkeypatch) -> None:
+    app.dependency_overrides[get_current_user] = lambda: CurrentUser(
+        id=uuid4(), email="user@example.com"
+    )
+
+    async def mock_transcribe(audio_data: bytes, filename: str) -> str:
+        return "这是测试转写结果"
+
+    monkeypatch.setattr(
+        "v1.agent.service.asr_service.transcribe",
+        mock_transcribe,
+    )
+
+    client = TestClient(app)
+
+    wav_content = b"fake-wav-file-content"
+    wav_file = BytesIO(wav_content)
+    wav_file.name = "test.wav"
+
+    try:
+        response = client.post(
+            "/api/v1/agent/transcribe",
+            files={"audio": ("test.wav", wav_file, "audio/wav")},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "transcript" in data
+        assert data["transcript"] == "这是测试转写结果"
+    finally:
+        app.dependency_overrides = {}