merge: integrate feature/tasks-8-9-multimodal-asr into dev

2026-03-08 17:35:53 +08:00
parent 44a0e15950 1060503a2d
commit 14508c52f6
11 changed files with 422 additions and 74 deletions
@@ -1,5 +1,6 @@
 from __future__ import annotations

+from io import BytesIO
 from types import SimpleNamespace
 from uuid import uuid4

@@ -346,3 +347,36 @@ def test_resume_accepts_tool_message_without_user_message() -> None:
        assert response.json()["taskId"] == "task-resume-1"
    finally:
        app.dependency_overrides = {}
+
+
+def test_asr_transcribe_returns_sync_transcript(monkeypatch) -> None:
+    app.dependency_overrides[get_current_user] = lambda: CurrentUser(
+        id=uuid4(), email="user@example.com"
+    )
+
+    async def mock_transcribe(audio_data: bytes, filename: str) -> str:
+        return "这是测试转写结果"
+
+    monkeypatch.setattr(
+        "v1.agent.service.asr_service.transcribe",
+        mock_transcribe,
+    )
+
+    client = TestClient(app)
+
+    wav_content = b"fake-wav-file-content"
+    wav_file = BytesIO(wav_content)
+    wav_file.name = "test.wav"
+
+    try:
+        response = client.post(
+            "/api/v1/agent/transcribe",
+            files={"audio": ("test.wav", wav_file, "audio/wav")},
+        )
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "transcript" in data
+        assert data["transcript"] == "这是测试转写结果"
+    finally:
+        app.dependency_overrides = {}
@@ -1,5 +1,9 @@
 from __future__ import annotations

+from types import SimpleNamespace
+from typing import Any
+from unittest.mock import AsyncMock, patch
+
 from core.agent.infrastructure.litellm.client import run_completion


@@ -53,3 +57,46 @@ def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
    assert "temperature" not in captured
    assert "max_tokens" not in captured
    assert "timeout" not in captured
+
+
+def test_image_content_block_is_preserved_for_llm(monkeypatch) -> None:
+    captured: dict[str, object] = {}
+
+    def _fake_completion(**kwargs):  # type: ignore[no-untyped-def]
+        captured.update(kwargs)
+        return SimpleNamespace(model_dump=lambda: {"choices": []})
+
+    monkeypatch.setattr(
+        "core.agent.infrastructure.litellm.client.completion",
+        _fake_completion,
+    )
+
+    messages_with_image = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "分析这个图片"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.com/image.png"},
+                },
+            ],
+        }
+    ]
+
+    run_completion(
+        model="dashscope/qwen3.5-flash",
+        api_key="key",
+        messages=messages_with_image,
+    )
+
+    assert "messages" in captured
+    result_messages = captured["messages"]
+    assert isinstance(result_messages, list)
+    assert len(result_messages) == 1
+    content = result_messages[0]["content"]
+    assert isinstance(content, list)
+    assert len(content) == 2
+    assert content[0]["type"] == "text"
+    assert content[1]["type"] == "image_url"
+    assert content[1]["image_url"]["url"] == "https://example.com/image.png"