merge: integrate feature/tasks-8-9-multimodal-asr into dev

This commit is contained in:
zl-q
2026-03-08 17:35:53 +08:00
11 changed files with 422 additions and 74 deletions
@@ -1,5 +1,6 @@
from __future__ import annotations
from io import BytesIO
from types import SimpleNamespace
from uuid import uuid4
@@ -346,3 +347,36 @@ def test_resume_accepts_tool_message_without_user_message() -> None:
assert response.json()["taskId"] == "task-resume-1"
finally:
app.dependency_overrides = {}
def test_asr_transcribe_returns_sync_transcript(monkeypatch) -> None:
app.dependency_overrides[get_current_user] = lambda: CurrentUser(
id=uuid4(), email="user@example.com"
)
async def mock_transcribe(audio_data: bytes, filename: str) -> str:
return "这是测试转写结果"
monkeypatch.setattr(
"v1.agent.service.asr_service.transcribe",
mock_transcribe,
)
client = TestClient(app)
wav_content = b"fake-wav-file-content"
wav_file = BytesIO(wav_content)
wav_file.name = "test.wav"
try:
response = client.post(
"/api/v1/agent/transcribe",
files={"audio": ("test.wav", wav_file, "audio/wav")},
)
assert response.status_code == 200
data = response.json()
assert "transcript" in data
assert data["transcript"] == "这是测试转写结果"
finally:
app.dependency_overrides = {}
@@ -1,5 +1,9 @@
from __future__ import annotations
from types import SimpleNamespace
from typing import Any
from unittest.mock import AsyncMock, patch
from core.agent.infrastructure.litellm.client import run_completion
@@ -53,3 +57,46 @@ def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
assert "temperature" not in captured
assert "max_tokens" not in captured
assert "timeout" not in captured
def test_image_content_block_is_preserved_for_llm(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return SimpleNamespace(model_dump=lambda: {"choices": []})
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
messages_with_image = [
{
"role": "user",
"content": [
{"type": "text", "text": "分析这个图片"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.png"},
},
],
}
]
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=messages_with_image,
)
assert "messages" in captured
result_messages = captured["messages"]
assert isinstance(result_messages, list)
assert len(result_messages) == 1
content = result_messages[0]["content"]
assert isinstance(content, list)
assert len(content) == 2
assert content[0]["type"] == "text"
assert content[1]["type"] == "image_url"
assert content[1]["image_url"]["url"] == "https://example.com/image.png"