feat(agent): support multimodal intent input and ASR transcribe endpoint
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from core.agent.infrastructure.litellm.client import run_completion
|
||||
|
||||
|
||||
@@ -53,3 +57,46 @@ def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
|
||||
assert "temperature" not in captured
|
||||
assert "max_tokens" not in captured
|
||||
assert "timeout" not in captured
|
||||
|
||||
|
||||
def test_image_content_block_is_preserved_for_llm(monkeypatch) -> None:
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
|
||||
captured.update(kwargs)
|
||||
return SimpleNamespace(model_dump=lambda: {"choices": []})
|
||||
|
||||
monkeypatch.setattr(
|
||||
"core.agent.infrastructure.litellm.client.completion",
|
||||
_fake_completion,
|
||||
)
|
||||
|
||||
messages_with_image = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "分析这个图片"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
run_completion(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
api_key="key",
|
||||
messages=messages_with_image,
|
||||
)
|
||||
|
||||
assert "messages" in captured
|
||||
result_messages = captured["messages"]
|
||||
assert isinstance(result_messages, list)
|
||||
assert len(result_messages) == 1
|
||||
content = result_messages[0]["content"]
|
||||
assert isinstance(content, list)
|
||||
assert len(content) == 2
|
||||
assert content[0]["type"] == "text"
|
||||
assert content[1]["type"] == "image_url"
|
||||
assert content[1]["image_url"]["url"] == "https://example.com/image.png"
|
||||
|
||||
Reference in New Issue
Block a user