feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持

This commit is contained in:
qzl
2026-03-05 18:25:51 +08:00
parent c07d339a5f
commit db158de39c
26 changed files with 1215 additions and 2914 deletions
@@ -1,22 +1,26 @@
from __future__ import annotations
from types import SimpleNamespace
from typing import cast
from core.agent.infrastructure.config.resolver import AgentConfigResolver
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def test_runtime_emits_text_tool_reasoning_events() -> None:
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(settings=settings),
model_code="gpt-4o-mini",
provider_name="dashscope",
)
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
captured: dict[str, object] = {}
def _fake_completion(
*, model: str, api_key: str, messages: list[dict[str, object]]
*,
model: str,
api_key: str,
messages: list[dict[str, object]],
temperature: float | None = None,
max_tokens: int | None = None,
):
captured["model"] = model
captured["api_key"] = api_key
captured["messages"] = messages
captured["temperature"] = temperature
captured["max_tokens"] = max_tokens
return {
"choices": [
{
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
cost=0.001,
),
)
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
),
resolver=AgentConfigResolver(settings=settings),
model_code="qwen3.5-flash",
provider_name="dashscope",
llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
)
result = runtime.execute(user_input="hi")
assert captured["model"] == "dashscope/qwen3.5-flash"
assert captured["api_key"] == "env-api-key"
assert captured["temperature"] == 0.3
assert captured["max_tokens"] == 256
assert result["assistant_text"] == "hello"
@@ -0,0 +1,14 @@
from __future__ import annotations
from core.config.initial.init_data import load_system_agents
def test_load_system_agents_supports_nullable_max_tokens() -> None:
loaded = load_system_agents()
agents = loaded["agents"]
assert len(agents) > 0
for agent in agents:
assert "config" in agent
assert "max_tokens" in agent["config"]
assert agent["config"]["max_tokens"] is None
@@ -0,0 +1,51 @@
from __future__ import annotations
from core.agent.infrastructure.litellm.client import run_completion
def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=0.6,
max_tokens=120,
)
assert captured["temperature"] == 0.6
assert captured["max_tokens"] == 120
def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=None,
max_tokens=None,
)
assert "temperature" not in captured
assert "max_tokens" not in captured
@@ -4,6 +4,23 @@ import pytest
from core.agent.application.resume_service import ResumeService
from core.agent.application.run_service import RunService
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
class _FakeResult:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
def one_or_none(self) -> tuple[object, object, object] | None:
return self._record
class _FakeSession:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
async def execute(self, _stmt: object) -> _FakeResult:
return _FakeResult(self._record)
@pytest.mark.asyncio
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:
with pytest.raises(ValueError):
await resume_service.resume(session_id="session-1", tool_call_id="call-1")
@pytest.mark.asyncio
async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 0.5, "max_tokens": 512},
)
)
(
model_code,
provider_name,
llm_config,
) = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert model_code == "qwen3.5-flash"
assert provider_name == "dashscope"
assert isinstance(llm_config, SystemAgentLLMConfig)
assert llm_config.temperature == 0.5
assert llm_config.max_tokens == 512
@pytest.mark.asyncio
async def test_load_agent_model_selection_rejects_invalid_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 3.0},
)
)
with pytest.raises(ValueError, match="invalid system agent config"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
@pytest.mark.asyncio
async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
"not-a-dict",
)
)
_, _, llm_config = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert llm_config.temperature is None
assert llm_config.max_tokens is None
@pytest.mark.asyncio
async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
run_service = RunService()
fake_session = _FakeSession(None)
with pytest.raises(ValueError, match="active system agent model is required"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]