feat(agent): 实现 Agent Runtime LLM 配置与消息元数据结构化支持

This commit is contained in:
qzl
2026-03-05 18:25:51 +08:00
parent c07d339a5f
commit db158de39c
26 changed files with 1215 additions and 2914 deletions
@@ -5,6 +5,10 @@ from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.domain.message_metadata import (
MessageMetadataAssistantOutput,
MessageMetadataToolResult,
)
from core.agent.infrastructure.persistence.message_repository import MessageRepository
from core.agent.infrastructure.persistence.session_repository import SessionRepository
from core.db import AsyncSessionLocal
@@ -46,14 +50,16 @@ class ResumeService:
seq=next_seq,
role=AgentChatMessageRole.TOOL,
content='{"status":"ok"}',
metadata={"type": "tool_result", "tool_call_id": tool_call_id},
metadata=MessageMetadataToolResult(
tool_call_id=tool_call_id,
).model_dump(),
)
await message_repository.append_message(
session_id=session_uuid,
seq=next_seq + 1,
role=AgentChatMessageRole.ASSISTANT,
content="Tool result received",
metadata={"type": "assistant_output"},
metadata=MessageMetadataAssistantOutput().model_dump(),
)
snapshot = self._state_persistence.build_completed_snapshot()
@@ -3,10 +3,16 @@ from __future__ import annotations
from decimal import Decimal
from uuid import UUID, uuid4
from pydantic import ValidationError
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.domain.message_metadata import (
MessageMetadataToolCall,
MessageMetadataUserInput,
)
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.crewai.factory import create_runtime
from core.agent.infrastructure.persistence.message_repository import MessageRepository
from core.agent.infrastructure.persistence.session_repository import SessionRepository
@@ -58,10 +64,16 @@ class RunService:
if chat_session is None:
raise ValueError("session not found")
model_code, provider_name = await self._load_agent_model_selection(
db_session
(
model_code,
provider_name,
llm_config,
) = await self._load_agent_model_selection(db_session)
runtime = create_runtime(
model_code=model_code,
provider_name=provider_name,
llm_config=llm_config,
)
runtime = create_runtime(model_code=model_code, provider_name=provider_name)
runtime_result = runtime.execute(user_input=user_input)
assistant_text = str(runtime_result.get("assistant_text", ""))
prompt_tokens = _to_int(runtime_result.get("prompt_tokens", 0))
@@ -79,7 +91,7 @@ class RunService:
role=AgentChatMessageRole.USER,
content=user_input,
model_code=model_code,
metadata={"type": "user_input"},
metadata=MessageMetadataUserInput().model_dump(),
)
await message_repository.append_message(
session_id=session_uuid,
@@ -87,10 +99,9 @@ class RunService:
role=AgentChatMessageRole.ASSISTANT,
content=assistant_text or "Tool call pending approval",
model_code=model_code,
metadata={
"type": "tool_call",
"tool_call_id": pending_tool_call_id,
},
metadata=MessageMetadataToolCall(
tool_call_id=pending_tool_call_id,
).model_dump(),
input_tokens=prompt_tokens,
output_tokens=completion_tokens,
cost=cost,
@@ -119,9 +130,9 @@ class RunService:
async def _load_agent_model_selection(
self, session: AsyncSession
) -> tuple[str, str]:
) -> tuple[str, str, SystemAgentLLMConfig]:
stmt = (
select(Llm.model_code, LlmFactory.name)
select(Llm.model_code, LlmFactory.name, SystemAgents.config)
.join(SystemAgents, SystemAgents.llm_id == Llm.id)
.join(LlmFactory, LlmFactory.id == Llm.factory_id)
.where(SystemAgents.status == "active")
@@ -131,4 +142,11 @@ class RunService:
record = (await session.execute(stmt)).one_or_none()
if record is None:
raise ValueError("active system agent model is required")
return str(record[0]), str(record[1])
raw_config = record[2] if isinstance(record[2], dict) else {}
try:
llm_config = SystemAgentLLMConfig.model_validate(raw_config)
except ValidationError as exc:
raise ValueError("invalid system agent config") from exc
return str(record[0]), str(record[1]), llm_config
@@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel
class MessageMetadataUserInput(BaseModel):
type: Literal["user_input"] = "user_input"
class MessageMetadataToolCall(BaseModel):
type: Literal["tool_call"] = "tool_call"
tool_call_id: str
class MessageMetadataToolResult(BaseModel):
type: Literal["tool_result"] = "tool_result"
tool_call_id: str
run_id: str | None = None
turn_id: str | None = None
tool_name: str | None = None
storage_bucket: str | None = None
storage_path: str | None = None
payload_sha256: str | None = None
payload_bytes: int | None = None
payload_format: str | None = None
class MessageMetadataAssistantOutput(BaseModel):
type: Literal["assistant_output"] = "assistant_output"
MessageMetadata = (
MessageMetadataUserInput
| MessageMetadataToolCall
| MessageMetadataToolResult
| MessageMetadataAssistantOutput
)
@@ -0,0 +1,8 @@
from __future__ import annotations
from pydantic import BaseModel, Field
class SystemAgentLLMConfig(BaseModel):
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
max_tokens: int | None = Field(default=None, ge=1)
@@ -1,5 +1,7 @@
from __future__ import annotations
from core.agent.domain.message_metadata import MessageMetadataToolResult
def reconstruct_tool_call_result_event(
*,
@@ -26,15 +28,14 @@ def build_tool_result_metadata(
payload_bytes: int,
payload_format: str,
) -> dict[str, object]:
return {
"type": "tool_result",
"run_id": run_id,
"turn_id": turn_id,
"tool_call_id": tool_call_id,
"tool_name": tool_name,
"storage_bucket": storage_bucket,
"storage_path": storage_path,
"payload_sha256": payload_sha256,
"payload_bytes": payload_bytes,
"payload_format": payload_format,
}
return MessageMetadataToolResult(
run_id=run_id,
turn_id=turn_id,
tool_call_id=tool_call_id,
tool_name=tool_name,
storage_bucket=storage_bucket,
storage_path=storage_path,
payload_sha256=payload_sha256,
payload_bytes=payload_bytes,
payload_format=payload_format,
).model_dump()
@@ -1,15 +1,20 @@
from __future__ import annotations
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def create_runtime(
*, model_code: str | None, provider_name: str | None
*,
model_code: str | None,
provider_name: str | None,
llm_config: SystemAgentLLMConfig | None = None,
) -> CrewAIRuntime:
resolver = AgentConfigResolver()
return CrewAIRuntime(
resolver=resolver,
model_code=model_code,
provider_name=provider_name,
llm_config=llm_config,
)
@@ -2,6 +2,7 @@ from __future__ import annotations
from typing import Any
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.agui.bridge import to_agui_events
from core.agent.infrastructure.config.resolver import (
AgentConfigResolver,
@@ -47,11 +48,13 @@ class CrewAIRuntime:
resolver: AgentConfigResolver,
model_code: str | None,
provider_name: str | None,
llm_config: SystemAgentLLMConfig | None = None,
) -> None:
self._config: ResolvedAgentConfig = resolver.resolve(
model_code=model_code,
provider_name=provider_name,
)
self._llm_config = llm_config or SystemAgentLLMConfig()
def map_events(self, internal_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
return to_agui_events(internal_events)
@@ -65,6 +68,8 @@ class CrewAIRuntime:
model=litellm_model,
api_key=self._config.provider_api_key,
messages=[{"role": "user", "content": user_input}],
temperature=self._llm_config.temperature,
max_tokens=self._llm_config.max_tokens,
)
if not isinstance(response, dict):
raise ValueError("llm response must be a dict")
@@ -5,13 +5,26 @@ from typing import Any
from litellm import completion
def run_completion(*, model: str, api_key: str, messages: list[dict[str, Any]]) -> Any:
response = completion(
model=model,
api_key=api_key,
messages=messages,
stream=False,
)
def run_completion(
*,
model: str,
api_key: str,
messages: list[dict[str, Any]],
temperature: float | None = None,
max_tokens: int | None = None,
) -> Any:
kwargs: dict[str, Any] = {
"model": model,
"api_key": api_key,
"messages": messages,
"stream": False,
}
if temperature is not None:
kwargs["temperature"] = temperature
if max_tokens is not None:
kwargs["max_tokens"] = max_tokens
response = completion(**kwargs)
model_dump = getattr(response, "model_dump", None)
if callable(model_dump):
return model_dump()
+5 -2
View File
@@ -9,6 +9,7 @@ from pydantic import BaseModel, ValidationError
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.db.session import AsyncSessionLocal
from core.logging import get_logger
from models.llm import Llm
@@ -38,7 +39,7 @@ class SystemAgentsSeed(BaseModel):
agent_type: str
llm_model_code: str
status: str
config: dict[str, Any]
config: SystemAgentLLMConfig | None = None
class SystemAgentsYaml(BaseModel):
@@ -184,7 +185,9 @@ async def initialize_system_agents() -> None:
agent_type=agent["agent_type"],
llm_id=llm.id,
status=agent["status"],
config=agent["config"],
config=SystemAgentLLMConfig.model_validate(
agent.get("config") or {}
).model_dump(),
)
logger.info("Initialized system agents")
@@ -4,15 +4,18 @@ agents:
status: active
config:
temperature: 0.7
max_tokens: null
- agent_type: TASK_EXECUTION
llm_model_code: deepseek-v3.2
status: active
config:
temperature: 0.7
max_tokens: null
- agent_type: RESULT_REPORTING
llm_model_code: deepseek-v3.2
status: active
config:
temperature: 0.7
max_tokens: null
@@ -1,22 +1,26 @@
from __future__ import annotations
from types import SimpleNamespace
from typing import cast
from core.agent.infrastructure.config.resolver import AgentConfigResolver
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import AgentConfigResolver, SettingsLike
from core.agent.infrastructure.crewai.runtime import CrewAIRuntime
def test_runtime_emits_text_tool_reasoning_events() -> None:
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(settings=settings),
model_code="gpt-4o-mini",
provider_name="dashscope",
)
@@ -46,11 +50,18 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
captured: dict[str, object] = {}
def _fake_completion(
*, model: str, api_key: str, messages: list[dict[str, object]]
*,
model: str,
api_key: str,
messages: list[dict[str, object]],
temperature: float | None = None,
max_tokens: int | None = None,
):
captured["model"] = model
captured["api_key"] = api_key
captured["messages"] = messages
captured["temperature"] = temperature
captured["max_tokens"] = max_tokens
return {
"choices": [
{
@@ -75,23 +86,28 @@ def test_runtime_execute_uses_provider_prefixed_litellm_model(
cost=0.001,
),
)
settings = cast(
SettingsLike,
SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
),
)
runtime = CrewAIRuntime(
resolver=AgentConfigResolver(
settings=SimpleNamespace(
agent_runtime=SimpleNamespace(
default_model_code="",
streaming_enabled=True,
),
llm=SimpleNamespace(provider_keys={"dashscope": "env-api-key"}),
)
),
resolver=AgentConfigResolver(settings=settings),
model_code="qwen3.5-flash",
provider_name="dashscope",
llm_config=SystemAgentLLMConfig(temperature=0.3, max_tokens=256),
)
result = runtime.execute(user_input="hi")
assert captured["model"] == "dashscope/qwen3.5-flash"
assert captured["api_key"] == "env-api-key"
assert captured["temperature"] == 0.3
assert captured["max_tokens"] == 256
assert result["assistant_text"] == "hello"
@@ -0,0 +1,14 @@
from __future__ import annotations
from core.config.initial.init_data import load_system_agents
def test_load_system_agents_supports_nullable_max_tokens() -> None:
loaded = load_system_agents()
agents = loaded["agents"]
assert len(agents) > 0
for agent in agents:
assert "config" in agent
assert "max_tokens" in agent["config"]
assert agent["config"]["max_tokens"] is None
@@ -0,0 +1,51 @@
from __future__ import annotations
from core.agent.infrastructure.litellm.client import run_completion
def test_run_completion_passes_optional_params_when_provided(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=0.6,
max_tokens=120,
)
assert captured["temperature"] == 0.6
assert captured["max_tokens"] == 120
def test_run_completion_omits_optional_params_when_none(monkeypatch) -> None:
captured: dict[str, object] = {}
def _fake_completion(**kwargs): # type: ignore[no-untyped-def]
captured.update(kwargs)
return {"ok": True}
monkeypatch.setattr(
"core.agent.infrastructure.litellm.client.completion",
_fake_completion,
)
run_completion(
model="dashscope/qwen3.5-flash",
api_key="key",
messages=[{"role": "user", "content": "hi"}],
temperature=None,
max_tokens=None,
)
assert "temperature" not in captured
assert "max_tokens" not in captured
@@ -4,6 +4,23 @@ import pytest
from core.agent.application.resume_service import ResumeService
from core.agent.application.run_service import RunService
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
class _FakeResult:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
def one_or_none(self) -> tuple[object, object, object] | None:
return self._record
class _FakeSession:
def __init__(self, record: tuple[object, object, object] | None) -> None:
self._record = record
async def execute(self, _stmt: object) -> _FakeResult:
return _FakeResult(self._record)
@pytest.mark.asyncio
@@ -20,3 +37,72 @@ async def test_resume_service_requires_pending_tool_call() -> None:
with pytest.raises(ValueError):
await resume_service.resume(session_id="session-1", tool_call_id="call-1")
@pytest.mark.asyncio
async def test_load_agent_model_selection_returns_validated_llm_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 0.5, "max_tokens": 512},
)
)
(
model_code,
provider_name,
llm_config,
) = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert model_code == "qwen3.5-flash"
assert provider_name == "dashscope"
assert isinstance(llm_config, SystemAgentLLMConfig)
assert llm_config.temperature == 0.5
assert llm_config.max_tokens == 512
@pytest.mark.asyncio
async def test_load_agent_model_selection_rejects_invalid_config() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
{"temperature": 3.0},
)
)
with pytest.raises(ValueError, match="invalid system agent config"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]
@pytest.mark.asyncio
async def test_load_agent_model_selection_falls_back_when_config_not_dict() -> None:
run_service = RunService()
fake_session = _FakeSession(
(
"qwen3.5-flash",
"dashscope",
"not-a-dict",
)
)
_, _, llm_config = await run_service._load_agent_model_selection(
fake_session # type: ignore[arg-type]
)
assert llm_config.temperature is None
assert llm_config.max_tokens is None
@pytest.mark.asyncio
async def test_load_agent_model_selection_raises_when_no_active_agent() -> None:
run_service = RunService()
fake_session = _FakeSession(None)
with pytest.raises(ValueError, match="active system agent model is required"):
await run_service._load_agent_model_selection(fake_session) # type: ignore[arg-type]