refactor: 移除 LiteLLM proxy 架构,后端直连 Provider API
- 移除 backend/scripts/build_litellm_proxy_config.py - 简化 LiteLLMService,移除 run_completion_with_cost 方法 - AgentScopeRunner 改为从 LlmFactory 获取 api_base 和 api_key - 部署配置移除 litellm/litellm-config-job 服务 - Flutter 新增 AuthBootScreen 引导页 - Android 添加通知权限 (POST_NOTIFICATIONS, RECEIVE_BOOT_COMPLETED, SCHEDULE_EXACT_ALARM) - 优化 LocalNotificationService 调度失败 fallback - 更新 manifest.json (version 3)
This commit is contained in:
@@ -21,9 +21,11 @@ from core.agentscope.utils import (
|
||||
finalize_json_response,
|
||||
patch_agentscope_json_repair_compat,
|
||||
)
|
||||
from core.config.settings import config
|
||||
from core.db.session import AsyncSessionLocal
|
||||
from core.logging import get_logger
|
||||
from models.llm import Llm
|
||||
from models.llm_factory import LlmFactory
|
||||
from models.system_agents import SystemAgents
|
||||
from schemas.agent.runtime_models import (
|
||||
RouterAgentOutput,
|
||||
@@ -50,6 +52,8 @@ logger = get_logger("core.agentscope.runtime.runner")
|
||||
class SystemAgentRuntimeConfig:
|
||||
agent_type: AgentType
|
||||
model_code: str
|
||||
api_base_url: str
|
||||
api_key: str
|
||||
llm_config: SystemAgentLLMConfig
|
||||
|
||||
|
||||
@@ -63,7 +67,7 @@ class StageExecutionResult:
|
||||
class AgentScopeRunner:
|
||||
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
|
||||
patch_agentscope_json_repair_compat()
|
||||
self._litellm_service = litellm_service or LiteLLMService()
|
||||
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
@@ -221,23 +225,42 @@ class AgentScopeRunner:
|
||||
agent_type: AgentType,
|
||||
) -> SystemAgentRuntimeConfig:
|
||||
stmt = (
|
||||
select(SystemAgents, Llm)
|
||||
select(SystemAgents, Llm, LlmFactory)
|
||||
.join(Llm, SystemAgents.llm_id == Llm.id)
|
||||
.join(LlmFactory, Llm.factory_id == LlmFactory.id)
|
||||
.where(SystemAgents.agent_type == agent_type.value)
|
||||
)
|
||||
row = (await session.execute(stmt)).one_or_none()
|
||||
if row is None:
|
||||
raise RuntimeError(f"system agent config not found: {agent_type.value}")
|
||||
system_agent, llm = row
|
||||
system_agent, llm, factory = row
|
||||
status = str(system_agent.status).strip().lower()
|
||||
if status != "active":
|
||||
raise RuntimeError(f"system agent is not active: {agent_type.value}")
|
||||
return SystemAgentRuntimeConfig(
|
||||
agent_type=agent_type,
|
||||
model_code=llm.model_code,
|
||||
api_base_url=factory.request_url,
|
||||
api_key=self._resolve_provider_api_key(factory_name=factory.name),
|
||||
llm_config=SystemAgentLLMConfig.model_validate(system_agent.config or {}),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_provider_api_key(*, factory_name: str) -> str:
|
||||
normalized_factory_name = factory_name.strip().upper()
|
||||
if normalized_factory_name == "VOLCENGINE":
|
||||
normalized_factory_name = "ARK"
|
||||
|
||||
provider_keys = {
|
||||
str(key).strip().upper(): str(value).strip()
|
||||
for key, value in config.llm.provider_keys.items()
|
||||
if str(value).strip()
|
||||
}
|
||||
api_key = provider_keys.get(normalized_factory_name, "")
|
||||
if not api_key:
|
||||
raise RuntimeError(f"provider api key missing for factory: {factory_name}")
|
||||
return api_key
|
||||
|
||||
async def _run_router_stage(
|
||||
self,
|
||||
*,
|
||||
@@ -363,9 +386,9 @@ class AgentScopeRunner:
|
||||
|
||||
model = OpenAIChatModel(
|
||||
model_name=stage_config.model_code,
|
||||
api_key=self._litellm_service.proxy_api_key,
|
||||
api_key=stage_config.api_key,
|
||||
stream=False,
|
||||
client_kwargs={"base_url": self._litellm_service.proxy_base_url},
|
||||
client_kwargs={"base_url": stage_config.api_base_url},
|
||||
generate_kwargs=generate_kwargs,
|
||||
)
|
||||
return TrackingChatModel(model)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.litellm.service import (
|
||||
LiteLLMResponseWithCost,
|
||||
LiteLLMService,
|
||||
LiteLLMUsage,
|
||||
)
|
||||
from services.litellm.service import LiteLLMService
|
||||
|
||||
__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
|
||||
__all__ = ["LiteLLMService"]
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
from typing import Any
|
||||
|
||||
from litellm import completion
|
||||
|
||||
from core.config.settings import config
|
||||
from core.config.initial.init_data import load_llm_catalog
|
||||
|
||||
|
||||
@@ -17,34 +14,10 @@ class PricingTier:
|
||||
cache_hit_cost_per_token: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LiteLLMUsage:
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
cached_prompt_tokens: int
|
||||
cost: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LiteLLMResponseWithCost:
|
||||
response: dict[str, Any]
|
||||
usage: LiteLLMUsage
|
||||
|
||||
|
||||
class LiteLLMService:
|
||||
proxy_base_url: str
|
||||
proxy_api_key: str
|
||||
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
proxy_base_url: str | None = None,
|
||||
proxy_api_key: str | None = None,
|
||||
) -> None:
|
||||
self.proxy_base_url = proxy_base_url or config.litellm.base_url
|
||||
self.proxy_api_key = proxy_api_key or config.litellm.api_key
|
||||
def __init__(self) -> None:
|
||||
self._pricing_by_model = self._build_pricing_map()
|
||||
|
||||
@staticmethod
|
||||
@@ -142,78 +115,3 @@ class LiteLLMService:
|
||||
"cost": cost,
|
||||
"latencyMs": latency_ms,
|
||||
}
|
||||
|
||||
def run_completion_with_cost(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
messages: list[dict[str, Any]],
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
timeout: float | None = None,
|
||||
response_format: dict[str, Any] | None = None,
|
||||
completion_fn: Callable[..., dict[str, Any]] | None = None,
|
||||
) -> LiteLLMResponseWithCost:
|
||||
caller = completion_fn or completion
|
||||
request_model = model if model.startswith("openai/") else f"openai/{model}"
|
||||
|
||||
request_kwargs: dict[str, Any] = {
|
||||
"model": request_model,
|
||||
"api_key": self.proxy_api_key,
|
||||
"api_base": self.proxy_base_url,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
"timeout": timeout,
|
||||
"stream": False,
|
||||
}
|
||||
if response_format is not None:
|
||||
request_kwargs["response_format"] = response_format
|
||||
|
||||
response_any = caller(**request_kwargs)
|
||||
response = self._normalize_response(response_any)
|
||||
|
||||
usage_raw = response.get("usage")
|
||||
if not isinstance(usage_raw, dict):
|
||||
raise ValueError("missing usage in response")
|
||||
|
||||
prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
|
||||
completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
|
||||
total_tokens = int(
|
||||
usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
|
||||
)
|
||||
cached_prompt_tokens = 0
|
||||
prompt_tokens_details = usage_raw.get("prompt_tokens_details")
|
||||
if isinstance(prompt_tokens_details, dict):
|
||||
cached_prompt_tokens = int(
|
||||
prompt_tokens_details.get("cached_tokens", 0) or 0
|
||||
)
|
||||
|
||||
resolved_model = str(response.get("model", model)).strip()
|
||||
cost = self.calculate_cost(
|
||||
model=resolved_model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
)
|
||||
return LiteLLMResponseWithCost(
|
||||
response=response,
|
||||
usage=LiteLLMUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
cost=cost,
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_response(response_any: Any) -> dict[str, Any]:
|
||||
if isinstance(response_any, dict):
|
||||
return response_any
|
||||
model_dump = getattr(response_any, "model_dump", None)
|
||||
if callable(model_dump):
|
||||
dumped = model_dump()
|
||||
if isinstance(dumped, dict):
|
||||
return dumped
|
||||
raise ValueError("litellm response is not serializable")
|
||||
|
||||
Reference in New Issue
Block a user