refactor: 移除 LiteLLM proxy 架构,后端直连 Provider API
- 移除 backend/scripts/build_litellm_proxy_config.py - 简化 LiteLLMService,移除 run_completion_with_cost 方法 - AgentScopeRunner 改为从 LlmFactory 获取 api_base 和 api_key - 部署配置移除 litellm/litellm-config-job 服务 - Flutter 新增 AuthBootScreen 引导页 - Android 添加通知权限 (POST_NOTIFICATIONS, RECEIVE_BOOT_COMPLETED, SCHEDULE_EXACT_ALARM) - 优化 LocalNotificationService 调度失败 fallback - 更新 manifest.json (version 3)
This commit is contained in:
@@ -1,90 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from core.config.initial.init_data import load_llm_catalog
|
||||
|
||||
|
||||
def _provider_key_env_name(factory_name: str) -> str:
|
||||
normalized = factory_name.strip().upper()
|
||||
if normalized == "VOLCENGINE":
|
||||
normalized = "ARK"
|
||||
return f"SOCIAL_LLM__PROVIDER_KEYS__{normalized}"
|
||||
|
||||
|
||||
def build_proxy_config() -> dict[str, Any]:
|
||||
catalog = load_llm_catalog()
|
||||
|
||||
factories = catalog.get("factories", [])
|
||||
llms = catalog.get("llms", [])
|
||||
if not isinstance(factories, list) or not isinstance(llms, list):
|
||||
raise ValueError("invalid llm catalog format")
|
||||
|
||||
factory_url_map: dict[str, str] = {}
|
||||
for factory in factories:
|
||||
if not isinstance(factory, dict):
|
||||
continue
|
||||
name = str(factory.get("name", "")).strip().lower()
|
||||
request_url = str(factory.get("request_url", "")).strip()
|
||||
if name and request_url:
|
||||
factory_url_map[name] = request_url
|
||||
|
||||
model_list: list[dict[str, Any]] = []
|
||||
for llm in llms:
|
||||
if not isinstance(llm, dict):
|
||||
continue
|
||||
model_code = str(llm.get("model_code", "")).strip()
|
||||
factory_name = str(llm.get("factory_name", "")).strip()
|
||||
litellm_model = str(llm.get("litellm_model", "")).strip()
|
||||
if not model_code or not factory_name or not litellm_model:
|
||||
continue
|
||||
|
||||
api_base = factory_url_map.get(factory_name.lower())
|
||||
if not api_base:
|
||||
raise ValueError(
|
||||
f"factory request_url missing for model {model_code}: {factory_name}"
|
||||
)
|
||||
|
||||
env_key_name = _provider_key_env_name(factory_name)
|
||||
provider_model = (
|
||||
litellm_model.split("/", 1)[1] if "/" in litellm_model else litellm_model
|
||||
)
|
||||
|
||||
model_list.append(
|
||||
{
|
||||
"model_name": model_code,
|
||||
"litellm_params": {
|
||||
"model": f"openai/{provider_model}",
|
||||
"api_base": api_base,
|
||||
"api_key": f"os.environ/{env_key_name}",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
if not model_list:
|
||||
raise ValueError("no models found in llm catalog")
|
||||
|
||||
return {"model_list": model_list}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Build LiteLLM proxy config")
|
||||
parser.add_argument("--output", required=True, help="Output YAML file path")
|
||||
args = parser.parse_args()
|
||||
|
||||
output_path = Path(args.output).resolve()
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
config = build_proxy_config()
|
||||
with output_path.open("w", encoding="utf-8") as file:
|
||||
yaml.safe_dump(config, file, sort_keys=False, allow_unicode=False)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -21,9 +21,11 @@ from core.agentscope.utils import (
|
||||
finalize_json_response,
|
||||
patch_agentscope_json_repair_compat,
|
||||
)
|
||||
from core.config.settings import config
|
||||
from core.db.session import AsyncSessionLocal
|
||||
from core.logging import get_logger
|
||||
from models.llm import Llm
|
||||
from models.llm_factory import LlmFactory
|
||||
from models.system_agents import SystemAgents
|
||||
from schemas.agent.runtime_models import (
|
||||
RouterAgentOutput,
|
||||
@@ -50,6 +52,8 @@ logger = get_logger("core.agentscope.runtime.runner")
|
||||
class SystemAgentRuntimeConfig:
|
||||
agent_type: AgentType
|
||||
model_code: str
|
||||
api_base_url: str
|
||||
api_key: str
|
||||
llm_config: SystemAgentLLMConfig
|
||||
|
||||
|
||||
@@ -63,7 +67,7 @@ class StageExecutionResult:
|
||||
class AgentScopeRunner:
|
||||
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
|
||||
patch_agentscope_json_repair_compat()
|
||||
self._litellm_service = litellm_service or LiteLLMService()
|
||||
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
@@ -221,23 +225,42 @@ class AgentScopeRunner:
|
||||
agent_type: AgentType,
|
||||
) -> SystemAgentRuntimeConfig:
|
||||
stmt = (
|
||||
select(SystemAgents, Llm)
|
||||
select(SystemAgents, Llm, LlmFactory)
|
||||
.join(Llm, SystemAgents.llm_id == Llm.id)
|
||||
.join(LlmFactory, Llm.factory_id == LlmFactory.id)
|
||||
.where(SystemAgents.agent_type == agent_type.value)
|
||||
)
|
||||
row = (await session.execute(stmt)).one_or_none()
|
||||
if row is None:
|
||||
raise RuntimeError(f"system agent config not found: {agent_type.value}")
|
||||
system_agent, llm = row
|
||||
system_agent, llm, factory = row
|
||||
status = str(system_agent.status).strip().lower()
|
||||
if status != "active":
|
||||
raise RuntimeError(f"system agent is not active: {agent_type.value}")
|
||||
return SystemAgentRuntimeConfig(
|
||||
agent_type=agent_type,
|
||||
model_code=llm.model_code,
|
||||
api_base_url=factory.request_url,
|
||||
api_key=self._resolve_provider_api_key(factory_name=factory.name),
|
||||
llm_config=SystemAgentLLMConfig.model_validate(system_agent.config or {}),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_provider_api_key(*, factory_name: str) -> str:
|
||||
normalized_factory_name = factory_name.strip().upper()
|
||||
if normalized_factory_name == "VOLCENGINE":
|
||||
normalized_factory_name = "ARK"
|
||||
|
||||
provider_keys = {
|
||||
str(key).strip().upper(): str(value).strip()
|
||||
for key, value in config.llm.provider_keys.items()
|
||||
if str(value).strip()
|
||||
}
|
||||
api_key = provider_keys.get(normalized_factory_name, "")
|
||||
if not api_key:
|
||||
raise RuntimeError(f"provider api key missing for factory: {factory_name}")
|
||||
return api_key
|
||||
|
||||
async def _run_router_stage(
|
||||
self,
|
||||
*,
|
||||
@@ -363,9 +386,9 @@ class AgentScopeRunner:
|
||||
|
||||
model = OpenAIChatModel(
|
||||
model_name=stage_config.model_code,
|
||||
api_key=self._litellm_service.proxy_api_key,
|
||||
api_key=stage_config.api_key,
|
||||
stream=False,
|
||||
client_kwargs={"base_url": self._litellm_service.proxy_base_url},
|
||||
client_kwargs={"base_url": stage_config.api_base_url},
|
||||
generate_kwargs=generate_kwargs,
|
||||
)
|
||||
return TrackingChatModel(model)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.litellm.service import (
|
||||
LiteLLMResponseWithCost,
|
||||
LiteLLMService,
|
||||
LiteLLMUsage,
|
||||
)
|
||||
from services.litellm.service import LiteLLMService
|
||||
|
||||
__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
|
||||
__all__ = ["LiteLLMService"]
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
from typing import Any
|
||||
|
||||
from litellm import completion
|
||||
|
||||
from core.config.settings import config
|
||||
from core.config.initial.init_data import load_llm_catalog
|
||||
|
||||
|
||||
@@ -17,34 +14,10 @@ class PricingTier:
|
||||
cache_hit_cost_per_token: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LiteLLMUsage:
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
cached_prompt_tokens: int
|
||||
cost: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LiteLLMResponseWithCost:
|
||||
response: dict[str, Any]
|
||||
usage: LiteLLMUsage
|
||||
|
||||
|
||||
class LiteLLMService:
|
||||
proxy_base_url: str
|
||||
proxy_api_key: str
|
||||
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
proxy_base_url: str | None = None,
|
||||
proxy_api_key: str | None = None,
|
||||
) -> None:
|
||||
self.proxy_base_url = proxy_base_url or config.litellm.base_url
|
||||
self.proxy_api_key = proxy_api_key or config.litellm.api_key
|
||||
def __init__(self) -> None:
|
||||
self._pricing_by_model = self._build_pricing_map()
|
||||
|
||||
@staticmethod
|
||||
@@ -142,78 +115,3 @@ class LiteLLMService:
|
||||
"cost": cost,
|
||||
"latencyMs": latency_ms,
|
||||
}
|
||||
|
||||
def run_completion_with_cost(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
messages: list[dict[str, Any]],
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
timeout: float | None = None,
|
||||
response_format: dict[str, Any] | None = None,
|
||||
completion_fn: Callable[..., dict[str, Any]] | None = None,
|
||||
) -> LiteLLMResponseWithCost:
|
||||
caller = completion_fn or completion
|
||||
request_model = model if model.startswith("openai/") else f"openai/{model}"
|
||||
|
||||
request_kwargs: dict[str, Any] = {
|
||||
"model": request_model,
|
||||
"api_key": self.proxy_api_key,
|
||||
"api_base": self.proxy_base_url,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
"timeout": timeout,
|
||||
"stream": False,
|
||||
}
|
||||
if response_format is not None:
|
||||
request_kwargs["response_format"] = response_format
|
||||
|
||||
response_any = caller(**request_kwargs)
|
||||
response = self._normalize_response(response_any)
|
||||
|
||||
usage_raw = response.get("usage")
|
||||
if not isinstance(usage_raw, dict):
|
||||
raise ValueError("missing usage in response")
|
||||
|
||||
prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
|
||||
completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
|
||||
total_tokens = int(
|
||||
usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
|
||||
)
|
||||
cached_prompt_tokens = 0
|
||||
prompt_tokens_details = usage_raw.get("prompt_tokens_details")
|
||||
if isinstance(prompt_tokens_details, dict):
|
||||
cached_prompt_tokens = int(
|
||||
prompt_tokens_details.get("cached_tokens", 0) or 0
|
||||
)
|
||||
|
||||
resolved_model = str(response.get("model", model)).strip()
|
||||
cost = self.calculate_cost(
|
||||
model=resolved_model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
)
|
||||
return LiteLLMResponseWithCost(
|
||||
response=response,
|
||||
usage=LiteLLMUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
cost=cost,
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_response(response_any: Any) -> dict[str, Any]:
|
||||
if isinstance(response_any, dict):
|
||||
return response_any
|
||||
model_dump = getattr(response_any, "model_dump", None)
|
||||
if callable(model_dump):
|
||||
dumped = model_dump()
|
||||
if isinstance(dumped, dict):
|
||||
return dumped
|
||||
raise ValueError("litellm response is not serializable")
|
||||
|
||||
@@ -133,6 +133,8 @@ async def test_execute_uses_router_ui_mode_to_select_worker_output_model(
|
||||
model_code="qwen3.5-flash"
|
||||
if kwargs["agent_type"] == AgentType.ROUTER
|
||||
else "deepseek-chat",
|
||||
api_base_url="https://example.com/v1",
|
||||
api_key="sk-test",
|
||||
llm_config=SystemAgentLLMConfig(
|
||||
temperature=0.1, max_tokens=256, timeout_seconds=30
|
||||
),
|
||||
@@ -233,6 +235,8 @@ async def test_execute_passes_runtime_client_time_to_router_and_worker(
|
||||
return SystemAgentRuntimeConfig(
|
||||
agent_type=kwargs["agent_type"],
|
||||
model_code="model-a",
|
||||
api_base_url="https://example.com/v1",
|
||||
api_key="sk-test",
|
||||
llm_config=SystemAgentLLMConfig(
|
||||
temperature=0.1, max_tokens=256, timeout_seconds=30
|
||||
),
|
||||
@@ -296,3 +300,29 @@ async def test_execute_passes_runtime_client_time_to_router_and_worker(
|
||||
|
||||
assert captured["router_timezone"] == "America/Los_Angeles"
|
||||
assert captured["worker_timezone"] == "America/Los_Angeles"
|
||||
|
||||
|
||||
def test_resolve_provider_api_key_maps_volcengine_to_ark(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(
|
||||
"core.agentscope.runtime.runner.config.llm.provider_keys",
|
||||
{"ARK": "ark-key", "DASHSCOPE": "dash-key"},
|
||||
)
|
||||
|
||||
assert (
|
||||
AgentScopeRunner._resolve_provider_api_key(factory_name="volcengine")
|
||||
== "ark-key"
|
||||
)
|
||||
|
||||
|
||||
def test_resolve_provider_api_key_raises_when_missing(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(
|
||||
"core.agentscope.runtime.runner.config.llm.provider_keys",
|
||||
{"DASHSCOPE": "dash-key"},
|
||||
)
|
||||
|
||||
with pytest.raises(RuntimeError, match="provider api key missing"):
|
||||
AgentScopeRunner._resolve_provider_api_key(factory_name="deepseek")
|
||||
|
||||
@@ -31,37 +31,6 @@ def test_calculate_cost_uses_second_qwen_tier() -> None:
|
||||
assert cost == pytest.approx(0.1856)
|
||||
|
||||
|
||||
def test_run_completion_extracts_usage_and_cost() -> None:
|
||||
service = LiteLLMService()
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def _fake_completion(**kwargs: object) -> dict[str, object]:
|
||||
captured.update(kwargs)
|
||||
return {
|
||||
"model": "dashscope/qwen3.5-flash",
|
||||
"usage": {
|
||||
"prompt_tokens": 2000,
|
||||
"completion_tokens": 100,
|
||||
"total_tokens": 2100,
|
||||
"prompt_tokens_details": {"cached_tokens": 500},
|
||||
},
|
||||
"choices": [{"message": {"content": "ok"}}],
|
||||
}
|
||||
|
||||
result = service.run_completion_with_cost(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
response_format={"type": "json_object"},
|
||||
completion_fn=_fake_completion,
|
||||
)
|
||||
|
||||
assert result.usage.prompt_tokens == 2000
|
||||
assert result.usage.completion_tokens == 100
|
||||
assert result.usage.total_tokens == 2100
|
||||
assert result.usage.cost == pytest.approx(0.00051)
|
||||
assert captured["response_format"] == {"type": "json_object"}
|
||||
|
||||
|
||||
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user