refactor: 移除 LiteLLM proxy 架构,后端直连 Provider API

- 移除 backend/scripts/build_litellm_proxy_config.py
- 简化 LiteLLMService,移除 run_completion_with_cost 方法
- AgentScopeRunner 改为从 LlmFactory 获取 api_base 和 api_key
- 部署配置移除 litellm/litellm-config-job 服务
- Flutter 新增 AuthBootScreen 引导页
- Android 添加通知权限 (POST_NOTIFICATIONS, RECEIVE_BOOT_COMPLETED, SCHEDULE_EXACT_ALARM)
- 优化 LocalNotificationService 调度失败 fallback
- 更新 manifest.json (version 3)
This commit is contained in:
qzl
2026-03-17 18:05:49 +08:00
parent cf56b358ad
commit 19981964fb
26 changed files with 417 additions and 1018 deletions
@@ -1,90 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
import yaml
from core.config.initial.init_data import load_llm_catalog
def _provider_key_env_name(factory_name: str) -> str:
normalized = factory_name.strip().upper()
if normalized == "VOLCENGINE":
normalized = "ARK"
return f"SOCIAL_LLM__PROVIDER_KEYS__{normalized}"
def build_proxy_config() -> dict[str, Any]:
catalog = load_llm_catalog()
factories = catalog.get("factories", [])
llms = catalog.get("llms", [])
if not isinstance(factories, list) or not isinstance(llms, list):
raise ValueError("invalid llm catalog format")
factory_url_map: dict[str, str] = {}
for factory in factories:
if not isinstance(factory, dict):
continue
name = str(factory.get("name", "")).strip().lower()
request_url = str(factory.get("request_url", "")).strip()
if name and request_url:
factory_url_map[name] = request_url
model_list: list[dict[str, Any]] = []
for llm in llms:
if not isinstance(llm, dict):
continue
model_code = str(llm.get("model_code", "")).strip()
factory_name = str(llm.get("factory_name", "")).strip()
litellm_model = str(llm.get("litellm_model", "")).strip()
if not model_code or not factory_name or not litellm_model:
continue
api_base = factory_url_map.get(factory_name.lower())
if not api_base:
raise ValueError(
f"factory request_url missing for model {model_code}: {factory_name}"
)
env_key_name = _provider_key_env_name(factory_name)
provider_model = (
litellm_model.split("/", 1)[1] if "/" in litellm_model else litellm_model
)
model_list.append(
{
"model_name": model_code,
"litellm_params": {
"model": f"openai/{provider_model}",
"api_base": api_base,
"api_key": f"os.environ/{env_key_name}",
},
}
)
if not model_list:
raise ValueError("no models found in llm catalog")
return {"model_list": model_list}
def main() -> int:
parser = argparse.ArgumentParser(description="Build LiteLLM proxy config")
parser.add_argument("--output", required=True, help="Output YAML file path")
args = parser.parse_args()
output_path = Path(args.output).resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)
config = build_proxy_config()
with output_path.open("w", encoding="utf-8") as file:
yaml.safe_dump(config, file, sort_keys=False, allow_unicode=False)
return 0
if __name__ == "__main__":
raise SystemExit(main())
+28 -5
View File
@@ -21,9 +21,11 @@ from core.agentscope.utils import (
finalize_json_response,
patch_agentscope_json_repair_compat,
)
from core.config.settings import config
from core.db.session import AsyncSessionLocal
from core.logging import get_logger
from models.llm import Llm
from models.llm_factory import LlmFactory
from models.system_agents import SystemAgents
from schemas.agent.runtime_models import (
RouterAgentOutput,
@@ -50,6 +52,8 @@ logger = get_logger("core.agentscope.runtime.runner")
class SystemAgentRuntimeConfig:
agent_type: AgentType
model_code: str
api_base_url: str
api_key: str
llm_config: SystemAgentLLMConfig
@@ -63,7 +67,7 @@ class StageExecutionResult:
class AgentScopeRunner:
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
patch_agentscope_json_repair_compat()
self._litellm_service = litellm_service or LiteLLMService()
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
async def execute(
self,
@@ -221,23 +225,42 @@ class AgentScopeRunner:
agent_type: AgentType,
) -> SystemAgentRuntimeConfig:
stmt = (
select(SystemAgents, Llm)
select(SystemAgents, Llm, LlmFactory)
.join(Llm, SystemAgents.llm_id == Llm.id)
.join(LlmFactory, Llm.factory_id == LlmFactory.id)
.where(SystemAgents.agent_type == agent_type.value)
)
row = (await session.execute(stmt)).one_or_none()
if row is None:
raise RuntimeError(f"system agent config not found: {agent_type.value}")
system_agent, llm = row
system_agent, llm, factory = row
status = str(system_agent.status).strip().lower()
if status != "active":
raise RuntimeError(f"system agent is not active: {agent_type.value}")
return SystemAgentRuntimeConfig(
agent_type=agent_type,
model_code=llm.model_code,
api_base_url=factory.request_url,
api_key=self._resolve_provider_api_key(factory_name=factory.name),
llm_config=SystemAgentLLMConfig.model_validate(system_agent.config or {}),
)
@staticmethod
def _resolve_provider_api_key(*, factory_name: str) -> str:
normalized_factory_name = factory_name.strip().upper()
if normalized_factory_name == "VOLCENGINE":
normalized_factory_name = "ARK"
provider_keys = {
str(key).strip().upper(): str(value).strip()
for key, value in config.llm.provider_keys.items()
if str(value).strip()
}
api_key = provider_keys.get(normalized_factory_name, "")
if not api_key:
raise RuntimeError(f"provider api key missing for factory: {factory_name}")
return api_key
async def _run_router_stage(
self,
*,
@@ -363,9 +386,9 @@ class AgentScopeRunner:
model = OpenAIChatModel(
model_name=stage_config.model_code,
api_key=self._litellm_service.proxy_api_key,
api_key=stage_config.api_key,
stream=False,
client_kwargs={"base_url": self._litellm_service.proxy_base_url},
client_kwargs={"base_url": stage_config.api_base_url},
generate_kwargs=generate_kwargs,
)
return TrackingChatModel(model)
+2 -6
View File
@@ -1,9 +1,5 @@
from __future__ import annotations
from services.litellm.service import (
LiteLLMResponseWithCost,
LiteLLMService,
LiteLLMUsage,
)
from services.litellm.service import LiteLLMService
__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
__all__ = ["LiteLLMService"]
+2 -104
View File
@@ -1,11 +1,8 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable
from typing import Any
from litellm import completion
from core.config.settings import config
from core.config.initial.init_data import load_llm_catalog
@@ -17,34 +14,10 @@ class PricingTier:
cache_hit_cost_per_token: float
@dataclass(frozen=True)
class LiteLLMUsage:
prompt_tokens: int
completion_tokens: int
total_tokens: int
cached_prompt_tokens: int
cost: float
@dataclass(frozen=True)
class LiteLLMResponseWithCost:
response: dict[str, Any]
usage: LiteLLMUsage
class LiteLLMService:
proxy_base_url: str
proxy_api_key: str
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
def __init__(
self,
*,
proxy_base_url: str | None = None,
proxy_api_key: str | None = None,
) -> None:
self.proxy_base_url = proxy_base_url or config.litellm.base_url
self.proxy_api_key = proxy_api_key or config.litellm.api_key
def __init__(self) -> None:
self._pricing_by_model = self._build_pricing_map()
@staticmethod
@@ -142,78 +115,3 @@ class LiteLLMService:
"cost": cost,
"latencyMs": latency_ms,
}
def run_completion_with_cost(
self,
*,
model: str,
messages: list[dict[str, Any]],
temperature: float | None = None,
max_tokens: int | None = None,
timeout: float | None = None,
response_format: dict[str, Any] | None = None,
completion_fn: Callable[..., dict[str, Any]] | None = None,
) -> LiteLLMResponseWithCost:
caller = completion_fn or completion
request_model = model if model.startswith("openai/") else f"openai/{model}"
request_kwargs: dict[str, Any] = {
"model": request_model,
"api_key": self.proxy_api_key,
"api_base": self.proxy_base_url,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"timeout": timeout,
"stream": False,
}
if response_format is not None:
request_kwargs["response_format"] = response_format
response_any = caller(**request_kwargs)
response = self._normalize_response(response_any)
usage_raw = response.get("usage")
if not isinstance(usage_raw, dict):
raise ValueError("missing usage in response")
prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
total_tokens = int(
usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
)
cached_prompt_tokens = 0
prompt_tokens_details = usage_raw.get("prompt_tokens_details")
if isinstance(prompt_tokens_details, dict):
cached_prompt_tokens = int(
prompt_tokens_details.get("cached_tokens", 0) or 0
)
resolved_model = str(response.get("model", model)).strip()
cost = self.calculate_cost(
model=resolved_model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cached_prompt_tokens=cached_prompt_tokens,
)
return LiteLLMResponseWithCost(
response=response,
usage=LiteLLMUsage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cached_prompt_tokens=cached_prompt_tokens,
cost=cost,
),
)
@staticmethod
def _normalize_response(response_any: Any) -> dict[str, Any]:
if isinstance(response_any, dict):
return response_any
model_dump = getattr(response_any, "model_dump", None)
if callable(model_dump):
dumped = model_dump()
if isinstance(dumped, dict):
return dumped
raise ValueError("litellm response is not serializable")
@@ -133,6 +133,8 @@ async def test_execute_uses_router_ui_mode_to_select_worker_output_model(
model_code="qwen3.5-flash"
if kwargs["agent_type"] == AgentType.ROUTER
else "deepseek-chat",
api_base_url="https://example.com/v1",
api_key="sk-test",
llm_config=SystemAgentLLMConfig(
temperature=0.1, max_tokens=256, timeout_seconds=30
),
@@ -233,6 +235,8 @@ async def test_execute_passes_runtime_client_time_to_router_and_worker(
return SystemAgentRuntimeConfig(
agent_type=kwargs["agent_type"],
model_code="model-a",
api_base_url="https://example.com/v1",
api_key="sk-test",
llm_config=SystemAgentLLMConfig(
temperature=0.1, max_tokens=256, timeout_seconds=30
),
@@ -296,3 +300,29 @@ async def test_execute_passes_runtime_client_time_to_router_and_worker(
assert captured["router_timezone"] == "America/Los_Angeles"
assert captured["worker_timezone"] == "America/Los_Angeles"
def test_resolve_provider_api_key_maps_volcengine_to_ark(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(
"core.agentscope.runtime.runner.config.llm.provider_keys",
{"ARK": "ark-key", "DASHSCOPE": "dash-key"},
)
assert (
AgentScopeRunner._resolve_provider_api_key(factory_name="volcengine")
== "ark-key"
)
def test_resolve_provider_api_key_raises_when_missing(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(
"core.agentscope.runtime.runner.config.llm.provider_keys",
{"DASHSCOPE": "dash-key"},
)
with pytest.raises(RuntimeError, match="provider api key missing"):
AgentScopeRunner._resolve_provider_api_key(factory_name="deepseek")
@@ -31,37 +31,6 @@ def test_calculate_cost_uses_second_qwen_tier() -> None:
assert cost == pytest.approx(0.1856)
def test_run_completion_extracts_usage_and_cost() -> None:
service = LiteLLMService()
captured: dict[str, object] = {}
def _fake_completion(**kwargs: object) -> dict[str, object]:
captured.update(kwargs)
return {
"model": "dashscope/qwen3.5-flash",
"usage": {
"prompt_tokens": 2000,
"completion_tokens": 100,
"total_tokens": 2100,
"prompt_tokens_details": {"cached_tokens": 500},
},
"choices": [{"message": {"content": "ok"}}],
}
result = service.run_completion_with_cost(
model="dashscope/qwen3.5-flash",
messages=[{"role": "user", "content": "hello"}],
response_format={"type": "json_object"},
completion_fn=_fake_completion,
)
assert result.usage.prompt_tokens == 2000
assert result.usage.completion_tokens == 100
assert result.usage.total_tokens == 2100
assert result.usage.cost == pytest.approx(0.00051)
assert captured["response_format"] == {"type": "json_object"}
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
service = LiteLLMService()