refactor: 移除 LiteLLM proxy 架构,后端直连 Provider API

- 移除 backend/scripts/build_litellm_proxy_config.py
- 简化 LiteLLMService,移除 run_completion_with_cost 方法
- AgentScopeRunner 改为从 LlmFactory 获取 api_base 和 api_key
- 部署配置移除 litellm/litellm-config-job 服务
- Flutter 新增 AuthBootScreen 引导页
- Android 添加通知权限 (POST_NOTIFICATIONS, RECEIVE_BOOT_COMPLETED, SCHEDULE_EXACT_ALARM)
- 优化 LocalNotificationService 调度失败 fallback
- 更新 manifest.json (version 3)
This commit is contained in:
qzl
2026-03-17 18:05:49 +08:00
parent cf56b358ad
commit 19981964fb
26 changed files with 417 additions and 1018 deletions
+2 -6
View File
@@ -1,9 +1,5 @@
from __future__ import annotations
from services.litellm.service import (
LiteLLMResponseWithCost,
LiteLLMService,
LiteLLMUsage,
)
from services.litellm.service import LiteLLMService
__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
__all__ = ["LiteLLMService"]
+2 -104
View File
@@ -1,11 +1,8 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable
from typing import Any
from litellm import completion
from core.config.settings import config
from core.config.initial.init_data import load_llm_catalog
@@ -17,34 +14,10 @@ class PricingTier:
cache_hit_cost_per_token: float
@dataclass(frozen=True)
class LiteLLMUsage:
prompt_tokens: int
completion_tokens: int
total_tokens: int
cached_prompt_tokens: int
cost: float
@dataclass(frozen=True)
class LiteLLMResponseWithCost:
response: dict[str, Any]
usage: LiteLLMUsage
class LiteLLMService:
proxy_base_url: str
proxy_api_key: str
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
def __init__(
self,
*,
proxy_base_url: str | None = None,
proxy_api_key: str | None = None,
) -> None:
self.proxy_base_url = proxy_base_url or config.litellm.base_url
self.proxy_api_key = proxy_api_key or config.litellm.api_key
def __init__(self) -> None:
self._pricing_by_model = self._build_pricing_map()
@staticmethod
@@ -142,78 +115,3 @@ class LiteLLMService:
"cost": cost,
"latencyMs": latency_ms,
}
def run_completion_with_cost(
self,
*,
model: str,
messages: list[dict[str, Any]],
temperature: float | None = None,
max_tokens: int | None = None,
timeout: float | None = None,
response_format: dict[str, Any] | None = None,
completion_fn: Callable[..., dict[str, Any]] | None = None,
) -> LiteLLMResponseWithCost:
caller = completion_fn or completion
request_model = model if model.startswith("openai/") else f"openai/{model}"
request_kwargs: dict[str, Any] = {
"model": request_model,
"api_key": self.proxy_api_key,
"api_base": self.proxy_base_url,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"timeout": timeout,
"stream": False,
}
if response_format is not None:
request_kwargs["response_format"] = response_format
response_any = caller(**request_kwargs)
response = self._normalize_response(response_any)
usage_raw = response.get("usage")
if not isinstance(usage_raw, dict):
raise ValueError("missing usage in response")
prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
total_tokens = int(
usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
)
cached_prompt_tokens = 0
prompt_tokens_details = usage_raw.get("prompt_tokens_details")
if isinstance(prompt_tokens_details, dict):
cached_prompt_tokens = int(
prompt_tokens_details.get("cached_tokens", 0) or 0
)
resolved_model = str(response.get("model", model)).strip()
cost = self.calculate_cost(
model=resolved_model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cached_prompt_tokens=cached_prompt_tokens,
)
return LiteLLMResponseWithCost(
response=response,
usage=LiteLLMUsage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cached_prompt_tokens=cached_prompt_tokens,
cost=cost,
),
)
@staticmethod
def _normalize_response(response_any: Any) -> dict[str, Any]:
if isinstance(response_any, dict):
return response_any
model_dump = getattr(response_any, "model_dump", None)
if callable(model_dump):
dumped = model_dump()
if isinstance(dumped, dict):
return dumped
raise ValueError("litellm response is not serializable")