refactor: 清理 litellm 冗余代码并锁定依赖版本

This commit is contained in:
zl-q
2026-03-25 20:08:34 +08:00
parent 96fc4a1e77
commit b765b9e3e1
9 changed files with 54 additions and 71 deletions
@@ -47,7 +47,7 @@ from schemas.agent.system_agent import (
from schemas.domain.automation import RuntimeConfig
from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent
from schemas.shared.user import UserContext
from services.litellm.service import LiteLLMService
from services.llm_pricing.service import LlmPricingService
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -63,9 +63,11 @@ class StageExecutionResult:
class AgentScopeRunner:
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
def __init__(self, *, llm_pricing_service: LlmPricingService | None = None) -> None:
patch_agentscope_json_repair_compat()
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
self._llm_pricing_service: LlmPricingService = (
llm_pricing_service or LlmPricingService()
)
self._active_agent: JsonReActAgent | None = None
self._active_agent_lock = asyncio.Lock()
@@ -355,7 +357,7 @@ class AgentScopeRunner:
return StageExecutionResult(
message=response_msg,
payload=payload,
response_metadata=self._litellm_service.build_usage_metadata(
response_metadata=self._llm_pricing_service.build_usage_metadata(
model=stage_config.model_code,
usage_summary=tracking_model.usage_summary(),
),
@@ -436,7 +438,7 @@ class AgentScopeRunner:
if self._active_agent is agent:
self._active_agent = None
worker_payload = worker_output_model.model_validate(response_msg.metadata or {})
response_metadata = self._litellm_service.build_usage_metadata(
response_metadata = self._llm_pricing_service.build_usage_metadata(
model=stage_config.model_code,
usage_summary=tracking_model.usage_summary(),
)
@@ -28,7 +28,6 @@ class LlmFactorySeed(BaseModel):
class LlmSeed(BaseModel):
model_code: str
factory_name: str
litellm_model: str
pricing_tiers: list[dict[str, float | int]]
+3 -10
View File
@@ -182,15 +182,8 @@ class LlmSettings(BaseModel):
provider_keys: dict[str, str] = Field(default_factory=dict)
class LiteLLMSettings(BaseModel):
host: str = "127.0.0.1"
port: int = 3875
api_key: str = "sk-local"
@computed_field
@property
def base_url(self) -> str:
return f"http://{self.host}:{self.port}/v1"
class LlmPricingSettings(BaseModel):
pass
class DatabaseSettings(BaseModel):
@@ -264,7 +257,7 @@ class Settings(BaseSettings):
supabase: SupabaseSettings = Field()
storage: StorageSettings = StorageSettings()
llm: LlmSettings = LlmSettings()
litellm: LiteLLMSettings = LiteLLMSettings()
llm_pricing: LlmPricingSettings = LlmPricingSettings()
agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
automation_scheduler: AutomationSchedulerSettings = AutomationSchedulerSettings()
taskiq: TaskiqSettings = TaskiqSettings()
@@ -27,7 +27,6 @@ llms:
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
- model_code: qwen3.5-flash
factory_name: dashscope
litellm_model: dashscope/qwen3.5-flash
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000002
@@ -44,7 +43,6 @@ llms:
- model_code: qwen3.5-35b-a3b
factory_name: dashscope
litellm_model: dashscope/qwen3.5-35b-a3b
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000004
@@ -55,7 +53,6 @@ llms:
- model_code: deepseek-chat
factory_name: deepseek
litellm_model: deepseek/deepseek-chat
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.000002
@@ -64,7 +61,6 @@ llms:
- model_code: qwen3.5-27b
factory_name: dashscope
litellm_model: dashscope/qwen3.5-27b
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000006
-5
View File
@@ -1,5 +0,0 @@
from __future__ import annotations
from services.litellm.service import LiteLLMService
__all__ = ["LiteLLMService"]
@@ -0,0 +1,5 @@
from __future__ import annotations
from services.llm_pricing.service import LlmPricingService
__all__ = ["LlmPricingService"]
@@ -14,7 +14,7 @@ class PricingTier:
cache_hit_cost_per_token: float
class LiteLLMService:
class LlmPricingService:
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
def __init__(self) -> None:
@@ -28,7 +28,6 @@ class LiteLLMService:
if not isinstance(model, dict):
continue
model_code = str(model.get("model_code", "")).strip().lower()
litellm_model = str(model.get("litellm_model", "")).strip().lower()
raw_tiers = model.get("pricing_tiers")
if not isinstance(raw_tiers, list) or not raw_tiers:
continue
@@ -56,8 +55,6 @@ class LiteLLMService:
)
if model_code:
pricing_by_model[model_code] = ordered_tiers
if litellm_model:
pricing_by_model[litellm_model] = ordered_tiers
return pricing_by_model
def calculate_cost(
@@ -2,14 +2,14 @@ from __future__ import annotations
import pytest
from services.litellm.service import LiteLLMService
from services.llm_pricing.service import LlmPricingService
def test_calculate_cost_uses_first_qwen_tier() -> None:
service = LiteLLMService()
service = LlmPricingService()
cost = service.calculate_cost(
model="dashscope/qwen3.5-flash",
model="qwen3.5-flash",
prompt_tokens=100_000,
completion_tokens=1_000,
cached_prompt_tokens=10_000,
@@ -19,10 +19,10 @@ def test_calculate_cost_uses_first_qwen_tier() -> None:
def test_calculate_cost_uses_second_qwen_tier() -> None:
service = LiteLLMService()
service = LlmPricingService()
cost = service.calculate_cost(
model="dashscope/qwen3.5-flash",
model="qwen3.5-flash",
prompt_tokens=200_000,
completion_tokens=5_000,
cached_prompt_tokens=20_000,
@@ -32,10 +32,10 @@ def test_calculate_cost_uses_second_qwen_tier() -> None:
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="dashscope/qwen3.5-flash",
model="qwen3.5-flash",
usage_summary={
"input_tokens": 2000,
"output_tokens": 100,
@@ -44,7 +44,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
},
)
assert metadata["model"] == "dashscope/qwen3.5-flash"
assert metadata["model"] == "qwen3.5-flash"
assert metadata["inputTokens"] == 2000
assert metadata["outputTokens"] == 100
assert metadata["totalTokens"] == 2100
@@ -59,7 +59,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="deepseek-chat",
@@ -80,7 +80,7 @@ def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="deepseek-chat",
@@ -100,7 +100,7 @@ def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None
def test_build_usage_metadata_marks_incomplete_usage_fallback() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="deepseek-chat",