refactor: 清理 litellm 冗余代码并锁定依赖版本
This commit is contained in:
@@ -47,7 +47,7 @@ from schemas.agent.system_agent import (
|
||||
from schemas.domain.automation import RuntimeConfig
|
||||
from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent
|
||||
from schemas.shared.user import UserContext
|
||||
from services.litellm.service import LiteLLMService
|
||||
from services.llm_pricing.service import LlmPricingService
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -63,9 +63,11 @@ class StageExecutionResult:
|
||||
|
||||
|
||||
class AgentScopeRunner:
|
||||
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
|
||||
def __init__(self, *, llm_pricing_service: LlmPricingService | None = None) -> None:
|
||||
patch_agentscope_json_repair_compat()
|
||||
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
|
||||
self._llm_pricing_service: LlmPricingService = (
|
||||
llm_pricing_service or LlmPricingService()
|
||||
)
|
||||
self._active_agent: JsonReActAgent | None = None
|
||||
self._active_agent_lock = asyncio.Lock()
|
||||
|
||||
@@ -355,7 +357,7 @@ class AgentScopeRunner:
|
||||
return StageExecutionResult(
|
||||
message=response_msg,
|
||||
payload=payload,
|
||||
response_metadata=self._litellm_service.build_usage_metadata(
|
||||
response_metadata=self._llm_pricing_service.build_usage_metadata(
|
||||
model=stage_config.model_code,
|
||||
usage_summary=tracking_model.usage_summary(),
|
||||
),
|
||||
@@ -436,7 +438,7 @@ class AgentScopeRunner:
|
||||
if self._active_agent is agent:
|
||||
self._active_agent = None
|
||||
worker_payload = worker_output_model.model_validate(response_msg.metadata or {})
|
||||
response_metadata = self._litellm_service.build_usage_metadata(
|
||||
response_metadata = self._llm_pricing_service.build_usage_metadata(
|
||||
model=stage_config.model_code,
|
||||
usage_summary=tracking_model.usage_summary(),
|
||||
)
|
||||
|
||||
@@ -28,7 +28,6 @@ class LlmFactorySeed(BaseModel):
|
||||
class LlmSeed(BaseModel):
|
||||
model_code: str
|
||||
factory_name: str
|
||||
litellm_model: str
|
||||
pricing_tiers: list[dict[str, float | int]]
|
||||
|
||||
|
||||
|
||||
@@ -182,15 +182,8 @@ class LlmSettings(BaseModel):
|
||||
provider_keys: dict[str, str] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class LiteLLMSettings(BaseModel):
|
||||
host: str = "127.0.0.1"
|
||||
port: int = 3875
|
||||
api_key: str = "sk-local"
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return f"http://{self.host}:{self.port}/v1"
|
||||
class LlmPricingSettings(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class DatabaseSettings(BaseModel):
|
||||
@@ -264,7 +257,7 @@ class Settings(BaseSettings):
|
||||
supabase: SupabaseSettings = Field()
|
||||
storage: StorageSettings = StorageSettings()
|
||||
llm: LlmSettings = LlmSettings()
|
||||
litellm: LiteLLMSettings = LiteLLMSettings()
|
||||
llm_pricing: LlmPricingSettings = LlmPricingSettings()
|
||||
agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
|
||||
automation_scheduler: AutomationSchedulerSettings = AutomationSchedulerSettings()
|
||||
taskiq: TaskiqSettings = TaskiqSettings()
|
||||
|
||||
@@ -27,7 +27,6 @@ llms:
|
||||
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
|
||||
- model_code: qwen3.5-flash
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-flash
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000002
|
||||
@@ -44,7 +43,6 @@ llms:
|
||||
|
||||
- model_code: qwen3.5-35b-a3b
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-35b-a3b
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000004
|
||||
@@ -55,7 +53,6 @@ llms:
|
||||
|
||||
- model_code: deepseek-chat
|
||||
factory_name: deepseek
|
||||
litellm_model: deepseek/deepseek-chat
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.000002
|
||||
@@ -64,7 +61,6 @@ llms:
|
||||
|
||||
- model_code: qwen3.5-27b
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-27b
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000006
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.litellm.service import LiteLLMService
|
||||
|
||||
__all__ = ["LiteLLMService"]
|
||||
@@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.llm_pricing.service import LlmPricingService
|
||||
|
||||
__all__ = ["LlmPricingService"]
|
||||
+1
-4
@@ -14,7 +14,7 @@ class PricingTier:
|
||||
cache_hit_cost_per_token: float
|
||||
|
||||
|
||||
class LiteLLMService:
|
||||
class LlmPricingService:
|
||||
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -28,7 +28,6 @@ class LiteLLMService:
|
||||
if not isinstance(model, dict):
|
||||
continue
|
||||
model_code = str(model.get("model_code", "")).strip().lower()
|
||||
litellm_model = str(model.get("litellm_model", "")).strip().lower()
|
||||
raw_tiers = model.get("pricing_tiers")
|
||||
if not isinstance(raw_tiers, list) or not raw_tiers:
|
||||
continue
|
||||
@@ -56,8 +55,6 @@ class LiteLLMService:
|
||||
)
|
||||
if model_code:
|
||||
pricing_by_model[model_code] = ordered_tiers
|
||||
if litellm_model:
|
||||
pricing_by_model[litellm_model] = ordered_tiers
|
||||
return pricing_by_model
|
||||
|
||||
def calculate_cost(
|
||||
+11
-11
@@ -2,14 +2,14 @@ from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from services.litellm.service import LiteLLMService
|
||||
from services.llm_pricing.service import LlmPricingService
|
||||
|
||||
|
||||
def test_calculate_cost_uses_first_qwen_tier() -> None:
|
||||
service = LiteLLMService()
|
||||
service = LlmPricingService()
|
||||
|
||||
cost = service.calculate_cost(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
model="qwen3.5-flash",
|
||||
prompt_tokens=100_000,
|
||||
completion_tokens=1_000,
|
||||
cached_prompt_tokens=10_000,
|
||||
@@ -19,10 +19,10 @@ def test_calculate_cost_uses_first_qwen_tier() -> None:
|
||||
|
||||
|
||||
def test_calculate_cost_uses_second_qwen_tier() -> None:
|
||||
service = LiteLLMService()
|
||||
service = LlmPricingService()
|
||||
|
||||
cost = service.calculate_cost(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
model="qwen3.5-flash",
|
||||
prompt_tokens=200_000,
|
||||
completion_tokens=5_000,
|
||||
cached_prompt_tokens=20_000,
|
||||
@@ -32,10 +32,10 @@ def test_calculate_cost_uses_second_qwen_tier() -> None:
|
||||
|
||||
|
||||
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
||||
service = LiteLLMService()
|
||||
service = LlmPricingService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
model="qwen3.5-flash",
|
||||
usage_summary={
|
||||
"input_tokens": 2000,
|
||||
"output_tokens": 100,
|
||||
@@ -44,7 +44,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
||||
},
|
||||
)
|
||||
|
||||
assert metadata["model"] == "dashscope/qwen3.5-flash"
|
||||
assert metadata["model"] == "qwen3.5-flash"
|
||||
assert metadata["inputTokens"] == 2000
|
||||
assert metadata["outputTokens"] == 100
|
||||
assert metadata["totalTokens"] == 2100
|
||||
@@ -59,7 +59,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
||||
|
||||
|
||||
def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
|
||||
service = LiteLLMService()
|
||||
service = LlmPricingService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="deepseek-chat",
|
||||
@@ -80,7 +80,7 @@ def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
|
||||
|
||||
|
||||
def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None:
|
||||
service = LiteLLMService()
|
||||
service = LlmPricingService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="deepseek-chat",
|
||||
@@ -100,7 +100,7 @@ def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None
|
||||
|
||||
|
||||
def test_build_usage_metadata_marks_incomplete_usage_fallback() -> None:
|
||||
service = LiteLLMService()
|
||||
service = LlmPricingService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="deepseek-chat",
|
||||
Reference in New Issue
Block a user