refactor: 清理 litellm 冗余代码并锁定依赖版本
This commit is contained in:
@@ -47,7 +47,7 @@ from schemas.agent.system_agent import (
|
||||
from schemas.domain.automation import RuntimeConfig
|
||||
from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent
|
||||
from schemas.shared.user import UserContext
|
||||
from services.litellm.service import LiteLLMService
|
||||
from services.llm_pricing.service import LlmPricingService
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -63,9 +63,11 @@ class StageExecutionResult:
|
||||
|
||||
|
||||
class AgentScopeRunner:
|
||||
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
|
||||
def __init__(self, *, llm_pricing_service: LlmPricingService | None = None) -> None:
|
||||
patch_agentscope_json_repair_compat()
|
||||
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
|
||||
self._llm_pricing_service: LlmPricingService = (
|
||||
llm_pricing_service or LlmPricingService()
|
||||
)
|
||||
self._active_agent: JsonReActAgent | None = None
|
||||
self._active_agent_lock = asyncio.Lock()
|
||||
|
||||
@@ -355,7 +357,7 @@ class AgentScopeRunner:
|
||||
return StageExecutionResult(
|
||||
message=response_msg,
|
||||
payload=payload,
|
||||
response_metadata=self._litellm_service.build_usage_metadata(
|
||||
response_metadata=self._llm_pricing_service.build_usage_metadata(
|
||||
model=stage_config.model_code,
|
||||
usage_summary=tracking_model.usage_summary(),
|
||||
),
|
||||
@@ -436,7 +438,7 @@ class AgentScopeRunner:
|
||||
if self._active_agent is agent:
|
||||
self._active_agent = None
|
||||
worker_payload = worker_output_model.model_validate(response_msg.metadata or {})
|
||||
response_metadata = self._litellm_service.build_usage_metadata(
|
||||
response_metadata = self._llm_pricing_service.build_usage_metadata(
|
||||
model=stage_config.model_code,
|
||||
usage_summary=tracking_model.usage_summary(),
|
||||
)
|
||||
|
||||
@@ -28,7 +28,6 @@ class LlmFactorySeed(BaseModel):
|
||||
class LlmSeed(BaseModel):
|
||||
model_code: str
|
||||
factory_name: str
|
||||
litellm_model: str
|
||||
pricing_tiers: list[dict[str, float | int]]
|
||||
|
||||
|
||||
|
||||
@@ -182,15 +182,8 @@ class LlmSettings(BaseModel):
|
||||
provider_keys: dict[str, str] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class LiteLLMSettings(BaseModel):
|
||||
host: str = "127.0.0.1"
|
||||
port: int = 3875
|
||||
api_key: str = "sk-local"
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return f"http://{self.host}:{self.port}/v1"
|
||||
class LlmPricingSettings(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class DatabaseSettings(BaseModel):
|
||||
@@ -264,7 +257,7 @@ class Settings(BaseSettings):
|
||||
supabase: SupabaseSettings = Field()
|
||||
storage: StorageSettings = StorageSettings()
|
||||
llm: LlmSettings = LlmSettings()
|
||||
litellm: LiteLLMSettings = LiteLLMSettings()
|
||||
llm_pricing: LlmPricingSettings = LlmPricingSettings()
|
||||
agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
|
||||
automation_scheduler: AutomationSchedulerSettings = AutomationSchedulerSettings()
|
||||
taskiq: TaskiqSettings = TaskiqSettings()
|
||||
|
||||
@@ -27,7 +27,6 @@ llms:
|
||||
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
|
||||
- model_code: qwen3.5-flash
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-flash
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000002
|
||||
@@ -44,7 +43,6 @@ llms:
|
||||
|
||||
- model_code: qwen3.5-35b-a3b
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-35b-a3b
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000004
|
||||
@@ -55,7 +53,6 @@ llms:
|
||||
|
||||
- model_code: deepseek-chat
|
||||
factory_name: deepseek
|
||||
litellm_model: deepseek/deepseek-chat
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.000002
|
||||
@@ -64,7 +61,6 @@ llms:
|
||||
|
||||
- model_code: qwen3.5-27b
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-27b
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000006
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.litellm.service import LiteLLMService
|
||||
|
||||
__all__ = ["LiteLLMService"]
|
||||
@@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.llm_pricing.service import LlmPricingService
|
||||
|
||||
__all__ = ["LlmPricingService"]
|
||||
+1
-4
@@ -14,7 +14,7 @@ class PricingTier:
|
||||
cache_hit_cost_per_token: float
|
||||
|
||||
|
||||
class LiteLLMService:
|
||||
class LlmPricingService:
|
||||
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -28,7 +28,6 @@ class LiteLLMService:
|
||||
if not isinstance(model, dict):
|
||||
continue
|
||||
model_code = str(model.get("model_code", "")).strip().lower()
|
||||
litellm_model = str(model.get("litellm_model", "")).strip().lower()
|
||||
raw_tiers = model.get("pricing_tiers")
|
||||
if not isinstance(raw_tiers, list) or not raw_tiers:
|
||||
continue
|
||||
@@ -56,8 +55,6 @@ class LiteLLMService:
|
||||
)
|
||||
if model_code:
|
||||
pricing_by_model[model_code] = ordered_tiers
|
||||
if litellm_model:
|
||||
pricing_by_model[litellm_model] = ordered_tiers
|
||||
return pricing_by_model
|
||||
|
||||
def calculate_cost(
|
||||
Reference in New Issue
Block a user