refactor: 清理 litellm 冗余代码并锁定依赖版本

This commit is contained in:
zl-q
2026-03-25 20:08:34 +08:00
parent 96fc4a1e77
commit b765b9e3e1
9 changed files with 54 additions and 71 deletions
@@ -47,7 +47,7 @@ from schemas.agent.system_agent import (
from schemas.domain.automation import RuntimeConfig
from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent
from schemas.shared.user import UserContext
from services.litellm.service import LiteLLMService
from services.llm_pricing.service import LlmPricingService
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -63,9 +63,11 @@ class StageExecutionResult:
class AgentScopeRunner:
def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None:
def __init__(self, *, llm_pricing_service: LlmPricingService | None = None) -> None:
patch_agentscope_json_repair_compat()
self._litellm_service: LiteLLMService = litellm_service or LiteLLMService()
self._llm_pricing_service: LlmPricingService = (
llm_pricing_service or LlmPricingService()
)
self._active_agent: JsonReActAgent | None = None
self._active_agent_lock = asyncio.Lock()
@@ -355,7 +357,7 @@ class AgentScopeRunner:
return StageExecutionResult(
message=response_msg,
payload=payload,
response_metadata=self._litellm_service.build_usage_metadata(
response_metadata=self._llm_pricing_service.build_usage_metadata(
model=stage_config.model_code,
usage_summary=tracking_model.usage_summary(),
),
@@ -436,7 +438,7 @@ class AgentScopeRunner:
if self._active_agent is agent:
self._active_agent = None
worker_payload = worker_output_model.model_validate(response_msg.metadata or {})
response_metadata = self._litellm_service.build_usage_metadata(
response_metadata = self._llm_pricing_service.build_usage_metadata(
model=stage_config.model_code,
usage_summary=tracking_model.usage_summary(),
)
@@ -28,7 +28,6 @@ class LlmFactorySeed(BaseModel):
class LlmSeed(BaseModel):
model_code: str
factory_name: str
litellm_model: str
pricing_tiers: list[dict[str, float | int]]
+3 -10
View File
@@ -182,15 +182,8 @@ class LlmSettings(BaseModel):
provider_keys: dict[str, str] = Field(default_factory=dict)
class LiteLLMSettings(BaseModel):
host: str = "127.0.0.1"
port: int = 3875
api_key: str = "sk-local"
@computed_field
@property
def base_url(self) -> str:
return f"http://{self.host}:{self.port}/v1"
class LlmPricingSettings(BaseModel):
pass
class DatabaseSettings(BaseModel):
@@ -264,7 +257,7 @@ class Settings(BaseSettings):
supabase: SupabaseSettings = Field()
storage: StorageSettings = StorageSettings()
llm: LlmSettings = LlmSettings()
litellm: LiteLLMSettings = LiteLLMSettings()
llm_pricing: LlmPricingSettings = LlmPricingSettings()
agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
automation_scheduler: AutomationSchedulerSettings = AutomationSchedulerSettings()
taskiq: TaskiqSettings = TaskiqSettings()
@@ -27,7 +27,6 @@ llms:
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
- model_code: qwen3.5-flash
factory_name: dashscope
litellm_model: dashscope/qwen3.5-flash
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000002
@@ -44,7 +43,6 @@ llms:
- model_code: qwen3.5-35b-a3b
factory_name: dashscope
litellm_model: dashscope/qwen3.5-35b-a3b
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000004
@@ -55,7 +53,6 @@ llms:
- model_code: deepseek-chat
factory_name: deepseek
litellm_model: deepseek/deepseek-chat
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.000002
@@ -64,7 +61,6 @@ llms:
- model_code: qwen3.5-27b
factory_name: dashscope
litellm_model: dashscope/qwen3.5-27b
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000006
-5
View File
@@ -1,5 +0,0 @@
from __future__ import annotations
from services.litellm.service import LiteLLMService
__all__ = ["LiteLLMService"]
@@ -0,0 +1,5 @@
from __future__ import annotations
from services.llm_pricing.service import LlmPricingService
__all__ = ["LlmPricingService"]
@@ -14,7 +14,7 @@ class PricingTier:
cache_hit_cost_per_token: float
class LiteLLMService:
class LlmPricingService:
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
def __init__(self) -> None:
@@ -28,7 +28,6 @@ class LiteLLMService:
if not isinstance(model, dict):
continue
model_code = str(model.get("model_code", "")).strip().lower()
litellm_model = str(model.get("litellm_model", "")).strip().lower()
raw_tiers = model.get("pricing_tiers")
if not isinstance(raw_tiers, list) or not raw_tiers:
continue
@@ -56,8 +55,6 @@ class LiteLLMService:
)
if model_code:
pricing_by_model[model_code] = ordered_tiers
if litellm_model:
pricing_by_model[litellm_model] = ordered_tiers
return pricing_by_model
def calculate_cost(
@@ -2,14 +2,14 @@ from __future__ import annotations
import pytest
from services.litellm.service import LiteLLMService
from services.llm_pricing.service import LlmPricingService
def test_calculate_cost_uses_first_qwen_tier() -> None:
service = LiteLLMService()
service = LlmPricingService()
cost = service.calculate_cost(
model="dashscope/qwen3.5-flash",
model="qwen3.5-flash",
prompt_tokens=100_000,
completion_tokens=1_000,
cached_prompt_tokens=10_000,
@@ -19,10 +19,10 @@ def test_calculate_cost_uses_first_qwen_tier() -> None:
def test_calculate_cost_uses_second_qwen_tier() -> None:
service = LiteLLMService()
service = LlmPricingService()
cost = service.calculate_cost(
model="dashscope/qwen3.5-flash",
model="qwen3.5-flash",
prompt_tokens=200_000,
completion_tokens=5_000,
cached_prompt_tokens=20_000,
@@ -32,10 +32,10 @@ def test_calculate_cost_uses_second_qwen_tier() -> None:
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="dashscope/qwen3.5-flash",
model="qwen3.5-flash",
usage_summary={
"input_tokens": 2000,
"output_tokens": 100,
@@ -44,7 +44,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
},
)
assert metadata["model"] == "dashscope/qwen3.5-flash"
assert metadata["model"] == "qwen3.5-flash"
assert metadata["inputTokens"] == 2000
assert metadata["outputTokens"] == 100
assert metadata["totalTokens"] == 2100
@@ -59,7 +59,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="deepseek-chat",
@@ -80,7 +80,7 @@ def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="deepseek-chat",
@@ -100,7 +100,7 @@ def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None
def test_build_usage_metadata_marks_incomplete_usage_fallback() -> None:
service = LiteLLMService()
service = LlmPricingService()
metadata = service.build_usage_metadata(
model="deepseek-chat",
+27 -31
View File
@@ -4,34 +4,34 @@ version = "0.1.0"
description = "Social application backend"
requires-python = ">=3.12"
dependencies = [
"ag-ui-protocol>=0.1.13",
"alembic>=1.18.3",
"asyncpg>=0.31.0",
"email-validator>=2.3.0",
"fastapi>=0.128.0",
"pydantic>=2.11.0",
"pydantic-settings>=2.10.0",
"pyjwt>=2.10.1",
"pyyaml>=6.0.3",
"redis>=7.1.0",
"sqlalchemy[asyncio]>=2.0.46",
"structlog>=24.4.0",
"taskiq>=0.11.0",
"taskiq-redis>=1.0.0",
"supabase>=2.27.2",
"uvicorn[standard]>=0.40.0",
"dashscope>=1.25.13",
"agentscope>=1.0.16",
"apscheduler>=3.11.0",
"ag-ui-protocol==0.1.13",
"alembic==1.18.4",
"asyncpg==0.31.0",
"email-validator==2.3.0",
"fastapi==0.135.1",
"pydantic==2.12.5",
"pydantic-settings==2.13.1",
"pyjwt==2.11.0",
"pyyaml==6.0.3",
"redis==7.2.1",
"sqlalchemy[asyncio]==2.0.48",
"structlog==25.5.0",
"taskiq==0.12.1",
"taskiq-redis==1.2.2",
"supabase==2.28.0",
"uvicorn[standard]==0.41.0",
"dashscope==1.25.13",
"agentscope==1.0.16",
"apscheduler==3.11.2",
]
[project.optional-dependencies]
dev = [
"httpx>=0.28.0",
"playwright>=1.49.0",
"pytest>=8.3.0",
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
"httpx==0.28.1",
"playwright==1.58.0",
"pytest==9.0.2",
"pytest-asyncio==1.3.0",
"pytest-cov==7.0.0",
]
[[tool.uv.index]]
@@ -48,11 +48,7 @@ markers = [
[dependency-groups]
dev = [
"aiosqlite>=0.22.1",
"basedpyright>=1.37.2",
"playwright>=1.57.0",
"pre-commit>=4.5.1",
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
"pytest-cov>=7.0.0",
"aiosqlite==0.22.1",
"basedpyright==1.38.2",
"pre-commit==4.5.1",
]