From b765b9e3e1c4352fa6a4f012c78a8d353f66b663 Mon Sep 17 00:00:00 2001 From: zl-q Date: Wed, 25 Mar 2026 20:08:34 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E6=B8=85=E7=90=86=20litellm=20?= =?UTF-8?q?=E5=86=97=E4=BD=99=E4=BB=A3=E7=A0=81=E5=B9=B6=E9=94=81=E5=AE=9A?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/src/core/agentscope/runtime/runner.py | 12 ++-- backend/src/core/config/initial/init_data.py | 1 - backend/src/core/config/settings.py | 13 +---- .../config/static/database/llm_catalog.yaml | 4 -- backend/src/services/litellm/__init__.py | 5 -- backend/src/services/llm_pricing/__init__.py | 5 ++ .../{litellm => llm_pricing}/service.py | 5 +- ...service.py => test_llm_pricing_service.py} | 22 +++---- pyproject.toml | 58 +++++++++---------- 9 files changed, 54 insertions(+), 71 deletions(-) delete mode 100644 backend/src/services/litellm/__init__.py create mode 100644 backend/src/services/llm_pricing/__init__.py rename backend/src/services/{litellm => llm_pricing}/service.py (97%) rename backend/tests/unit/services/{test_litellm_service.py => test_llm_pricing_service.py} (87%) diff --git a/backend/src/core/agentscope/runtime/runner.py b/backend/src/core/agentscope/runtime/runner.py index 5f7b117..576e49e 100644 --- a/backend/src/core/agentscope/runtime/runner.py +++ b/backend/src/core/agentscope/runtime/runner.py @@ -47,7 +47,7 @@ from schemas.agent.system_agent import ( from schemas.domain.automation import RuntimeConfig from schemas.domain.memory_content import UserMemoryContent, WorkProfileContent from schemas.shared.user import UserContext -from services.litellm.service import LiteLLMService +from services.llm_pricing.service import LlmPricingService from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -63,9 +63,11 @@ class StageExecutionResult: class AgentScopeRunner: - def __init__(self, *, litellm_service: LiteLLMService | None = None) -> None: + def __init__(self, *, llm_pricing_service: LlmPricingService | None = None) -> None: patch_agentscope_json_repair_compat() - self._litellm_service: LiteLLMService = litellm_service or LiteLLMService() + self._llm_pricing_service: LlmPricingService = ( + llm_pricing_service or LlmPricingService() + ) self._active_agent: JsonReActAgent | None = None self._active_agent_lock = asyncio.Lock() @@ -355,7 +357,7 @@ class AgentScopeRunner: return StageExecutionResult( message=response_msg, payload=payload, - response_metadata=self._litellm_service.build_usage_metadata( + response_metadata=self._llm_pricing_service.build_usage_metadata( model=stage_config.model_code, usage_summary=tracking_model.usage_summary(), ), @@ -436,7 +438,7 @@ class AgentScopeRunner: if self._active_agent is agent: self._active_agent = None worker_payload = worker_output_model.model_validate(response_msg.metadata or {}) - response_metadata = self._litellm_service.build_usage_metadata( + response_metadata = self._llm_pricing_service.build_usage_metadata( model=stage_config.model_code, usage_summary=tracking_model.usage_summary(), ) diff --git a/backend/src/core/config/initial/init_data.py b/backend/src/core/config/initial/init_data.py index a66b68b..4042328 100644 --- a/backend/src/core/config/initial/init_data.py +++ b/backend/src/core/config/initial/init_data.py @@ -28,7 +28,6 @@ class LlmFactorySeed(BaseModel): class LlmSeed(BaseModel): model_code: str factory_name: str - litellm_model: str pricing_tiers: list[dict[str, float | int]] diff --git a/backend/src/core/config/settings.py b/backend/src/core/config/settings.py index 46acb3a..86136dd 100644 --- a/backend/src/core/config/settings.py +++ b/backend/src/core/config/settings.py @@ -182,15 +182,8 @@ class LlmSettings(BaseModel): provider_keys: dict[str, str] = Field(default_factory=dict) -class LiteLLMSettings(BaseModel): - host: str = "127.0.0.1" - port: int = 3875 - api_key: str = "sk-local" - - @computed_field - @property - def base_url(self) -> str: - return f"http://{self.host}:{self.port}/v1" +class LlmPricingSettings(BaseModel): + pass class DatabaseSettings(BaseModel): @@ -264,7 +257,7 @@ class Settings(BaseSettings): supabase: SupabaseSettings = Field() storage: StorageSettings = StorageSettings() llm: LlmSettings = LlmSettings() - litellm: LiteLLMSettings = LiteLLMSettings() + llm_pricing: LlmPricingSettings = LlmPricingSettings() agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings() automation_scheduler: AutomationSchedulerSettings = AutomationSchedulerSettings() taskiq: TaskiqSettings = TaskiqSettings() diff --git a/backend/src/core/config/static/database/llm_catalog.yaml b/backend/src/core/config/static/database/llm_catalog.yaml index a389d04..0ab2857 100644 --- a/backend/src/core/config/static/database/llm_catalog.yaml +++ b/backend/src/core/config/static/database/llm_catalog.yaml @@ -27,7 +27,6 @@ llms: # qwen3.5-flash (3 tiers: 128K, 256K, 1M) - model_code: qwen3.5-flash factory_name: dashscope - litellm_model: dashscope/qwen3.5-flash pricing_tiers: - max_prompt_tokens: 128000 input_cost_per_token: 0.0000002 @@ -44,7 +43,6 @@ llms: - model_code: qwen3.5-35b-a3b factory_name: dashscope - litellm_model: dashscope/qwen3.5-35b-a3b pricing_tiers: - max_prompt_tokens: 128000 input_cost_per_token: 0.0000004 @@ -55,7 +53,6 @@ llms: - model_code: deepseek-chat factory_name: deepseek - litellm_model: deepseek/deepseek-chat pricing_tiers: - max_prompt_tokens: 128000 input_cost_per_token: 0.000002 @@ -64,7 +61,6 @@ llms: - model_code: qwen3.5-27b factory_name: dashscope - litellm_model: dashscope/qwen3.5-27b pricing_tiers: - max_prompt_tokens: 128000 input_cost_per_token: 0.0000006 diff --git a/backend/src/services/litellm/__init__.py b/backend/src/services/litellm/__init__.py deleted file mode 100644 index 8ea2ef7..0000000 --- a/backend/src/services/litellm/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import annotations - -from services.litellm.service import LiteLLMService - -__all__ = ["LiteLLMService"] diff --git a/backend/src/services/llm_pricing/__init__.py b/backend/src/services/llm_pricing/__init__.py new file mode 100644 index 0000000..623a075 --- /dev/null +++ b/backend/src/services/llm_pricing/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from services.llm_pricing.service import LlmPricingService + +__all__ = ["LlmPricingService"] diff --git a/backend/src/services/litellm/service.py b/backend/src/services/llm_pricing/service.py similarity index 97% rename from backend/src/services/litellm/service.py rename to backend/src/services/llm_pricing/service.py index 92b3bb6..7c7e1dc 100644 --- a/backend/src/services/litellm/service.py +++ b/backend/src/services/llm_pricing/service.py @@ -14,7 +14,7 @@ class PricingTier: cache_hit_cost_per_token: float -class LiteLLMService: +class LlmPricingService: _pricing_by_model: dict[str, tuple[PricingTier, ...]] def __init__(self) -> None: @@ -28,7 +28,6 @@ class LiteLLMService: if not isinstance(model, dict): continue model_code = str(model.get("model_code", "")).strip().lower() - litellm_model = str(model.get("litellm_model", "")).strip().lower() raw_tiers = model.get("pricing_tiers") if not isinstance(raw_tiers, list) or not raw_tiers: continue @@ -56,8 +55,6 @@ class LiteLLMService: ) if model_code: pricing_by_model[model_code] = ordered_tiers - if litellm_model: - pricing_by_model[litellm_model] = ordered_tiers return pricing_by_model def calculate_cost( diff --git a/backend/tests/unit/services/test_litellm_service.py b/backend/tests/unit/services/test_llm_pricing_service.py similarity index 87% rename from backend/tests/unit/services/test_litellm_service.py rename to backend/tests/unit/services/test_llm_pricing_service.py index 4089a09..ad1e9ce 100644 --- a/backend/tests/unit/services/test_litellm_service.py +++ b/backend/tests/unit/services/test_llm_pricing_service.py @@ -2,14 +2,14 @@ from __future__ import annotations import pytest -from services.litellm.service import LiteLLMService +from services.llm_pricing.service import LlmPricingService def test_calculate_cost_uses_first_qwen_tier() -> None: - service = LiteLLMService() + service = LlmPricingService() cost = service.calculate_cost( - model="dashscope/qwen3.5-flash", + model="qwen3.5-flash", prompt_tokens=100_000, completion_tokens=1_000, cached_prompt_tokens=10_000, @@ -19,10 +19,10 @@ def test_calculate_cost_uses_first_qwen_tier() -> None: def test_calculate_cost_uses_second_qwen_tier() -> None: - service = LiteLLMService() + service = LlmPricingService() cost = service.calculate_cost( - model="dashscope/qwen3.5-flash", + model="qwen3.5-flash", prompt_tokens=200_000, completion_tokens=5_000, cached_prompt_tokens=20_000, @@ -32,10 +32,10 @@ def test_calculate_cost_uses_second_qwen_tier() -> None: def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None: - service = LiteLLMService() + service = LlmPricingService() metadata = service.build_usage_metadata( - model="dashscope/qwen3.5-flash", + model="qwen3.5-flash", usage_summary={ "input_tokens": 2000, "output_tokens": 100, @@ -44,7 +44,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None: }, ) - assert metadata["model"] == "dashscope/qwen3.5-flash" + assert metadata["model"] == "qwen3.5-flash" assert metadata["inputTokens"] == 2000 assert metadata["outputTokens"] == 100 assert metadata["totalTokens"] == 2100 @@ -59,7 +59,7 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None: def test_build_usage_metadata_prefers_provider_direct_cost() -> None: - service = LiteLLMService() + service = LlmPricingService() metadata = service.build_usage_metadata( model="deepseek-chat", @@ -80,7 +80,7 @@ def test_build_usage_metadata_prefers_provider_direct_cost() -> None: def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None: - service = LiteLLMService() + service = LlmPricingService() metadata = service.build_usage_metadata( model="deepseek-chat", @@ -100,7 +100,7 @@ def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None def test_build_usage_metadata_marks_incomplete_usage_fallback() -> None: - service = LiteLLMService() + service = LlmPricingService() metadata = service.build_usage_metadata( model="deepseek-chat", diff --git a/pyproject.toml b/pyproject.toml index a8a260b..df44cab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,34 +4,34 @@ version = "0.1.0" description = "Social application backend" requires-python = ">=3.12" dependencies = [ - "ag-ui-protocol>=0.1.13", - "alembic>=1.18.3", - "asyncpg>=0.31.0", - "email-validator>=2.3.0", - "fastapi>=0.128.0", - "pydantic>=2.11.0", - "pydantic-settings>=2.10.0", - "pyjwt>=2.10.1", - "pyyaml>=6.0.3", - "redis>=7.1.0", - "sqlalchemy[asyncio]>=2.0.46", - "structlog>=24.4.0", - "taskiq>=0.11.0", - "taskiq-redis>=1.0.0", - "supabase>=2.27.2", - "uvicorn[standard]>=0.40.0", - "dashscope>=1.25.13", - "agentscope>=1.0.16", - "apscheduler>=3.11.0", + "ag-ui-protocol==0.1.13", + "alembic==1.18.4", + "asyncpg==0.31.0", + "email-validator==2.3.0", + "fastapi==0.135.1", + "pydantic==2.12.5", + "pydantic-settings==2.13.1", + "pyjwt==2.11.0", + "pyyaml==6.0.3", + "redis==7.2.1", + "sqlalchemy[asyncio]==2.0.48", + "structlog==25.5.0", + "taskiq==0.12.1", + "taskiq-redis==1.2.2", + "supabase==2.28.0", + "uvicorn[standard]==0.41.0", + "dashscope==1.25.13", + "agentscope==1.0.16", + "apscheduler==3.11.2", ] [project.optional-dependencies] dev = [ - "httpx>=0.28.0", - "playwright>=1.49.0", - "pytest>=8.3.0", - "pytest-asyncio>=0.24.0", - "pytest-cov>=5.0.0", + "httpx==0.28.1", + "playwright==1.58.0", + "pytest==9.0.2", + "pytest-asyncio==1.3.0", + "pytest-cov==7.0.0", ] [[tool.uv.index]] @@ -48,11 +48,7 @@ markers = [ [dependency-groups] dev = [ - "aiosqlite>=0.22.1", - "basedpyright>=1.37.2", - "playwright>=1.57.0", - "pre-commit>=4.5.1", - "pytest>=9.0.2", - "pytest-asyncio>=1.3.0", - "pytest-cov>=7.0.0", + "aiosqlite==0.22.1", + "basedpyright==1.38.2", + "pre-commit==4.5.1", ]