chore: 迁移到 social-app 架构,集成 Supabase 和 taskiq worker
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.llm_pricing.service import LlmPricingService
|
||||
|
||||
__all__ = ["LlmPricingService"]
|
||||
@@ -0,0 +1,183 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from core.config.initial.init_data import load_llm_catalog
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PricingTier:
|
||||
max_prompt_tokens: int
|
||||
input_cost_per_token: float
|
||||
output_cost_per_token: float
|
||||
cache_hit_cost_per_token: float
|
||||
|
||||
|
||||
class LlmPricingService:
|
||||
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._pricing_by_model = self._build_pricing_map()
|
||||
|
||||
@staticmethod
|
||||
def _build_pricing_map() -> dict[str, tuple[PricingTier, ...]]:
|
||||
catalog = load_llm_catalog()
|
||||
pricing_by_model: dict[str, tuple[PricingTier, ...]] = {}
|
||||
for model in catalog.get("llms", []):
|
||||
if not isinstance(model, dict):
|
||||
continue
|
||||
model_code = str(model.get("model_code", "")).strip().lower()
|
||||
raw_tiers = model.get("pricing_tiers")
|
||||
if not isinstance(raw_tiers, list) or not raw_tiers:
|
||||
continue
|
||||
|
||||
tiers = [
|
||||
PricingTier(
|
||||
max_prompt_tokens=int(item.get("max_prompt_tokens", 0) or 0),
|
||||
input_cost_per_token=float(
|
||||
item.get("input_cost_per_token", 0.0) or 0.0
|
||||
),
|
||||
output_cost_per_token=float(
|
||||
item.get("output_cost_per_token", 0.0) or 0.0
|
||||
),
|
||||
cache_hit_cost_per_token=float(
|
||||
item.get("cache_hit_cost_per_token", 0.0) or 0.0
|
||||
),
|
||||
)
|
||||
for item in raw_tiers
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
if not tiers:
|
||||
continue
|
||||
ordered_tiers = tuple(
|
||||
sorted(tiers, key=lambda item: item.max_prompt_tokens)
|
||||
)
|
||||
if model_code:
|
||||
pricing_by_model[model_code] = ordered_tiers
|
||||
return pricing_by_model
|
||||
|
||||
def calculate_cost(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cached_prompt_tokens: int = 0,
|
||||
) -> float:
|
||||
tiers = self._pricing_by_model.get(model.strip().lower())
|
||||
if tiers is None:
|
||||
raise ValueError(f"unknown model pricing: {model}")
|
||||
|
||||
normalized_prompt_tokens = max(int(prompt_tokens), 0)
|
||||
normalized_completion_tokens = max(int(completion_tokens), 0)
|
||||
normalized_cached_tokens = min(
|
||||
max(int(cached_prompt_tokens), 0), normalized_prompt_tokens
|
||||
)
|
||||
uncached_prompt_tokens = normalized_prompt_tokens - normalized_cached_tokens
|
||||
|
||||
selected_tier = tiers[-1]
|
||||
for tier in tiers:
|
||||
if normalized_prompt_tokens <= tier.max_prompt_tokens:
|
||||
selected_tier = tier
|
||||
break
|
||||
|
||||
cached_token_rate = (
|
||||
selected_tier.cache_hit_cost_per_token
|
||||
if selected_tier.cache_hit_cost_per_token > 0
|
||||
else selected_tier.input_cost_per_token
|
||||
)
|
||||
|
||||
return float(
|
||||
uncached_prompt_tokens * selected_tier.input_cost_per_token
|
||||
+ normalized_cached_tokens * cached_token_rate
|
||||
+ normalized_completion_tokens * selected_tier.output_cost_per_token
|
||||
)
|
||||
|
||||
def build_usage_metadata(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
usage_summary: dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
summary = usage_summary or {}
|
||||
input_tokens = max(int(summary.get("input_tokens", 0) or 0), 0)
|
||||
output_tokens = max(int(summary.get("output_tokens", 0) or 0), 0)
|
||||
total_tokens = max(
|
||||
int(summary.get("total_tokens", input_tokens + output_tokens) or 0), 0
|
||||
)
|
||||
latency_ms = max(int(summary.get("latency_ms", 0) or 0), 0)
|
||||
cached_prompt_tokens = max(int(summary.get("cached_prompt_tokens", 0) or 0), 0)
|
||||
prompt_cache_hit_tokens = max(
|
||||
int(summary.get("prompt_cache_hit_tokens", cached_prompt_tokens) or 0), 0
|
||||
)
|
||||
prompt_cache_miss_tokens = max(
|
||||
int(
|
||||
summary.get(
|
||||
"prompt_cache_miss_tokens",
|
||||
max(input_tokens - prompt_cache_hit_tokens, 0),
|
||||
)
|
||||
or 0
|
||||
),
|
||||
0,
|
||||
)
|
||||
reasoning_tokens = max(int(summary.get("reasoning_tokens", 0) or 0), 0)
|
||||
direct_cost_raw = summary.get("direct_cost")
|
||||
direct_cost_observed = bool(int(summary.get("direct_cost_observed", 0) or 0))
|
||||
direct_cost_complete = bool(int(summary.get("direct_cost_complete", 0) or 0))
|
||||
model_call_records = max(int(summary.get("model_call_records", 0) or 0), 0)
|
||||
usage_records = max(int(summary.get("usage_records", 0) or 0), 0)
|
||||
usage_complete = model_call_records == 0 or model_call_records == usage_records
|
||||
direct_cost = self._coerce_non_negative_float(direct_cost_raw)
|
||||
|
||||
if (
|
||||
usage_complete
|
||||
and direct_cost_observed
|
||||
and direct_cost_complete
|
||||
and direct_cost is not None
|
||||
):
|
||||
cost = direct_cost
|
||||
cost_source = "provider"
|
||||
else:
|
||||
cost = self.calculate_cost(
|
||||
model=model,
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=output_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
)
|
||||
cost_source = (
|
||||
"incomplete_usage_fallback"
|
||||
if not usage_complete
|
||||
else (
|
||||
"catalog_fallback_incomplete_provider_cost"
|
||||
if direct_cost_observed and not direct_cost_complete
|
||||
else "catalog_fallback"
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"model": model,
|
||||
"inputTokens": input_tokens,
|
||||
"outputTokens": output_tokens,
|
||||
"totalTokens": total_tokens,
|
||||
"cachedPromptTokens": cached_prompt_tokens,
|
||||
"promptCacheHitTokens": prompt_cache_hit_tokens,
|
||||
"promptCacheMissTokens": prompt_cache_miss_tokens,
|
||||
"reasoningTokens": reasoning_tokens,
|
||||
"cost": cost,
|
||||
"costSource": cost_source,
|
||||
"usageComplete": usage_complete,
|
||||
"latencyMs": latency_ms,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_non_negative_float(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if parsed < 0:
|
||||
return None
|
||||
return parsed
|
||||
Reference in New Issue
Block a user