chore: 迁移到 social-app 架构，集成 Supabase 和 taskiq worker

2026-04-02 16:36:35 +08:00
parent 695adb7d6f
commit 92cdfd9fca
132 changed files with 5802 additions and 759 deletions
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+from services.llm_pricing.service import LlmPricingService
+
+__all__ = ["LlmPricingService"]
@@ -0,0 +1,183 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from core.config.initial.init_data import load_llm_catalog
+
+
+@dataclass(frozen=True)
+class PricingTier:
+    max_prompt_tokens: int
+    input_cost_per_token: float
+    output_cost_per_token: float
+    cache_hit_cost_per_token: float
+
+
+class LlmPricingService:
+    _pricing_by_model: dict[str, tuple[PricingTier, ...]]
+
+    def __init__(self) -> None:
+        self._pricing_by_model = self._build_pricing_map()
+
+    @staticmethod
+    def _build_pricing_map() -> dict[str, tuple[PricingTier, ...]]:
+        catalog = load_llm_catalog()
+        pricing_by_model: dict[str, tuple[PricingTier, ...]] = {}
+        for model in catalog.get("llms", []):
+            if not isinstance(model, dict):
+                continue
+            model_code = str(model.get("model_code", "")).strip().lower()
+            raw_tiers = model.get("pricing_tiers")
+            if not isinstance(raw_tiers, list) or not raw_tiers:
+                continue
+
+            tiers = [
+                PricingTier(
+                    max_prompt_tokens=int(item.get("max_prompt_tokens", 0) or 0),
+                    input_cost_per_token=float(
+                        item.get("input_cost_per_token", 0.0) or 0.0
+                    ),
+                    output_cost_per_token=float(
+                        item.get("output_cost_per_token", 0.0) or 0.0
+                    ),
+                    cache_hit_cost_per_token=float(
+                        item.get("cache_hit_cost_per_token", 0.0) or 0.0
+                    ),
+                )
+                for item in raw_tiers
+                if isinstance(item, dict)
+            ]
+            if not tiers:
+                continue
+            ordered_tiers = tuple(
+                sorted(tiers, key=lambda item: item.max_prompt_tokens)
+            )
+            if model_code:
+                pricing_by_model[model_code] = ordered_tiers
+        return pricing_by_model
+
+    def calculate_cost(
+        self,
+        *,
+        model: str,
+        prompt_tokens: int,
+        completion_tokens: int,
+        cached_prompt_tokens: int = 0,
+    ) -> float:
+        tiers = self._pricing_by_model.get(model.strip().lower())
+        if tiers is None:
+            raise ValueError(f"unknown model pricing: {model}")
+
+        normalized_prompt_tokens = max(int(prompt_tokens), 0)
+        normalized_completion_tokens = max(int(completion_tokens), 0)
+        normalized_cached_tokens = min(
+            max(int(cached_prompt_tokens), 0), normalized_prompt_tokens
+        )
+        uncached_prompt_tokens = normalized_prompt_tokens - normalized_cached_tokens
+
+        selected_tier = tiers[-1]
+        for tier in tiers:
+            if normalized_prompt_tokens <= tier.max_prompt_tokens:
+                selected_tier = tier
+                break
+
+        cached_token_rate = (
+            selected_tier.cache_hit_cost_per_token
+            if selected_tier.cache_hit_cost_per_token > 0
+            else selected_tier.input_cost_per_token
+        )
+
+        return float(
+            uncached_prompt_tokens * selected_tier.input_cost_per_token
+            + normalized_cached_tokens * cached_token_rate
+            + normalized_completion_tokens * selected_tier.output_cost_per_token
+        )
+
+    def build_usage_metadata(
+        self,
+        *,
+        model: str,
+        usage_summary: dict[str, Any] | None,
+    ) -> dict[str, Any]:
+        summary = usage_summary or {}
+        input_tokens = max(int(summary.get("input_tokens", 0) or 0), 0)
+        output_tokens = max(int(summary.get("output_tokens", 0) or 0), 0)
+        total_tokens = max(
+            int(summary.get("total_tokens", input_tokens + output_tokens) or 0), 0
+        )
+        latency_ms = max(int(summary.get("latency_ms", 0) or 0), 0)
+        cached_prompt_tokens = max(int(summary.get("cached_prompt_tokens", 0) or 0), 0)
+        prompt_cache_hit_tokens = max(
+            int(summary.get("prompt_cache_hit_tokens", cached_prompt_tokens) or 0), 0
+        )
+        prompt_cache_miss_tokens = max(
+            int(
+                summary.get(
+                    "prompt_cache_miss_tokens",
+                    max(input_tokens - prompt_cache_hit_tokens, 0),
+                )
+                or 0
+            ),
+            0,
+        )
+        reasoning_tokens = max(int(summary.get("reasoning_tokens", 0) or 0), 0)
+        direct_cost_raw = summary.get("direct_cost")
+        direct_cost_observed = bool(int(summary.get("direct_cost_observed", 0) or 0))
+        direct_cost_complete = bool(int(summary.get("direct_cost_complete", 0) or 0))
+        model_call_records = max(int(summary.get("model_call_records", 0) or 0), 0)
+        usage_records = max(int(summary.get("usage_records", 0) or 0), 0)
+        usage_complete = model_call_records == 0 or model_call_records == usage_records
+        direct_cost = self._coerce_non_negative_float(direct_cost_raw)
+
+        if (
+            usage_complete
+            and direct_cost_observed
+            and direct_cost_complete
+            and direct_cost is not None
+        ):
+            cost = direct_cost
+            cost_source = "provider"
+        else:
+            cost = self.calculate_cost(
+                model=model,
+                prompt_tokens=input_tokens,
+                completion_tokens=output_tokens,
+                cached_prompt_tokens=cached_prompt_tokens,
+            )
+            cost_source = (
+                "incomplete_usage_fallback"
+                if not usage_complete
+                else (
+                    "catalog_fallback_incomplete_provider_cost"
+                    if direct_cost_observed and not direct_cost_complete
+                    else "catalog_fallback"
+                )
+            )
+
+        return {
+            "model": model,
+            "inputTokens": input_tokens,
+            "outputTokens": output_tokens,
+            "totalTokens": total_tokens,
+            "cachedPromptTokens": cached_prompt_tokens,
+            "promptCacheHitTokens": prompt_cache_hit_tokens,
+            "promptCacheMissTokens": prompt_cache_miss_tokens,
+            "reasoningTokens": reasoning_tokens,
+            "cost": cost,
+            "costSource": cost_source,
+            "usageComplete": usage_complete,
+            "latencyMs": latency_ms,
+        }
+
+    @staticmethod
+    def _coerce_non_negative_float(value: Any) -> float | None:
+        if value is None:
+            return None
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            return None
+        if parsed < 0:
+            return None
+        return parsed