feat: 添加好友功能并集成 LiteLLM 代理服务

- 新增好友搜索、添加、好友列表功能 - 集成 LiteLLM 代理服务及多模型定价配置 - 更新 iOS CocoaPods 配置 - 更新 .gitignore 和环境变量配置
2026-03-11 09:14:51 +08:00
parent 487405aa5b
commit e55e445906
28 changed files with 1226 additions and 181 deletions
@@ -231,21 +231,23 @@ class AgentType(str, Enum):

 Agent loop functionality MUST follow the AG-UI protocol. **Use the `ag-ui` skill** for protocol reference and implementation guidance.

-## Multi-Agent Orchestration (CrewAI Framework)
+## Multi-Agent Orchestration (AgentScope Framework)

-Multi-agent orchestration MUST use the CrewAI framework. **Use the `crewai` skill** for framework reference and implementation guidance.
+Multi-agent orchestration MUST use the AgentScope framework. **Use the `agentscope-skill`** for framework reference and implementation guidance.
+
+For workflows involving routing, LiteLLM proxy cost audit, or frontend/backend human approval loops, **use the `agentscope-hitl-cost` skill**.

 ### Core Principles

- Use CrewAI for orchestrating multiple agents working together
- Define clear agent roles, tasks, and crews
- Leverage built-in collaboration and delegation mechanisms
- Follow CrewAI best practices for agent configuration
+- Use AgentScope for orchestrating multiple agents working together
+- Define clear agent roles, stage responsibilities, and pipeline boundaries
+- Leverage AgentScope built-in workflow and tool middleware mechanisms
+- Follow AgentScope best practices for agent configuration

 ### Key Components

 - **Agents**: Autonomous units with specific roles and goals
- **Tasks**: Assignments that agents complete
- **Crews**: Teams of agents working together
+- **Tasks**: Stage-specific prompts and execution goals
+- **Pipelines**: Ordered orchestration flow between agents
 - **Tools**: Capabilities available to agents
 - **Flows**: Workflow orchestration and state management
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from core.config.initial.init_data import load_llm_catalog
+
+
+def _provider_key_env_name(factory_name: str) -> str:
+    normalized = factory_name.strip().upper()
+    if normalized == "VOLCENGINE":
+        normalized = "ARK"
+    return f"SOCIAL_LLM__PROVIDER_KEYS__{normalized}"
+
+
+def build_proxy_config() -> dict[str, Any]:
+    catalog = load_llm_catalog()
+
+    factories = catalog.get("factories", [])
+    llms = catalog.get("llms", [])
+    if not isinstance(factories, list) or not isinstance(llms, list):
+        raise ValueError("invalid llm catalog format")
+
+    factory_url_map: dict[str, str] = {}
+    for factory in factories:
+        if not isinstance(factory, dict):
+            continue
+        name = str(factory.get("name", "")).strip().lower()
+        request_url = str(factory.get("request_url", "")).strip()
+        if name and request_url:
+            factory_url_map[name] = request_url
+
+    model_list: list[dict[str, Any]] = []
+    for llm in llms:
+        if not isinstance(llm, dict):
+            continue
+        model_code = str(llm.get("model_code", "")).strip()
+        factory_name = str(llm.get("factory_name", "")).strip()
+        litellm_model = str(llm.get("litellm_model", "")).strip()
+        if not model_code or not factory_name or not litellm_model:
+            continue
+
+        api_base = factory_url_map.get(factory_name.lower())
+        if not api_base:
+            raise ValueError(
+                f"factory request_url missing for model {model_code}: {factory_name}"
+            )
+
+        env_key_name = _provider_key_env_name(factory_name)
+        provider_model = (
+            litellm_model.split("/", 1)[1] if "/" in litellm_model else litellm_model
+        )
+
+        model_list.append(
+            {
+                "model_name": model_code,
+                "litellm_params": {
+                    "model": f"openai/{provider_model}",
+                    "api_base": api_base,
+                    "api_key": f"os.environ/{env_key_name}",
+                },
+            }
+        )
+
+    if not model_list:
+        raise ValueError("no models found in llm catalog")
+
+    return {"model_list": model_list}
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Build LiteLLM proxy config")
+    parser.add_argument("--output", required=True, help="Output YAML file path")
+    args = parser.parse_args()
+
+    output_path = Path(args.output).resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    config = build_proxy_config()
+    with output_path.open("w", encoding="utf-8") as file:
+        yaml.safe_dump(config, file, sort_keys=False, allow_unicode=False)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -28,6 +28,8 @@ class LlmFactorySeed(BaseModel):
 class LlmSeed(BaseModel):
    model_code: str
    factory_name: str
+    litellm_model: str
+    pricing_tiers: list[dict[str, float | int]]


 class LlmCatalogSeed(BaseModel):
@@ -170,6 +170,17 @@ class LlmSettings(BaseModel):
    provider_keys: dict[str, str] = Field(default_factory=dict)


+class LiteLLMSettings(BaseModel):
+    host: str = "127.0.0.1"
+    port: int = 3875
+    api_key: str = "sk-local"
+
+    @computed_field
+    @property
+    def base_url(self) -> str:
+        return f"http://{self.host}:{self.port}/v1"
+
+
 class DatabaseSettings(BaseModel):
    host: str = "localhost"
    port: int = 5432
@@ -206,6 +217,7 @@ class Settings(BaseSettings):
    supabase: SupabaseSettings = Field()
    storage: StorageSettings = StorageSettings()
    llm: LlmSettings = LlmSettings()
+    litellm: LiteLLMSettings = LiteLLMSettings()
    agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
    taskiq: TaskiqSettings = TaskiqSettings()
    database: DatabaseSettings = DatabaseSettings()
@@ -1,34 +1,52 @@
 factories:
-    - name: dashscope
-      request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
-      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
+  - name: dashscope
+    request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png

-    - name: minimax
-      request_url: https://api.minimaxi.com/v1
-      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
+  - name: minimax
+    request_url: https://api.minimaxi.com/v1
+    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png

-    - name: moonshot
-      request_url: https://api.moonshot.cn/v1
-      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
+  - name: moonshot
+    request_url: https://api.moonshot.cn/v1
+    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png

-    - name: deepseek
-      request_url: https://api.deepseek.com/v1
-      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
+  - name: deepseek
+    request_url: https://api.deepseek.com/v1
+    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png

-    - name: volcengine
-      request_url: https://ark.cn-beijing.volces.com/api/v3
-      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
+  - name: volcengine
+    request_url: https://ark.cn-beijing.volces.com/api/v3
+    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png

-    - name: zai
-      request_url: https://api.z.ai/api/paas/v4
-      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
+  - name: zai
+    request_url: https://api.z.ai/api/paas/v4
+    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png

 llms:
-    # 你原来的两个保留
-    - model_code: qwen3.5-flash
-      factory_name: dashscope
-      litellm_model: dashscope/qwen-turbo
+  # qwen3.5-flash (3 tiers: 128K, 256K, 1M)
+  - model_code: qwen3.5-flash
+    factory_name: dashscope
+    litellm_model: dashscope/qwen3.5-flash
+    pricing_tiers:
+      - max_prompt_tokens: 128000
+        input_cost_per_token: 0.0000002
+        output_cost_per_token: 0.000002
+        cache_hit_cost_per_token: 0.00000002
+      - max_prompt_tokens: 256000
+        input_cost_per_token: 0.0000008
+        output_cost_per_token: 0.000008
+        cache_hit_cost_per_token: 0.00000008
+      - max_prompt_tokens: 1000000
+        input_cost_per_token: 0.0000012
+        output_cost_per_token: 0.000012
+        cache_hit_cost_per_token: 0.00000012

-    - model_code: deepseek-chat
-      factory_name: deepseek
-      litellm_model: deepseek/deepseek-chat
+  - model_code: deepseek-chat
+    factory_name: deepseek
+    litellm_model: deepseek/deepseek-chat
+    pricing_tiers:
+      - max_prompt_tokens: 128000
+        input_cost_per_token: 0.000002
+        output_cost_per_token: 0.000003
+        cache_hit_cost_per_token: 0.0000002
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+from services.litellm.service import (
+    LiteLLMResponseWithCost,
+    LiteLLMService,
+    LiteLLMUsage,
+)
+
+__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
@@ -0,0 +1,189 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Callable
+
+from litellm import completion
+
+from core.config.settings import config
+from core.config.initial.init_data import load_llm_catalog
+
+
+@dataclass(frozen=True)
+class PricingTier:
+    max_prompt_tokens: int
+    input_cost_per_token: float
+    output_cost_per_token: float
+    cache_hit_cost_per_token: float
+
+
+@dataclass(frozen=True)
+class LiteLLMUsage:
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+    cached_prompt_tokens: int
+    cost: float
+
+
+@dataclass(frozen=True)
+class LiteLLMResponseWithCost:
+    response: dict[str, Any]
+    usage: LiteLLMUsage
+
+
+class LiteLLMService:
+    proxy_base_url: str
+    proxy_api_key: str
+    _pricing_by_model: dict[str, tuple[PricingTier, ...]]
+
+    def __init__(
+        self,
+        *,
+        proxy_base_url: str | None = None,
+        proxy_api_key: str | None = None,
+    ) -> None:
+        self.proxy_base_url = proxy_base_url or config.litellm.base_url
+        self.proxy_api_key = proxy_api_key or config.litellm.api_key
+        self._pricing_by_model = self._build_pricing_map()
+
+    @staticmethod
+    def _build_pricing_map() -> dict[str, tuple[PricingTier, ...]]:
+        catalog = load_llm_catalog()
+        pricing_by_model: dict[str, tuple[PricingTier, ...]] = {}
+        for model in catalog.get("llms", []):
+            if not isinstance(model, dict):
+                continue
+            model_code = str(model.get("model_code", "")).strip().lower()
+            litellm_model = str(model.get("litellm_model", "")).strip().lower()
+            raw_tiers = model.get("pricing_tiers")
+            if not isinstance(raw_tiers, list) or not raw_tiers:
+                continue
+
+            tiers = [
+                PricingTier(
+                    max_prompt_tokens=int(item.get("max_prompt_tokens", 0) or 0),
+                    input_cost_per_token=float(
+                        item.get("input_cost_per_token", 0.0) or 0.0
+                    ),
+                    output_cost_per_token=float(
+                        item.get("output_cost_per_token", 0.0) or 0.0
+                    ),
+                    cache_hit_cost_per_token=float(
+                        item.get("cache_hit_cost_per_token", 0.0) or 0.0
+                    ),
+                )
+                for item in raw_tiers
+                if isinstance(item, dict)
+            ]
+            if not tiers:
+                continue
+            ordered_tiers = tuple(
+                sorted(tiers, key=lambda item: item.max_prompt_tokens)
+            )
+            if model_code:
+                pricing_by_model[model_code] = ordered_tiers
+            if litellm_model:
+                pricing_by_model[litellm_model] = ordered_tiers
+        return pricing_by_model
+
+    def calculate_cost(
+        self,
+        *,
+        model: str,
+        prompt_tokens: int,
+        completion_tokens: int,
+        cached_prompt_tokens: int = 0,
+    ) -> float:
+        tiers = self._pricing_by_model.get(model.strip().lower())
+        if tiers is None:
+            raise ValueError(f"unknown model pricing: {model}")
+
+        normalized_prompt_tokens = max(int(prompt_tokens), 0)
+        normalized_completion_tokens = max(int(completion_tokens), 0)
+        normalized_cached_tokens = min(
+            max(int(cached_prompt_tokens), 0), normalized_prompt_tokens
+        )
+        uncached_prompt_tokens = normalized_prompt_tokens - normalized_cached_tokens
+
+        selected_tier = tiers[-1]
+        for tier in tiers:
+            if normalized_prompt_tokens <= tier.max_prompt_tokens:
+                selected_tier = tier
+                break
+
+        return float(
+            uncached_prompt_tokens * selected_tier.input_cost_per_token
+            + normalized_cached_tokens * selected_tier.cache_hit_cost_per_token
+            + normalized_completion_tokens * selected_tier.output_cost_per_token
+        )
+
+    def run_completion_with_cost(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        timeout: float | None = None,
+        completion_fn: Callable[..., dict[str, Any]] | None = None,
+    ) -> LiteLLMResponseWithCost:
+        caller = completion_fn or completion
+        request_model = model if model.startswith("openai/") else f"openai/{model}"
+
+        response_any = caller(
+            model=request_model,
+            api_key=self.proxy_api_key,
+            api_base=self.proxy_base_url,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            timeout=timeout,
+            stream=False,
+        )
+        response = self._normalize_response(response_any)
+
+        usage_raw = response.get("usage")
+        if not isinstance(usage_raw, dict):
+            raise ValueError("missing usage in response")
+
+        prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
+        completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
+        total_tokens = int(
+            usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
+        )
+        cached_prompt_tokens = 0
+        prompt_tokens_details = usage_raw.get("prompt_tokens_details")
+        if isinstance(prompt_tokens_details, dict):
+            cached_prompt_tokens = int(
+                prompt_tokens_details.get("cached_tokens", 0) or 0
+            )
+
+        resolved_model = str(response.get("model", model)).strip()
+        cost = self.calculate_cost(
+            model=resolved_model,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            cached_prompt_tokens=cached_prompt_tokens,
+        )
+        return LiteLLMResponseWithCost(
+            response=response,
+            usage=LiteLLMUsage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+                cached_prompt_tokens=cached_prompt_tokens,
+                cost=cost,
+            ),
+        )
+
+    @staticmethod
+    def _normalize_response(response_any: Any) -> dict[str, Any]:
+        if isinstance(response_any, dict):
+            return response_any
+        model_dump = getattr(response_any, "model_dump", None)
+        if callable(model_dump):
+            dumped = model_dump()
+            if isinstance(dumped, dict):
+                return dumped
+        raise ValueError("litellm response is not serializable")
@@ -31,3 +31,26 @@ def test_seed_data_does_not_keep_legacy_deepseek_alias() -> None:
    catalog = load_llm_catalog()

    assert all(entry["model_code"] != "deepseek-v3.2" for entry in catalog["llms"])
+
+
+def test_llm_catalog_contains_litellm_routing_and_pricing_fields() -> None:
+    catalog = load_llm_catalog()
+
+    for entry in catalog["llms"]:
+        assert set(entry.keys()) == {
+            "model_code",
+            "factory_name",
+            "litellm_model",
+            "pricing_tiers",
+        }
+        assert isinstance(entry["litellm_model"], str)
+        assert "/" in entry["litellm_model"]
+        pricing_tiers = entry["pricing_tiers"]
+        assert isinstance(pricing_tiers, list)
+        assert len(pricing_tiers) > 0
+        for tier in pricing_tiers:
+            assert isinstance(tier, dict)
+            assert int(tier["max_prompt_tokens"]) > 0
+            assert float(tier["input_cost_per_token"]) >= 0
+            assert float(tier["output_cost_per_token"]) >= 0
+            assert float(tier["cache_hit_cost_per_token"]) >= 0
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import pytest
+
+from services.litellm.service import LiteLLMService
+
+
+def test_calculate_cost_uses_first_qwen_tier() -> None:
+    service = LiteLLMService()
+
+    cost = service.calculate_cost(
+        model="dashscope/qwen3.5-flash",
+        prompt_tokens=100_000,
+        completion_tokens=1_000,
+        cached_prompt_tokens=10_000,
+    )
+
+    assert cost == pytest.approx(0.0202)
+
+
+def test_calculate_cost_uses_second_qwen_tier() -> None:
+    service = LiteLLMService()
+
+    cost = service.calculate_cost(
+        model="dashscope/qwen3.5-flash",
+        prompt_tokens=200_000,
+        completion_tokens=5_000,
+        cached_prompt_tokens=20_000,
+    )
+
+    assert cost == pytest.approx(0.1856)
+
+
+def test_run_completion_extracts_usage_and_cost() -> None:
+    service = LiteLLMService()
+
+    result = service.run_completion_with_cost(
+        model="dashscope/qwen3.5-flash",
+        messages=[{"role": "user", "content": "hello"}],
+        completion_fn=lambda **_: {
+            "model": "dashscope/qwen3.5-flash",
+            "usage": {
+                "prompt_tokens": 2000,
+                "completion_tokens": 100,
+                "total_tokens": 2100,
+                "prompt_tokens_details": {"cached_tokens": 500},
+            },
+            "choices": [{"message": {"content": "ok"}}],
+        },
+    )
+
+    assert result.usage.prompt_tokens == 2000
+    assert result.usage.completion_tokens == 100
+    assert result.usage.total_tokens == 2100
+    assert result.usage.cost == pytest.approx(0.00051)