feat: 添加好友功能并集成 LiteLLM 代理服务

- 新增好友搜索、添加、好友列表功能
- 集成 LiteLLM 代理服务及多模型定价配置
- 更新 iOS CocoaPods 配置
- 更新 .gitignore 和环境变量配置
This commit is contained in:
zl-q
2026-03-11 09:14:51 +08:00
parent 487405aa5b
commit e55e445906
28 changed files with 1226 additions and 181 deletions
BIN
View File
Binary file not shown.
+10 -8
View File
@@ -231,21 +231,23 @@ class AgentType(str, Enum):
Agent loop functionality MUST follow the AG-UI protocol. **Use the `ag-ui` skill** for protocol reference and implementation guidance.
## Multi-Agent Orchestration (CrewAI Framework)
## Multi-Agent Orchestration (AgentScope Framework)
Multi-agent orchestration MUST use the CrewAI framework. **Use the `crewai` skill** for framework reference and implementation guidance.
Multi-agent orchestration MUST use the AgentScope framework. **Use the `agentscope-skill`** for framework reference and implementation guidance.
For workflows involving routing, LiteLLM proxy cost audit, or frontend/backend human approval loops, **use the `agentscope-hitl-cost` skill**.
### Core Principles
- Use CrewAI for orchestrating multiple agents working together
- Define clear agent roles, tasks, and crews
- Leverage built-in collaboration and delegation mechanisms
- Follow CrewAI best practices for agent configuration
- Use AgentScope for orchestrating multiple agents working together
- Define clear agent roles, stage responsibilities, and pipeline boundaries
- Leverage AgentScope built-in workflow and tool middleware mechanisms
- Follow AgentScope best practices for agent configuration
### Key Components
- **Agents**: Autonomous units with specific roles and goals
- **Tasks**: Assignments that agents complete
- **Crews**: Teams of agents working together
- **Tasks**: Stage-specific prompts and execution goals
- **Pipelines**: Ordered orchestration flow between agents
- **Tools**: Capabilities available to agents
- **Flows**: Workflow orchestration and state management
@@ -0,0 +1,90 @@
from __future__ import annotations
import argparse
from pathlib import Path
from typing import Any
import yaml
from core.config.initial.init_data import load_llm_catalog
def _provider_key_env_name(factory_name: str) -> str:
normalized = factory_name.strip().upper()
if normalized == "VOLCENGINE":
normalized = "ARK"
return f"SOCIAL_LLM__PROVIDER_KEYS__{normalized}"
def build_proxy_config() -> dict[str, Any]:
catalog = load_llm_catalog()
factories = catalog.get("factories", [])
llms = catalog.get("llms", [])
if not isinstance(factories, list) or not isinstance(llms, list):
raise ValueError("invalid llm catalog format")
factory_url_map: dict[str, str] = {}
for factory in factories:
if not isinstance(factory, dict):
continue
name = str(factory.get("name", "")).strip().lower()
request_url = str(factory.get("request_url", "")).strip()
if name and request_url:
factory_url_map[name] = request_url
model_list: list[dict[str, Any]] = []
for llm in llms:
if not isinstance(llm, dict):
continue
model_code = str(llm.get("model_code", "")).strip()
factory_name = str(llm.get("factory_name", "")).strip()
litellm_model = str(llm.get("litellm_model", "")).strip()
if not model_code or not factory_name or not litellm_model:
continue
api_base = factory_url_map.get(factory_name.lower())
if not api_base:
raise ValueError(
f"factory request_url missing for model {model_code}: {factory_name}"
)
env_key_name = _provider_key_env_name(factory_name)
provider_model = (
litellm_model.split("/", 1)[1] if "/" in litellm_model else litellm_model
)
model_list.append(
{
"model_name": model_code,
"litellm_params": {
"model": f"openai/{provider_model}",
"api_base": api_base,
"api_key": f"os.environ/{env_key_name}",
},
}
)
if not model_list:
raise ValueError("no models found in llm catalog")
return {"model_list": model_list}
def main() -> int:
parser = argparse.ArgumentParser(description="Build LiteLLM proxy config")
parser.add_argument("--output", required=True, help="Output YAML file path")
args = parser.parse_args()
output_path = Path(args.output).resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)
config = build_proxy_config()
with output_path.open("w", encoding="utf-8") as file:
yaml.safe_dump(config, file, sort_keys=False, allow_unicode=False)
return 0
if __name__ == "__main__":
raise SystemExit(main())
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
Binary file not shown.
@@ -28,6 +28,8 @@ class LlmFactorySeed(BaseModel):
class LlmSeed(BaseModel):
model_code: str
factory_name: str
litellm_model: str
pricing_tiers: list[dict[str, float | int]]
class LlmCatalogSeed(BaseModel):
+12
View File
@@ -170,6 +170,17 @@ class LlmSettings(BaseModel):
provider_keys: dict[str, str] = Field(default_factory=dict)
class LiteLLMSettings(BaseModel):
host: str = "127.0.0.1"
port: int = 3875
api_key: str = "sk-local"
@computed_field
@property
def base_url(self) -> str:
return f"http://{self.host}:{self.port}/v1"
class DatabaseSettings(BaseModel):
host: str = "localhost"
port: int = 5432
@@ -206,6 +217,7 @@ class Settings(BaseSettings):
supabase: SupabaseSettings = Field()
storage: StorageSettings = StorageSettings()
llm: LlmSettings = LlmSettings()
litellm: LiteLLMSettings = LiteLLMSettings()
agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
taskiq: TaskiqSettings = TaskiqSettings()
database: DatabaseSettings = DatabaseSettings()
Binary file not shown.
@@ -1,34 +1,52 @@
factories:
- name: dashscope
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
- name: dashscope
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
- name: minimax
request_url: https://api.minimaxi.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
- name: minimax
request_url: https://api.minimaxi.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
- name: moonshot
request_url: https://api.moonshot.cn/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
- name: moonshot
request_url: https://api.moonshot.cn/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
- name: deepseek
request_url: https://api.deepseek.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
- name: deepseek
request_url: https://api.deepseek.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
- name: volcengine
request_url: https://ark.cn-beijing.volces.com/api/v3
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
- name: volcengine
request_url: https://ark.cn-beijing.volces.com/api/v3
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
- name: zai
request_url: https://api.z.ai/api/paas/v4
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
- name: zai
request_url: https://api.z.ai/api/paas/v4
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
llms:
# 你原来的两个保留
- model_code: qwen3.5-flash
factory_name: dashscope
litellm_model: dashscope/qwen-turbo
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
- model_code: qwen3.5-flash
factory_name: dashscope
litellm_model: dashscope/qwen3.5-flash
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000002
output_cost_per_token: 0.000002
cache_hit_cost_per_token: 0.00000002
- max_prompt_tokens: 256000
input_cost_per_token: 0.0000008
output_cost_per_token: 0.000008
cache_hit_cost_per_token: 0.00000008
- max_prompt_tokens: 1000000
input_cost_per_token: 0.0000012
output_cost_per_token: 0.000012
cache_hit_cost_per_token: 0.00000012
- model_code: deepseek-chat
factory_name: deepseek
litellm_model: deepseek/deepseek-chat
- model_code: deepseek-chat
factory_name: deepseek
litellm_model: deepseek/deepseek-chat
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.000002
output_cost_per_token: 0.000003
cache_hit_cost_per_token: 0.0000002
+9
View File
@@ -0,0 +1,9 @@
from __future__ import annotations
from services.litellm.service import (
LiteLLMResponseWithCost,
LiteLLMService,
LiteLLMUsage,
)
__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
+189
View File
@@ -0,0 +1,189 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable
from litellm import completion
from core.config.settings import config
from core.config.initial.init_data import load_llm_catalog
@dataclass(frozen=True)
class PricingTier:
max_prompt_tokens: int
input_cost_per_token: float
output_cost_per_token: float
cache_hit_cost_per_token: float
@dataclass(frozen=True)
class LiteLLMUsage:
prompt_tokens: int
completion_tokens: int
total_tokens: int
cached_prompt_tokens: int
cost: float
@dataclass(frozen=True)
class LiteLLMResponseWithCost:
response: dict[str, Any]
usage: LiteLLMUsage
class LiteLLMService:
proxy_base_url: str
proxy_api_key: str
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
def __init__(
self,
*,
proxy_base_url: str | None = None,
proxy_api_key: str | None = None,
) -> None:
self.proxy_base_url = proxy_base_url or config.litellm.base_url
self.proxy_api_key = proxy_api_key or config.litellm.api_key
self._pricing_by_model = self._build_pricing_map()
@staticmethod
def _build_pricing_map() -> dict[str, tuple[PricingTier, ...]]:
catalog = load_llm_catalog()
pricing_by_model: dict[str, tuple[PricingTier, ...]] = {}
for model in catalog.get("llms", []):
if not isinstance(model, dict):
continue
model_code = str(model.get("model_code", "")).strip().lower()
litellm_model = str(model.get("litellm_model", "")).strip().lower()
raw_tiers = model.get("pricing_tiers")
if not isinstance(raw_tiers, list) or not raw_tiers:
continue
tiers = [
PricingTier(
max_prompt_tokens=int(item.get("max_prompt_tokens", 0) or 0),
input_cost_per_token=float(
item.get("input_cost_per_token", 0.0) or 0.0
),
output_cost_per_token=float(
item.get("output_cost_per_token", 0.0) or 0.0
),
cache_hit_cost_per_token=float(
item.get("cache_hit_cost_per_token", 0.0) or 0.0
),
)
for item in raw_tiers
if isinstance(item, dict)
]
if not tiers:
continue
ordered_tiers = tuple(
sorted(tiers, key=lambda item: item.max_prompt_tokens)
)
if model_code:
pricing_by_model[model_code] = ordered_tiers
if litellm_model:
pricing_by_model[litellm_model] = ordered_tiers
return pricing_by_model
def calculate_cost(
self,
*,
model: str,
prompt_tokens: int,
completion_tokens: int,
cached_prompt_tokens: int = 0,
) -> float:
tiers = self._pricing_by_model.get(model.strip().lower())
if tiers is None:
raise ValueError(f"unknown model pricing: {model}")
normalized_prompt_tokens = max(int(prompt_tokens), 0)
normalized_completion_tokens = max(int(completion_tokens), 0)
normalized_cached_tokens = min(
max(int(cached_prompt_tokens), 0), normalized_prompt_tokens
)
uncached_prompt_tokens = normalized_prompt_tokens - normalized_cached_tokens
selected_tier = tiers[-1]
for tier in tiers:
if normalized_prompt_tokens <= tier.max_prompt_tokens:
selected_tier = tier
break
return float(
uncached_prompt_tokens * selected_tier.input_cost_per_token
+ normalized_cached_tokens * selected_tier.cache_hit_cost_per_token
+ normalized_completion_tokens * selected_tier.output_cost_per_token
)
def run_completion_with_cost(
self,
*,
model: str,
messages: list[dict[str, Any]],
temperature: float | None = None,
max_tokens: int | None = None,
timeout: float | None = None,
completion_fn: Callable[..., dict[str, Any]] | None = None,
) -> LiteLLMResponseWithCost:
caller = completion_fn or completion
request_model = model if model.startswith("openai/") else f"openai/{model}"
response_any = caller(
model=request_model,
api_key=self.proxy_api_key,
api_base=self.proxy_base_url,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
timeout=timeout,
stream=False,
)
response = self._normalize_response(response_any)
usage_raw = response.get("usage")
if not isinstance(usage_raw, dict):
raise ValueError("missing usage in response")
prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
total_tokens = int(
usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
)
cached_prompt_tokens = 0
prompt_tokens_details = usage_raw.get("prompt_tokens_details")
if isinstance(prompt_tokens_details, dict):
cached_prompt_tokens = int(
prompt_tokens_details.get("cached_tokens", 0) or 0
)
resolved_model = str(response.get("model", model)).strip()
cost = self.calculate_cost(
model=resolved_model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cached_prompt_tokens=cached_prompt_tokens,
)
return LiteLLMResponseWithCost(
response=response,
usage=LiteLLMUsage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cached_prompt_tokens=cached_prompt_tokens,
cost=cost,
),
)
@staticmethod
def _normalize_response(response_any: Any) -> dict[str, Any]:
if isinstance(response_any, dict):
return response_any
model_dump = getattr(response_any, "model_dump", None)
if callable(model_dump):
dumped = model_dump()
if isinstance(dumped, dict):
return dumped
raise ValueError("litellm response is not serializable")
@@ -31,3 +31,26 @@ def test_seed_data_does_not_keep_legacy_deepseek_alias() -> None:
catalog = load_llm_catalog()
assert all(entry["model_code"] != "deepseek-v3.2" for entry in catalog["llms"])
def test_llm_catalog_contains_litellm_routing_and_pricing_fields() -> None:
catalog = load_llm_catalog()
for entry in catalog["llms"]:
assert set(entry.keys()) == {
"model_code",
"factory_name",
"litellm_model",
"pricing_tiers",
}
assert isinstance(entry["litellm_model"], str)
assert "/" in entry["litellm_model"]
pricing_tiers = entry["pricing_tiers"]
assert isinstance(pricing_tiers, list)
assert len(pricing_tiers) > 0
for tier in pricing_tiers:
assert isinstance(tier, dict)
assert int(tier["max_prompt_tokens"]) > 0
assert float(tier["input_cost_per_token"]) >= 0
assert float(tier["output_cost_per_token"]) >= 0
assert float(tier["cache_hit_cost_per_token"]) >= 0
@@ -0,0 +1,55 @@
from __future__ import annotations
import pytest
from services.litellm.service import LiteLLMService
def test_calculate_cost_uses_first_qwen_tier() -> None:
service = LiteLLMService()
cost = service.calculate_cost(
model="dashscope/qwen3.5-flash",
prompt_tokens=100_000,
completion_tokens=1_000,
cached_prompt_tokens=10_000,
)
assert cost == pytest.approx(0.0202)
def test_calculate_cost_uses_second_qwen_tier() -> None:
service = LiteLLMService()
cost = service.calculate_cost(
model="dashscope/qwen3.5-flash",
prompt_tokens=200_000,
completion_tokens=5_000,
cached_prompt_tokens=20_000,
)
assert cost == pytest.approx(0.1856)
def test_run_completion_extracts_usage_and_cost() -> None:
service = LiteLLMService()
result = service.run_completion_with_cost(
model="dashscope/qwen3.5-flash",
messages=[{"role": "user", "content": "hello"}],
completion_fn=lambda **_: {
"model": "dashscope/qwen3.5-flash",
"usage": {
"prompt_tokens": 2000,
"completion_tokens": 100,
"total_tokens": 2100,
"prompt_tokens_details": {"cached_tokens": 500},
},
"choices": [{"message": {"content": "ok"}}],
},
)
assert result.usage.prompt_tokens == 2000
assert result.usage.completion_tokens == 100
assert result.usage.total_tokens == 2100
assert result.usage.cost == pytest.approx(0.00051)