feat: 添加好友功能并集成 LiteLLM 代理服务
- 新增好友搜索、添加、好友列表功能 - 集成 LiteLLM 代理服务及多模型定价配置 - 更新 iOS CocoaPods 配置 - 更新 .gitignore 和环境变量配置
This commit is contained in:
Vendored
BIN
Binary file not shown.
+10
-8
@@ -231,21 +231,23 @@ class AgentType(str, Enum):
|
||||
|
||||
Agent loop functionality MUST follow the AG-UI protocol. **Use the `ag-ui` skill** for protocol reference and implementation guidance.
|
||||
|
||||
## Multi-Agent Orchestration (CrewAI Framework)
|
||||
## Multi-Agent Orchestration (AgentScope Framework)
|
||||
|
||||
Multi-agent orchestration MUST use the CrewAI framework. **Use the `crewai` skill** for framework reference and implementation guidance.
|
||||
Multi-agent orchestration MUST use the AgentScope framework. **Use the `agentscope-skill`** for framework reference and implementation guidance.
|
||||
|
||||
For workflows involving routing, LiteLLM proxy cost audit, or frontend/backend human approval loops, **use the `agentscope-hitl-cost` skill**.
|
||||
|
||||
### Core Principles
|
||||
|
||||
- Use CrewAI for orchestrating multiple agents working together
|
||||
- Define clear agent roles, tasks, and crews
|
||||
- Leverage built-in collaboration and delegation mechanisms
|
||||
- Follow CrewAI best practices for agent configuration
|
||||
- Use AgentScope for orchestrating multiple agents working together
|
||||
- Define clear agent roles, stage responsibilities, and pipeline boundaries
|
||||
- Leverage AgentScope built-in workflow and tool middleware mechanisms
|
||||
- Follow AgentScope best practices for agent configuration
|
||||
|
||||
### Key Components
|
||||
|
||||
- **Agents**: Autonomous units with specific roles and goals
|
||||
- **Tasks**: Assignments that agents complete
|
||||
- **Crews**: Teams of agents working together
|
||||
- **Tasks**: Stage-specific prompts and execution goals
|
||||
- **Pipelines**: Ordered orchestration flow between agents
|
||||
- **Tools**: Capabilities available to agents
|
||||
- **Flows**: Workflow orchestration and state management
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from core.config.initial.init_data import load_llm_catalog
|
||||
|
||||
|
||||
def _provider_key_env_name(factory_name: str) -> str:
|
||||
normalized = factory_name.strip().upper()
|
||||
if normalized == "VOLCENGINE":
|
||||
normalized = "ARK"
|
||||
return f"SOCIAL_LLM__PROVIDER_KEYS__{normalized}"
|
||||
|
||||
|
||||
def build_proxy_config() -> dict[str, Any]:
|
||||
catalog = load_llm_catalog()
|
||||
|
||||
factories = catalog.get("factories", [])
|
||||
llms = catalog.get("llms", [])
|
||||
if not isinstance(factories, list) or not isinstance(llms, list):
|
||||
raise ValueError("invalid llm catalog format")
|
||||
|
||||
factory_url_map: dict[str, str] = {}
|
||||
for factory in factories:
|
||||
if not isinstance(factory, dict):
|
||||
continue
|
||||
name = str(factory.get("name", "")).strip().lower()
|
||||
request_url = str(factory.get("request_url", "")).strip()
|
||||
if name and request_url:
|
||||
factory_url_map[name] = request_url
|
||||
|
||||
model_list: list[dict[str, Any]] = []
|
||||
for llm in llms:
|
||||
if not isinstance(llm, dict):
|
||||
continue
|
||||
model_code = str(llm.get("model_code", "")).strip()
|
||||
factory_name = str(llm.get("factory_name", "")).strip()
|
||||
litellm_model = str(llm.get("litellm_model", "")).strip()
|
||||
if not model_code or not factory_name or not litellm_model:
|
||||
continue
|
||||
|
||||
api_base = factory_url_map.get(factory_name.lower())
|
||||
if not api_base:
|
||||
raise ValueError(
|
||||
f"factory request_url missing for model {model_code}: {factory_name}"
|
||||
)
|
||||
|
||||
env_key_name = _provider_key_env_name(factory_name)
|
||||
provider_model = (
|
||||
litellm_model.split("/", 1)[1] if "/" in litellm_model else litellm_model
|
||||
)
|
||||
|
||||
model_list.append(
|
||||
{
|
||||
"model_name": model_code,
|
||||
"litellm_params": {
|
||||
"model": f"openai/{provider_model}",
|
||||
"api_base": api_base,
|
||||
"api_key": f"os.environ/{env_key_name}",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
if not model_list:
|
||||
raise ValueError("no models found in llm catalog")
|
||||
|
||||
return {"model_list": model_list}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Build LiteLLM proxy config")
|
||||
parser.add_argument("--output", required=True, help="Output YAML file path")
|
||||
args = parser.parse_args()
|
||||
|
||||
output_path = Path(args.output).resolve()
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
config = build_proxy_config()
|
||||
with output_path.open("w", encoding="utf-8") as file:
|
||||
yaml.safe_dump(config, file, sort_keys=False, allow_unicode=False)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Vendored
BIN
Binary file not shown.
Vendored
BIN
Binary file not shown.
Vendored
BIN
Binary file not shown.
@@ -28,6 +28,8 @@ class LlmFactorySeed(BaseModel):
|
||||
class LlmSeed(BaseModel):
|
||||
model_code: str
|
||||
factory_name: str
|
||||
litellm_model: str
|
||||
pricing_tiers: list[dict[str, float | int]]
|
||||
|
||||
|
||||
class LlmCatalogSeed(BaseModel):
|
||||
|
||||
@@ -170,6 +170,17 @@ class LlmSettings(BaseModel):
|
||||
provider_keys: dict[str, str] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class LiteLLMSettings(BaseModel):
|
||||
host: str = "127.0.0.1"
|
||||
port: int = 3875
|
||||
api_key: str = "sk-local"
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return f"http://{self.host}:{self.port}/v1"
|
||||
|
||||
|
||||
class DatabaseSettings(BaseModel):
|
||||
host: str = "localhost"
|
||||
port: int = 5432
|
||||
@@ -206,6 +217,7 @@ class Settings(BaseSettings):
|
||||
supabase: SupabaseSettings = Field()
|
||||
storage: StorageSettings = StorageSettings()
|
||||
llm: LlmSettings = LlmSettings()
|
||||
litellm: LiteLLMSettings = LiteLLMSettings()
|
||||
agent_runtime: AgentRuntimeSettings = AgentRuntimeSettings()
|
||||
taskiq: TaskiqSettings = TaskiqSettings()
|
||||
database: DatabaseSettings = DatabaseSettings()
|
||||
|
||||
BIN
Binary file not shown.
@@ -1,34 +1,52 @@
|
||||
factories:
|
||||
- name: dashscope
|
||||
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
|
||||
- name: dashscope
|
||||
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
|
||||
|
||||
- name: minimax
|
||||
request_url: https://api.minimaxi.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
|
||||
- name: minimax
|
||||
request_url: https://api.minimaxi.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
|
||||
|
||||
- name: moonshot
|
||||
request_url: https://api.moonshot.cn/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
|
||||
- name: moonshot
|
||||
request_url: https://api.moonshot.cn/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
|
||||
|
||||
- name: deepseek
|
||||
request_url: https://api.deepseek.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
|
||||
- name: deepseek
|
||||
request_url: https://api.deepseek.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
|
||||
|
||||
- name: volcengine
|
||||
request_url: https://ark.cn-beijing.volces.com/api/v3
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
|
||||
- name: volcengine
|
||||
request_url: https://ark.cn-beijing.volces.com/api/v3
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
|
||||
|
||||
- name: zai
|
||||
request_url: https://api.z.ai/api/paas/v4
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
|
||||
- name: zai
|
||||
request_url: https://api.z.ai/api/paas/v4
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
|
||||
|
||||
llms:
|
||||
# 你原来的两个保留
|
||||
- model_code: qwen3.5-flash
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen-turbo
|
||||
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
|
||||
- model_code: qwen3.5-flash
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-flash
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000002
|
||||
output_cost_per_token: 0.000002
|
||||
cache_hit_cost_per_token: 0.00000002
|
||||
- max_prompt_tokens: 256000
|
||||
input_cost_per_token: 0.0000008
|
||||
output_cost_per_token: 0.000008
|
||||
cache_hit_cost_per_token: 0.00000008
|
||||
- max_prompt_tokens: 1000000
|
||||
input_cost_per_token: 0.0000012
|
||||
output_cost_per_token: 0.000012
|
||||
cache_hit_cost_per_token: 0.00000012
|
||||
|
||||
- model_code: deepseek-chat
|
||||
factory_name: deepseek
|
||||
litellm_model: deepseek/deepseek-chat
|
||||
- model_code: deepseek-chat
|
||||
factory_name: deepseek
|
||||
litellm_model: deepseek/deepseek-chat
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.000002
|
||||
output_cost_per_token: 0.000003
|
||||
cache_hit_cost_per_token: 0.0000002
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from services.litellm.service import (
|
||||
LiteLLMResponseWithCost,
|
||||
LiteLLMService,
|
||||
LiteLLMUsage,
|
||||
)
|
||||
|
||||
__all__ = ["LiteLLMService", "LiteLLMUsage", "LiteLLMResponseWithCost"]
|
||||
@@ -0,0 +1,189 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable
|
||||
|
||||
from litellm import completion
|
||||
|
||||
from core.config.settings import config
|
||||
from core.config.initial.init_data import load_llm_catalog
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PricingTier:
|
||||
max_prompt_tokens: int
|
||||
input_cost_per_token: float
|
||||
output_cost_per_token: float
|
||||
cache_hit_cost_per_token: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LiteLLMUsage:
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
cached_prompt_tokens: int
|
||||
cost: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LiteLLMResponseWithCost:
|
||||
response: dict[str, Any]
|
||||
usage: LiteLLMUsage
|
||||
|
||||
|
||||
class LiteLLMService:
|
||||
proxy_base_url: str
|
||||
proxy_api_key: str
|
||||
_pricing_by_model: dict[str, tuple[PricingTier, ...]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
proxy_base_url: str | None = None,
|
||||
proxy_api_key: str | None = None,
|
||||
) -> None:
|
||||
self.proxy_base_url = proxy_base_url or config.litellm.base_url
|
||||
self.proxy_api_key = proxy_api_key or config.litellm.api_key
|
||||
self._pricing_by_model = self._build_pricing_map()
|
||||
|
||||
@staticmethod
|
||||
def _build_pricing_map() -> dict[str, tuple[PricingTier, ...]]:
|
||||
catalog = load_llm_catalog()
|
||||
pricing_by_model: dict[str, tuple[PricingTier, ...]] = {}
|
||||
for model in catalog.get("llms", []):
|
||||
if not isinstance(model, dict):
|
||||
continue
|
||||
model_code = str(model.get("model_code", "")).strip().lower()
|
||||
litellm_model = str(model.get("litellm_model", "")).strip().lower()
|
||||
raw_tiers = model.get("pricing_tiers")
|
||||
if not isinstance(raw_tiers, list) or not raw_tiers:
|
||||
continue
|
||||
|
||||
tiers = [
|
||||
PricingTier(
|
||||
max_prompt_tokens=int(item.get("max_prompt_tokens", 0) or 0),
|
||||
input_cost_per_token=float(
|
||||
item.get("input_cost_per_token", 0.0) or 0.0
|
||||
),
|
||||
output_cost_per_token=float(
|
||||
item.get("output_cost_per_token", 0.0) or 0.0
|
||||
),
|
||||
cache_hit_cost_per_token=float(
|
||||
item.get("cache_hit_cost_per_token", 0.0) or 0.0
|
||||
),
|
||||
)
|
||||
for item in raw_tiers
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
if not tiers:
|
||||
continue
|
||||
ordered_tiers = tuple(
|
||||
sorted(tiers, key=lambda item: item.max_prompt_tokens)
|
||||
)
|
||||
if model_code:
|
||||
pricing_by_model[model_code] = ordered_tiers
|
||||
if litellm_model:
|
||||
pricing_by_model[litellm_model] = ordered_tiers
|
||||
return pricing_by_model
|
||||
|
||||
def calculate_cost(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cached_prompt_tokens: int = 0,
|
||||
) -> float:
|
||||
tiers = self._pricing_by_model.get(model.strip().lower())
|
||||
if tiers is None:
|
||||
raise ValueError(f"unknown model pricing: {model}")
|
||||
|
||||
normalized_prompt_tokens = max(int(prompt_tokens), 0)
|
||||
normalized_completion_tokens = max(int(completion_tokens), 0)
|
||||
normalized_cached_tokens = min(
|
||||
max(int(cached_prompt_tokens), 0), normalized_prompt_tokens
|
||||
)
|
||||
uncached_prompt_tokens = normalized_prompt_tokens - normalized_cached_tokens
|
||||
|
||||
selected_tier = tiers[-1]
|
||||
for tier in tiers:
|
||||
if normalized_prompt_tokens <= tier.max_prompt_tokens:
|
||||
selected_tier = tier
|
||||
break
|
||||
|
||||
return float(
|
||||
uncached_prompt_tokens * selected_tier.input_cost_per_token
|
||||
+ normalized_cached_tokens * selected_tier.cache_hit_cost_per_token
|
||||
+ normalized_completion_tokens * selected_tier.output_cost_per_token
|
||||
)
|
||||
|
||||
def run_completion_with_cost(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
messages: list[dict[str, Any]],
|
||||
temperature: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
timeout: float | None = None,
|
||||
completion_fn: Callable[..., dict[str, Any]] | None = None,
|
||||
) -> LiteLLMResponseWithCost:
|
||||
caller = completion_fn or completion
|
||||
request_model = model if model.startswith("openai/") else f"openai/{model}"
|
||||
|
||||
response_any = caller(
|
||||
model=request_model,
|
||||
api_key=self.proxy_api_key,
|
||||
api_base=self.proxy_base_url,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
timeout=timeout,
|
||||
stream=False,
|
||||
)
|
||||
response = self._normalize_response(response_any)
|
||||
|
||||
usage_raw = response.get("usage")
|
||||
if not isinstance(usage_raw, dict):
|
||||
raise ValueError("missing usage in response")
|
||||
|
||||
prompt_tokens = int(usage_raw.get("prompt_tokens", 0) or 0)
|
||||
completion_tokens = int(usage_raw.get("completion_tokens", 0) or 0)
|
||||
total_tokens = int(
|
||||
usage_raw.get("total_tokens", prompt_tokens + completion_tokens) or 0
|
||||
)
|
||||
cached_prompt_tokens = 0
|
||||
prompt_tokens_details = usage_raw.get("prompt_tokens_details")
|
||||
if isinstance(prompt_tokens_details, dict):
|
||||
cached_prompt_tokens = int(
|
||||
prompt_tokens_details.get("cached_tokens", 0) or 0
|
||||
)
|
||||
|
||||
resolved_model = str(response.get("model", model)).strip()
|
||||
cost = self.calculate_cost(
|
||||
model=resolved_model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
)
|
||||
return LiteLLMResponseWithCost(
|
||||
response=response,
|
||||
usage=LiteLLMUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
cost=cost,
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_response(response_any: Any) -> dict[str, Any]:
|
||||
if isinstance(response_any, dict):
|
||||
return response_any
|
||||
model_dump = getattr(response_any, "model_dump", None)
|
||||
if callable(model_dump):
|
||||
dumped = model_dump()
|
||||
if isinstance(dumped, dict):
|
||||
return dumped
|
||||
raise ValueError("litellm response is not serializable")
|
||||
@@ -31,3 +31,26 @@ def test_seed_data_does_not_keep_legacy_deepseek_alias() -> None:
|
||||
catalog = load_llm_catalog()
|
||||
|
||||
assert all(entry["model_code"] != "deepseek-v3.2" for entry in catalog["llms"])
|
||||
|
||||
|
||||
def test_llm_catalog_contains_litellm_routing_and_pricing_fields() -> None:
|
||||
catalog = load_llm_catalog()
|
||||
|
||||
for entry in catalog["llms"]:
|
||||
assert set(entry.keys()) == {
|
||||
"model_code",
|
||||
"factory_name",
|
||||
"litellm_model",
|
||||
"pricing_tiers",
|
||||
}
|
||||
assert isinstance(entry["litellm_model"], str)
|
||||
assert "/" in entry["litellm_model"]
|
||||
pricing_tiers = entry["pricing_tiers"]
|
||||
assert isinstance(pricing_tiers, list)
|
||||
assert len(pricing_tiers) > 0
|
||||
for tier in pricing_tiers:
|
||||
assert isinstance(tier, dict)
|
||||
assert int(tier["max_prompt_tokens"]) > 0
|
||||
assert float(tier["input_cost_per_token"]) >= 0
|
||||
assert float(tier["output_cost_per_token"]) >= 0
|
||||
assert float(tier["cache_hit_cost_per_token"]) >= 0
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from services.litellm.service import LiteLLMService
|
||||
|
||||
|
||||
def test_calculate_cost_uses_first_qwen_tier() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
cost = service.calculate_cost(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
prompt_tokens=100_000,
|
||||
completion_tokens=1_000,
|
||||
cached_prompt_tokens=10_000,
|
||||
)
|
||||
|
||||
assert cost == pytest.approx(0.0202)
|
||||
|
||||
|
||||
def test_calculate_cost_uses_second_qwen_tier() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
cost = service.calculate_cost(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
prompt_tokens=200_000,
|
||||
completion_tokens=5_000,
|
||||
cached_prompt_tokens=20_000,
|
||||
)
|
||||
|
||||
assert cost == pytest.approx(0.1856)
|
||||
|
||||
|
||||
def test_run_completion_extracts_usage_and_cost() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
result = service.run_completion_with_cost(
|
||||
model="dashscope/qwen3.5-flash",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
completion_fn=lambda **_: {
|
||||
"model": "dashscope/qwen3.5-flash",
|
||||
"usage": {
|
||||
"prompt_tokens": 2000,
|
||||
"completion_tokens": 100,
|
||||
"total_tokens": 2100,
|
||||
"prompt_tokens_details": {"cached_tokens": 500},
|
||||
},
|
||||
"choices": [{"message": {"content": "ok"}}],
|
||||
},
|
||||
)
|
||||
|
||||
assert result.usage.prompt_tokens == 2000
|
||||
assert result.usage.completion_tokens == 100
|
||||
assert result.usage.total_tokens == 2100
|
||||
assert result.usage.cost == pytest.approx(0.00051)
|
||||
Reference in New Issue
Block a user