feat: 应用名称更新为灵可析并增强 Chat 功能

- 更新 Android/iOS 应用名称和图标为灵可析
- Chat 支持取消正在运行的 Agent 对话
- 改进 ChatBloc 状态管理(区分发送/等待/流式/取消状态)
- HomeScreen 支持外部注入 ChatBloc 和显示等待指示器
- 后端 Agent 运行服务优化(消息处理、usage 追踪)
- 补充相关单元测试和 Widget 测试
This commit is contained in:
qzl
2026-03-10 18:39:53 +08:00
parent b48f7abf72
commit 487405aa5b
50 changed files with 768 additions and 284 deletions
@@ -13,8 +13,8 @@ from core.agent.domain.agui_input import (
)
from core.agent.application.runtime_loop_service import RuntimeLoopService
from core.agent.application.runtime_data_service import RuntimeDataService
from core.agent.application.session_state_persistence import SessionStatePersistence
from core.agent.application.session_state_persistence import (
SessionStatePersistence,
ToolResultStorage,
persist_tool_result_payload,
)
@@ -179,7 +179,6 @@ class RunService:
seq=next_seq,
role=AgentChatMessageRole.USER,
content=user_input,
model_code=model_code,
metadata=MessageMetadataUserInput().model_dump(),
)
pending_tool_call_id: str | None = None
@@ -4,7 +4,7 @@ from typing import Any, Callable
from crewai import Agent, Crew, LLM, Process, Task
from crewai.agents import parser as crew_parser
from litellm import completion, completion_cost
from litellm import completion
from core.agent.domain.system_agent_config import SystemAgentLLMConfig
from core.agent.infrastructure.config.resolver import ResolvedAgentConfig
@@ -17,7 +17,11 @@ from core.agent.infrastructure.crewai.runtime_tools import (
PendingFrontendToolCall,
resolve_stage_crewai_tools,
)
from core.agent.infrastructure.litellm.usage_tracker import UsageCost
from core.agent.infrastructure.litellm.pricing import calculate_tiered_model_cost
from core.agent.infrastructure.litellm.usage_tracker import (
UsageCost,
extract_usage_and_cost,
)
from core.agent.prompt import runtime_stage_prompts
from core.logging import get_logger
@@ -25,6 +29,31 @@ from core.logging import get_logger
logger = get_logger("core.agent.infrastructure.crewai.runtime_stage_runner")
class LiteLLMUsageCaptureCallback:
def __init__(self) -> None:
self.captured_usage: dict[str, Any] | None = None
@staticmethod
def _normalize_usage(usage_payload: object) -> dict[str, Any] | None:
if isinstance(usage_payload, dict):
return usage_payload
model_dump = getattr(usage_payload, "model_dump", None)
if callable(model_dump):
dumped = model_dump()
if isinstance(dumped, dict):
return dumped
return None
def log_success_event(self, **kwargs: Any) -> None:
response_obj = kwargs.get("response_obj")
if not isinstance(response_obj, dict):
return
normalized = self._normalize_usage(response_obj.get("usage"))
if normalized is None:
return
self.captured_usage = normalized
def _tool_names(tools_payload: list[dict[str, object]]) -> list[str]:
names: list[str] = []
for item in tools_payload:
@@ -69,24 +98,37 @@ def _output_diagnostics(*, text: str, tool_names: list[str]) -> dict[str, object
}
def extract_usage_from_captured_payload(
*,
captured_usage: dict[str, Any],
model: str,
) -> UsageCost:
usage = extract_usage_and_cost(
{
"model": model,
"usage": captured_usage,
}
)
return usage
def extract_usage_from_crew_output(*, output: object, model: str) -> UsageCost:
token_usage = getattr(output, "token_usage", None)
prompt_tokens = int(getattr(token_usage, "prompt_tokens", 0) or 0)
completion_tokens = int(getattr(token_usage, "completion_tokens", 0) or 0)
total_tokens = int(getattr(token_usage, "total_tokens", 0) or 0)
cached_prompt_tokens = int(getattr(token_usage, "cached_prompt_tokens", 0) or 0)
if total_tokens == 0:
total_tokens = prompt_tokens + completion_tokens
try:
cost = float(
completion_cost(
model=model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
or 0.0
cost = float(
calculate_tiered_model_cost(
model_name=model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cached_prompt_tokens=cached_prompt_tokens,
)
except Exception:
cost = 0.0
or 0.0
)
return UsageCost(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
@@ -134,32 +176,32 @@ def run_stage_with_crewai(
content = getattr(message, "content", None)
if isinstance(content, str):
raw_text = content
usage_obj = getattr(response_any, "usage", None)
prompt_tokens = int(getattr(usage_obj, "prompt_tokens", 0) or 0)
completion_tokens = int(getattr(usage_obj, "completion_tokens", 0) or 0)
total_tokens = int(getattr(usage_obj, "total_tokens", 0) or 0)
if total_tokens == 0:
total_tokens = prompt_tokens + completion_tokens
try:
cost = float(
completion_cost(
model=litellm_model,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
or 0.0
response_dict = (
response_any.model_dump()
if hasattr(response_any, "model_dump")
else dict(response_any)
)
if "model" not in response_dict:
response_dict["model"] = litellm_model
usage = extract_usage_and_cost(response_dict)
except Exception:
cost = 0.0
usage = UsageCost(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cost=cost,
)
usage_obj = getattr(response_any, "usage", None)
prompt_tokens = int(getattr(usage_obj, "prompt_tokens", 0) or 0)
completion_tokens = int(getattr(usage_obj, "completion_tokens", 0) or 0)
total_tokens = int(getattr(usage_obj, "total_tokens", 0) or 0)
if total_tokens == 0:
total_tokens = prompt_tokens + completion_tokens
usage = UsageCost(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cost=0.0,
)
return raw_text, usage, [], None
calls: list[dict[str, Any]] = []
usage_callback = LiteLLMUsageCaptureCallback()
crew_tools = resolve_stage_crewai_tools(
tools_payload=tools_payload,
calls=calls,
@@ -173,6 +215,8 @@ def run_stage_with_crewai(
temperature=llm_config.temperature,
max_tokens=llm_config.max_tokens,
timeout=llm_config.timeout_seconds,
stream=True,
callbacks=[usage_callback],
)
agent = Agent(
role=agent_template.role,
@@ -218,7 +262,14 @@ def run_stage_with_crewai(
],
pending_tool=str(pending.payload.get("name")),
)
return "", UsageCost(0, 0, 0, 0.0), calls, pending.payload
if usage_callback.captured_usage is not None:
usage = extract_usage_from_captured_payload(
captured_usage=usage_callback.captured_usage,
model=litellm_model,
)
else:
usage = UsageCost(0, 0, 0, 0.0)
return "", usage, calls, pending.payload
output_text = extract_crew_output_text(output)
logger.info(
@@ -231,5 +282,11 @@ def run_stage_with_crewai(
],
diagnostics=_output_diagnostics(text=output_text, tool_names=stage_tool_names),
)
usage = extract_usage_from_crew_output(output=output, model=litellm_model)
if usage_callback.captured_usage is not None:
usage = extract_usage_from_captured_payload(
captured_usage=usage_callback.captured_usage,
model=litellm_model,
)
else:
usage = extract_usage_from_crew_output(output=output, model=litellm_model)
return output_text, usage, calls, None
@@ -36,9 +36,22 @@ QWEN35_FLASH_TIERED_PRICING: tuple[TieredModelPricing, ...] = (
),
)
DEEPSEEK_CHAT_TIERED_PRICING: tuple[TieredModelPricing, ...] = (
TieredModelPricing(
max_prompt_tokens=10_000_000,
input_cost_per_token=2.0 / 1_000_000,
output_cost_per_token=3.0 / 1_000_000,
cache_create_cost_per_token=2.0 / 1_000_000,
cache_hit_cost_per_token=0.2 / 1_000_000,
),
)
_MODEL_TIERED_PRICING: dict[str, tuple[TieredModelPricing, ...]] = {
"dashscope/qwen3.5-flash": QWEN35_FLASH_TIERED_PRICING,
"qwen3.5-flash": QWEN35_FLASH_TIERED_PRICING,
"deepseek/deepseek-chat": DEEPSEEK_CHAT_TIERED_PRICING,
"deepseek-chat": DEEPSEEK_CHAT_TIERED_PRICING,
}
@@ -61,12 +74,21 @@ def calculate_tiered_model_cost(
model_name: str,
prompt_tokens: int,
completion_tokens: int,
cached_prompt_tokens: int = 0,
) -> float | None:
tier = get_tiered_pricing(model_name=model_name, prompt_tokens=prompt_tokens)
if tier is None:
return None
return (
prompt_tokens * tier.input_cost_per_token
+ completion_tokens * tier.output_cost_per_token
normalized_prompt_tokens = max(int(prompt_tokens), 0)
normalized_completion_tokens = max(int(completion_tokens), 0)
normalized_cached_tokens = min(
max(int(cached_prompt_tokens), 0), normalized_prompt_tokens
)
uncached_prompt_tokens = normalized_prompt_tokens - normalized_cached_tokens
return (
uncached_prompt_tokens * tier.input_cost_per_token
+ normalized_cached_tokens * tier.cache_hit_cost_per_token
+ normalized_completion_tokens * tier.output_cost_per_token
)
@@ -3,8 +3,6 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import Any
from litellm import completion_cost
from core.agent.infrastructure.litellm.pricing import calculate_tiered_model_cost
@@ -26,25 +24,19 @@ def extract_usage_and_cost(response: dict[str, Any]) -> UsageCost:
completion_tokens = int(usage.get("completion_tokens", 0))
total_tokens = int(usage.get("total_tokens", prompt_tokens + completion_tokens))
model_name = str(response.get("model", "")).strip().lower()
prompt_tokens_details = usage.get("prompt_tokens_details")
cached_prompt_tokens = 0
if isinstance(prompt_tokens_details, dict):
cached_prompt_tokens = int(prompt_tokens_details.get("cached_tokens", 0) or 0)
try:
cost = completion_cost(completion_response=response)
if cost is None:
raise ValueError("unable to calculate litellm completion cost")
return UsageCost(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
cost=float(cost),
)
except Exception as exc:
local_cost = calculate_tiered_model_cost(
model_name=model_name,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
if local_cost is None:
raise ValueError("unable to calculate litellm completion cost") from exc
local_cost = calculate_tiered_model_cost(
model_name=model_name,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cached_prompt_tokens=cached_prompt_tokens,
)
if local_cost is None:
raise ValueError("unable to calculate custom completion cost")
return UsageCost(
prompt_tokens=prompt_tokens,