feat: 实现日历提醒完整功能(操作执行、通知服务重构、归档)

- 新增 ReminderActionExecutor 处理取消/稍后提醒操作
- 新增 ReminderOutboxStore 本地存储待处理操作
- 重构 LocalNotificationService 支持聚合提醒和交互操作
- 新增 event_color_resolver 工具类统一颜色解析
- 新增 CalendarService.archiveEvent 归档方法
- 增强 ModelTracking 支持缓存命中、推理token和成本追踪
- 添加 qwen3.5-35b-a3b 模型配置
- 更新 AndroidManifest 全屏intent权限
- 补充相关单元测试和文档
This commit is contained in:
qzl
2026-03-18 19:12:47 +08:00
parent 257cb0f5d5
commit 00f37d7e19
35 changed files with 2676 additions and 244 deletions
@@ -15,8 +15,17 @@ class TrackingChatModel:
self._inner = inner
self._total_input_tokens = 0
self._total_output_tokens = 0
self._total_tokens = 0
self._total_latency_ms = 0
self._cached_prompt_tokens = 0
self._prompt_cache_hit_tokens = 0
self._prompt_cache_miss_tokens = 0
self._reasoning_tokens = 0
self._direct_cost = 0.0
self._direct_cost_observed = False
self._model_call_records = 0
self._usage_records = 0
self._direct_cost_records = 0
@property
def stream(self) -> bool:
@@ -31,18 +40,37 @@ class TrackingChatModel:
async def __call__(self, *args: Any, **kwargs: Any) -> Any:
self._log_model_call(kwargs)
self._model_call_records += 1
response = await self._inner(*args, **kwargs)
if isinstance(response, AsyncGenerator):
return self._track_stream(response)
self._record_usage(getattr(response, "usage", None))
return response
def usage_summary(self) -> dict[str, int]:
def usage_summary(self) -> dict[str, int | float | str]:
direct_cost = self._direct_cost if self._direct_cost_observed else 0.0
direct_cost_complete = (
self._model_call_records > 0
and self._model_call_records == self._direct_cost_records
)
return {
"input_tokens": self._total_input_tokens,
"output_tokens": self._total_output_tokens,
"total_tokens": self._total_tokens,
"latency_ms": self._total_latency_ms,
"cached_prompt_tokens": self._cached_prompt_tokens,
"prompt_cache_hit_tokens": self._prompt_cache_hit_tokens,
"prompt_cache_miss_tokens": self._prompt_cache_miss_tokens,
"reasoning_tokens": self._reasoning_tokens,
"direct_cost": direct_cost,
"direct_cost_observed": int(self._direct_cost_observed),
"direct_cost_complete": int(direct_cost_complete),
"model_call_records": self._model_call_records,
"usage_records": self._usage_records,
"direct_cost_records": self._direct_cost_records,
"cost_source": "provider"
if self._direct_cost_observed
else "catalog_fallback",
}
def _log_model_call(self, kwargs: dict[str, Any]) -> None:
@@ -101,25 +129,167 @@ class TrackingChatModel:
def _record_usage(self, usage: Any) -> None:
if usage is None:
return
self._total_input_tokens += max(int(getattr(usage, "input_tokens", 0) or 0), 0)
self._total_output_tokens += max(
int(getattr(usage, "output_tokens", 0) or 0), 0
self._usage_records += 1
usage_mapping = self._to_mapping(usage)
metadata = self._safe_get(usage, "metadata")
metadata_mapping = self._to_mapping(metadata)
input_tokens = self._coerce_int(
self._first_non_null(
self._safe_get(usage, "input_tokens"),
usage_mapping.get("input_tokens"),
metadata_mapping.get("prompt_tokens"),
)
)
self._total_latency_ms += max(
int(round(float(getattr(usage, "time", 0) or 0) * 1000)), 0
output_tokens = self._coerce_int(
self._first_non_null(
self._safe_get(usage, "output_tokens"),
usage_mapping.get("output_tokens"),
metadata_mapping.get("completion_tokens"),
)
)
metadata = getattr(usage, "metadata", None)
if metadata is None:
return
self._cached_prompt_tokens += max(self._extract_cached_tokens(metadata), 0)
total_tokens = self._coerce_int(
self._first_non_null(
self._safe_get(usage, "total_tokens"),
usage_mapping.get("total_tokens"),
metadata_mapping.get("total_tokens"),
input_tokens + output_tokens,
)
)
latency_ms = max(
int(
round(
self._coerce_float(
self._first_non_null(
self._safe_get(usage, "time"),
usage_mapping.get("time"),
0.0,
)
)
* 1000
)
),
0,
)
prompt_tokens_details = self._to_mapping(
metadata_mapping.get("prompt_tokens_details")
)
completion_tokens_details = self._to_mapping(
metadata_mapping.get("completion_tokens_details")
)
cached_prompt_tokens = self._coerce_int(
self._first_non_null(
prompt_tokens_details.get("cached_tokens"),
metadata_mapping.get("prompt_cache_hit_tokens"),
0,
)
)
prompt_cache_hit_tokens = self._coerce_int(
self._first_non_null(
metadata_mapping.get("prompt_cache_hit_tokens"),
cached_prompt_tokens,
)
)
prompt_cache_miss_tokens = self._coerce_int(
self._first_non_null(
metadata_mapping.get("prompt_cache_miss_tokens"),
max(input_tokens - prompt_cache_hit_tokens, 0),
)
)
reasoning_tokens = self._coerce_int(
self._first_non_null(completion_tokens_details.get("reasoning_tokens"), 0)
)
direct_cost = self._coerce_optional_float(
self._first_non_null(
self._safe_get(usage, "cost"),
usage_mapping.get("cost"),
metadata_mapping.get("cost"),
metadata_mapping.get("total_cost"),
)
)
self._total_input_tokens += input_tokens
self._total_output_tokens += output_tokens
self._total_tokens += total_tokens
self._total_latency_ms += latency_ms
self._cached_prompt_tokens += cached_prompt_tokens
self._prompt_cache_hit_tokens += prompt_cache_hit_tokens
self._prompt_cache_miss_tokens += prompt_cache_miss_tokens
self._reasoning_tokens += reasoning_tokens
if direct_cost is not None:
self._direct_cost_observed = True
self._direct_cost_records += 1
self._direct_cost += max(direct_cost, 0.0)
@staticmethod
def _extract_cached_tokens(metadata: Any) -> int:
if isinstance(metadata, dict):
prompt_details = metadata.get("prompt_tokens_details")
if isinstance(prompt_details, dict):
return int(prompt_details.get("cached_tokens", 0) or 0)
def _safe_get(obj: Any, key: str) -> Any:
if obj is None:
return None
try:
if isinstance(obj, dict):
return obj.get(key)
return getattr(obj, key, None)
except Exception:
return None
@classmethod
def _to_mapping(cls, obj: Any) -> dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
if obj is None:
return {}
model_dump = cls._safe_get(obj, "model_dump")
if callable(model_dump):
try:
dumped = model_dump()
except Exception:
dumped = None
if isinstance(dumped, dict):
return dumped
data = cls._safe_get(obj, "__dict__")
if isinstance(data, dict):
return data
return {}
@staticmethod
def _first_non_null(*values: Any) -> Any:
for value in values:
if value is not None:
return value
return None
@staticmethod
def _coerce_int(value: Any) -> int:
if value is None:
return 0
if isinstance(value, bool):
return int(value)
if isinstance(value, int):
return max(value, 0)
try:
return max(int(float(value)), 0)
except Exception:
return 0
prompt_details = getattr(metadata, "prompt_tokens_details", None)
return int(getattr(prompt_details, "cached_tokens", 0) or 0)
@staticmethod
def _coerce_float(value: Any) -> float:
if value is None:
return 0.0
try:
return max(float(value), 0.0)
except Exception:
return 0.0
@staticmethod
def _coerce_optional_float(value: Any) -> float | None:
if value is None:
return None
try:
parsed = float(value)
except Exception:
return None
if parsed < 0:
return None
return parsed
@@ -1,52 +1,63 @@
factories:
- name: dashscope
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
- name: dashscope
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
- name: minimax
request_url: https://api.minimaxi.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
- name: minimax
request_url: https://api.minimaxi.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
- name: moonshot
request_url: https://api.moonshot.cn/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
- name: moonshot
request_url: https://api.moonshot.cn/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
- name: deepseek
request_url: https://api.deepseek.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
- name: deepseek
request_url: https://api.deepseek.com/v1
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
- name: volcengine
request_url: https://ark.cn-beijing.volces.com/api/v3
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
- name: volcengine
request_url: https://ark.cn-beijing.volces.com/api/v3
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
- name: zai
request_url: https://api.z.ai/api/paas/v4
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
- name: zai
request_url: https://api.z.ai/api/paas/v4
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
llms:
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
- model_code: qwen3.5-flash
factory_name: dashscope
litellm_model: dashscope/qwen3.5-flash
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000002
output_cost_per_token: 0.000002
cache_hit_cost_per_token: 0.00000002
- max_prompt_tokens: 256000
input_cost_per_token: 0.0000008
output_cost_per_token: 0.000008
cache_hit_cost_per_token: 0.00000008
- max_prompt_tokens: 1000000
input_cost_per_token: 0.0000012
output_cost_per_token: 0.000012
cache_hit_cost_per_token: 0.00000012
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
- model_code: qwen3.5-flash
factory_name: dashscope
litellm_model: dashscope/qwen3.5-flash
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000002
output_cost_per_token: 0.000002
cache_hit_cost_per_token: 0.00000002
- max_prompt_tokens: 256000
input_cost_per_token: 0.0000008
output_cost_per_token: 0.000008
cache_hit_cost_per_token: 0.00000008
- max_prompt_tokens: 1000000
input_cost_per_token: 0.0000012
output_cost_per_token: 0.000012
cache_hit_cost_per_token: 0.00000012
- model_code: deepseek-chat
factory_name: deepseek
litellm_model: deepseek/deepseek-chat
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.000002
output_cost_per_token: 0.000003
cache_hit_cost_per_token: 0.0000002
- model_code: qwen3.5-35b-a3b
factory_name: dashscope
litellm_model: dashscope/qwen3.5-35b-a3b
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.0000004
output_cost_per_token: 0.0000032
- max_prompt_tokens: 256000
input_cost_per_token: 0.0000016
output_cost_per_token: 0.0000128
- model_code: deepseek-chat
factory_name: deepseek
litellm_model: deepseek/deepseek-chat
pricing_tiers:
- max_prompt_tokens: 128000
input_cost_per_token: 0.000002
output_cost_per_token: 0.000003
cache_hit_cost_per_token: 0.0000002
+76 -7
View File
@@ -85,9 +85,15 @@ class LiteLLMService:
selected_tier = tier
break
cached_token_rate = (
selected_tier.cache_hit_cost_per_token
if selected_tier.cache_hit_cost_per_token > 0
else selected_tier.input_cost_per_token
)
return float(
uncached_prompt_tokens * selected_tier.input_cost_per_token
+ normalized_cached_tokens * selected_tier.cache_hit_cost_per_token
+ normalized_cached_tokens * cached_token_rate
+ normalized_completion_tokens * selected_tier.output_cost_per_token
)
@@ -95,23 +101,86 @@ class LiteLLMService:
self,
*,
model: str,
usage_summary: dict[str, int] | None,
usage_summary: dict[str, Any] | None,
) -> dict[str, Any]:
summary = usage_summary or {}
input_tokens = max(int(summary.get("input_tokens", 0) or 0), 0)
output_tokens = max(int(summary.get("output_tokens", 0) or 0), 0)
total_tokens = max(
int(summary.get("total_tokens", input_tokens + output_tokens) or 0), 0
)
latency_ms = max(int(summary.get("latency_ms", 0) or 0), 0)
cached_prompt_tokens = max(int(summary.get("cached_prompt_tokens", 0) or 0), 0)
cost = self.calculate_cost(
model=model,
prompt_tokens=input_tokens,
completion_tokens=output_tokens,
cached_prompt_tokens=cached_prompt_tokens,
prompt_cache_hit_tokens = max(
int(summary.get("prompt_cache_hit_tokens", cached_prompt_tokens) or 0), 0
)
prompt_cache_miss_tokens = max(
int(
summary.get(
"prompt_cache_miss_tokens",
max(input_tokens - prompt_cache_hit_tokens, 0),
)
or 0
),
0,
)
reasoning_tokens = max(int(summary.get("reasoning_tokens", 0) or 0), 0)
direct_cost_raw = summary.get("direct_cost")
direct_cost_observed = bool(int(summary.get("direct_cost_observed", 0) or 0))
direct_cost_complete = bool(int(summary.get("direct_cost_complete", 0) or 0))
model_call_records = max(int(summary.get("model_call_records", 0) or 0), 0)
usage_records = max(int(summary.get("usage_records", 0) or 0), 0)
usage_complete = model_call_records == 0 or model_call_records == usage_records
direct_cost = self._coerce_non_negative_float(direct_cost_raw)
if (
usage_complete
and direct_cost_observed
and direct_cost_complete
and direct_cost is not None
):
cost = direct_cost
cost_source = "provider"
else:
cost = self.calculate_cost(
model=model,
prompt_tokens=input_tokens,
completion_tokens=output_tokens,
cached_prompt_tokens=cached_prompt_tokens,
)
cost_source = (
"incomplete_usage_fallback"
if not usage_complete
else (
"catalog_fallback_incomplete_provider_cost"
if direct_cost_observed and not direct_cost_complete
else "catalog_fallback"
)
)
return {
"model": model,
"inputTokens": input_tokens,
"outputTokens": output_tokens,
"totalTokens": total_tokens,
"cachedPromptTokens": cached_prompt_tokens,
"promptCacheHitTokens": prompt_cache_hit_tokens,
"promptCacheMissTokens": prompt_cache_miss_tokens,
"reasoningTokens": reasoning_tokens,
"cost": cost,
"costSource": cost_source,
"usageComplete": usage_complete,
"latencyMs": latency_ms,
}
@staticmethod
def _coerce_non_negative_float(value: Any) -> float | None:
if value is None:
return None
try:
parsed = float(value)
except (TypeError, ValueError):
return None
if parsed < 0:
return None
return parsed
+53 -3
View File
@@ -9,7 +9,7 @@ from sqlalchemy.exc import SQLAlchemyError
from core.db.base_repository import BaseRepository
from core.logging import get_logger
from models.schedule_items import ScheduleItem
from models.schedule_items import ScheduleItem, ScheduleItemStatus
from models.schedule_subscriptions import ScheduleSubscription, SubscriptionStatus
if TYPE_CHECKING:
@@ -61,6 +61,11 @@ class ScheduleItemRepository(Protocol):
start_at: datetime,
end_at: datetime,
) -> Sequence[tuple[ScheduleItem, ScheduleSubscription]]: ...
async def archive_expired_subscribed_items(
self,
subscriber_id: UUID,
now_at: datetime,
) -> int: ...
class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
@@ -149,8 +154,13 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
select(ScheduleItem)
.where(ScheduleItem.owner_id == owner_id)
.where(ScheduleItem.deleted_at.is_(None))
.where(ScheduleItem.start_at >= start_at)
.where(ScheduleItem.start_at <= end_at)
.where(
or_(
ScheduleItem.end_at.is_(None),
ScheduleItem.end_at >= start_at,
)
)
.order_by(ScheduleItem.start_at.asc())
)
result = await self._session.execute(stmt)
@@ -308,8 +318,13 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
.where(ScheduleSubscription.subscriber_id == subscriber_id)
.where(ScheduleSubscription.status == SubscriptionStatus.ACTIVE)
.where(ScheduleItem.deleted_at.is_(None))
.where(ScheduleItem.start_at >= start_at)
.where(ScheduleItem.start_at <= end_at)
.where(
or_(
ScheduleItem.end_at.is_(None),
ScheduleItem.end_at >= start_at,
)
)
.order_by(ScheduleItem.start_at.asc())
)
result = await self._session.execute(stmt)
@@ -317,3 +332,38 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
except SQLAlchemyError:
logger.exception("Failed to list subscribed items")
raise
async def archive_expired_subscribed_items(
self,
subscriber_id: UUID,
now_at: datetime,
) -> int:
try:
item_ids_subquery = (
select(ScheduleItem.id)
.join(
ScheduleSubscription,
ScheduleSubscription.item_id == ScheduleItem.id,
)
.where(ScheduleSubscription.subscriber_id == subscriber_id)
.where(ScheduleSubscription.status == SubscriptionStatus.ACTIVE)
.where(ScheduleItem.deleted_at.is_(None))
.where(ScheduleItem.status == ScheduleItemStatus.ACTIVE)
.where(ScheduleItem.end_at.is_not(None))
.where(ScheduleItem.end_at <= now_at)
)
stmt = (
update(ScheduleItem)
.where(ScheduleItem.id.in_(item_ids_subquery))
.values(status=ScheduleItemStatus.ARCHIVED)
)
result = await self._session.execute(stmt)
await self._session.flush()
return int(getattr(result, "rowcount", 0) or 0)
except SQLAlchemyError:
logger.exception(
"Failed to archive expired subscribed items",
subscriber_id=str(subscriber_id),
)
raise
+8
View File
@@ -240,6 +240,11 @@ class ScheduleItemService(BaseService):
raise HTTPException(status_code=400, detail="end_at must be after start_at")
try:
archived_count = await self._repository.archive_expired_subscribed_items(
user_id,
datetime.now(timezone.utc),
)
subscribed_items = (
await self._repository.list_subscribed_items_by_date_range(
user_id, normalized_start_at, normalized_end_at
@@ -256,9 +261,12 @@ class ScheduleItemService(BaseService):
)
results.sort(key=lambda x: x.start_at)
if archived_count > 0:
await self._session.commit()
return results
except SQLAlchemyError:
await self._session.rollback()
logger.exception("Failed to list schedule items")
raise HTTPException(
status_code=503, detail="Schedule item store unavailable"