feat: 实现日历提醒完整功能（操作执行、通知服务重构、归档）

- 新增 ReminderActionExecutor 处理取消/稍后提醒操作 - 新增 ReminderOutboxStore 本地存储待处理操作 - 重构 LocalNotificationService 支持聚合提醒和交互操作 - 新增 event_color_resolver 工具类统一颜色解析 - 新增 CalendarService.archiveEvent 归档方法 - 增强 ModelTracking 支持缓存命中、推理token和成本追踪 - 添加 qwen3.5-35b-a3b 模型配置 - 更新 AndroidManifest 全屏intent权限 - 补充相关单元测试和文档
2026-03-18 19:12:47 +08:00
parent 257cb0f5d5
commit 00f37d7e19
35 changed files with 2676 additions and 244 deletions
@@ -15,8 +15,17 @@ class TrackingChatModel:
        self._inner = inner
        self._total_input_tokens = 0
        self._total_output_tokens = 0
+        self._total_tokens = 0
        self._total_latency_ms = 0
        self._cached_prompt_tokens = 0
+        self._prompt_cache_hit_tokens = 0
+        self._prompt_cache_miss_tokens = 0
+        self._reasoning_tokens = 0
+        self._direct_cost = 0.0
+        self._direct_cost_observed = False
+        self._model_call_records = 0
+        self._usage_records = 0
+        self._direct_cost_records = 0

    @property
    def stream(self) -> bool:
@@ -31,18 +40,37 @@ class TrackingChatModel:

    async def __call__(self, *args: Any, **kwargs: Any) -> Any:
        self._log_model_call(kwargs)
+        self._model_call_records += 1
        response = await self._inner(*args, **kwargs)
        if isinstance(response, AsyncGenerator):
            return self._track_stream(response)
        self._record_usage(getattr(response, "usage", None))
        return response

-    def usage_summary(self) -> dict[str, int]:
+    def usage_summary(self) -> dict[str, int | float | str]:
+        direct_cost = self._direct_cost if self._direct_cost_observed else 0.0
+        direct_cost_complete = (
+            self._model_call_records > 0
+            and self._model_call_records == self._direct_cost_records
+        )
        return {
            "input_tokens": self._total_input_tokens,
            "output_tokens": self._total_output_tokens,
+            "total_tokens": self._total_tokens,
            "latency_ms": self._total_latency_ms,
            "cached_prompt_tokens": self._cached_prompt_tokens,
+            "prompt_cache_hit_tokens": self._prompt_cache_hit_tokens,
+            "prompt_cache_miss_tokens": self._prompt_cache_miss_tokens,
+            "reasoning_tokens": self._reasoning_tokens,
+            "direct_cost": direct_cost,
+            "direct_cost_observed": int(self._direct_cost_observed),
+            "direct_cost_complete": int(direct_cost_complete),
+            "model_call_records": self._model_call_records,
+            "usage_records": self._usage_records,
+            "direct_cost_records": self._direct_cost_records,
+            "cost_source": "provider"
+            if self._direct_cost_observed
+            else "catalog_fallback",
        }

    def _log_model_call(self, kwargs: dict[str, Any]) -> None:
@@ -101,25 +129,167 @@ class TrackingChatModel:
    def _record_usage(self, usage: Any) -> None:
        if usage is None:
            return
-        self._total_input_tokens += max(int(getattr(usage, "input_tokens", 0) or 0), 0)
-        self._total_output_tokens += max(
-            int(getattr(usage, "output_tokens", 0) or 0), 0
+        self._usage_records += 1
+        usage_mapping = self._to_mapping(usage)
+        metadata = self._safe_get(usage, "metadata")
+        metadata_mapping = self._to_mapping(metadata)
+
+        input_tokens = self._coerce_int(
+            self._first_non_null(
+                self._safe_get(usage, "input_tokens"),
+                usage_mapping.get("input_tokens"),
+                metadata_mapping.get("prompt_tokens"),
+            )
        )
-        self._total_latency_ms += max(
-            int(round(float(getattr(usage, "time", 0) or 0) * 1000)), 0
+        output_tokens = self._coerce_int(
+            self._first_non_null(
+                self._safe_get(usage, "output_tokens"),
+                usage_mapping.get("output_tokens"),
+                metadata_mapping.get("completion_tokens"),
+            )
        )
-        metadata = getattr(usage, "metadata", None)
-        if metadata is None:
-            return
-        self._cached_prompt_tokens += max(self._extract_cached_tokens(metadata), 0)
+        total_tokens = self._coerce_int(
+            self._first_non_null(
+                self._safe_get(usage, "total_tokens"),
+                usage_mapping.get("total_tokens"),
+                metadata_mapping.get("total_tokens"),
+                input_tokens + output_tokens,
+            )
+        )
+        latency_ms = max(
+            int(
+                round(
+                    self._coerce_float(
+                        self._first_non_null(
+                            self._safe_get(usage, "time"),
+                            usage_mapping.get("time"),
+                            0.0,
+                        )
+                    )
+                    * 1000
+                )
+            ),
+            0,
+        )
+
+        prompt_tokens_details = self._to_mapping(
+            metadata_mapping.get("prompt_tokens_details")
+        )
+        completion_tokens_details = self._to_mapping(
+            metadata_mapping.get("completion_tokens_details")
+        )
+
+        cached_prompt_tokens = self._coerce_int(
+            self._first_non_null(
+                prompt_tokens_details.get("cached_tokens"),
+                metadata_mapping.get("prompt_cache_hit_tokens"),
+                0,
+            )
+        )
+        prompt_cache_hit_tokens = self._coerce_int(
+            self._first_non_null(
+                metadata_mapping.get("prompt_cache_hit_tokens"),
+                cached_prompt_tokens,
+            )
+        )
+        prompt_cache_miss_tokens = self._coerce_int(
+            self._first_non_null(
+                metadata_mapping.get("prompt_cache_miss_tokens"),
+                max(input_tokens - prompt_cache_hit_tokens, 0),
+            )
+        )
+        reasoning_tokens = self._coerce_int(
+            self._first_non_null(completion_tokens_details.get("reasoning_tokens"), 0)
+        )
+        direct_cost = self._coerce_optional_float(
+            self._first_non_null(
+                self._safe_get(usage, "cost"),
+                usage_mapping.get("cost"),
+                metadata_mapping.get("cost"),
+                metadata_mapping.get("total_cost"),
+            )
+        )
+
+        self._total_input_tokens += input_tokens
+        self._total_output_tokens += output_tokens
+        self._total_tokens += total_tokens
+        self._total_latency_ms += latency_ms
+        self._cached_prompt_tokens += cached_prompt_tokens
+        self._prompt_cache_hit_tokens += prompt_cache_hit_tokens
+        self._prompt_cache_miss_tokens += prompt_cache_miss_tokens
+        self._reasoning_tokens += reasoning_tokens
+        if direct_cost is not None:
+            self._direct_cost_observed = True
+            self._direct_cost_records += 1
+            self._direct_cost += max(direct_cost, 0.0)

    @staticmethod
-    def _extract_cached_tokens(metadata: Any) -> int:
-        if isinstance(metadata, dict):
-            prompt_details = metadata.get("prompt_tokens_details")
-            if isinstance(prompt_details, dict):
-                return int(prompt_details.get("cached_tokens", 0) or 0)
+    def _safe_get(obj: Any, key: str) -> Any:
+        if obj is None:
+            return None
+        try:
+            if isinstance(obj, dict):
+                return obj.get(key)
+            return getattr(obj, key, None)
+        except Exception:
+            return None
+
+    @classmethod
+    def _to_mapping(cls, obj: Any) -> dict[str, Any]:
+        if isinstance(obj, dict):
+            return dict(obj)
+        if obj is None:
+            return {}
+        model_dump = cls._safe_get(obj, "model_dump")
+        if callable(model_dump):
+            try:
+                dumped = model_dump()
+            except Exception:
+                dumped = None
+            if isinstance(dumped, dict):
+                return dumped
+        data = cls._safe_get(obj, "__dict__")
+        if isinstance(data, dict):
+            return data
+        return {}
+
+    @staticmethod
+    def _first_non_null(*values: Any) -> Any:
+        for value in values:
+            if value is not None:
+                return value
+        return None
+
+    @staticmethod
+    def _coerce_int(value: Any) -> int:
+        if value is None:
+            return 0
+        if isinstance(value, bool):
+            return int(value)
+        if isinstance(value, int):
+            return max(value, 0)
+        try:
+            return max(int(float(value)), 0)
+        except Exception:
            return 0

-        prompt_details = getattr(metadata, "prompt_tokens_details", None)
-        return int(getattr(prompt_details, "cached_tokens", 0) or 0)
+    @staticmethod
+    def _coerce_float(value: Any) -> float:
+        if value is None:
+            return 0.0
+        try:
+            return max(float(value), 0.0)
+        except Exception:
+            return 0.0
+
+    @staticmethod
+    def _coerce_optional_float(value: Any) -> float | None:
+        if value is None:
+            return None
+        try:
+            parsed = float(value)
+        except Exception:
+            return None
+        if parsed < 0:
+            return None
+        return parsed
@@ -1,52 +1,63 @@
 factories:
-  - name: dashscope
-    request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
-    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
+    - name: dashscope
+      request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png

-  - name: minimax
-    request_url: https://api.minimaxi.com/v1
-    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
+    - name: minimax
+      request_url: https://api.minimaxi.com/v1
+      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png

-  - name: moonshot
-    request_url: https://api.moonshot.cn/v1
-    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
+    - name: moonshot
+      request_url: https://api.moonshot.cn/v1
+      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png

-  - name: deepseek
-    request_url: https://api.deepseek.com/v1
-    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
+    - name: deepseek
+      request_url: https://api.deepseek.com/v1
+      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png

-  - name: volcengine
-    request_url: https://ark.cn-beijing.volces.com/api/v3
-    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
+    - name: volcengine
+      request_url: https://ark.cn-beijing.volces.com/api/v3
+      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png

-  - name: zai
-    request_url: https://api.z.ai/api/paas/v4
-    avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
+    - name: zai
+      request_url: https://api.z.ai/api/paas/v4
+      avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png

 llms:
-  # qwen3.5-flash (3 tiers: 128K, 256K, 1M)
-  - model_code: qwen3.5-flash
-    factory_name: dashscope
-    litellm_model: dashscope/qwen3.5-flash
-    pricing_tiers:
-      - max_prompt_tokens: 128000
-        input_cost_per_token: 0.0000002
-        output_cost_per_token: 0.000002
-        cache_hit_cost_per_token: 0.00000002
-      - max_prompt_tokens: 256000
-        input_cost_per_token: 0.0000008
-        output_cost_per_token: 0.000008
-        cache_hit_cost_per_token: 0.00000008
-      - max_prompt_tokens: 1000000
-        input_cost_per_token: 0.0000012
-        output_cost_per_token: 0.000012
-        cache_hit_cost_per_token: 0.00000012
+    # qwen3.5-flash (3 tiers: 128K, 256K, 1M)
+    - model_code: qwen3.5-flash
+      factory_name: dashscope
+      litellm_model: dashscope/qwen3.5-flash
+      pricing_tiers:
+          - max_prompt_tokens: 128000
+            input_cost_per_token: 0.0000002
+            output_cost_per_token: 0.000002
+            cache_hit_cost_per_token: 0.00000002
+          - max_prompt_tokens: 256000
+            input_cost_per_token: 0.0000008
+            output_cost_per_token: 0.000008
+            cache_hit_cost_per_token: 0.00000008
+          - max_prompt_tokens: 1000000
+            input_cost_per_token: 0.0000012
+            output_cost_per_token: 0.000012
+            cache_hit_cost_per_token: 0.00000012

-  - model_code: deepseek-chat
-    factory_name: deepseek
-    litellm_model: deepseek/deepseek-chat
-    pricing_tiers:
-      - max_prompt_tokens: 128000
-        input_cost_per_token: 0.000002
-        output_cost_per_token: 0.000003
-        cache_hit_cost_per_token: 0.0000002
+    - model_code: qwen3.5-35b-a3b
+      factory_name: dashscope
+      litellm_model: dashscope/qwen3.5-35b-a3b
+      pricing_tiers:
+          - max_prompt_tokens: 128000
+            input_cost_per_token: 0.0000004
+            output_cost_per_token: 0.0000032
+          - max_prompt_tokens: 256000
+            input_cost_per_token: 0.0000016
+            output_cost_per_token: 0.0000128
+
+    - model_code: deepseek-chat
+      factory_name: deepseek
+      litellm_model: deepseek/deepseek-chat
+      pricing_tiers:
+          - max_prompt_tokens: 128000
+            input_cost_per_token: 0.000002
+            output_cost_per_token: 0.000003
+            cache_hit_cost_per_token: 0.0000002
@@ -85,9 +85,15 @@ class LiteLLMService:
                selected_tier = tier
                break

+        cached_token_rate = (
+            selected_tier.cache_hit_cost_per_token
+            if selected_tier.cache_hit_cost_per_token > 0
+            else selected_tier.input_cost_per_token
+        )
+
        return float(
            uncached_prompt_tokens * selected_tier.input_cost_per_token
-            + normalized_cached_tokens * selected_tier.cache_hit_cost_per_token
+            + normalized_cached_tokens * cached_token_rate
            + normalized_completion_tokens * selected_tier.output_cost_per_token
        )

@@ -95,23 +101,86 @@ class LiteLLMService:
        self,
        *,
        model: str,
-        usage_summary: dict[str, int] | None,
+        usage_summary: dict[str, Any] | None,
    ) -> dict[str, Any]:
        summary = usage_summary or {}
        input_tokens = max(int(summary.get("input_tokens", 0) or 0), 0)
        output_tokens = max(int(summary.get("output_tokens", 0) or 0), 0)
+        total_tokens = max(
+            int(summary.get("total_tokens", input_tokens + output_tokens) or 0), 0
+        )
        latency_ms = max(int(summary.get("latency_ms", 0) or 0), 0)
        cached_prompt_tokens = max(int(summary.get("cached_prompt_tokens", 0) or 0), 0)
-        cost = self.calculate_cost(
-            model=model,
-            prompt_tokens=input_tokens,
-            completion_tokens=output_tokens,
-            cached_prompt_tokens=cached_prompt_tokens,
+        prompt_cache_hit_tokens = max(
+            int(summary.get("prompt_cache_hit_tokens", cached_prompt_tokens) or 0), 0
        )
+        prompt_cache_miss_tokens = max(
+            int(
+                summary.get(
+                    "prompt_cache_miss_tokens",
+                    max(input_tokens - prompt_cache_hit_tokens, 0),
+                )
+                or 0
+            ),
+            0,
+        )
+        reasoning_tokens = max(int(summary.get("reasoning_tokens", 0) or 0), 0)
+        direct_cost_raw = summary.get("direct_cost")
+        direct_cost_observed = bool(int(summary.get("direct_cost_observed", 0) or 0))
+        direct_cost_complete = bool(int(summary.get("direct_cost_complete", 0) or 0))
+        model_call_records = max(int(summary.get("model_call_records", 0) or 0), 0)
+        usage_records = max(int(summary.get("usage_records", 0) or 0), 0)
+        usage_complete = model_call_records == 0 or model_call_records == usage_records
+        direct_cost = self._coerce_non_negative_float(direct_cost_raw)
+
+        if (
+            usage_complete
+            and direct_cost_observed
+            and direct_cost_complete
+            and direct_cost is not None
+        ):
+            cost = direct_cost
+            cost_source = "provider"
+        else:
+            cost = self.calculate_cost(
+                model=model,
+                prompt_tokens=input_tokens,
+                completion_tokens=output_tokens,
+                cached_prompt_tokens=cached_prompt_tokens,
+            )
+            cost_source = (
+                "incomplete_usage_fallback"
+                if not usage_complete
+                else (
+                    "catalog_fallback_incomplete_provider_cost"
+                    if direct_cost_observed and not direct_cost_complete
+                    else "catalog_fallback"
+                )
+            )
+
        return {
            "model": model,
            "inputTokens": input_tokens,
            "outputTokens": output_tokens,
+            "totalTokens": total_tokens,
+            "cachedPromptTokens": cached_prompt_tokens,
+            "promptCacheHitTokens": prompt_cache_hit_tokens,
+            "promptCacheMissTokens": prompt_cache_miss_tokens,
+            "reasoningTokens": reasoning_tokens,
            "cost": cost,
+            "costSource": cost_source,
+            "usageComplete": usage_complete,
            "latencyMs": latency_ms,
        }
+
+    @staticmethod
+    def _coerce_non_negative_float(value: Any) -> float | None:
+        if value is None:
+            return None
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            return None
+        if parsed < 0:
+            return None
+        return parsed
@@ -9,7 +9,7 @@ from sqlalchemy.exc import SQLAlchemyError

 from core.db.base_repository import BaseRepository
 from core.logging import get_logger
-from models.schedule_items import ScheduleItem
+from models.schedule_items import ScheduleItem, ScheduleItemStatus
 from models.schedule_subscriptions import ScheduleSubscription, SubscriptionStatus

 if TYPE_CHECKING:
@@ -61,6 +61,11 @@ class ScheduleItemRepository(Protocol):
        start_at: datetime,
        end_at: datetime,
    ) -> Sequence[tuple[ScheduleItem, ScheduleSubscription]]: ...
+    async def archive_expired_subscribed_items(
+        self,
+        subscriber_id: UUID,
+        now_at: datetime,
+    ) -> int: ...


 class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
@@ -149,8 +154,13 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
                select(ScheduleItem)
                .where(ScheduleItem.owner_id == owner_id)
                .where(ScheduleItem.deleted_at.is_(None))
-                .where(ScheduleItem.start_at >= start_at)
                .where(ScheduleItem.start_at <= end_at)
+                .where(
+                    or_(
+                        ScheduleItem.end_at.is_(None),
+                        ScheduleItem.end_at >= start_at,
+                    )
+                )
                .order_by(ScheduleItem.start_at.asc())
            )
            result = await self._session.execute(stmt)
@@ -308,8 +318,13 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
                .where(ScheduleSubscription.subscriber_id == subscriber_id)
                .where(ScheduleSubscription.status == SubscriptionStatus.ACTIVE)
                .where(ScheduleItem.deleted_at.is_(None))
-                .where(ScheduleItem.start_at >= start_at)
                .where(ScheduleItem.start_at <= end_at)
+                .where(
+                    or_(
+                        ScheduleItem.end_at.is_(None),
+                        ScheduleItem.end_at >= start_at,
+                    )
+                )
                .order_by(ScheduleItem.start_at.asc())
            )
            result = await self._session.execute(stmt)
@@ -317,3 +332,38 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
        except SQLAlchemyError:
            logger.exception("Failed to list subscribed items")
            raise
+
+    async def archive_expired_subscribed_items(
+        self,
+        subscriber_id: UUID,
+        now_at: datetime,
+    ) -> int:
+        try:
+            item_ids_subquery = (
+                select(ScheduleItem.id)
+                .join(
+                    ScheduleSubscription,
+                    ScheduleSubscription.item_id == ScheduleItem.id,
+                )
+                .where(ScheduleSubscription.subscriber_id == subscriber_id)
+                .where(ScheduleSubscription.status == SubscriptionStatus.ACTIVE)
+                .where(ScheduleItem.deleted_at.is_(None))
+                .where(ScheduleItem.status == ScheduleItemStatus.ACTIVE)
+                .where(ScheduleItem.end_at.is_not(None))
+                .where(ScheduleItem.end_at <= now_at)
+            )
+
+            stmt = (
+                update(ScheduleItem)
+                .where(ScheduleItem.id.in_(item_ids_subquery))
+                .values(status=ScheduleItemStatus.ARCHIVED)
+            )
+            result = await self._session.execute(stmt)
+            await self._session.flush()
+            return int(getattr(result, "rowcount", 0) or 0)
+        except SQLAlchemyError:
+            logger.exception(
+                "Failed to archive expired subscribed items",
+                subscriber_id=str(subscriber_id),
+            )
+            raise
@@ -240,6 +240,11 @@ class ScheduleItemService(BaseService):
            raise HTTPException(status_code=400, detail="end_at must be after start_at")

        try:
+            archived_count = await self._repository.archive_expired_subscribed_items(
+                user_id,
+                datetime.now(timezone.utc),
+            )
+
            subscribed_items = (
                await self._repository.list_subscribed_items_by_date_range(
                    user_id, normalized_start_at, normalized_end_at
@@ -256,9 +261,12 @@ class ScheduleItemService(BaseService):
                )

            results.sort(key=lambda x: x.start_at)
+            if archived_count > 0:
+                await self._session.commit()

            return results
        except SQLAlchemyError:
+            await self._session.rollback()
            logger.exception("Failed to list schedule items")
            raise HTTPException(
                status_code=503, detail="Schedule item store unavailable"