feat: 实现日历提醒完整功能(操作执行、通知服务重构、归档)
- 新增 ReminderActionExecutor 处理取消/稍后提醒操作 - 新增 ReminderOutboxStore 本地存储待处理操作 - 重构 LocalNotificationService 支持聚合提醒和交互操作 - 新增 event_color_resolver 工具类统一颜色解析 - 新增 CalendarService.archiveEvent 归档方法 - 增强 ModelTracking 支持缓存命中、推理token和成本追踪 - 添加 qwen3.5-35b-a3b 模型配置 - 更新 AndroidManifest 全屏intent权限 - 补充相关单元测试和文档
This commit is contained in:
@@ -15,8 +15,17 @@ class TrackingChatModel:
|
||||
self._inner = inner
|
||||
self._total_input_tokens = 0
|
||||
self._total_output_tokens = 0
|
||||
self._total_tokens = 0
|
||||
self._total_latency_ms = 0
|
||||
self._cached_prompt_tokens = 0
|
||||
self._prompt_cache_hit_tokens = 0
|
||||
self._prompt_cache_miss_tokens = 0
|
||||
self._reasoning_tokens = 0
|
||||
self._direct_cost = 0.0
|
||||
self._direct_cost_observed = False
|
||||
self._model_call_records = 0
|
||||
self._usage_records = 0
|
||||
self._direct_cost_records = 0
|
||||
|
||||
@property
|
||||
def stream(self) -> bool:
|
||||
@@ -31,18 +40,37 @@ class TrackingChatModel:
|
||||
|
||||
async def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
||||
self._log_model_call(kwargs)
|
||||
self._model_call_records += 1
|
||||
response = await self._inner(*args, **kwargs)
|
||||
if isinstance(response, AsyncGenerator):
|
||||
return self._track_stream(response)
|
||||
self._record_usage(getattr(response, "usage", None))
|
||||
return response
|
||||
|
||||
def usage_summary(self) -> dict[str, int]:
|
||||
def usage_summary(self) -> dict[str, int | float | str]:
|
||||
direct_cost = self._direct_cost if self._direct_cost_observed else 0.0
|
||||
direct_cost_complete = (
|
||||
self._model_call_records > 0
|
||||
and self._model_call_records == self._direct_cost_records
|
||||
)
|
||||
return {
|
||||
"input_tokens": self._total_input_tokens,
|
||||
"output_tokens": self._total_output_tokens,
|
||||
"total_tokens": self._total_tokens,
|
||||
"latency_ms": self._total_latency_ms,
|
||||
"cached_prompt_tokens": self._cached_prompt_tokens,
|
||||
"prompt_cache_hit_tokens": self._prompt_cache_hit_tokens,
|
||||
"prompt_cache_miss_tokens": self._prompt_cache_miss_tokens,
|
||||
"reasoning_tokens": self._reasoning_tokens,
|
||||
"direct_cost": direct_cost,
|
||||
"direct_cost_observed": int(self._direct_cost_observed),
|
||||
"direct_cost_complete": int(direct_cost_complete),
|
||||
"model_call_records": self._model_call_records,
|
||||
"usage_records": self._usage_records,
|
||||
"direct_cost_records": self._direct_cost_records,
|
||||
"cost_source": "provider"
|
||||
if self._direct_cost_observed
|
||||
else "catalog_fallback",
|
||||
}
|
||||
|
||||
def _log_model_call(self, kwargs: dict[str, Any]) -> None:
|
||||
@@ -101,25 +129,167 @@ class TrackingChatModel:
|
||||
def _record_usage(self, usage: Any) -> None:
|
||||
if usage is None:
|
||||
return
|
||||
self._total_input_tokens += max(int(getattr(usage, "input_tokens", 0) or 0), 0)
|
||||
self._total_output_tokens += max(
|
||||
int(getattr(usage, "output_tokens", 0) or 0), 0
|
||||
self._usage_records += 1
|
||||
usage_mapping = self._to_mapping(usage)
|
||||
metadata = self._safe_get(usage, "metadata")
|
||||
metadata_mapping = self._to_mapping(metadata)
|
||||
|
||||
input_tokens = self._coerce_int(
|
||||
self._first_non_null(
|
||||
self._safe_get(usage, "input_tokens"),
|
||||
usage_mapping.get("input_tokens"),
|
||||
metadata_mapping.get("prompt_tokens"),
|
||||
)
|
||||
)
|
||||
self._total_latency_ms += max(
|
||||
int(round(float(getattr(usage, "time", 0) or 0) * 1000)), 0
|
||||
output_tokens = self._coerce_int(
|
||||
self._first_non_null(
|
||||
self._safe_get(usage, "output_tokens"),
|
||||
usage_mapping.get("output_tokens"),
|
||||
metadata_mapping.get("completion_tokens"),
|
||||
)
|
||||
)
|
||||
metadata = getattr(usage, "metadata", None)
|
||||
if metadata is None:
|
||||
return
|
||||
self._cached_prompt_tokens += max(self._extract_cached_tokens(metadata), 0)
|
||||
total_tokens = self._coerce_int(
|
||||
self._first_non_null(
|
||||
self._safe_get(usage, "total_tokens"),
|
||||
usage_mapping.get("total_tokens"),
|
||||
metadata_mapping.get("total_tokens"),
|
||||
input_tokens + output_tokens,
|
||||
)
|
||||
)
|
||||
latency_ms = max(
|
||||
int(
|
||||
round(
|
||||
self._coerce_float(
|
||||
self._first_non_null(
|
||||
self._safe_get(usage, "time"),
|
||||
usage_mapping.get("time"),
|
||||
0.0,
|
||||
)
|
||||
)
|
||||
* 1000
|
||||
)
|
||||
),
|
||||
0,
|
||||
)
|
||||
|
||||
prompt_tokens_details = self._to_mapping(
|
||||
metadata_mapping.get("prompt_tokens_details")
|
||||
)
|
||||
completion_tokens_details = self._to_mapping(
|
||||
metadata_mapping.get("completion_tokens_details")
|
||||
)
|
||||
|
||||
cached_prompt_tokens = self._coerce_int(
|
||||
self._first_non_null(
|
||||
prompt_tokens_details.get("cached_tokens"),
|
||||
metadata_mapping.get("prompt_cache_hit_tokens"),
|
||||
0,
|
||||
)
|
||||
)
|
||||
prompt_cache_hit_tokens = self._coerce_int(
|
||||
self._first_non_null(
|
||||
metadata_mapping.get("prompt_cache_hit_tokens"),
|
||||
cached_prompt_tokens,
|
||||
)
|
||||
)
|
||||
prompt_cache_miss_tokens = self._coerce_int(
|
||||
self._first_non_null(
|
||||
metadata_mapping.get("prompt_cache_miss_tokens"),
|
||||
max(input_tokens - prompt_cache_hit_tokens, 0),
|
||||
)
|
||||
)
|
||||
reasoning_tokens = self._coerce_int(
|
||||
self._first_non_null(completion_tokens_details.get("reasoning_tokens"), 0)
|
||||
)
|
||||
direct_cost = self._coerce_optional_float(
|
||||
self._first_non_null(
|
||||
self._safe_get(usage, "cost"),
|
||||
usage_mapping.get("cost"),
|
||||
metadata_mapping.get("cost"),
|
||||
metadata_mapping.get("total_cost"),
|
||||
)
|
||||
)
|
||||
|
||||
self._total_input_tokens += input_tokens
|
||||
self._total_output_tokens += output_tokens
|
||||
self._total_tokens += total_tokens
|
||||
self._total_latency_ms += latency_ms
|
||||
self._cached_prompt_tokens += cached_prompt_tokens
|
||||
self._prompt_cache_hit_tokens += prompt_cache_hit_tokens
|
||||
self._prompt_cache_miss_tokens += prompt_cache_miss_tokens
|
||||
self._reasoning_tokens += reasoning_tokens
|
||||
if direct_cost is not None:
|
||||
self._direct_cost_observed = True
|
||||
self._direct_cost_records += 1
|
||||
self._direct_cost += max(direct_cost, 0.0)
|
||||
|
||||
@staticmethod
|
||||
def _extract_cached_tokens(metadata: Any) -> int:
|
||||
if isinstance(metadata, dict):
|
||||
prompt_details = metadata.get("prompt_tokens_details")
|
||||
if isinstance(prompt_details, dict):
|
||||
return int(prompt_details.get("cached_tokens", 0) or 0)
|
||||
def _safe_get(obj: Any, key: str) -> Any:
|
||||
if obj is None:
|
||||
return None
|
||||
try:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(key)
|
||||
return getattr(obj, key, None)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _to_mapping(cls, obj: Any) -> dict[str, Any]:
|
||||
if isinstance(obj, dict):
|
||||
return dict(obj)
|
||||
if obj is None:
|
||||
return {}
|
||||
model_dump = cls._safe_get(obj, "model_dump")
|
||||
if callable(model_dump):
|
||||
try:
|
||||
dumped = model_dump()
|
||||
except Exception:
|
||||
dumped = None
|
||||
if isinstance(dumped, dict):
|
||||
return dumped
|
||||
data = cls._safe_get(obj, "__dict__")
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _first_non_null(*values: Any) -> Any:
|
||||
for value in values:
|
||||
if value is not None:
|
||||
return value
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _coerce_int(value: Any) -> int:
|
||||
if value is None:
|
||||
return 0
|
||||
if isinstance(value, bool):
|
||||
return int(value)
|
||||
if isinstance(value, int):
|
||||
return max(value, 0)
|
||||
try:
|
||||
return max(int(float(value)), 0)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
prompt_details = getattr(metadata, "prompt_tokens_details", None)
|
||||
return int(getattr(prompt_details, "cached_tokens", 0) or 0)
|
||||
@staticmethod
|
||||
def _coerce_float(value: Any) -> float:
|
||||
if value is None:
|
||||
return 0.0
|
||||
try:
|
||||
return max(float(value), 0.0)
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def _coerce_optional_float(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
parsed = float(value)
|
||||
except Exception:
|
||||
return None
|
||||
if parsed < 0:
|
||||
return None
|
||||
return parsed
|
||||
|
||||
@@ -1,52 +1,63 @@
|
||||
factories:
|
||||
- name: dashscope
|
||||
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
|
||||
- name: dashscope
|
||||
request_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/qwen-color.png
|
||||
|
||||
- name: minimax
|
||||
request_url: https://api.minimaxi.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
|
||||
- name: minimax
|
||||
request_url: https://api.minimaxi.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/minimax-color.png
|
||||
|
||||
- name: moonshot
|
||||
request_url: https://api.moonshot.cn/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
|
||||
- name: moonshot
|
||||
request_url: https://api.moonshot.cn/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/moonshot.png
|
||||
|
||||
- name: deepseek
|
||||
request_url: https://api.deepseek.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
|
||||
- name: deepseek
|
||||
request_url: https://api.deepseek.com/v1
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/deepseek-color.png
|
||||
|
||||
- name: volcengine
|
||||
request_url: https://ark.cn-beijing.volces.com/api/v3
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
|
||||
- name: volcengine
|
||||
request_url: https://ark.cn-beijing.volces.com/api/v3
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/doubao-color.png
|
||||
|
||||
- name: zai
|
||||
request_url: https://api.z.ai/api/paas/v4
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
|
||||
- name: zai
|
||||
request_url: https://api.z.ai/api/paas/v4
|
||||
avatar: https://registry.npmmirror.com/@lobehub/icons-static-png/latest/files/light/zai.png
|
||||
|
||||
llms:
|
||||
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
|
||||
- model_code: qwen3.5-flash
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-flash
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000002
|
||||
output_cost_per_token: 0.000002
|
||||
cache_hit_cost_per_token: 0.00000002
|
||||
- max_prompt_tokens: 256000
|
||||
input_cost_per_token: 0.0000008
|
||||
output_cost_per_token: 0.000008
|
||||
cache_hit_cost_per_token: 0.00000008
|
||||
- max_prompt_tokens: 1000000
|
||||
input_cost_per_token: 0.0000012
|
||||
output_cost_per_token: 0.000012
|
||||
cache_hit_cost_per_token: 0.00000012
|
||||
# qwen3.5-flash (3 tiers: 128K, 256K, 1M)
|
||||
- model_code: qwen3.5-flash
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-flash
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000002
|
||||
output_cost_per_token: 0.000002
|
||||
cache_hit_cost_per_token: 0.00000002
|
||||
- max_prompt_tokens: 256000
|
||||
input_cost_per_token: 0.0000008
|
||||
output_cost_per_token: 0.000008
|
||||
cache_hit_cost_per_token: 0.00000008
|
||||
- max_prompt_tokens: 1000000
|
||||
input_cost_per_token: 0.0000012
|
||||
output_cost_per_token: 0.000012
|
||||
cache_hit_cost_per_token: 0.00000012
|
||||
|
||||
- model_code: deepseek-chat
|
||||
factory_name: deepseek
|
||||
litellm_model: deepseek/deepseek-chat
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.000002
|
||||
output_cost_per_token: 0.000003
|
||||
cache_hit_cost_per_token: 0.0000002
|
||||
- model_code: qwen3.5-35b-a3b
|
||||
factory_name: dashscope
|
||||
litellm_model: dashscope/qwen3.5-35b-a3b
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.0000004
|
||||
output_cost_per_token: 0.0000032
|
||||
- max_prompt_tokens: 256000
|
||||
input_cost_per_token: 0.0000016
|
||||
output_cost_per_token: 0.0000128
|
||||
|
||||
- model_code: deepseek-chat
|
||||
factory_name: deepseek
|
||||
litellm_model: deepseek/deepseek-chat
|
||||
pricing_tiers:
|
||||
- max_prompt_tokens: 128000
|
||||
input_cost_per_token: 0.000002
|
||||
output_cost_per_token: 0.000003
|
||||
cache_hit_cost_per_token: 0.0000002
|
||||
|
||||
@@ -85,9 +85,15 @@ class LiteLLMService:
|
||||
selected_tier = tier
|
||||
break
|
||||
|
||||
cached_token_rate = (
|
||||
selected_tier.cache_hit_cost_per_token
|
||||
if selected_tier.cache_hit_cost_per_token > 0
|
||||
else selected_tier.input_cost_per_token
|
||||
)
|
||||
|
||||
return float(
|
||||
uncached_prompt_tokens * selected_tier.input_cost_per_token
|
||||
+ normalized_cached_tokens * selected_tier.cache_hit_cost_per_token
|
||||
+ normalized_cached_tokens * cached_token_rate
|
||||
+ normalized_completion_tokens * selected_tier.output_cost_per_token
|
||||
)
|
||||
|
||||
@@ -95,23 +101,86 @@ class LiteLLMService:
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
usage_summary: dict[str, int] | None,
|
||||
usage_summary: dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
summary = usage_summary or {}
|
||||
input_tokens = max(int(summary.get("input_tokens", 0) or 0), 0)
|
||||
output_tokens = max(int(summary.get("output_tokens", 0) or 0), 0)
|
||||
total_tokens = max(
|
||||
int(summary.get("total_tokens", input_tokens + output_tokens) or 0), 0
|
||||
)
|
||||
latency_ms = max(int(summary.get("latency_ms", 0) or 0), 0)
|
||||
cached_prompt_tokens = max(int(summary.get("cached_prompt_tokens", 0) or 0), 0)
|
||||
cost = self.calculate_cost(
|
||||
model=model,
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=output_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
prompt_cache_hit_tokens = max(
|
||||
int(summary.get("prompt_cache_hit_tokens", cached_prompt_tokens) or 0), 0
|
||||
)
|
||||
prompt_cache_miss_tokens = max(
|
||||
int(
|
||||
summary.get(
|
||||
"prompt_cache_miss_tokens",
|
||||
max(input_tokens - prompt_cache_hit_tokens, 0),
|
||||
)
|
||||
or 0
|
||||
),
|
||||
0,
|
||||
)
|
||||
reasoning_tokens = max(int(summary.get("reasoning_tokens", 0) or 0), 0)
|
||||
direct_cost_raw = summary.get("direct_cost")
|
||||
direct_cost_observed = bool(int(summary.get("direct_cost_observed", 0) or 0))
|
||||
direct_cost_complete = bool(int(summary.get("direct_cost_complete", 0) or 0))
|
||||
model_call_records = max(int(summary.get("model_call_records", 0) or 0), 0)
|
||||
usage_records = max(int(summary.get("usage_records", 0) or 0), 0)
|
||||
usage_complete = model_call_records == 0 or model_call_records == usage_records
|
||||
direct_cost = self._coerce_non_negative_float(direct_cost_raw)
|
||||
|
||||
if (
|
||||
usage_complete
|
||||
and direct_cost_observed
|
||||
and direct_cost_complete
|
||||
and direct_cost is not None
|
||||
):
|
||||
cost = direct_cost
|
||||
cost_source = "provider"
|
||||
else:
|
||||
cost = self.calculate_cost(
|
||||
model=model,
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=output_tokens,
|
||||
cached_prompt_tokens=cached_prompt_tokens,
|
||||
)
|
||||
cost_source = (
|
||||
"incomplete_usage_fallback"
|
||||
if not usage_complete
|
||||
else (
|
||||
"catalog_fallback_incomplete_provider_cost"
|
||||
if direct_cost_observed and not direct_cost_complete
|
||||
else "catalog_fallback"
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"model": model,
|
||||
"inputTokens": input_tokens,
|
||||
"outputTokens": output_tokens,
|
||||
"totalTokens": total_tokens,
|
||||
"cachedPromptTokens": cached_prompt_tokens,
|
||||
"promptCacheHitTokens": prompt_cache_hit_tokens,
|
||||
"promptCacheMissTokens": prompt_cache_miss_tokens,
|
||||
"reasoningTokens": reasoning_tokens,
|
||||
"cost": cost,
|
||||
"costSource": cost_source,
|
||||
"usageComplete": usage_complete,
|
||||
"latencyMs": latency_ms,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_non_negative_float(value: Any) -> float | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if parsed < 0:
|
||||
return None
|
||||
return parsed
|
||||
|
||||
@@ -9,7 +9,7 @@ from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from core.db.base_repository import BaseRepository
|
||||
from core.logging import get_logger
|
||||
from models.schedule_items import ScheduleItem
|
||||
from models.schedule_items import ScheduleItem, ScheduleItemStatus
|
||||
from models.schedule_subscriptions import ScheduleSubscription, SubscriptionStatus
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -61,6 +61,11 @@ class ScheduleItemRepository(Protocol):
|
||||
start_at: datetime,
|
||||
end_at: datetime,
|
||||
) -> Sequence[tuple[ScheduleItem, ScheduleSubscription]]: ...
|
||||
async def archive_expired_subscribed_items(
|
||||
self,
|
||||
subscriber_id: UUID,
|
||||
now_at: datetime,
|
||||
) -> int: ...
|
||||
|
||||
|
||||
class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
|
||||
@@ -149,8 +154,13 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
|
||||
select(ScheduleItem)
|
||||
.where(ScheduleItem.owner_id == owner_id)
|
||||
.where(ScheduleItem.deleted_at.is_(None))
|
||||
.where(ScheduleItem.start_at >= start_at)
|
||||
.where(ScheduleItem.start_at <= end_at)
|
||||
.where(
|
||||
or_(
|
||||
ScheduleItem.end_at.is_(None),
|
||||
ScheduleItem.end_at >= start_at,
|
||||
)
|
||||
)
|
||||
.order_by(ScheduleItem.start_at.asc())
|
||||
)
|
||||
result = await self._session.execute(stmt)
|
||||
@@ -308,8 +318,13 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
|
||||
.where(ScheduleSubscription.subscriber_id == subscriber_id)
|
||||
.where(ScheduleSubscription.status == SubscriptionStatus.ACTIVE)
|
||||
.where(ScheduleItem.deleted_at.is_(None))
|
||||
.where(ScheduleItem.start_at >= start_at)
|
||||
.where(ScheduleItem.start_at <= end_at)
|
||||
.where(
|
||||
or_(
|
||||
ScheduleItem.end_at.is_(None),
|
||||
ScheduleItem.end_at >= start_at,
|
||||
)
|
||||
)
|
||||
.order_by(ScheduleItem.start_at.asc())
|
||||
)
|
||||
result = await self._session.execute(stmt)
|
||||
@@ -317,3 +332,38 @@ class SQLAlchemyScheduleItemRepository(BaseRepository[ScheduleItem]):
|
||||
except SQLAlchemyError:
|
||||
logger.exception("Failed to list subscribed items")
|
||||
raise
|
||||
|
||||
async def archive_expired_subscribed_items(
|
||||
self,
|
||||
subscriber_id: UUID,
|
||||
now_at: datetime,
|
||||
) -> int:
|
||||
try:
|
||||
item_ids_subquery = (
|
||||
select(ScheduleItem.id)
|
||||
.join(
|
||||
ScheduleSubscription,
|
||||
ScheduleSubscription.item_id == ScheduleItem.id,
|
||||
)
|
||||
.where(ScheduleSubscription.subscriber_id == subscriber_id)
|
||||
.where(ScheduleSubscription.status == SubscriptionStatus.ACTIVE)
|
||||
.where(ScheduleItem.deleted_at.is_(None))
|
||||
.where(ScheduleItem.status == ScheduleItemStatus.ACTIVE)
|
||||
.where(ScheduleItem.end_at.is_not(None))
|
||||
.where(ScheduleItem.end_at <= now_at)
|
||||
)
|
||||
|
||||
stmt = (
|
||||
update(ScheduleItem)
|
||||
.where(ScheduleItem.id.in_(item_ids_subquery))
|
||||
.values(status=ScheduleItemStatus.ARCHIVED)
|
||||
)
|
||||
result = await self._session.execute(stmt)
|
||||
await self._session.flush()
|
||||
return int(getattr(result, "rowcount", 0) or 0)
|
||||
except SQLAlchemyError:
|
||||
logger.exception(
|
||||
"Failed to archive expired subscribed items",
|
||||
subscriber_id=str(subscriber_id),
|
||||
)
|
||||
raise
|
||||
|
||||
@@ -240,6 +240,11 @@ class ScheduleItemService(BaseService):
|
||||
raise HTTPException(status_code=400, detail="end_at must be after start_at")
|
||||
|
||||
try:
|
||||
archived_count = await self._repository.archive_expired_subscribed_items(
|
||||
user_id,
|
||||
datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
subscribed_items = (
|
||||
await self._repository.list_subscribed_items_by_date_range(
|
||||
user_id, normalized_start_at, normalized_end_at
|
||||
@@ -256,9 +261,12 @@ class ScheduleItemService(BaseService):
|
||||
)
|
||||
|
||||
results.sort(key=lambda x: x.start_at)
|
||||
if archived_count > 0:
|
||||
await self._session.commit()
|
||||
|
||||
return results
|
||||
except SQLAlchemyError:
|
||||
await self._session.rollback()
|
||||
logger.exception("Failed to list schedule items")
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Schedule item store unavailable"
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, cast
|
||||
|
||||
from core.agentscope.runtime.model_tracking import TrackingChatModel
|
||||
|
||||
|
||||
class _FakeMetadata:
|
||||
def model_dump(self) -> dict[str, object]:
|
||||
return {
|
||||
"prompt_tokens": 120,
|
||||
"completion_tokens": 30,
|
||||
"total_tokens": 150,
|
||||
"prompt_tokens_details": {
|
||||
"cached_tokens": 80,
|
||||
},
|
||||
"prompt_cache_hit_tokens": 80,
|
||||
"prompt_cache_miss_tokens": 40,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 5,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class _FakeUsage:
|
||||
input_tokens = 120
|
||||
output_tokens = 30
|
||||
time = 1.234
|
||||
metadata = _FakeMetadata()
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
usage = _FakeUsage()
|
||||
|
||||
|
||||
class _FakeModel:
|
||||
stream = False
|
||||
|
||||
async def __call__(self, *args: object, **kwargs: object) -> _FakeResponse:
|
||||
return _FakeResponse()
|
||||
|
||||
|
||||
class _FakeUsageWithProviderCost:
|
||||
input_tokens = 50
|
||||
output_tokens = 10
|
||||
time = 0.5
|
||||
cost = 0.0123
|
||||
metadata = _FakeMetadata()
|
||||
|
||||
|
||||
class _FakeResponseWithProviderCost:
|
||||
usage = _FakeUsageWithProviderCost()
|
||||
|
||||
|
||||
class _FakeModelWithProviderCost:
|
||||
stream = False
|
||||
|
||||
async def __call__(
|
||||
self, *args: object, **kwargs: object
|
||||
) -> _FakeResponseWithProviderCost:
|
||||
return _FakeResponseWithProviderCost()
|
||||
|
||||
|
||||
class _FakeResponseWithoutUsage:
|
||||
usage = None
|
||||
|
||||
|
||||
class _FakeModelWithoutUsage:
|
||||
stream = False
|
||||
|
||||
async def __call__(
|
||||
self, *args: object, **kwargs: object
|
||||
) -> _FakeResponseWithoutUsage:
|
||||
return _FakeResponseWithoutUsage()
|
||||
|
||||
|
||||
async def test_tracking_chat_model_collects_primary_usage_fields() -> None:
|
||||
model = TrackingChatModel(cast(Any, _FakeModel()))
|
||||
|
||||
await model("prompt")
|
||||
|
||||
summary = model.usage_summary()
|
||||
assert summary["input_tokens"] == 120
|
||||
assert summary["output_tokens"] == 30
|
||||
assert summary["total_tokens"] == 150
|
||||
assert summary["latency_ms"] == 1234
|
||||
assert summary["cached_prompt_tokens"] == 80
|
||||
assert summary["prompt_cache_hit_tokens"] == 80
|
||||
assert summary["prompt_cache_miss_tokens"] == 40
|
||||
assert summary["reasoning_tokens"] == 5
|
||||
assert summary["direct_cost"] == 0.0
|
||||
assert summary["direct_cost_observed"] == 0
|
||||
assert summary["direct_cost_complete"] == 0
|
||||
assert summary["model_call_records"] == 1
|
||||
assert summary["usage_records"] == 1
|
||||
assert summary["direct_cost_records"] == 0
|
||||
assert summary["cost_source"] == "catalog_fallback"
|
||||
|
||||
|
||||
async def test_tracking_chat_model_prefers_provider_cost_when_available() -> None:
|
||||
model = TrackingChatModel(cast(Any, _FakeModelWithProviderCost()))
|
||||
|
||||
await model("prompt")
|
||||
|
||||
summary = model.usage_summary()
|
||||
assert summary["direct_cost"] == 0.0123
|
||||
assert summary["direct_cost_observed"] == 1
|
||||
assert summary["direct_cost_complete"] == 1
|
||||
assert summary["model_call_records"] == 1
|
||||
assert summary["usage_records"] == 1
|
||||
assert summary["direct_cost_records"] == 1
|
||||
assert summary["cost_source"] == "provider"
|
||||
|
||||
|
||||
async def test_tracking_chat_model_marks_direct_cost_incomplete_when_usage_missing() -> (
|
||||
None
|
||||
):
|
||||
model = TrackingChatModel(cast(Any, _FakeModelWithoutUsage()))
|
||||
|
||||
await model("prompt")
|
||||
|
||||
summary = model.usage_summary()
|
||||
assert summary["model_call_records"] == 1
|
||||
assert summary["usage_records"] == 0
|
||||
assert summary["direct_cost_records"] == 0
|
||||
assert summary["direct_cost_complete"] == 0
|
||||
@@ -44,10 +44,75 @@ def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
||||
},
|
||||
)
|
||||
|
||||
assert metadata == {
|
||||
"model": "dashscope/qwen3.5-flash",
|
||||
"inputTokens": 2000,
|
||||
"outputTokens": 100,
|
||||
"cost": pytest.approx(0.00051),
|
||||
"latencyMs": 321,
|
||||
}
|
||||
assert metadata["model"] == "dashscope/qwen3.5-flash"
|
||||
assert metadata["inputTokens"] == 2000
|
||||
assert metadata["outputTokens"] == 100
|
||||
assert metadata["totalTokens"] == 2100
|
||||
assert metadata["cachedPromptTokens"] == 500
|
||||
assert metadata["promptCacheHitTokens"] == 500
|
||||
assert metadata["promptCacheMissTokens"] == 1500
|
||||
assert metadata["reasoningTokens"] == 0
|
||||
assert metadata["cost"] == pytest.approx(0.00051)
|
||||
assert metadata["costSource"] == "catalog_fallback"
|
||||
assert metadata["usageComplete"] is True
|
||||
assert metadata["latencyMs"] == 321
|
||||
|
||||
|
||||
def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="deepseek-chat",
|
||||
usage_summary={
|
||||
"input_tokens": 1000,
|
||||
"output_tokens": 100,
|
||||
"latency_ms": 100,
|
||||
"cached_prompt_tokens": 0,
|
||||
"direct_cost": 0.1234,
|
||||
"direct_cost_observed": 1,
|
||||
"direct_cost_complete": 1,
|
||||
},
|
||||
)
|
||||
|
||||
assert metadata["cost"] == pytest.approx(0.1234)
|
||||
assert metadata["costSource"] == "provider"
|
||||
assert metadata["usageComplete"] is True
|
||||
|
||||
|
||||
def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="deepseek-chat",
|
||||
usage_summary={
|
||||
"input_tokens": 1000,
|
||||
"output_tokens": 100,
|
||||
"latency_ms": 100,
|
||||
"cached_prompt_tokens": 0,
|
||||
"direct_cost": 0.1234,
|
||||
"direct_cost_observed": 1,
|
||||
"direct_cost_complete": 0,
|
||||
},
|
||||
)
|
||||
|
||||
assert metadata["cost"] == pytest.approx(0.0023)
|
||||
assert metadata["costSource"] == "catalog_fallback_incomplete_provider_cost"
|
||||
|
||||
|
||||
def test_build_usage_metadata_marks_incomplete_usage_fallback() -> None:
|
||||
service = LiteLLMService()
|
||||
|
||||
metadata = service.build_usage_metadata(
|
||||
model="deepseek-chat",
|
||||
usage_summary={
|
||||
"input_tokens": 0,
|
||||
"output_tokens": 0,
|
||||
"latency_ms": 0,
|
||||
"cached_prompt_tokens": 0,
|
||||
"model_call_records": 1,
|
||||
"usage_records": 0,
|
||||
},
|
||||
)
|
||||
|
||||
assert metadata["costSource"] == "incomplete_usage_fallback"
|
||||
assert metadata["usageComplete"] is False
|
||||
|
||||
@@ -4,6 +4,7 @@ from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from core.auth.models import CurrentUser
|
||||
from models.schedule_items import (
|
||||
@@ -13,6 +14,7 @@ from models.schedule_items import (
|
||||
)
|
||||
from v1.schedule_items.schemas import (
|
||||
ScheduleItemCreateRequest,
|
||||
ScheduleItemListRequest,
|
||||
ScheduleItemMetadata,
|
||||
ScheduleItemUpdateRequest,
|
||||
)
|
||||
@@ -43,6 +45,7 @@ def _create_mock_schedule_item(
|
||||
class FakeRepo:
|
||||
def __init__(self, item: ScheduleItem | None) -> None:
|
||||
self._item = item
|
||||
self.archive_expired_called = 0
|
||||
|
||||
async def get_by_item_id(
|
||||
self, item_id: UUID, owner_id: UUID
|
||||
@@ -89,8 +92,9 @@ class FakeRepo:
|
||||
*,
|
||||
page: int,
|
||||
page_size: int,
|
||||
query: str | None = None,
|
||||
) -> tuple[list[ScheduleItem], int]:
|
||||
del owner_id, page, page_size
|
||||
del owner_id, page, page_size, query
|
||||
return ([self._item] if self._item else [], 1 if self._item else 0)
|
||||
|
||||
async def create_subscription(self, data: dict):
|
||||
@@ -104,7 +108,20 @@ class FakeRepo:
|
||||
end_at: datetime,
|
||||
):
|
||||
del subscriber_id, start_at, end_at
|
||||
return []
|
||||
if self._item is None:
|
||||
return []
|
||||
subscription = MagicMock()
|
||||
subscription.permission = 1
|
||||
return [(self._item, subscription)]
|
||||
|
||||
async def archive_expired_subscribed_items(
|
||||
self,
|
||||
subscriber_id: UUID,
|
||||
now_at: datetime,
|
||||
) -> int:
|
||||
del subscriber_id, now_at
|
||||
self.archive_expired_called += 1
|
||||
return 0
|
||||
|
||||
async def get_user_subscriptions(self, subscriber_id: UUID):
|
||||
del subscriber_id
|
||||
@@ -376,3 +393,110 @@ async def test_update_maps_null_metadata_to_extra_metadata_null(
|
||||
assert "extra_metadata" in captured
|
||||
assert captured["extra_metadata"] is None
|
||||
assert "metadata" not in captured
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_by_date_range_archives_expired_before_query(
|
||||
mock_session: AsyncMock,
|
||||
mock_inbox_repository: MagicMock,
|
||||
) -> None:
|
||||
user_id = UUID("00000000-0000-0000-0000-000000000001")
|
||||
item = _create_mock_schedule_item()
|
||||
repo = FakeRepo(item)
|
||||
service = ScheduleItemService(
|
||||
repository=repo,
|
||||
session=mock_session,
|
||||
current_user=CurrentUser(id=user_id),
|
||||
inbox_repository=mock_inbox_repository,
|
||||
)
|
||||
|
||||
await service.list_by_date_range(
|
||||
request=ScheduleItemListRequest(
|
||||
start_at=datetime(2026, 2, 1, 0, 0, tzinfo=timezone.utc),
|
||||
end_at=datetime(2026, 3, 1, 0, 0, tzinfo=timezone.utc),
|
||||
),
|
||||
)
|
||||
|
||||
assert repo.archive_expired_called == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_by_date_range_commits_when_archived_changed(
|
||||
mock_session: AsyncMock,
|
||||
mock_inbox_repository: MagicMock,
|
||||
) -> None:
|
||||
user_id = UUID("00000000-0000-0000-0000-000000000001")
|
||||
item = _create_mock_schedule_item()
|
||||
|
||||
class ArchiveRepo(FakeRepo):
|
||||
async def archive_expired_subscribed_items(
|
||||
self,
|
||||
subscriber_id: UUID,
|
||||
now_at: datetime,
|
||||
) -> int:
|
||||
del subscriber_id, now_at
|
||||
self.archive_expired_called += 1
|
||||
return 2
|
||||
|
||||
repo = ArchiveRepo(item)
|
||||
service = ScheduleItemService(
|
||||
repository=repo,
|
||||
session=mock_session,
|
||||
current_user=CurrentUser(id=user_id),
|
||||
inbox_repository=mock_inbox_repository,
|
||||
)
|
||||
|
||||
await service.list_by_date_range(
|
||||
request=ScheduleItemListRequest(
|
||||
start_at=datetime(2026, 2, 1, 0, 0, tzinfo=timezone.utc),
|
||||
end_at=datetime(2026, 3, 1, 0, 0, tzinfo=timezone.utc),
|
||||
),
|
||||
)
|
||||
|
||||
mock_session.commit.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_by_date_range_rolls_back_when_query_fails_after_archive(
|
||||
mock_session: AsyncMock,
|
||||
mock_inbox_repository: MagicMock,
|
||||
) -> None:
|
||||
user_id = UUID("00000000-0000-0000-0000-000000000001")
|
||||
item = _create_mock_schedule_item()
|
||||
|
||||
class FailingRepo(FakeRepo):
|
||||
async def archive_expired_subscribed_items(
|
||||
self,
|
||||
subscriber_id: UUID,
|
||||
now_at: datetime,
|
||||
) -> int:
|
||||
del subscriber_id, now_at
|
||||
return 1
|
||||
|
||||
async def list_subscribed_items_by_date_range(
|
||||
self,
|
||||
subscriber_id: UUID,
|
||||
start_at: datetime,
|
||||
end_at: datetime,
|
||||
):
|
||||
del subscriber_id, start_at, end_at
|
||||
raise SQLAlchemyError("db unavailable")
|
||||
|
||||
service = ScheduleItemService(
|
||||
repository=FailingRepo(item),
|
||||
session=mock_session,
|
||||
current_user=CurrentUser(id=user_id),
|
||||
inbox_repository=mock_inbox_repository,
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await service.list_by_date_range(
|
||||
request=ScheduleItemListRequest(
|
||||
start_at=datetime(2026, 2, 1, 0, 0, tzinfo=timezone.utc),
|
||||
end_at=datetime(2026, 3, 1, 0, 0, tzinfo=timezone.utc),
|
||||
),
|
||||
)
|
||||
|
||||
assert exc_info.value.status_code == 503
|
||||
mock_session.rollback.assert_awaited_once()
|
||||
mock_session.commit.assert_not_awaited()
|
||||
|
||||
Reference in New Issue
Block a user