2026-03-11 09:14:51 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
2026-03-25 20:08:34 +08:00
|
|
|
from services.llm_pricing.service import LlmPricingService
|
2026-03-11 09:14:51 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_calculate_cost_uses_first_qwen_tier() -> None:
|
2026-03-25 20:08:34 +08:00
|
|
|
service = LlmPricingService()
|
2026-03-11 09:14:51 +08:00
|
|
|
|
|
|
|
|
cost = service.calculate_cost(
|
2026-03-25 20:08:34 +08:00
|
|
|
model="qwen3.5-flash",
|
2026-03-11 09:14:51 +08:00
|
|
|
prompt_tokens=100_000,
|
|
|
|
|
completion_tokens=1_000,
|
|
|
|
|
cached_prompt_tokens=10_000,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert cost == pytest.approx(0.0202)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_calculate_cost_uses_second_qwen_tier() -> None:
|
2026-03-25 20:08:34 +08:00
|
|
|
service = LlmPricingService()
|
2026-03-11 09:14:51 +08:00
|
|
|
|
|
|
|
|
cost = service.calculate_cost(
|
2026-03-25 20:08:34 +08:00
|
|
|
model="qwen3.5-flash",
|
2026-03-11 09:14:51 +08:00
|
|
|
prompt_tokens=200_000,
|
|
|
|
|
completion_tokens=5_000,
|
|
|
|
|
cached_prompt_tokens=20_000,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert cost == pytest.approx(0.1856)
|
|
|
|
|
|
|
|
|
|
|
2026-03-16 09:01:01 +08:00
|
|
|
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
2026-03-25 20:08:34 +08:00
|
|
|
service = LlmPricingService()
|
2026-03-16 09:01:01 +08:00
|
|
|
|
|
|
|
|
metadata = service.build_usage_metadata(
|
2026-03-25 20:08:34 +08:00
|
|
|
model="qwen3.5-flash",
|
2026-03-16 09:01:01 +08:00
|
|
|
usage_summary={
|
|
|
|
|
"input_tokens": 2000,
|
|
|
|
|
"output_tokens": 100,
|
|
|
|
|
"latency_ms": 321,
|
|
|
|
|
"cached_prompt_tokens": 500,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-25 20:08:34 +08:00
|
|
|
assert metadata["model"] == "qwen3.5-flash"
|
2026-03-18 19:12:47 +08:00
|
|
|
assert metadata["inputTokens"] == 2000
|
|
|
|
|
assert metadata["outputTokens"] == 100
|
|
|
|
|
assert metadata["totalTokens"] == 2100
|
|
|
|
|
assert metadata["cachedPromptTokens"] == 500
|
|
|
|
|
assert metadata["promptCacheHitTokens"] == 500
|
|
|
|
|
assert metadata["promptCacheMissTokens"] == 1500
|
|
|
|
|
assert metadata["reasoningTokens"] == 0
|
|
|
|
|
assert metadata["cost"] == pytest.approx(0.00051)
|
|
|
|
|
assert metadata["costSource"] == "catalog_fallback"
|
|
|
|
|
assert metadata["usageComplete"] is True
|
|
|
|
|
assert metadata["latencyMs"] == 321
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_usage_metadata_prefers_provider_direct_cost() -> None:
|
2026-03-25 20:08:34 +08:00
|
|
|
service = LlmPricingService()
|
2026-03-18 19:12:47 +08:00
|
|
|
|
|
|
|
|
metadata = service.build_usage_metadata(
|
|
|
|
|
model="deepseek-chat",
|
|
|
|
|
usage_summary={
|
|
|
|
|
"input_tokens": 1000,
|
|
|
|
|
"output_tokens": 100,
|
|
|
|
|
"latency_ms": 100,
|
|
|
|
|
"cached_prompt_tokens": 0,
|
|
|
|
|
"direct_cost": 0.1234,
|
|
|
|
|
"direct_cost_observed": 1,
|
|
|
|
|
"direct_cost_complete": 1,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert metadata["cost"] == pytest.approx(0.1234)
|
|
|
|
|
assert metadata["costSource"] == "provider"
|
|
|
|
|
assert metadata["usageComplete"] is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_usage_metadata_falls_back_when_provider_cost_incomplete() -> None:
|
2026-03-25 20:08:34 +08:00
|
|
|
service = LlmPricingService()
|
2026-03-18 19:12:47 +08:00
|
|
|
|
|
|
|
|
metadata = service.build_usage_metadata(
|
|
|
|
|
model="deepseek-chat",
|
|
|
|
|
usage_summary={
|
|
|
|
|
"input_tokens": 1000,
|
|
|
|
|
"output_tokens": 100,
|
|
|
|
|
"latency_ms": 100,
|
|
|
|
|
"cached_prompt_tokens": 0,
|
|
|
|
|
"direct_cost": 0.1234,
|
|
|
|
|
"direct_cost_observed": 1,
|
|
|
|
|
"direct_cost_complete": 0,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert metadata["cost"] == pytest.approx(0.0023)
|
|
|
|
|
assert metadata["costSource"] == "catalog_fallback_incomplete_provider_cost"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_build_usage_metadata_marks_incomplete_usage_fallback() -> None:
|
2026-03-25 20:08:34 +08:00
|
|
|
service = LlmPricingService()
|
2026-03-18 19:12:47 +08:00
|
|
|
|
|
|
|
|
metadata = service.build_usage_metadata(
|
|
|
|
|
model="deepseek-chat",
|
|
|
|
|
usage_summary={
|
|
|
|
|
"input_tokens": 0,
|
|
|
|
|
"output_tokens": 0,
|
|
|
|
|
"latency_ms": 0,
|
|
|
|
|
"cached_prompt_tokens": 0,
|
|
|
|
|
"model_call_records": 1,
|
|
|
|
|
"usage_records": 0,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert metadata["costSource"] == "incomplete_usage_fallback"
|
|
|
|
|
assert metadata["usageComplete"] is False
|