from __future__ import annotations import pytest from services.litellm.service import LiteLLMService def test_calculate_cost_uses_first_qwen_tier() -> None: service = LiteLLMService() cost = service.calculate_cost( model="dashscope/qwen3.5-flash", prompt_tokens=100_000, completion_tokens=1_000, cached_prompt_tokens=10_000, ) assert cost == pytest.approx(0.0202) def test_calculate_cost_uses_second_qwen_tier() -> None: service = LiteLLMService() cost = service.calculate_cost( model="dashscope/qwen3.5-flash", prompt_tokens=200_000, completion_tokens=5_000, cached_prompt_tokens=20_000, ) assert cost == pytest.approx(0.1856) def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None: service = LiteLLMService() metadata = service.build_usage_metadata( model="dashscope/qwen3.5-flash", usage_summary={ "input_tokens": 2000, "output_tokens": 100, "latency_ms": 321, "cached_prompt_tokens": 500, }, ) assert metadata == { "model": "dashscope/qwen3.5-flash", "inputTokens": 2000, "outputTokens": 100, "cost": pytest.approx(0.00051), "latencyMs": 321, }