from __future__ import annotations import pytest from services.litellm.service import LiteLLMService def test_calculate_cost_uses_first_qwen_tier() -> None: service = LiteLLMService() cost = service.calculate_cost( model="dashscope/qwen3.5-flash", prompt_tokens=100_000, completion_tokens=1_000, cached_prompt_tokens=10_000, ) assert cost == pytest.approx(0.0202) def test_calculate_cost_uses_second_qwen_tier() -> None: service = LiteLLMService() cost = service.calculate_cost( model="dashscope/qwen3.5-flash", prompt_tokens=200_000, completion_tokens=5_000, cached_prompt_tokens=20_000, ) assert cost == pytest.approx(0.1856) def test_run_completion_extracts_usage_and_cost() -> None: service = LiteLLMService() captured: dict[str, object] = {} def _fake_completion(**kwargs: object) -> dict[str, object]: captured.update(kwargs) return { "model": "dashscope/qwen3.5-flash", "usage": { "prompt_tokens": 2000, "completion_tokens": 100, "total_tokens": 2100, "prompt_tokens_details": {"cached_tokens": 500}, }, "choices": [{"message": {"content": "ok"}}], } result = service.run_completion_with_cost( model="dashscope/qwen3.5-flash", messages=[{"role": "user", "content": "hello"}], response_format={"type": "json_object"}, completion_fn=_fake_completion, ) assert result.usage.prompt_tokens == 2000 assert result.usage.completion_tokens == 100 assert result.usage.total_tokens == 2100 assert result.usage.cost == pytest.approx(0.00051) assert captured["response_format"] == {"type": "json_object"}