85 lines
2.3 KiB
Python
85 lines
2.3 KiB
Python
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from services.litellm.service import LiteLLMService
|
|
|
|
|
|
def test_calculate_cost_uses_first_qwen_tier() -> None:
|
|
service = LiteLLMService()
|
|
|
|
cost = service.calculate_cost(
|
|
model="dashscope/qwen3.5-flash",
|
|
prompt_tokens=100_000,
|
|
completion_tokens=1_000,
|
|
cached_prompt_tokens=10_000,
|
|
)
|
|
|
|
assert cost == pytest.approx(0.0202)
|
|
|
|
|
|
def test_calculate_cost_uses_second_qwen_tier() -> None:
|
|
service = LiteLLMService()
|
|
|
|
cost = service.calculate_cost(
|
|
model="dashscope/qwen3.5-flash",
|
|
prompt_tokens=200_000,
|
|
completion_tokens=5_000,
|
|
cached_prompt_tokens=20_000,
|
|
)
|
|
|
|
assert cost == pytest.approx(0.1856)
|
|
|
|
|
|
def test_run_completion_extracts_usage_and_cost() -> None:
|
|
service = LiteLLMService()
|
|
captured: dict[str, object] = {}
|
|
|
|
def _fake_completion(**kwargs: object) -> dict[str, object]:
|
|
captured.update(kwargs)
|
|
return {
|
|
"model": "dashscope/qwen3.5-flash",
|
|
"usage": {
|
|
"prompt_tokens": 2000,
|
|
"completion_tokens": 100,
|
|
"total_tokens": 2100,
|
|
"prompt_tokens_details": {"cached_tokens": 500},
|
|
},
|
|
"choices": [{"message": {"content": "ok"}}],
|
|
}
|
|
|
|
result = service.run_completion_with_cost(
|
|
model="dashscope/qwen3.5-flash",
|
|
messages=[{"role": "user", "content": "hello"}],
|
|
response_format={"type": "json_object"},
|
|
completion_fn=_fake_completion,
|
|
)
|
|
|
|
assert result.usage.prompt_tokens == 2000
|
|
assert result.usage.completion_tokens == 100
|
|
assert result.usage.total_tokens == 2100
|
|
assert result.usage.cost == pytest.approx(0.00051)
|
|
assert captured["response_format"] == {"type": "json_object"}
|
|
|
|
|
|
def test_build_usage_metadata_calculates_cost_from_usage_summary() -> None:
|
|
service = LiteLLMService()
|
|
|
|
metadata = service.build_usage_metadata(
|
|
model="dashscope/qwen3.5-flash",
|
|
usage_summary={
|
|
"input_tokens": 2000,
|
|
"output_tokens": 100,
|
|
"latency_ms": 321,
|
|
"cached_prompt_tokens": 500,
|
|
},
|
|
)
|
|
|
|
assert metadata == {
|
|
"model": "dashscope/qwen3.5-flash",
|
|
"inputTokens": 2000,
|
|
"outputTokens": 100,
|
|
"cost": pytest.approx(0.00051),
|
|
"latencyMs": 321,
|
|
}
|