social-app/backend/tests/unit/services/test_litellm_service.py

from __future__ import annotations

import pytest

from services.litellm.service import LiteLLMService


def test_calculate_cost_uses_first_qwen_tier() -> None:
    service = LiteLLMService()

    cost = service.calculate_cost(
        model="dashscope/qwen3.5-flash",
        prompt_tokens=100_000,
        completion_tokens=1_000,
        cached_prompt_tokens=10_000,
    )

    assert cost == pytest.approx(0.0202)


def test_calculate_cost_uses_second_qwen_tier() -> None:
    service = LiteLLMService()

    cost = service.calculate_cost(
        model="dashscope/qwen3.5-flash",
        prompt_tokens=200_000,
        completion_tokens=5_000,
        cached_prompt_tokens=20_000,
    )

    assert cost == pytest.approx(0.1856)


def test_run_completion_extracts_usage_and_cost() -> None:
    service = LiteLLMService()
    captured: dict[str, object] = {}

    def _fake_completion(**kwargs: object) -> dict[str, object]:
        captured.update(kwargs)
        return {
            "model": "dashscope/qwen3.5-flash",
            "usage": {
                "prompt_tokens": 2000,
                "completion_tokens": 100,
                "total_tokens": 2100,
                "prompt_tokens_details": {"cached_tokens": 500},
            },
            "choices": [{"message": {"content": "ok"}}],
        }

    result = service.run_completion_with_cost(
        model="dashscope/qwen3.5-flash",
        messages=[{"role": "user", "content": "hello"}],
        response_format={"type": "json_object"},
        completion_fn=_fake_completion,
    )

    assert result.usage.prompt_tokens == 2000
    assert result.usage.completion_tokens == 100
    assert result.usage.total_tokens == 2100
    assert result.usage.cost == pytest.approx(0.00051)
    assert captured["response_format"] == {"type": "json_object"}