tabbyAPI/tests/req_logprob.py

import yaml
from _common import *

BASE_URL = "http://localhost:5000/v1"
MODEL = "/mnt/str/models/qwen3.5-35b-a3b/exl3/4.09bpw/"

oai_request = {
    "model": MODEL,
    "template_vars": {
        "enable_thinking": False,
    },
    "messages": [
        {
            "role": "user",
            "content": "Write a Haiku about fish, and start each line with a fish-related emoji.",
        }
    ],
    "logprobs": True,
    "top_logprobs": 7,
    "stream_options": {"include_usage": True},
    # "max_tokens": 200,
}

oai_request_2 = {
    "model": MODEL,
    "template_vars": {
        "enable_thinking": True,
    },
    "messages": [{"role": "user", "content": "What is the mass of a water molecule, in kg?"}],
    "logprobs": True,
    "top_logprobs": 5,
    "stream_options": {"include_usage": True},
    # "max_tokens": 200,
}


def main():
    with open("api_tokens.yml") as f:
        tokens = yaml.safe_load(f)
        api_key = tokens["admin_key"]

    test_chat_request(api_key, BASE_URL, oai_request.copy(), n=1)
    test_chat_request(api_key, BASE_URL, oai_request.copy(), n=2)
    test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=1)
    test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=2)
    test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=1, rawdump=True)
    test_chat_request(api_key, BASE_URL, oai_request_2.copy(), n=1)


if __name__ == "__main__":
    main()