tabbyAPI/tests/req_long.py

import random
import string

import yaml
from _common import *

BASE_URL = "http://localhost:5000/v1"
MODEL = "/mnt/str/models/qwen3.5-35b-a3b/exl3/4.09bpw/"

oai_request = {
    "model": MODEL,
    "messages": [
        {
            "role": "user",
            "content": "".join(random.choices(string.ascii_letters, k=4))
            + "All work and no play. " * 5000,
        }
    ],
    "max_tokens": 500,
    "stream_options": {"include_usage": True},
}

non_tool_request = {
    "model": MODEL,
    "template_vars": {
        "enable_thinking": False,
    },
    "messages": [{"role": "user", "content": "Hello."}],
}


def main():
    with open("api_tokens.yml") as f:
        tokens = yaml.safe_load(f)
        api_key = tokens["admin_key"]

    test_chat_streaming(api_key, BASE_URL, non_tool_request.copy(), n=1)
    test_chat_request(api_key, BASE_URL, oai_request.copy(), n=1)
    test_chat_request(api_key, BASE_URL, oai_request.copy(), n=2)
    test_chat_request(api_key, BASE_URL, oai_request.copy(), n=4)
    test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=1)
    test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=2)
    test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=4)


if __name__ == "__main__":
    main()