Files
tabbyAPI/tests/req_logprob.py
turboderp 79d581e1f5 OAI endpoints: More rework
- remove disconnect_task
- move disconnect logic to a per-request handler that wraps cleanup operation and directly polls the request state with throttling
- exclusively signal disconnect with CancelledError
- rework completions endpoint to follow same approach as chat completions, share some code
- refactor OAI endpoints a bit
- correct behavior for batched completion requests
- make sure logprobs work for completion and streaming completion requests
- more tests
2026-04-02 01:26:44 +02:00

52 lines
1.4 KiB
Python

import yaml
from _common import *
BASE_URL = "http://localhost:5000/v1"
MODEL = "/mnt/str/models/qwen3.5-35b-a3b/exl3/4.09bpw/"
oai_request = {
"model": MODEL,
"template_vars": {
"enable_thinking": False,
},
"messages": [
{
"role": "user",
"content": "Write a Haiku about fish, and start each line with a fish-related emoji.",
}
],
"logprobs": True,
"top_logprobs": 7,
"stream_options": {"include_usage": True},
# "max_tokens": 200,
}
oai_request_2 = {
"model": MODEL,
"template_vars": {
"enable_thinking": True,
},
"messages": [{"role": "user", "content": "What is the mass of a water molecule, in kg?"}],
"logprobs": True,
"top_logprobs": 5,
"stream_options": {"include_usage": True},
# "max_tokens": 200,
}
def main():
with open("api_tokens.yml") as f:
tokens = yaml.safe_load(f)
api_key = tokens["admin_key"]
test_chat_request(api_key, BASE_URL, oai_request.copy(), n=1)
test_chat_request(api_key, BASE_URL, oai_request.copy(), n=2)
test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=1)
test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=2)
test_chat_streaming(api_key, BASE_URL, oai_request.copy(), n=1, rawdump=True)
test_chat_request(api_key, BASE_URL, oai_request_2.copy(), n=1)
if __name__ == "__main__":
main()