mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-19 22:09:10 +00:00
[fix] improve Sglang kt-kernel detect time duration (#1887)
* Increase timeout for Check if --kt-gpu-prefill-token-threshold is in the help output to 90 seconds. In cloud environments,CUDA initialization and Python module loading can easily exceed 30 seconds. * Update kt-kernel/python/cli/utils/sglang_checker.py add comment about the change Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -324,7 +324,7 @@ def check_sglang_kt_kernel_support(use_cache: bool = True, silent: bool = False)
|
||||
[sys.executable, "-m", "sglang.launch_server", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
timeout=90, # Increased for slow CUDA init and module loading in some environments
|
||||
)
|
||||
|
||||
help_output = result.stdout + result.stderr
|
||||
|
||||
Reference in New Issue
Block a user