mirror of
https://github.com/kvcache-ai/sglang.git
synced 2026-06-30 03:37:51 +00:00
95 lines
3.7 KiB
Python
95 lines
3.7 KiB
Python
# Reference: https://github.com/flashinfer-ai/flashinfer/blob/v0.2.0/scripts/update_whl_index.py
|
|
|
|
import argparse
|
|
import hashlib
|
|
import pathlib
|
|
import re
|
|
|
|
# All the CUDA versions that the wheels will cover
|
|
SUPPORTED_CUDA_VERSIONS = ["129", "130"]
|
|
DEFAULT_CUDA_VERSION = "130"
|
|
|
|
|
|
def check_wheel_cuda_version(path_name, target_cuda_version):
|
|
# Skip non-CUDA backend wheels (rocm, musa, ...). Their +<backend><ver>
|
|
# local-version tags don't match the CUDA wheel regex below, and they are
|
|
# published by the dedicated release-rocm*/release-musa* jobs.
|
|
if re.search(r"\+(rocm|musa)", path_name):
|
|
return False
|
|
|
|
# For other CUDA versions, the wheel path name will contain the cuda version suffix, e.g. sglang_kernel-0.4.0+cu130-cp310-abi3-manylinux2014_x86_64.whl
|
|
if target_cuda_version != DEFAULT_CUDA_VERSION:
|
|
return target_cuda_version in path_name
|
|
|
|
# For the default CUDA version, the wheel path name will not contain any cuda version suffix, e.g. sglang_kernel-0.4.0-cp310-abi3-manylinux2014_x86_64.whl
|
|
# So we need to check if the wheel path name contains any other cuda version suffix
|
|
for cuda_version in SUPPORTED_CUDA_VERSIONS:
|
|
if cuda_version != DEFAULT_CUDA_VERSION and cuda_version in path_name:
|
|
return False
|
|
return True
|
|
|
|
|
|
def update_wheel_index(cuda_version=DEFAULT_CUDA_VERSION, rocm_version=None):
|
|
index_dir = pathlib.Path(f"sgl-whl/cu{cuda_version}/sglang-kernel")
|
|
index_dir.mkdir(exist_ok=True, parents=True)
|
|
base_url = "https://github.com/sgl-project/whl/releases/download"
|
|
|
|
for path in sorted(pathlib.Path("sgl-kernel/dist").glob("*.whl")):
|
|
# Skip the wheel if mismatches the passed in cuda_version
|
|
if not check_wheel_cuda_version(path.name, cuda_version):
|
|
continue
|
|
with open(path, "rb") as f:
|
|
sha256 = hashlib.sha256(f.read()).hexdigest()
|
|
ver = re.findall(
|
|
r"sglang_kernel-([0-9.]+(?:\.post[0-9]+)?)(?:\+cu[0-9]+)?-", path.name
|
|
)[0]
|
|
full_url = f"{base_url}/v{ver}/{path.name}#sha256={sha256}"
|
|
with (index_dir / "index.html").open("a") as f:
|
|
f.write(f'<a href="{full_url}">{path.name}</a><br>\n')
|
|
|
|
|
|
def _update_non_cuda_wheel_index(backend, version):
|
|
index_dir = pathlib.Path(f"sgl-whl/{backend}{version}/sglang-kernel")
|
|
index_dir.mkdir(exist_ok=True, parents=True)
|
|
base_url = "https://github.com/sgl-project/whl/releases/download"
|
|
|
|
for path in sorted(pathlib.Path("sgl-kernel/dist").glob("*.whl")):
|
|
# Skip the wheel if not for this backend
|
|
if re.search(f"{backend}", path.name) is None:
|
|
continue
|
|
with open(path, "rb") as f:
|
|
sha256 = hashlib.sha256(f.read()).hexdigest()
|
|
ver = re.findall(
|
|
rf"sglang_kernel-([0-9.]+(?:\.post[0-9]+)?)(?:\+{backend}[0-9]+)?-",
|
|
path.name,
|
|
)[0]
|
|
full_url = f"{base_url}/v{ver}/{path.name}#sha256={sha256}"
|
|
with (index_dir / "index.html").open("a") as f:
|
|
f.write(f'<a href="{full_url}">{path.name}</a><br>\n')
|
|
|
|
|
|
def update_wheel_index_rocm(rocm_version):
|
|
_update_non_cuda_wheel_index("rocm", rocm_version)
|
|
|
|
|
|
def update_wheel_index_musa(musa_version):
|
|
_update_non_cuda_wheel_index("musa", musa_version)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--cuda", type=str, default=DEFAULT_CUDA_VERSION)
|
|
parser.add_argument("--rocm", type=str, default=None)
|
|
parser.add_argument("--musa", type=str, default=None)
|
|
args = parser.parse_args()
|
|
if args.musa is not None:
|
|
update_wheel_index_musa(args.musa)
|
|
elif args.rocm is not None:
|
|
update_wheel_index_rocm(args.rocm)
|
|
else:
|
|
update_wheel_index(args.cuda)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|