Files
nvbench/ci/build_multi_cuda_wheel.sh
2026-01-29 13:25:17 -06:00

193 lines
5.9 KiB
Bash
Executable File

#!/bin/bash
# Apache 2.0 License
# Copyright 2024-2025 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 with the LLVM exception
# (the "License"); you may not use this file except in compliance with
# the License.
#
# You may obtain a copy of the License at
#
# http://llvm.org/foundation/relicensing/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build a multi-CUDA wheel for the given Python version
# This builds separate wheels for each supported CUDA major version,
# and then merges them into a single wheel containing extensions
# for all CUDA versions. At runtime, depending on the installed CUDA version,
# the correct extension will be chosen.
set -euo pipefail
ci_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
usage="Usage: $0 -py-version <python_version> [additional options...]"
source "$ci_dir/util/python/common_arg_parser.sh"
parse_python_args "$@"
# Check if py_version was provided (this script requires it)
require_py_version "$usage" || exit 1
echo "Docker socket: " $(ls /var/run/docker.sock)
# Set HOST_WORKSPACE if not already set (for local runs)
if [[ -z "${HOST_WORKSPACE:-}" ]]; then
# Get the repository root
HOST_WORKSPACE="$(cd "${ci_dir}/.." && pwd)"
echo "Setting HOST_WORKSPACE to: $HOST_WORKSPACE"
fi
# cuda-bench must be built in a container that can produce manylinux wheels,
# and has the CUDA toolkit installed. We use the rapidsai/ci-wheel image for this.
# We build separate wheels using separate containers for each CUDA version,
# then merge them into a single wheel.
readonly cuda12_version=12.9.1
readonly cuda13_version=13.0.1
readonly devcontainer_version=25.12
readonly devcontainer_distro=rockylinux8
if [[ "$(uname -m)" == "aarch64" ]]; then
readonly host_arch_suffix="-arm64"
else
readonly host_arch_suffix=""
fi
readonly cuda12_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda12_version}-${devcontainer_distro}-py${py_version}${host_arch_suffix}
readonly cuda13_image=rapidsai/ci-wheel:${devcontainer_version}-cuda${cuda13_version}-${devcontainer_distro}-py${py_version}${host_arch_suffix}
mkdir -p wheelhouse
for ctk in 12 13; do
image=$(eval echo \$cuda${ctk}_image)
echo "::group::⚒️ Building CUDA ${ctk} wheel on ${image}"
(
set -x
docker pull $image
docker run --rm -i \
--workdir /workspace/python \
--mount type=bind,source=${HOST_WORKSPACE},target=/workspace/ \
--env py_version=${py_version} \
$image \
/workspace/ci/build_cuda_bench_wheel_for_cuda.sh
# Prevent GHA runners from exhausting available storage with leftover images:
if [[ -n "${GITHUB_ACTIONS:-}" ]]; then
docker rmi -f $image
fi
)
echo "::endgroup::"
done
echo "Merging CUDA wheels..."
# Detect python command
if command -v python &> /dev/null; then
PYTHON=python
elif command -v python3 &> /dev/null; then
PYTHON=python3
else
echo "Error: No python found"
exit 1
fi
# Needed for unpacking and repacking wheels.
$PYTHON -m pip install --break-system-packages wheel
# Find the built wheels (temporarily suffixed with .cu12/.cu13 to avoid collision)
cu12_wheel=$(find wheelhouse -name "*cu12*.whl" | head -1)
cu13_wheel=$(find wheelhouse -name "*cu13*.whl" | head -1)
if [[ -z "$cu12_wheel" ]]; then
echo "Error: CUDA 12 wheel not found in wheelhouse/"
ls -la wheelhouse/
exit 1
fi
if [[ -z "$cu13_wheel" ]]; then
echo "Error: CUDA 13 wheel not found in wheelhouse/"
ls -la wheelhouse/
exit 1
fi
if [[ "$cu12_wheel" == "$cu13_wheel" ]]; then
echo "Error: Only one wheel found, expected two (CUDA 12 and CUDA 13)"
ls -la wheelhouse/
exit 1
fi
echo "Found CUDA 12 wheel: $cu12_wheel"
echo "Found CUDA 13 wheel: $cu13_wheel"
# Convert to absolute paths before changing directory
cu12_wheel=$(readlink -f "$cu12_wheel")
cu13_wheel=$(readlink -f "$cu13_wheel")
# Merge the wheels manually
mkdir -p wheelhouse_merged
cd wheelhouse_merged
# Unpack CUDA 12 wheel (this will be our base)
$PYTHON -m wheel unpack "$cu12_wheel"
base_dir=$(find . -maxdepth 1 -type d -name "cuda_bench-*" | head -1)
# Unpack CUDA 13 wheel into a temporary subdirectory
mkdir cu13_tmp
cd cu13_tmp
$PYTHON -m wheel unpack "$cu13_wheel"
cu13_dir=$(find . -maxdepth 1 -type d -name "cuda_bench-*" | head -1)
# Copy the cu13/ directory from CUDA 13 wheel into the base wheel
cp -r "$cu13_dir"/cuda/bench/cu13 "../$base_dir/cuda/bench/"
# Go back and clean up
cd ..
rm -rf cu13_tmp
# Remove RECORD file to let wheel recreate it
rm -f "$base_dir"/*.dist-info/RECORD
# Repack the merged wheel
$PYTHON -m wheel pack "$base_dir"
cd ..
# Install auditwheel and repair the merged wheel
$PYTHON -m pip install --break-system-packages auditwheel
for wheel in wheelhouse_merged/cuda_bench-*.whl; do
echo "Repairing merged wheel: $wheel"
$PYTHON -m auditwheel repair \
--exclude 'libcuda.so.1' \
--exclude 'libnvidia-ml.so.1' \
--exclude 'libcupti.so.12' \
--exclude 'libcupti.so.13' \
--exclude 'libnvperf_host.so' \
--exclude 'libnvperf_target.so' \
"$wheel" \
--wheel-dir wheelhouse_final
done
# Clean up intermediate files and move only the final merged wheel to wheelhouse
rm -rf wheelhouse/* # Clean existing wheelhouse
mkdir -p wheelhouse
# Move only the final repaired merged wheel
if ls wheelhouse_final/cuda_bench-*.whl 1> /dev/null 2>&1; then
mv wheelhouse_final/cuda_bench-*.whl wheelhouse/
echo "Final merged wheel moved to wheelhouse"
else
echo "No final repaired wheel found, moving unrepaired merged wheel"
mv wheelhouse_merged/cuda_bench-*.whl wheelhouse/
fi
# Clean up temporary directories
rm -rf wheelhouse_merged wheelhouse_final
echo "Final wheels in wheelhouse:"
ls -la wheelhouse/