diff --git a/.clangd b/.clangd
index 1649e16..0e4c84b 100644
--- a/.clangd
+++ b/.clangd
@@ -32,7 +32,6 @@ CompileFlags:
# report all errors
- "-ferror-limit=0"
- "-ftemplate-backtrace-limit=0"
- - "-stdlib=libc++"
- "-std=c++17"
Remove:
# strip CUDA fatbin args
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
new file mode 100644
index 0000000..e84b5f3
--- /dev/null
+++ b/.devcontainer/README.md
@@ -0,0 +1,198 @@
+> **Note**
+> The instructions in this README are specific to Linux development environments. Instructions for Windows are coming soon!
+
+[](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json)
+
+# CCCL Dev Containers
+
+CCCL uses [Development Containers](https://containers.dev/) to provide consistent and convenient development environments for both local development and for CI. This guide covers setup in [Visual Studio Code](#quickstart-vscode-recommended) and [Docker](#quickstart-docker-manual-approach). The guide also provides additional instructions in case you want use WSL.
+
+## Table of Contents
+1. [Quickstart: VSCode (Recommended)](#vscode)
+2. [Quickstart: Docker (Manual Approach)](#docker)
+3. [Quickstart: Using WSL](#wsl)
+
+## Quickstart: VSCode (Recommended)
+
+### Prerequisites
+- [Visual Studio Code](https://code.visualstudio.com/)
+- [Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+- [Docker](https://docs.docker.com/engine/install/) - This is only for completeness because it should already be implicitly installed by the Dev Containers extension
+
+### Steps
+
+1. Clone the Repository
+ ```bash
+ git clone https://github.com/nvidia/cccl.git
+ ```
+2. Open the cloned directory in VSCode
+
+3. Launch a Dev Container by clicking the prompt suggesting to "Reopen in Container"
+
+ 
+
+ - Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it.
+
+ 
+
+4. Select an environment with the desired CTK and host compiler from the list:
+
+ 
+
+5. VSCode will initialize the selected Dev Container. This can take a few minutes the first time.
+
+6. Once initialized, the local `cccl/` directory is mirrored into the container to ensure any changes are persistent.
+
+7. Done! See the [contributing guide](../CONTRIBUTING.md#building-and-testing) for instructions on how to build and run tests.
+
+### (Optional) Authenticate with GitHub for `sccache`
+
+After starting the container, there will be a prompt to authenticate with GitHub. This grants access to a [`sccache`](https://github.com/mozilla/sccache) server shared with CI and greatly accelerates local build times. This is currently limited to NVIDIA employees belonging to the `NVIDIA` or `rapidsai` GitHub organizations.
+
+Without authentication to the remote server, `sccache` will still accelerate local builds by using a filesystem cache.
+
+Follow the instructions in the prompt as below and enter the one-time code at https://github.com/login/device
+
+ 
+
+To manually trigger this authentication, execute the `devcontainer-utils-vault-s3-init` script within the container.
+
+For more information about the sccache configuration and authentication, see the documentation at [`rapidsai/devcontainers`](https://github.com/rapidsai/devcontainers/blob/branch-23.10/USAGE.md#build-caching-with-sccache).
+
+## Quickstart: Docker (Manual Approach)
+
+### Prerequisites
+- [Docker](https://docs.docker.com/desktop/install/linux-install/)
+
+### Steps
+1. Clone the repository and use the [`launch.sh`](./launch.sh) script to launch the default container environment
+ ```bash
+ git clone https://github.com/nvidia/cccl.git
+ cd cccl
+ ./.devcontainer/launch.sh --docker
+ ```
+ This script starts an interactive shell as the `coder` user inside the container with the local `cccl/` directory mirrored into `/home/coder/cccl`.
+
+ For specific environments, use the `--cuda` and `--host` options:
+ ```bassh
+ ./.devcontainer/launch.sh --docker --cuda 12.2 --host gcc10
+ ```
+ See `./.devcontainer/launch.sh --help` for more information.
+
+2. Done. See the [contributing guide](../CONTRIBUTING.md#building-and-testing) for instructions on how to build and run tests.
+
+## Available Environments
+
+CCCL provides environments for both the oldest and newest supported CUDA versions with all compatible host compilers.
+
+Look in the [`.devcontainer/`](.) directory to see the available configurations. The top-level [`devcontainer.json`](./devcontainer.json) serves as the default environment. All `devcontainer.json` files in the `cuda-` sub-directories are variations on this top-level file, with different base images for the different CUDA and host compiler versions.
+
+## VSCode Customization
+
+By default, CCCL's Dev Containers come with certain VSCode settings and extensions configured by default, as can be seen in the [`devcontainer.json`](./devcontainer.json) file. This can be further customized by users without needing to modify the `devcontainer.json` file directly.
+
+For extensions, the [`dev.containers.defaultExtensions` setting](https://code.visualstudio.com/docs/devcontainers/containers#_always-installed-extensions) allows listing extensions that will always be installed.
+
+For more general customizations, VSCode allows using a dotfile repository. See the [VSCode documentation](https://code.visualstudio.com/docs/devcontainers/containers#_personalizing-with-dotfile-repositories) for more information.
+
+## GitHub Codespaces
+
+[](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json)
+
+One of the benefits of Dev Containers is that they integrate natively with [GitHub Codespaces](https://github.com/features/codespaces). Codespaces provide a VSCode development environment right in your browser running on a machine in the cloud. This provides a truly one-click, turnkey development environment where you can develop, build, and test with no other setup required.
+
+Click the badge above or [click here](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json) to get started with CCCL's Dev Containers on Codespaces. This will start the default Dev Container environment. [Click here](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=296416761&skip_quickstart=true) to start a Codespace with a particular environment and hardware configuration as shown:
+
+ 
+
+## For Maintainers: The `make_devcontainers.sh` Script
+
+### Overview
+
+[`make_devcontainers.sh`](./make_devcontainers.sh) generates devcontainer configurations for the unique combinations of CUDA Toolkit (CTK) versions and host compilers in [`ci/matrix.yaml`](../ci/matrix.yaml).
+
+### How It Works:
+
+1. Parses the matrix from `ci/matrix.yaml`.
+2. Use the top-level [`.devcontainer/devcontainer.json`](./devcontainer.json) as a template. For each unique combination of CTK version and host compiler, generate a corresponding `devcontainer.json` configuration, adjusting only the base Docker image to match the desired environment.
+3. Place the generated configurations in the `.devcontainer` directory, organizing them into subdirectories following the naming convention `cuda-`.
+
+For more information, see the `.devcontainer/make_devcontainers.sh --help` message.
+
+**Note**: When adding or updating supported environments, modify `matrix.yaml` and then rerun this script to synchronize the `devcontainer` configurations.
+
+## Quickstart: Using WSL
+
+> [!NOTE]
+> _Make sure you have the Nvidia driver installed on your Windows host before moving further_. Type in `nvidia-smi` for verification.
+
+### Install WSL on your Windows host
+
+> [!WARNING]
+> Disclaimer: This guide was developed for WSL 2 on Windows 11.
+
+1. Launch a Windows terminal (_e.g. Powershell_) as an administrator.
+
+2. Install WSL 2 by running:
+```bash
+wsl --install
+```
+This should probably install Ubuntu distro as a default.
+
+3. Restart your computer and run `wsl -l -v` on a Windows terminal to verify installation.
+
+ Install prerequisites and VS Code extensions
+
+4. Launch your WSL/Ubuntu terminal by running `wsl` in Powershell.
+
+5. Install the [WSL extension](ms-vscode-remote.remote-wsl) on VS Code.
+
+ - `Ctrl + Shift + P` and select `WSL: Connect to WSL` (it will prompt you to install the WSL extension).
+
+ - Make sure you are connected to WSL with VS Code by checking the bottom left corner of the VS Code window (should indicate "WSL: Ubuntu" in our case).
+
+6. Install the [Dev Containers extension](ms-vscode-remote.remote-containers) on VS Code.
+
+ - In a vanilla system you should be prompted to install `Docker` at this point, accept it. If it hangs you might have to restart VS Code after that.
+
+7. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). **Make sure you install the WSL 2 version and not the native Linux one**. This builds on top of Docker so make sure you have Docker properly installed (run `docker --version`).
+
+8. Open `/etc/docker/daemon.json` from within your WSL system (if the file does not exist, create it) and add the following:
+
+```json
+{
+ "runtimes": {
+ "nvidia": {
+ "path": "nvidia-container-runtime",
+ "runtimeArgs": []
+ }
+ }
+}
+```
+
+then run `sudo systemctl restart docker.service`.
+
+---
+### Build CCCL in WSL using Dev Containers
+
+9. Still on your WSL terminal run `git clone https://github.com/NVIDIA/cccl.git`
+
+
+10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located).
+
+11. If prompted, choose `Reopen in Container`.
+
+ - If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`.
+
+12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order.
+
+From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration:
+
+13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
+
+> Failed opening a web browser at https://github.com/login/device
+ exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH
+ Please try entering the URL in your browser manually
+
+In that case type in the address manually in your web browser https://github.com/login/device and fill in the one-time code.
diff --git a/.devcontainer/cuda11.1-gcc6/devcontainer.json b/.devcontainer/cuda11.1-gcc6/devcontainer.json
new file mode 100644
index 0000000..9faa25a
--- /dev/null
+++ b/.devcontainer/cuda11.1-gcc6/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc6-cuda11.1-ubuntu18.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda11.1-gcc6",
+ "CCCL_CUDA_VERSION": "11.1",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "6",
+ "CCCL_BUILD_INFIX": "cuda11.1-gcc6"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda11.1-gcc6"
+}
diff --git a/.devcontainer/cuda11.1-gcc7/devcontainer.json b/.devcontainer/cuda11.1-gcc7/devcontainer.json
new file mode 100644
index 0000000..3e9e2f7
--- /dev/null
+++ b/.devcontainer/cuda11.1-gcc7/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc7-cuda11.1-ubuntu18.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda11.1-gcc7",
+ "CCCL_CUDA_VERSION": "11.1",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "7",
+ "CCCL_BUILD_INFIX": "cuda11.1-gcc7"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda11.1-gcc7"
+}
diff --git a/.devcontainer/cuda11.1-gcc8/devcontainer.json b/.devcontainer/cuda11.1-gcc8/devcontainer.json
new file mode 100644
index 0000000..3862680
--- /dev/null
+++ b/.devcontainer/cuda11.1-gcc8/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc8-cuda11.1-ubuntu18.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda11.1-gcc8",
+ "CCCL_CUDA_VERSION": "11.1",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "8",
+ "CCCL_BUILD_INFIX": "cuda11.1-gcc8"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda11.1-gcc8"
+}
diff --git a/.devcontainer/cuda11.1-gcc9/devcontainer.json b/.devcontainer/cuda11.1-gcc9/devcontainer.json
new file mode 100644
index 0000000..54f7b88
--- /dev/null
+++ b/.devcontainer/cuda11.1-gcc9/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc9-cuda11.1-ubuntu18.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda11.1-gcc9",
+ "CCCL_CUDA_VERSION": "11.1",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "9",
+ "CCCL_BUILD_INFIX": "cuda11.1-gcc9"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda11.1-gcc9"
+}
diff --git a/.devcontainer/cuda11.1-llvm9/devcontainer.json b/.devcontainer/cuda11.1-llvm9/devcontainer.json
new file mode 100644
index 0000000..d875c9b
--- /dev/null
+++ b/.devcontainer/cuda11.1-llvm9/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm9-cuda11.1-ubuntu18.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda11.1-llvm9",
+ "CCCL_CUDA_VERSION": "11.1",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "9",
+ "CCCL_BUILD_INFIX": "cuda11.1-llvm9"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda11.1-llvm9"
+}
diff --git a/.devcontainer/cuda11.8-gcc11/devcontainer.json b/.devcontainer/cuda11.8-gcc11/devcontainer.json
new file mode 100644
index 0000000..389d6c7
--- /dev/null
+++ b/.devcontainer/cuda11.8-gcc11/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda11.8-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda11.8-gcc11",
+ "CCCL_CUDA_VERSION": "11.8",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "11",
+ "CCCL_BUILD_INFIX": "cuda11.8-gcc11"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda11.8-gcc11"
+}
diff --git a/.devcontainer/cuda12.4-gcc10/devcontainer.json b/.devcontainer/cuda12.4-gcc10/devcontainer.json
new file mode 100644
index 0000000..9968a84
--- /dev/null
+++ b/.devcontainer/cuda12.4-gcc10/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc10-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc10",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "10",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc10"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc10"
+}
diff --git a/.devcontainer/cuda12.4-gcc11/devcontainer.json b/.devcontainer/cuda12.4-gcc11/devcontainer.json
new file mode 100644
index 0000000..d2c2662
--- /dev/null
+++ b/.devcontainer/cuda12.4-gcc11/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda12.4-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc11",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "11",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc11"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc11"
+}
diff --git a/.devcontainer/cuda12.4-gcc12/devcontainer.json b/.devcontainer/cuda12.4-gcc12/devcontainer.json
new file mode 100644
index 0000000..fa6e0c5
--- /dev/null
+++ b/.devcontainer/cuda12.4-gcc12/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.4-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc12",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "12",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc12"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc12"
+}
diff --git a/.devcontainer/cuda12.4-gcc7/devcontainer.json b/.devcontainer/cuda12.4-gcc7/devcontainer.json
new file mode 100644
index 0000000..af6fdb1
--- /dev/null
+++ b/.devcontainer/cuda12.4-gcc7/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc7-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc7",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "7",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc7"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc7"
+}
diff --git a/.devcontainer/cuda12.4-gcc8/devcontainer.json b/.devcontainer/cuda12.4-gcc8/devcontainer.json
new file mode 100644
index 0000000..46670d4
--- /dev/null
+++ b/.devcontainer/cuda12.4-gcc8/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc8-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc8",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "8",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc8"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc8"
+}
diff --git a/.devcontainer/cuda12.4-gcc9/devcontainer.json b/.devcontainer/cuda12.4-gcc9/devcontainer.json
new file mode 100644
index 0000000..4005e72
--- /dev/null
+++ b/.devcontainer/cuda12.4-gcc9/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc9-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc9",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "9",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc9"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc9"
+}
diff --git a/.devcontainer/cuda12.4-llvm10/devcontainer.json b/.devcontainer/cuda12.4-llvm10/devcontainer.json
new file mode 100644
index 0000000..6ee5788
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm10/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm10-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm10",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "10",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm10"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm10"
+}
diff --git a/.devcontainer/cuda12.4-llvm11/devcontainer.json b/.devcontainer/cuda12.4-llvm11/devcontainer.json
new file mode 100644
index 0000000..66bd26c
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm11/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm11-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm11",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "11",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm11"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm11"
+}
diff --git a/.devcontainer/cuda12.4-llvm12/devcontainer.json b/.devcontainer/cuda12.4-llvm12/devcontainer.json
new file mode 100644
index 0000000..8889f14
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm12/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm12-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm12",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "12",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm12"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm12"
+}
diff --git a/.devcontainer/cuda12.4-llvm13/devcontainer.json b/.devcontainer/cuda12.4-llvm13/devcontainer.json
new file mode 100644
index 0000000..76faea9
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm13/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm13-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm13",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "13",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm13"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm13"
+}
diff --git a/.devcontainer/cuda12.4-llvm14/devcontainer.json b/.devcontainer/cuda12.4-llvm14/devcontainer.json
new file mode 100644
index 0000000..58b00cc
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm14/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm14-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm14",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "14",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm14"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm14"
+}
diff --git a/.devcontainer/cuda12.4-llvm15/devcontainer.json b/.devcontainer/cuda12.4-llvm15/devcontainer.json
new file mode 100644
index 0000000..9c92653
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm15/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm15-cuda12.4-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm15",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "15",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm15"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm15"
+}
diff --git a/.devcontainer/cuda12.4-llvm16/devcontainer.json b/.devcontainer/cuda12.4-llvm16/devcontainer.json
new file mode 100644
index 0000000..9f6fcad
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm16/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm16-cuda12.4-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm16",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "16",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm16"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm16"
+}
diff --git a/.devcontainer/cuda12.4-llvm9/devcontainer.json b/.devcontainer/cuda12.4-llvm9/devcontainer.json
new file mode 100644
index 0000000..d9910e2
--- /dev/null
+++ b/.devcontainer/cuda12.4-llvm9/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-llvm9-cuda12.4-ubuntu20.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-llvm9",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "llvm",
+ "CCCL_HOST_COMPILER_VERSION": "9",
+ "CCCL_BUILD_INFIX": "cuda12.4-llvm9"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-llvm9"
+}
diff --git a/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json b/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json
new file mode 100644
index 0000000..04d71c2
--- /dev/null
+++ b/.devcontainer/cuda12.4-oneapi2023.2.0/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-oneapi2023.2.0-cuda12.4-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-oneapi2023.2.0",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "oneapi",
+ "CCCL_HOST_COMPILER_VERSION": "2023.2.0",
+ "CCCL_BUILD_INFIX": "cuda12.4-oneapi2023.2.0"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-oneapi2023.2.0"
+}
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..fa6e0c5
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,46 @@
+{
+ "shutdownAction": "stopContainer",
+ "image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.4-ubuntu22.04",
+ "hostRequirements": {
+ "gpu": "optional"
+ },
+ "initializeCommand": [
+ "/bin/bash",
+ "-c",
+ "mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
+ ],
+ "containerEnv": {
+ "SCCACHE_REGION": "us-east-2",
+ "SCCACHE_BUCKET": "rapids-sccache-devs",
+ "VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
+ "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
+ "DEVCONTAINER_NAME": "cuda12.4-gcc12",
+ "CCCL_CUDA_VERSION": "12.4",
+ "CCCL_HOST_COMPILER": "gcc",
+ "CCCL_HOST_COMPILER_VERSION": "12",
+ "CCCL_BUILD_INFIX": "cuda12.4-gcc12"
+ },
+ "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "xaver.clang-format"
+ ],
+ "settings": {
+ "editor.defaultFormatter": "xaver.clang-format",
+ "clang-format.executable": "/usr/local/bin/clang-format",
+ "clangd.arguments": [
+ "--compile-commands-dir=${workspaceFolder}"
+ ]
+ }
+ }
+ },
+ "name": "cuda12.4-gcc12"
+}
diff --git a/.devcontainer/img/container_list.png b/.devcontainer/img/container_list.png
new file mode 100644
index 0000000..09c4510
Binary files /dev/null and b/.devcontainer/img/container_list.png differ
diff --git a/.devcontainer/img/github_auth.png b/.devcontainer/img/github_auth.png
new file mode 100644
index 0000000..3f52b3a
Binary files /dev/null and b/.devcontainer/img/github_auth.png differ
diff --git a/.devcontainer/img/open_in_container_manual.png b/.devcontainer/img/open_in_container_manual.png
new file mode 100644
index 0000000..e09435b
Binary files /dev/null and b/.devcontainer/img/open_in_container_manual.png differ
diff --git a/.devcontainer/img/reopen_in_container.png b/.devcontainer/img/reopen_in_container.png
new file mode 100644
index 0000000..0e1d82d
Binary files /dev/null and b/.devcontainer/img/reopen_in_container.png differ
diff --git a/.devcontainer/launch.sh b/.devcontainer/launch.sh
new file mode 100755
index 0000000..0299e0c
--- /dev/null
+++ b/.devcontainer/launch.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Ensure the script is being executed in the cccl/ root
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/..";
+
+print_help() {
+ echo "Usage: $0 [-c|--cuda ] [-H|--host ] [-d|--docker]"
+ echo "Launch a development container. If no CUDA version or Host compiler are specified,"
+ echo "the top-level devcontainer in .devcontainer/devcontainer.json will be used."
+ echo ""
+ echo "Options:"
+ echo " -c, --cuda Specify the CUDA version. E.g., 12.2"
+ echo " -H, --host Specify the host compiler. E.g., gcc12"
+ echo " -d, --docker Launch the development environment in Docker directly without using VSCode."
+ echo " -h, --help Display this help message and exit."
+}
+
+parse_options() {
+ local OPTIONS=c:H:dh
+ local LONG_OPTIONS=cuda:,host:,docker,help
+ local PARSED_OPTIONS=$(getopt -n "$0" -o "${OPTIONS}" --long "${LONG_OPTIONS}" -- "$@")
+
+ if [[ $? -ne 0 ]]; then
+ exit 1
+ fi
+
+ eval set -- "${PARSED_OPTIONS}"
+
+ while true; do
+ case "$1" in
+ -c|--cuda)
+ cuda_version="$2"
+ shift 2
+ ;;
+ -H|--host)
+ host_compiler="$2"
+ shift 2
+ ;;
+ -d|--docker)
+ docker_mode=true
+ shift
+ ;;
+ -h|--help)
+ print_help
+ exit 0
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Invalid option: $1"
+ print_help
+ exit 1
+ ;;
+ esac
+ done
+}
+
+launch_docker() {
+ DOCKER_IMAGE=$(grep "image" "${path}/devcontainer.json" | sed 's/.*: "\(.*\)",/\1/')
+ echo "Found image: ${DOCKER_IMAGE}"
+ docker pull ${DOCKER_IMAGE}
+ docker run \
+ -it --rm \
+ --user coder \
+ --workdir /home/coder/cccl \
+ --mount type=bind,src="$(pwd)",dst='/home/coder/cccl' \
+ ${DOCKER_IMAGE} \
+ /bin/bash
+}
+
+launch_vscode() {
+ # Since Visual Studio Code allows only one instance per `devcontainer.json`,
+ # this code prepares a unique temporary directory structure for each launch of a devcontainer.
+ # By doing so, it ensures that multiple instances of the same environment can be run
+ # simultaneously. The script replicates the `devcontainer.json` from the desired CUDA
+ # and compiler environment into this temporary directory, adjusting paths to ensure the
+ # correct workspace is loaded. A special URL is then generated to instruct VSCode to
+ # launch the development container using this temporary configuration.
+ local workspace="$(basename "$(pwd)")"
+ local tmpdir="$(mktemp -d)/${workspace}"
+ mkdir -p "${tmpdir}"
+ mkdir -p "${tmpdir}/.devcontainer"
+ cp -arL "${path}/devcontainer.json" "${tmpdir}/.devcontainer"
+ sed -i 's@\\${localWorkspaceFolder}@$(pwd)@g' "${tmpdir}/.devcontainer/devcontainer.json"
+ local path="${tmpdir}"
+ local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')"
+ local url="vscode://vscode-remote/dev-container+${hash}/home/coder/cccl"
+
+ local launch=""
+ if type open >/dev/null 2>&1; then
+ launch="open"
+ elif type xdg-open >/dev/null 2>&1; then
+ launch="xdg-open"
+ fi
+
+ if [ -n "${launch}" ]; then
+ echo "Launching VSCode Dev Container URL: ${url}"
+ code --new-window "${tmpdir}"
+ exec "${launch}" "${url}" >/dev/null 2>&1
+ fi
+}
+
+main() {
+ parse_options "$@"
+
+ # If no CTK/Host compiler are provided, just use the default environment
+ if [[ -z ${cuda_version:-} ]] && [[ -z ${host_compiler:-} ]]; then
+ path=".devcontainer"
+ else
+ path=".devcontainer/cuda${cuda_version}-${host_compiler}"
+ if [[ ! -f "${path}/devcontainer.json" ]]; then
+ echo "Unknown CUDA [${cuda_version}] compiler [${host_compiler}] combination"
+ echo "Requested devcontainer ${path}/devcontainer.json does not exist"
+ exit 1
+ fi
+ fi
+
+ if ${docker_mode:-'false'}; then
+ launch_docker
+ else
+ launch_vscode
+ fi
+}
+
+main "$@"
+
diff --git a/.devcontainer/make_devcontainers.sh b/.devcontainer/make_devcontainers.sh
new file mode 100755
index 0000000..64b92c0
--- /dev/null
+++ b/.devcontainer/make_devcontainers.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+
+# This script parses the CI matrix.yaml file and generates a devcontainer.json file for each unique combination of
+# CUDA version, compiler name/version, and Ubuntu version. The devcontainer.json files are written to the
+# .devcontainer directory to a subdirectory named after the CUDA version and compiler name/version.
+# GitHub docs on using multiple devcontainer.json files:
+# https://docs.github.com/en/codespaces/setting-up-your-project-for-codespaces/adding-a-dev-container-configuration/introduction-to-dev-containers#devcontainerjson
+
+set -euo pipefail
+
+# Ensure the script is being executed in its containing directory
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
+
+
+function usage {
+ echo "Usage: $0 [--clean] [-h/--help] [-v/--verbose]"
+ echo " --clean Remove stale devcontainer subdirectories"
+ echo " -h, --help Display this help message"
+ echo " -v, --verbose Enable verbose mode (set -x)"
+ exit 1
+}
+
+# Function to update the devcontainer.json file with the provided parameters
+update_devcontainer() {
+ local input_file="$1"
+ local output_file="$2"
+ local name="$3"
+ local cuda_version="$4"
+ local compiler_name="$5"
+ local compiler_exe="$6"
+ local compiler_version="$7"
+ local os="$8"
+ local devcontainer_version="$9"
+
+ local IMAGE_ROOT="rapidsai/devcontainers:${devcontainer_version}-cpp-"
+ local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}-${os}"
+
+ jq --arg image "$image" --arg name "$name" \
+ --arg cuda_version "$cuda_version" --arg compiler_name "$compiler_name" \
+ --arg compiler_exe "$compiler_exe" --arg compiler_version "$compiler_version" --arg os "$os" \
+ '.image = $image | .name = $name | .containerEnv.DEVCONTAINER_NAME = $name |
+ .containerEnv.CCCL_BUILD_INFIX = $name |
+ .containerEnv.CCCL_CUDA_VERSION = $cuda_version | .containerEnv.CCCL_HOST_COMPILER = $compiler_name |
+ .containerEnv.CCCL_HOST_COMPILER_VERSION = $compiler_version '\
+ "$input_file" > "$output_file"
+}
+
+make_name() {
+ local cuda_version="$1"
+ local compiler_name="$2"
+ local compiler_version="$3"
+
+ echo "cuda$cuda_version-$compiler_name$compiler_version"
+}
+
+CLEAN=false
+VERBOSE=false
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --clean)
+ CLEAN=true
+ ;;
+ -h|--help)
+ usage
+ ;;
+ -v|--verbose)
+ VERBOSE=true
+ ;;
+ *)
+ usage
+ ;;
+ esac
+ shift
+done
+
+MATRIX_FILE="../ci/matrix.yaml"
+
+# Enable verbose mode if requested
+if [ "$VERBOSE" = true ]; then
+ set -x
+ cat ${MATRIX_FILE}
+fi
+
+# Read matrix.yaml and convert it to json
+matrix_json=$(yq -o json ${MATRIX_FILE})
+
+# Exclude Windows environments
+readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))')
+
+# Get the devcontainer image version and define image tag root
+readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version')
+
+# Get unique combinations of cuda version, compiler name/version, and Ubuntu version
+readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]')
+
+# Update the base devcontainer with the default values
+# The root devcontainer.json file is used as the default container as well as a template for all
+# other devcontainer.json files by replacing the `image:` field with the appropriate image name
+readonly base_devcontainer_file="./devcontainer.json"
+readonly NEWEST_GCC_CUDA_ENTRY=$(echo "$combinations" | jq -rs '[.[] | select(.compiler_name == "gcc")] | sort_by((.cuda | tonumber), (.compiler_version | tonumber)) | .[-1]')
+readonly DEFAULT_CUDA=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.cuda')
+readonly DEFAULT_COMPILER_NAME=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_name')
+readonly DEFAULT_COMPILER_EXE=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_exe')
+readonly DEFAULT_COMPILER_VERSION=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_version')
+readonly DEFAULT_OS=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.os')
+readonly DEFAULT_NAME=$(make_name "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_VERSION")
+
+update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$DEFAULT_NAME" "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEFAULT_OS" "$DEVCONTAINER_VERSION"
+mv "./temp_devcontainer.json" ${base_devcontainer_file}
+
+# Create an array to keep track of valid subdirectory names
+valid_subdirs=()
+
+# For each unique combination
+for combination in $combinations; do
+ cuda_version=$(echo "$combination" | jq -r '.cuda')
+ compiler_name=$(echo "$combination" | jq -r '.compiler_name')
+ compiler_exe=$(echo "$combination" | jq -r '.compiler_exe')
+ compiler_version=$(echo "$combination" | jq -r '.compiler_version')
+ os=$(echo "$combination" | jq -r '.os')
+
+ name=$(make_name "$cuda_version" "$compiler_name" "$compiler_version")
+ mkdir -p "$name"
+ new_devcontainer_file="$name/devcontainer.json"
+
+ update_devcontainer "$base_devcontainer_file" "$new_devcontainer_file" "$name" "$cuda_version" "$compiler_name" "$compiler_exe" "$compiler_version" "$os" "$DEVCONTAINER_VERSION"
+ echo "Created $new_devcontainer_file"
+
+ # Add the subdirectory name to the valid_subdirs array
+ valid_subdirs+=("$name")
+done
+
+# Clean up stale subdirectories and devcontainer.json files
+if [ "$CLEAN" = true ]; then
+ for subdir in ./*; do
+ if [ -d "$subdir" ] && [[ ! " ${valid_subdirs[@]} " =~ " ${subdir#./} " ]]; then
+ echo "Removing stale subdirectory: $subdir"
+ rm -r "$subdir"
+ fi
+ done
+fi
diff --git a/.devcontainer/verify_devcontainer.sh b/.devcontainer/verify_devcontainer.sh
new file mode 100755
index 0000000..b5934ea
--- /dev/null
+++ b/.devcontainer/verify_devcontainer.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+function usage {
+ echo "Usage: $0"
+ echo
+ echo "This script is intended to be run within one of CCCL's Dev Containers."
+ echo "It verifies that the expected environment variables and binary versions match what is expected."
+}
+
+check_envvars() {
+ for var_name in "$@"; do
+ if [[ -z "${!var_name:-}" ]]; then
+ echo "::error:: ${var_name} variable is not set."
+ exit 1
+ else
+ echo "$var_name=${!var_name}"
+ fi
+ done
+}
+
+check_host_compiler_version() {
+ local version_output=$($CXX --version)
+
+ if [[ "$CXX" == "g++" ]]; then
+ local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 4 | cut -d '.' -f 1)
+ local expected_compiler="gcc"
+ elif [[ "$CXX" == "clang++" ]]; then
+ if [[ $version_output =~ clang\ version\ ([0-9]+) ]]; then
+ actual_version=${BASH_REMATCH[1]}
+ else
+ echo "::error:: Unable to determine clang version."
+ exit 1
+ fi
+ expected_compiler="llvm"
+ elif [[ "$CXX" == "icpc" ]]; then
+ local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 3 )
+ # The icpc compiler version of oneAPI release 2023.2.0 is 2021.10.0
+ if [[ "$actual_version" == "2021.10.0" ]]; then
+ actual_version="2023.2.0"
+ fi
+ expected_compiler="oneapi"
+ else
+ echo "::error:: Unexpected CXX value ($CXX)."
+ exit 1
+ fi
+
+ if [[ "$expected_compiler" != "${CCCL_HOST_COMPILER}" || "$actual_version" != "$CCCL_HOST_COMPILER_VERSION" ]]; then
+ echo "::error:: CXX ($CXX) version ($actual_version) does not match the expected compiler (${CCCL_HOST_COMPILER}) and version (${CCCL_HOST_COMPILER_VERSION})."
+ exit 1
+ else
+ echo "Detected host compiler: $CXX version $actual_version"
+ fi
+}
+
+check_cuda_version() {
+ local cuda_version_output=$(nvcc --version)
+ if [[ $cuda_version_output =~ release\ ([0-9]+\.[0-9]+) ]]; then
+ local actual_cuda_version=${BASH_REMATCH[1]}
+ else
+ echo "::error:: Unable to determine CUDA version from nvcc."
+ exit 1
+ fi
+
+ if [[ "$actual_cuda_version" != "$CCCL_CUDA_VERSION" ]]; then
+ echo "::error:: CUDA version ($actual_cuda_version) does not match the expected CUDA version ($CCCL_CUDA_VERSION)."
+ exit 1
+ else
+ echo "Detected CUDA version: $actual_cuda_version"
+ fi
+}
+
+main() {
+ if [[ "$1" == "-h" || "$1" == "--help" ]]; then
+ usage
+ exit 0
+ fi
+
+ set -euo pipefail
+
+ check_envvars DEVCONTAINER_NAME CXX CUDAHOSTCXX CCCL_BUILD_INFIX CCCL_HOST_COMPILER CCCL_CUDA_VERSION CCCL_HOST_COMPILER_VERSION
+
+ check_host_compiler_version
+
+ check_cuda_version
+
+ echo "Dev Container successfully verified!"
+}
+
+main "$@"
diff --git a/.github/actions/compute-matrix/action.yml b/.github/actions/compute-matrix/action.yml
new file mode 100644
index 0000000..b8155e7
--- /dev/null
+++ b/.github/actions/compute-matrix/action.yml
@@ -0,0 +1,25 @@
+
+name: Compute Matrix
+description: "Compute the matrix for a given matrix type from the specified matrix file"
+
+inputs:
+ matrix_query:
+ description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
+ required: true
+ matrix_file:
+ description: 'The file containing the matrix'
+ required: true
+outputs:
+ matrix:
+ description: 'The requested matrix'
+ value: ${{ steps.compute-matrix.outputs.MATRIX }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute matrix
+ id: compute-matrix
+ run: |
+ MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} )
+ echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
+ shell: bash -euxo pipefail {0}
diff --git a/.github/actions/compute-matrix/compute-matrix.sh b/.github/actions/compute-matrix/compute-matrix.sh
new file mode 100755
index 0000000..8a6d635
--- /dev/null
+++ b/.github/actions/compute-matrix/compute-matrix.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+set -euo pipefail
+
+write_output() {
+ local key="$1"
+ local value="$2"
+ echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
+}
+
+explode_std_versions() {
+ jq -cr 'map(. as $o | {std: $o.std[]} + del($o.std))'
+}
+
+explode_libs() {
+ jq -cr 'map(. as $o | {lib: $o.lib[]} + del($o.lib))'
+}
+
+extract_matrix() {
+ local file="$1"
+ local type="$2"
+ local matrix=$(yq -o=json "$file" | jq -cr ".$type")
+ write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
+
+ local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc' | explode_std_versions )"
+ local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
+ write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix"
+ write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')"
+}
+
+main() {
+ if [ "$1" == "-v" ]; then
+ set -x
+ shift
+ fi
+
+ if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
+ echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
+ echo " -v : Enable verbose output"
+ echo " MATRIX_FILE : The path to the matrix file."
+ echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'"
+ exit 1
+ fi
+
+ echo "Input matrix file:" >&2
+ cat "$1" >&2
+ echo "Matrix Type: $2" >&2
+
+ extract_matrix "$1" "$2"
+}
+
+main "$@"
diff --git a/.github/actions/configure_cccl_sccache/action.yml b/.github/actions/configure_cccl_sccache/action.yml
new file mode 100644
index 0000000..1b42fc9
--- /dev/null
+++ b/.github/actions/configure_cccl_sccache/action.yml
@@ -0,0 +1,19 @@
+name: Set up AWS credentials and environment variables for sccache
+description: "Set up AWS credentials and environment variables for sccache"
+runs:
+ using: "composite"
+ steps:
+ - name: Get AWS credentials for sccache bucket
+ uses: aws-actions/configure-aws-credentials@v2
+ with:
+ role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+ aws-region: us-east-2
+ role-duration-seconds: 43200 # 12 hours)
+ - name: Set environment variables
+ run: |
+ echo "SCCACHE_BUCKET=rapids-sccache-devs" >> $GITHUB_ENV
+ echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV
+ echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
+ echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
+ echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
+ shell: bash
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
new file mode 100644
index 0000000..895ba83
--- /dev/null
+++ b/.github/copy-pr-bot.yaml
@@ -0,0 +1,4 @@
+# Configuration file for `copy-pr-bot` GitHub App
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
+
+enabled: true
diff --git a/.github/problem-matchers/problem-matcher.json b/.github/problem-matchers/problem-matcher.json
new file mode 100644
index 0000000..f196a5c
--- /dev/null
+++ b/.github/problem-matchers/problem-matcher.json
@@ -0,0 +1,14 @@
+{
+ "problemMatcher": [
+ {
+ "owner": "nvcc",
+ "pattern": [
+ {
+ "regexp": "^\\/home\\/coder\\/(.+):(\\d+):(\\d+): (\\w+): \"(.+)\"$",
+ "severity": 4,
+ "message": 5
+ }
+ ]
+ }
+ ]
+}
diff --git a/.github/workflows/build-and-test-linux.yml b/.github/workflows/build-and-test-linux.yml
new file mode 100644
index 0000000..6c5ba40
--- /dev/null
+++ b/.github/workflows/build-and-test-linux.yml
@@ -0,0 +1,47 @@
+name: build and test
+
+defaults:
+ run:
+ shell: bash -exo pipefail {0}
+
+on:
+ workflow_call:
+ inputs:
+ cpu: {type: string, required: true}
+ test_name: {type: string, required: false}
+ build_script: {type: string, required: false}
+ test_script: {type: string, required: false}
+ container_image: {type: string, required: false}
+ run_tests: {type: boolean, required: false, default: true}
+
+permissions:
+ contents: read
+
+jobs:
+ build:
+ name: Build ${{inputs.test_name}}
+ permissions:
+ id-token: write
+ contents: read
+ uses: ./.github/workflows/run-as-coder.yml
+ with:
+ name: Build ${{inputs.test_name}}
+ runner: linux-${{inputs.cpu}}-cpu16
+ image: ${{ inputs.container_image }}
+ command: |
+ ${{ inputs.build_script }}
+
+ test:
+ needs: build
+ permissions:
+ id-token: write
+ contents: read
+ if: ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
+ name: Test ${{inputs.test_name}}
+ uses: ./.github/workflows/run-as-coder.yml
+ with:
+ name: Test ${{inputs.test_name}}
+ runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
+ image: ${{inputs.container_image}}
+ command: |
+ ${{ inputs.test_script }}
diff --git a/.github/workflows/build-and-test-windows.yml b/.github/workflows/build-and-test-windows.yml
new file mode 100644
index 0000000..55b3100
--- /dev/null
+++ b/.github/workflows/build-and-test-windows.yml
@@ -0,0 +1,49 @@
+name: Build Windows
+
+on:
+ workflow_call:
+ inputs:
+ test_name: {type: string, required: false}
+ build_script: {type: string, required: false}
+ container_image: {type: string, required: false}
+
+jobs:
+ prepare:
+ name: Build ${{inputs.test_name}}
+ runs-on: windows-amd64-cpu16
+ permissions:
+ id-token: write
+ contents: read
+ env:
+ SCCACHE_BUCKET: rapids-sccache-devs
+ SCCACHE_REGION: us-east-2
+ SCCACHE_IDLE_TIMEOUT: 0
+ SCCACHE_S3_USE_SSL: true
+ SCCACHE_S3_NO_CREDENTIALS: false
+ steps:
+ - name: Get AWS credentials for sccache bucket
+ uses: aws-actions/configure-aws-credentials@v2
+ with:
+ role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+ aws-region: us-east-2
+ role-duration-seconds: 43200 # 12 hours
+ - name: Fetch ${{ inputs.container_image }}
+ shell: powershell
+ run: docker pull ${{ inputs.container_image }}
+ - name: Run the tests
+ shell: powershell
+ run: >-
+ docker run ${{ inputs.container_image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
+ [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
+ [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
+ [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
+ [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
+ [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
+ [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
+ [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
+ git clone https://github.com/NVIDIA/cccl.git;
+ cd cccl;
+ git fetch --all;
+ git checkout ${{github.ref_name}};
+ ${{inputs.build_script}};"
+
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
new file mode 100644
index 0000000..7b5ed4e
--- /dev/null
+++ b/.github/workflows/dispatch-build-and-test.yml
@@ -0,0 +1,51 @@
+name: Dispatch build and test
+
+on:
+ workflow_call:
+ inputs:
+ project_name: {type: string, required: true}
+ per_cuda_compiler_matrix: {type: string, required: true}
+ devcontainer_version: {type: string, required: true}
+ is_windows: {type: boolean, required: true}
+
+permissions:
+ contents: read
+
+jobs:
+ # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
+ # ensures that the build/test steps can overlap across different configurations. For example,
+ # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
+ build_and_test_linux:
+ name: build and test linux
+ permissions:
+ id-token: write
+ contents: read
+ if: ${{ !inputs.is_windows }}
+ uses: ./.github/workflows/build-and-test-linux.yml
+ strategy:
+ fail-fast: false
+ matrix:
+ include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+ with:
+ cpu: ${{ matrix.cpu }}
+ test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}}
+ build_script: './ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
+ test_script: './ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
+ container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+ run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }}
+
+ build_and_test_windows:
+ name: build and test windows
+ permissions:
+ id-token: write
+ contents: read
+ if: ${{ inputs.is_windows }}
+ uses: ./.github/workflows/build-and-test-windows.yml
+ strategy:
+ fail-fast: false
+ matrix:
+ include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+ with:
+ test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
+ build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}}"
+ container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}}
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
new file mode 100644
index 0000000..6ea22ab
--- /dev/null
+++ b/.github/workflows/pr.yml
@@ -0,0 +1,95 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the main workflow that runs on every PR and push to main
+name: pr
+
+defaults:
+ run:
+ shell: bash -euo pipefail {0}
+
+on:
+ push:
+ branches:
+ - "pull-request/[0-9]+"
+
+# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts.
+concurrency:
+ group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+ pull-requests: read
+
+jobs:
+ compute-matrix:
+ name: Compute matrix
+ runs-on: ubuntu-latest
+ outputs:
+ DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
+ PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
+ PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ - name: Compute matrix outputs
+ id: set-outputs
+ run: |
+ .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
+
+ nvbench:
+ name: NVBench CUDA${{ matrix.cuda_host_combination }}
+ permissions:
+ id-token: write
+ contents: read
+ needs: compute-matrix
+ uses: ./.github/workflows/dispatch-build-and-test.yml
+ strategy:
+ fail-fast: false
+ matrix:
+ cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
+ with:
+ project_name: "nvbench"
+ per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
+ devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+ is_windows: ${{ contains(matrix.cuda_host_combination, 'cl') }}
+
+ verify-devcontainers:
+ name: Verify Dev Containers
+ permissions:
+ id-token: write
+ contents: read
+ uses: ./.github/workflows/verify-devcontainers.yml
+
+ # This job is the final job that runs after all other jobs and is used for branch protection status checks.
+ # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
+ # https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
+ ci:
+ runs-on: ubuntu-latest
+ name: CI
+ if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
+ needs:
+ - nvbench
+ - verify-devcontainers
+ steps:
+ - name: Check status of all precursor jobs
+ if: >-
+ ${{
+ contains(needs.*.result, 'failure')
+ || contains(needs.*.result, 'cancelled')
+ || contains(needs.*.result, 'skipped')
+ }}
+ run: exit 1
diff --git a/.github/workflows/run-as-coder.yml b/.github/workflows/run-as-coder.yml
new file mode 100644
index 0000000..29399b6
--- /dev/null
+++ b/.github/workflows/run-as-coder.yml
@@ -0,0 +1,67 @@
+name: Run as coder user
+
+defaults:
+ run:
+ shell: bash -exo pipefail {0}
+
+on:
+ workflow_call:
+ inputs:
+ name: {type: string, required: true}
+ image: {type: string, required: true}
+ runner: {type: string, required: true}
+ command: {type: string, required: true}
+ env: { type: string, required: false, default: "" }
+
+permissions:
+ contents: read
+
+jobs:
+ run-as-coder:
+ name: ${{inputs.name}}
+ permissions:
+ id-token: write
+ contents: read
+ runs-on: ${{inputs.runner}}
+ container:
+ options: -u root
+ image: ${{inputs.image}}
+ env:
+ NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ with:
+ path: nvbench
+ persist-credentials: false
+ - name: Move files to coder user home directory
+ run: |
+ cp -R nvbench /home/coder/nvbench
+ chown -R coder:coder /home/coder/
+ - name: Add NVCC problem matcher
+ run: |
+ echo "::add-matcher::nvbench/.github/problem-matchers/problem-matcher.json"
+ - name: Configure credentials and environment variables for sccache
+ uses: ./nvbench/.github/actions/configure_cccl_sccache
+ - name: Run command
+ shell: su coder {0}
+ run: |
+ set -eo pipefail
+ cd ~/nvbench
+ echo -e "\e[1;34mRunning as 'coder' user in $(pwd):\e[0m"
+ echo -e "\e[1;34m${{inputs.command}}\e[0m"
+ eval "${{inputs.command}}" || exit_code=$?
+ if [ ! -z "$exit_code" ]; then
+ echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
+ echo "::error:: To replicate this failure locally, follow the steps below:"
+ echo "1. Clone the repository, and navigate to the correct branch and commit:"
+ echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
+ echo ""
+ echo "2. Run the failed command inside the same Docker container used by the CI:"
+ echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
+ echo ""
+ echo "For additional information, see:"
+ echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
+ echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
+ exit $exit_code
+ fi
diff --git a/.github/workflows/verify-devcontainers.yml b/.github/workflows/verify-devcontainers.yml
new file mode 100644
index 0000000..6fea8ae
--- /dev/null
+++ b/.github/workflows/verify-devcontainers.yml
@@ -0,0 +1,94 @@
+name: Verify devcontainers
+
+on:
+ workflow_call:
+
+defaults:
+ run:
+ shell: bash -euo pipefail {0}
+
+permissions:
+ contents: read
+
+jobs:
+ verify-make-devcontainers:
+ name: Verify devcontainer files are up-to-date
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3
+ - name: Setup jq and yq
+ run: |
+ sudo apt-get update
+ sudo apt-get install jq -y
+ sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.34.2/yq_linux_amd64
+ sudo chmod +x /usr/local/bin/yq
+ - name: Run the script to generate devcontainer files
+ run: |
+ ./.devcontainer/make_devcontainers.sh --verbose
+ - name: Check for changes
+ run: |
+ if [[ $(git diff --stat) != '' || $(git status --porcelain | grep '^??') != '' ]]; then
+ git diff --minimal
+ git status --porcelain
+ echo "::error:: Dev Container files are out of date or there are untracked files. Run the .devcontainer/make_devcontainers.sh script and commit the changes."
+ exit 1
+ else
+ echo "::note::Dev Container files are up-to-date."
+ fi
+
+ get-devcontainer-list:
+ needs: verify-make-devcontainers
+ name: Get list of devcontainer.json files
+ runs-on: ubuntu-latest
+ outputs:
+ devcontainers: ${{ steps.get-list.outputs.devcontainers }}
+ steps:
+ - name: Check out the code
+ uses: actions/checkout@v3
+ - name: Get list of devcontainer.json paths and names
+ id: get-list
+ run: |
+ devcontainers=$(find .devcontainer/ -name 'devcontainer.json' | while read -r devcontainer; do
+ jq --arg path "$devcontainer" '{path: $path, name: .name}' "$devcontainer"
+ done | jq -s -c .)
+ echo "devcontainers=${devcontainers}" | tee --append "${GITHUB_OUTPUT}"
+
+ verify-devcontainers:
+ needs: get-devcontainer-list
+ name: ${{matrix.devcontainer.name}}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ devcontainer: ${{fromJson(needs.get-devcontainer-list.outputs.devcontainers)}}
+ permissions:
+ id-token: write
+ contents: read
+ steps:
+ - name: Check out the code
+ uses: actions/checkout@v3
+ # devcontainer/ci doesn't supported nested devcontainer.json files, so we need to copy the devcontainer.json
+ # file to the top level .devcontainer/ directory
+ - name: Copy devcontainer.json to .devcontainer/
+ run: |
+ src="${{ matrix.devcontainer.path }}"
+ dst=".devcontainer/devcontainer.json"
+ if [[ "$src" != "$dst" ]]; then
+ cp "$src" "$dst"
+ fi
+ # We don't really need sccache configured, but we need the AWS credentials envvars to be set
+ # in order to avoid the devcontainer hanging waiting for GitHub authentication
+ - name: Configure credentials and environment variables for sccache
+ uses: ./.github/actions/configure_cccl_sccache
+ - name: Run in devcontainer
+ uses: devcontainers/ci@v0.3
+ with:
+ push: never
+ env: |
+ SCCACHE_REGION=${{ env.SCCACHE_REGION }}
+ AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }}
+ AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }}
+ AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }}
+ runCmd: |
+ .devcontainer/verify_devcontainer.sh
diff --git a/.gitignore b/.gitignore
index 20d94d8..d41aa02 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
build*/
+.aws
+.vscode
.cache
+.config
.idea
cmake-build-*
*~
diff --git a/CMakePresets.json b/CMakePresets.json
new file mode 100644
index 0000000..9dc92cd
--- /dev/null
+++ b/CMakePresets.json
@@ -0,0 +1,76 @@
+{
+ "version": 3,
+ "cmakeMinimumRequired": {
+ "major": 3,
+ "minor": 23,
+ "patch": 1
+ },
+ "configurePresets": [
+ {
+ "name": "base",
+ "hidden": true,
+ "generator": "Ninja",
+ "binaryDir": "${sourceDir}/build/$env{CCCL_BUILD_INFIX}/${presetName}",
+ "cacheVariables": {
+ "CMAKE_BUILD_TYPE": "Release",
+ "CMAKE_CUDA_ARCHITECTURES": "60;70;80",
+ "NVBench_ENABLE_CUPTI": true,
+ "NVBench_ENABLE_DEVICE_TESTING": false,
+ "NVBench_ENABLE_EXAMPLES": true,
+ "NVBench_ENABLE_INSTALL_RULES": true,
+ "NVBench_ENABLE_NVML": true,
+ "NVBench_ENABLE_TESTING": true,
+ "NVBench_ENABLE_WERROR": true
+ }
+ },
+ {
+ "name": "all-dev",
+ "inherits": "base",
+ "cacheVariables": {
+ "NVBench_ENABLE_DEVICE_TESTING": true
+ }
+ },
+ {
+ "name": "nvbench-cpp17",
+ "displayName": "nvbench_c++17",
+ "inherits": "base",
+ "cacheVariables": {
+ "CMAKE_CXX_STANDARD": "17",
+ "CMAKE_CUDA_STANDARD": "17"
+ }
+ }
+ ],
+ "buildPresets": [
+ {
+ "name": "all-dev",
+ "configurePreset": "all-dev"
+ },
+ {
+ "name": "nvbench-cpp17",
+ "configurePreset": "nvbench-cpp17"
+ }
+ ],
+ "testPresets": [
+ {
+ "name": "base",
+ "hidden": true,
+ "output": {
+ "outputOnFailure": true
+ },
+ "execution": {
+ "noTestsAction": "error",
+ "stopOnFailure": false
+ }
+ },
+ {
+ "name": "all-dev",
+ "configurePreset": "all-dev",
+ "inherits": "base"
+ },
+ {
+ "name": "nvbench-cpp17",
+ "configurePreset": "nvbench-cpp17",
+ "inherits": "base"
+ }
+ ]
+}
diff --git a/ci/axis/cpu.yml b/ci/axis/cpu.yml
deleted file mode 100644
index 7230b66..0000000
--- a/ci/axis/cpu.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-SDK_TYPE:
- - cuda
-
-SDK_VER:
- - 11.5.1-devel
-
-OS_TYPE:
- - ubuntu
-
-OS_VER:
- - 20.04
-
-CXX_TYPE:
- - clang
- - gcc
-
-CXX_VER:
- - 5
- - 6
- - 7
- - 8
- - 9
- - 10
- - 11
- - 12
-
-exclude:
- - CXX_TYPE: clang
- CXX_VER: 5
- - CXX_TYPE: clang
- CXX_VER: 6
- - CXX_TYPE: gcc
- CXX_VER: 12
diff --git a/ci/axis/gpu.yml b/ci/axis/gpu.yml
deleted file mode 100644
index 1531079..0000000
--- a/ci/axis/gpu.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-SDK_TYPE:
- - cuda
-
-SDK_VER:
- - 11.5.1-devel
-
-OS_TYPE:
- - ubuntu
-
-OS_VER:
- - 20.04
-
-CXX_TYPE:
- - clang
- - gcc
-
-CXX_VER:
- - 11
- - 12
-
-exclude:
- - CXX_TYPE: clang
- CXX_VER: 11
- - CXX_TYPE: gcc
- CXX_VER: 12
diff --git a/ci/build_common.sh b/ci/build_common.sh
new file mode 100755
index 0000000..ee95b00
--- /dev/null
+++ b/ci/build_common.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# Ensure the script is being executed in its containing directory
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
+
+# Script defaults
+HOST_COMPILER=${CXX:-g++} # $CXX if set, otherwise `g++`
+CXX_STANDARD=17
+CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc`
+CUDA_ARCHS= # Empty, use presets by default.
+GLOBAL_CMAKE_OPTIONS=()
+DISABLE_CUB_BENCHMARKS= # Enable to force-disable building CUB benchmarks.
+
+# Check if the correct number of arguments has been provided
+function usage {
+ echo "Usage: $0 [OPTIONS]"
+ echo
+ echo "The PARALLEL_LEVEL environment variable controls the amount of build parallelism. Default is the number of cores."
+ echo
+ echo "Options:"
+ echo " -v/--verbose: enable shell echo for debugging"
+ echo " -cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)"
+ echo " -cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)"
+ echo " -std: CUDA/C++ standard (Defaults to 17)"
+ echo " -arch: Target CUDA arches, e.g. \"60-real;70;80-virtual\" (Defaults to value in presets file)"
+ echo " -cmake-options: Additional options to pass to CMake"
+ echo
+ echo "Examples:"
+ echo " $ PARALLEL_LEVEL=8 $0"
+ echo " $ PARALLEL_LEVEL=8 $0 -cxx g++-9"
+ echo " $ $0 -cxx clang++-8"
+ echo " $ $0 -cxx g++-8 -std 20 -arch 80-real -v -cuda /usr/local/bin/nvcc"
+ echo " $ $0 -cmake-options \"-DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS=-Wfatal-errors\""
+ exit 1
+}
+
+# Parse options
+
+# Copy the args into a temporary array, since we will modify them and
+# the parent script may still need them.
+args=("$@")
+while [ "${#args[@]}" -ne 0 ]; do
+ case "${args[0]}" in
+ -v | --verbose) VERBOSE=1; args=("${args[@]:1}");;
+ -cxx) HOST_COMPILER="${args[1]}"; args=("${args[@]:2}");;
+ -std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");;
+ -cuda) CUDA_COMPILER="${args[1]}"; args=("${args[@]:2}");;
+ -arch) CUDA_ARCHS="${args[1]}"; args=("${args[@]:2}");;
+ -disable-benchmarks) DISABLE_CUB_BENCHMARKS=1; args=("${args[@]:1}");;
+ -cmake-options)
+ if [ -n "${args[1]}" ]; then
+ IFS=' ' read -ra split_args <<< "${args[1]}"
+ GLOBAL_CMAKE_OPTIONS+=("${split_args[@]}")
+ args=("${args[@]:2}")
+ else
+ echo "Error: No arguments provided for -cmake-options"
+ usage
+ exit 1
+ fi
+ ;;
+ -h | -help | --help) usage ;;
+ *) echo "Unrecognized option: ${args[0]}"; usage ;;
+ esac
+done
+
+# Convert to full paths:
+HOST_COMPILER=$(which ${HOST_COMPILER})
+CUDA_COMPILER=$(which ${CUDA_COMPILER})
+
+if [[ -n "${CUDA_ARCHS}" ]]; then
+ GLOBAL_CMAKE_OPTIONS+=("-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}")
+fi
+
+if [ $VERBOSE ]; then
+ set -x
+fi
+
+# Begin processing unsets after option parsing
+set -u
+
+readonly PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)}
+
+if [ -z ${CCCL_BUILD_INFIX+x} ]; then
+ CCCL_BUILD_INFIX=""
+fi
+
+# Presets will be configured in this directory:
+BUILD_DIR="../build/${CCCL_BUILD_INFIX}"
+
+# The most recent build will always be symlinked to cccl/build/latest
+mkdir -p $BUILD_DIR
+rm -f ../build/latest
+ln -sf $BUILD_DIR ../build/latest
+
+# Now that BUILD_DIR exists, use readlink to canonicalize the path:
+BUILD_DIR=$(readlink -f "${BUILD_DIR}")
+
+# Prepare environment for CMake:
+export CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL}"
+export CTEST_PARALLEL_LEVEL="1"
+export CXX="${HOST_COMPILER}"
+export CUDACXX="${CUDA_COMPILER}"
+export CUDAHOSTCXX="${HOST_COMPILER}"
+export CXX_STANDARD
+
+source ./pretty_printing.sh
+
+print_environment_details() {
+ begin_group "⚙️ Environment Details"
+
+ echo "pwd=$(pwd)"
+
+ print_var_values \
+ BUILD_DIR \
+ CXX_STANDARD \
+ CXX \
+ CUDACXX \
+ CUDAHOSTCXX \
+ NVCC_VERSION \
+ CMAKE_BUILD_PARALLEL_LEVEL \
+ CTEST_PARALLEL_LEVEL \
+ CCCL_BUILD_INFIX \
+ GLOBAL_CMAKE_OPTIONS
+
+ echo "Current commit is:"
+ git log -1 || echo "Not a repository"
+
+ if command -v nvidia-smi &> /dev/null; then
+ nvidia-smi
+ else
+ echo "nvidia-smi not found"
+ fi
+
+ end_group "⚙️ Environment Details"
+}
+
+fail_if_no_gpu() {
+ if ! nvidia-smi &> /dev/null; then
+ echo "Error: No NVIDIA GPU detected. Please ensure you have an NVIDIA GPU installed and the drivers are properly configured." >&2
+ exit 1
+ fi
+}
+
+function print_test_time_summary()
+{
+ ctest_log=${1}
+
+ if [ -f ${ctest_log} ]; then
+ begin_group "⏱️ Longest Test Steps"
+ # Only print the full output in CI:
+ if [ -n "${GITHUB_ACTIONS:-}" ]; then
+ cmake -DLOGFILE=${ctest_log} -P ../cmake/PrintCTestRunTimes.cmake
+ else
+ cmake -DLOGFILE=${ctest_log} -P ../cmake/PrintCTestRunTimes.cmake | head -n 15
+ fi
+ end_group "⏱️ Longest Test Steps"
+ fi
+}
+
+function configure_preset()
+{
+ local BUILD_NAME=$1
+ local PRESET=$2
+ local CMAKE_OPTIONS=$3
+ local GROUP_NAME="🛠️ CMake Configure ${BUILD_NAME}"
+
+ pushd .. > /dev/null
+ run_command "$GROUP_NAME" cmake --preset=$PRESET --log-level=VERBOSE "${GLOBAL_CMAKE_OPTIONS[@]}" $CMAKE_OPTIONS
+ status=$?
+ popd > /dev/null
+ return $status
+}
+
+function build_preset() {
+ local BUILD_NAME=$1
+ local PRESET=$2
+ local green="1;32"
+ local red="1;31"
+ local GROUP_NAME="🏗️ Build ${BUILD_NAME}"
+
+ source "./sccache_stats.sh" "start"
+
+ pushd .. > /dev/null
+ run_command "$GROUP_NAME" cmake --build --preset=$PRESET -v
+ status=$?
+ popd > /dev/null
+
+ minimal_sccache_stats=$(source "./sccache_stats.sh" "end")
+
+ # Only print detailed stats in actions workflow
+ if [ -n "${GITHUB_ACTIONS:-}" ]; then
+ begin_group "💲 sccache stats"
+ echo "${minimal_sccache_stats}"
+ sccache -s
+ end_group
+
+ begin_group "🥷 ninja build times"
+ echo "The "weighted" time is the elapsed time of each build step divided by the number
+ of tasks that were running in parallel. This makes it an excellent approximation
+ of how "important" a slow step was. A link that is entirely or mostly serialized
+ will have a weighted time that is the same or similar to its elapsed time. A
+ compile that runs in parallel with 999 other compiles will have a weighted time
+ that is tiny."
+ ./ninja_summary.py -C ${BUILD_DIR}/${PRESET}
+ end_group
+ else
+ echo $minimal_sccache_stats
+ fi
+
+ return $status
+}
+
+function test_preset()
+{
+ local BUILD_NAME=$1
+ local PRESET=$2
+ local GROUP_NAME="🚀 Test ${BUILD_NAME}"
+
+ fail_if_no_gpu
+
+
+ ctest_log_dir="${BUILD_DIR}/log/ctest"
+ ctest_log="${ctest_log_dir}/${PRESET}"
+ mkdir -p "${ctest_log_dir}"
+
+ pushd .. > /dev/null
+ run_command "$GROUP_NAME" ctest --output-log "${ctest_log}" --preset=$PRESET
+ status=$?
+ popd > /dev/null
+
+ print_test_time_summary ${ctest_log}
+
+ return $status
+}
+
+function configure_and_build_preset()
+{
+ local BUILD_NAME=$1
+ local PRESET=$2
+ local CMAKE_OPTIONS=$3
+
+ configure_preset "$BUILD_NAME" "$PRESET" "$CMAKE_OPTIONS"
+ build_preset "$BUILD_NAME" "$PRESET"
+}
diff --git a/ci/build_nvbench.sh b/ci/build_nvbench.sh
new file mode 100755
index 0000000..ecd0628
--- /dev/null
+++ b/ci/build_nvbench.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+source "$(dirname "$0")/build_common.sh"
+
+print_environment_details
+
+PRESET="nvbench-cpp$CXX_STANDARD"
+
+CMAKE_OPTIONS=""
+
+configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS"
+
+print_time_summary
diff --git a/ci/common/build.bash b/ci/common/build.bash
deleted file mode 100755
index 61b3654..0000000
--- a/ci/common/build.bash
+++ /dev/null
@@ -1,231 +0,0 @@
-#! /usr/bin/env bash
-
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-################################################################################
-# NVBench build script for gpuCI
-################################################################################
-
-set -e
-
-# append variable value
-# Appends ${value} to ${variable}, adding a space before ${value} if
-# ${variable} is not empty.
-function append {
- tmp="${!1:+${!1} }${2}"
- eval "${1}=\${tmp}"
-}
-
-# log args...
-# Prints out ${args[*]} with a gpuCI log prefix and a newline before and after.
-function log() {
- printf "\n>>>> %s\n\n" "${*}"
-}
-
-# print_with_trailing_blank_line args...
-# Prints ${args[*]} with one blank line following, preserving newlines within
-# ${args[*]} but stripping any preceding ${args[*]}.
-function print_with_trailing_blank_line {
- printf "%s\n\n" "${*}"
-}
-
-# echo_and_run name args...
-# Echo ${args[@]}, then execute ${args[@]}
-function echo_and_run {
- echo "${1}: ${@:2}"
- ${@:2}
-}
-
-# echo_and_run_timed name args...
-# Echo ${args[@]}, then execute ${args[@]} and report how long it took,
-# including ${name} in the output of the time.
-function echo_and_run_timed {
- echo "${@:2}"
- TIMEFORMAT=$'\n'"${1} Time: %lR"
- time ${@:2}
-}
-
-# join_delimit [value [value [...]]]
-# Combine all values into a single string, separating each by a single character
-# delimiter. Eg:
-# foo=(bar baz kramble)
-# joined_foo=$(join_delimit "|" "${foo[@]}")
-# echo joined_foo # "bar|baz|kramble"
-function join_delimit {
- local IFS="${1}"
- shift
- echo "${*}"
-}
-
-################################################################################
-# VARIABLES - Set up bash and environmental variables.
-################################################################################
-
-# Get the variables the Docker container set up for us: ${CXX}, ${CUDACXX}, etc.
-source /etc/cccl.bashrc
-
-# Set path.
-export PATH=/usr/local/cuda/bin:${PATH}
-
-# Set home to the job's workspace.
-export HOME=${WORKSPACE}
-
-# Switch to the build directory.
-cd ${WORKSPACE}
-mkdir -p build
-cd build
-
-# Remove any old .ninja_log file so the PrintNinjaBuildTimes step is accurate:
-rm -f .ninja_log
-
-if [[ -z "${CMAKE_BUILD_TYPE}" ]]; then
- CMAKE_BUILD_TYPE="Release"
-fi
-
-CMAKE_BUILD_FLAGS="--"
-
-# The Docker image sets up `${CXX}` and `${CUDACXX}`.
-append CMAKE_FLAGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
-append CMAKE_FLAGS "-DCMAKE_CUDA_COMPILER='${CUDACXX}'"
-
-if [[ "${CXX_TYPE}" == "nvcxx" ]]; then
- echo "nvc++ not supported."
- exit 1
-else
- if [[ "${CXX_TYPE}" == "icc" ]]; then
- echo "icc not supported."
- exit 1
- fi
- # We're using NVCC so we need to set the host compiler.
- append CMAKE_FLAGS "-DCMAKE_CXX_COMPILER='${CXX}'"
- append CMAKE_FLAGS "-DCMAKE_CUDA_HOST_COMPILER='${CXX}'"
- append CMAKE_FLAGS "-G Ninja"
- # Don't stop on build failures.
- append CMAKE_BUILD_FLAGS "-k0"
-fi
-
-if [[ -n "${PARALLEL_LEVEL}" ]]; then
- DETERMINE_PARALLELISM_FLAGS="-j ${PARALLEL_LEVEL}"
-fi
-
-WSL=0
-if [[ $(grep -i microsoft /proc/version) ]]; then
- echo "Windows Subsystem for Linux detected."
- WSL=1
-fi
-export WSL
-
-#append CMAKE_FLAGS "-DCMAKE_CUDA_ARCHITECTURES=all"
-
-append CMAKE_FLAGS "-DNVBench_ENABLE_EXAMPLES=ON"
-append CMAKE_FLAGS "-DNVBench_ENABLE_TESTING=ON"
-append CMAKE_FLAGS "-DNVBench_ENABLE_CUPTI=ON"
-append CMAKE_FLAGS "-DNVBench_ENABLE_WERROR=ON"
-
-# These consume a lot of time and don't currently have
-# any value as regression tests.
-append CMAKE_FLAGS "-DNVBench_ENABLE_DEVICE_TESTING=OFF"
-
-# NVML doesn't work under WSL
-if [[ ${WSL} -eq 0 ]]; then
- append CMAKE_FLAGS "-DNVBench_ENABLE_NVML=ON"
-else
- append CMAKE_FLAGS "-DNVBench_ENABLE_NVML=OFF"
-fi
-
-if [[ -n "${@}" ]]; then
- append CMAKE_BUILD_FLAGS "${@}"
-fi
-
-append CTEST_FLAGS "--output-on-failure"
-
-# Export variables so they'll show up in the logs when we report the environment.
-export CMAKE_FLAGS
-export CMAKE_BUILD_FLAGS
-export CTEST_FLAGS
-
-################################################################################
-# ENVIRONMENT - Configure and print out information about the environment.
-################################################################################
-
-log "Determine system topology..."
-
-# Set `${PARALLEL_LEVEL}` if it is unset; otherwise, this just reports the
-# system topology.
-source ${WORKSPACE}/ci/common/determine_build_parallelism.bash ${DETERMINE_PARALLELISM_FLAGS}
-
-log "Get environment..."
-
-env | sort
-
-log "Check versions..."
-
-# We use sed and echo below to ensure there is always one and only trailing
-# line following the output from each tool.
-
-${CXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/'
-
-echo
-
-${CUDACXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/'
-
-echo
-
-cmake --version 2>&1 | sed -Ez '$ s/\n*$/\n/'
-
-echo
-
-if [[ "${BUILD_TYPE}" == "gpu" ]]; then
- nvidia-smi 2>&1 | sed -Ez '$ s/\n*$/\n/'
-fi
-
-################################################################################
-# BUILD
-################################################################################
-
-log "Configure..."
-
-echo_and_run_timed "Configure" cmake .. --log-level=VERBOSE ${CMAKE_FLAGS}
-configure_status=$?
-
-log "Build..."
-
-# ${PARALLEL_LEVEL} needs to be passed after we run
-# determine_build_parallelism.bash, so it can't be part of ${CMAKE_BUILD_FLAGS}.
-set +e # Don't stop on build failures.
-echo_and_run_timed "Build" cmake --build . ${CMAKE_BUILD_FLAGS} -j ${PARALLEL_LEVEL}
-build_status=$?
-set -e
-
-################################################################################
-# TEST - Run examples and tests.
-################################################################################
-
-log "Test..."
-
-(
- # Make sure test_status captures ctest, not tee:
- # https://stackoverflow.com/a/999259/11130318
- set -o pipefail
- echo_and_run_timed "Test" ctest ${CTEST_FLAGS} -j ${PARALLEL_LEVEL} | tee ctest_log
-)
-
-test_status=$?
-
-################################################################################
-# SUMMARY - Print status of each step and exit with failure if needed.
-################################################################################
-
-log "Summary:"
-echo "- Configure Error Code: ${configure_status}"
-echo "- Build Error Code: ${build_status}"
-echo "- Test Error Code: ${test_status}"
-
-if [[ "${configure_status}" != "0" ]] || \
- [[ "${build_status}" != "0" ]] || \
- [[ "${test_status}" != "0" ]]; then
- exit 1
-fi
diff --git a/ci/common/determine_build_parallelism.bash b/ci/common/determine_build_parallelism.bash
deleted file mode 100755
index 1a1cf4c..0000000
--- a/ci/common/determine_build_parallelism.bash
+++ /dev/null
@@ -1,119 +0,0 @@
-#! /usr/bin/env bash
-
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-function usage {
- echo "Usage: ${0} [flags...]"
- echo
- echo "Examine the system topology to determine a reasonable amount of build"
- echo "parallelism."
- echo
- echo "Exported variables:"
- echo " \${LOGICAL_CPUS} : Logical processors (e.g. threads)."
- echo " \${PHYSICAL_CPUS} : Physical processors (e.g. cores)."
- echo " \${TOTAL_MEM} : Total system memory [GB]."
- echo " \${MAX_THREADS_PER_CORE} : Maximum threads per core allowed."
- echo " \${MIN_MEMORY_PER_THREAD} : Minimum memory [GB] per thread allowed."
- echo " \${CPU_BOUND_THREADS} : # of build threads constrained by processors."
- echo " \${MEM_BOUND_THREADS} : # of build threads constrained by memory [GB]."
- echo " \${PARALLEL_LEVEL} : Determined # of build threads."
- echo " \${MEM_PER_THREAD} : Memory [GB] per build thread."
- echo
- echo "-h, -help, --help"
- echo " Print this message."
- echo
- echo "-q, --quiet"
- echo " Print nothing and only export variables."
- echo
- echo "-j , --jobs "
- echo " Explicitly set the number of build threads to use."
- echo
- echo "--max-threads-per-core "
- echo " Specify the maximum threads per core allowed (default: ${MAX_THREADS_PER_CORE} [threads/core])."
- echo
- echo "--min-memory-per-thread "
- echo " Specify the minimum memory per thread allowed (default: ${MIN_MEMORY_PER_THREAD} [GBs/thread])."
-
- exit -3
-}
-
-QUIET=0
-
-export MAX_THREADS_PER_CORE=2
-export MIN_MEMORY_PER_THREAD=1 # [GB]
-
-while test ${#} != 0
-do
- case "${1}" in
- -h) ;&
- -help) ;&
- --help) usage ;;
- -q) ;&
- --quiet) QUIET=1 ;;
- -j) ;&
- --jobs)
- shift # The next argument is the number of threads.
- PARALLEL_LEVEL="${1}"
- ;;
- --max-threads-per-core)
- shift # The next argument is the number of threads per core.
- MAX_THREADS_PER_CORE="${1}"
- ;;
- --min-memory-per-thread)
- shift # The next argument is the amount of memory per thread.
- MIN_MEMORY_PER_THREAD="${1}"
- ;;
- esac
- shift
-done
-
-# https://stackoverflow.com/a/23378780
-if [ $(uname) == "Darwin" ]; then
- export LOGICAL_CPUS=$(sysctl -n hw.logicalcpu_max)
- export PHYSICAL_CPUS=$(sysctl -n hw.physicalcpu_max)
-else
- export LOGICAL_CPUS=$(lscpu -p | egrep -v '^#' | wc -l)
- export PHYSICAL_CPUS=$(lscpu -p | egrep -v '^#' | sort -u -t, -k 2,4 | wc -l)
-fi
-
-export TOTAL_MEM=$(awk "BEGIN { printf \"%0.4g\", $(grep MemTotal /proc/meminfo | awk '{ print $2 }') / (1024 * 1024) }")
-
-export CPU_BOUND_THREADS=$(awk "BEGIN { printf \"%.04g\", int(${PHYSICAL_CPUS} * ${MAX_THREADS_PER_CORE}) }")
-export MEM_BOUND_THREADS=$(awk "BEGIN { printf \"%.04g\", int(${TOTAL_MEM} / ${MIN_MEMORY_PER_THREAD}) }")
-
-if [[ -z "${PARALLEL_LEVEL}" ]]; then
- # Pick the smaller of the two as the default.
- if [[ "${MEM_BOUND_THREADS}" -lt "${CPU_BOUND_THREADS}" ]]; then
- export PARALLEL_LEVEL=${MEM_BOUND_THREADS}
- else
- export PARALLEL_LEVEL=${CPU_BOUND_THREADS}
- fi
-else
- EXPLICIT_PARALLEL_LEVEL=1
-fi
-
-# This can be a floating point number.
-export MEM_PER_THREAD=$(awk "BEGIN { printf \"%.04g\", ${TOTAL_MEM} / ${PARALLEL_LEVEL} }")
-
-if [[ "${QUIET}" == 0 ]]; then
- echo "Logical CPUs: ${LOGICAL_CPUS} [threads]"
- echo "Physical CPUs: ${PHYSICAL_CPUS} [cores]"
- echo "Total Mem: ${TOTAL_MEM} [GBs]"
- echo "Max Threads Per Core: ${MAX_THREADS_PER_CORE} [threads/core]"
- echo "Min Memory Per Threads: ${MIN_MEMORY_PER_THREAD} [GBs/thread]"
- echo "CPU Bound Threads: ${CPU_BOUND_THREADS} [threads]"
- echo "Mem Bound Threads: ${MEM_BOUND_THREADS} [threads]"
-
- echo -n "Parallel Level: ${PARALLEL_LEVEL} [threads]"
- if [[ -n "${EXPLICIT_PARALLEL_LEVEL}" ]]; then
- echo " (explicitly set)"
- else
- echo
- fi
-
- echo "Mem Per Thread: ${MEM_PER_THREAD} [GBs/thread]"
-fi
-
diff --git a/ci/cpu/build.bash b/ci/cpu/build.bash
deleted file mode 100755
index edf1ba3..0000000
--- a/ci/cpu/build.bash
+++ /dev/null
@@ -1,14 +0,0 @@
-#! /usr/bin/env bash
-
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-################################################################################
-# NVBench build script for gpuCI (CPU-only)
-################################################################################
-
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-source ${WORKSPACE}/ci/common/build.bash
diff --git a/ci/gpu/build.bash b/ci/gpu/build.bash
deleted file mode 100755
index 9f6fc01..0000000
--- a/ci/gpu/build.bash
+++ /dev/null
@@ -1,14 +0,0 @@
-#! /usr/bin/env bash
-
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-################################################################################
-# NVBench build script for gpuCI (heterogeneous)
-################################################################################
-
-export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
-
-source ${WORKSPACE}/ci/common/build.bash
diff --git a/ci/local/build.bash b/ci/local/build.bash
deleted file mode 100755
index 60d22de..0000000
--- a/ci/local/build.bash
+++ /dev/null
@@ -1,215 +0,0 @@
-#! /usr/bin/env bash
-
-# Copyright (c) 2018-2020 NVIDIA Corporation
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# Released under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-
-################################################################################
-# NVBench local containerized build script
-################################################################################
-
-function usage {
- echo "Usage: ${0} [flags...] [cmake-targets...]"
- echo
- echo "Build and test your local repository using a gpuCI Docker image."
- echo "If CMake targets are specified, only those targets are built and tested."
- echo "Otherwise, everything is built and tested."
- echo
- echo "-h, -help, --help"
- echo " Print this message."
- echo
- echo "-r , --repository "
- echo " Path to the repository (default: ${REPOSITORY_PATH})."
- echo
- echo "-i , --image "
- echo " Docker image to use (default: ${IMAGE})"
- echo
- echo "-l, --local-image"
- echo " Use the local version of the image instead of pulling from Docker hub."
- echo
- echo "-s, --shell-only"
- echo " Skip building and testing and launch an interactive shell instead."
- echo
- echo "-d, --disable-gpus"
- echo " Don't start the container with the NVIDIA runtime and GPUs attached."
- echo
- echo "-c, --clean"
- echo " If the build directory already exists, delete it."
- echo
- echo "-j , --jobs "
- echo " Number of threads to use when building (default: inferred)."
- echo
- echo "-b , --cmake-build-type "
- echo " CMake build type to use, either Release, RelWithDebInfo, or Debug"
- echo " (default: ${CMAKE_BUILD_TYPE})."
- echo
-
- exit -3
-}
-
-SCRIPT_PATH=$(cd $(dirname ${0}); pwd -P)
-
-REPOSITORY_PATH=$(realpath ${SCRIPT_PATH}/../..)
-
-################################################################################
-# FLAGS - Process command line flags.
-################################################################################
-
-IMAGE="gpuci/cccl:cuda11.5.1-devel-ubuntu20.04-gcc9"
-
-LOCAL_IMAGE=0
-
-SHELL_ONLY=0
-
-BUILD_TYPE="gpu"
-
-CLEAN=0
-
-PARALLEL_LEVEL=""
-
-CMAKE_BUILD_TYPE="Release"
-
-TARGETS=""
-
-while test ${#} != 0
-do
- case "${1}" in
- -h) ;&
- -help) ;&
- --help) usage ;;
- -r) ;&
- --repository)
- shift # The next argument is the path.
- REPOSITORY_PATH="${1}"
- ;;
- -i) ;&
- --image)
- shift # The next argument is the image.
- IMAGE="${1}"
- ;;
- -l) ;&
- --local-image) LOCAL_IMAGE=1 ;;
- -s) ;&
- --shell-only) SHELL_ONLY=1 ;;
- -d) ;&
- --disable-gpus) BUILD_TYPE="cpu" ;;
- -c) ;&
- --clean) CLEAN=1 ;;
- -j) ;&
- --jobs)
- shift # The next argument is the number of threads.
- PARALLEL_LEVEL="${1}"
- ;;
- -b) ;&
- --cmake-build-type)
- shift # The next argument is the build type.
- CMAKE_BUILD_TYPE="${1}"
- ;;
- *)
- TARGETS="${TARGETS:+${TARGETS} }${1}"
- ;;
- esac
- shift
-done
-
-################################################################################
-# PATHS - Setup paths for the container.
-################################################################################
-
-# ${REPOSITORY_PATH} is the local filesystem path to the Git repository being
-# built and tested. It can be set with the --repository flag.
-#
-# ${BUILD_PATH} is the local filesystem path that will be used for the build. It
-# is named after the image name, allowing multiple image builds to coexist on
-# the local filesystem.
-#
-# ${REPOSITORY_PATH_IN_CONTAINER} is the location of ${REPOSITORY_PATH} inside
-# the container.
-#
-# ${BUILD_PATH_IN_CONTAINER} is the location of ${BUILD_PATH} inside the
-# container.
-
-BUILD_PATH=${REPOSITORY_PATH}/build_$(echo "$(basename "${IMAGE}")" | sed -e 's/:/_/g' | sed -e 's/-/_/g')
-
-if [[ "${CLEAN}" != 0 ]]; then
- rm -rf ${BUILD_PATH}
-fi
-
-mkdir -p ${BUILD_PATH}
-
-BASE_PATH_IN_CONTAINER="/cccl"
-
-REPOSITORY_PATH_IN_CONTAINER="${BASE_PATH_IN_CONTAINER}/$(basename "${REPOSITORY_PATH}")"
-
-BUILD_PATH_IN_CONTAINER="${BASE_PATH_IN_CONTAINER}/$(basename "${REPOSITORY_PATH}")/build"
-
-################################################################################
-# ENVIRONMENT - Setup the thunk build script that will be run by the container.
-################################################################################
-
-# We have to run `ldconfig` to rebuild `ld.so.cache` to work around this
-# failure on Debian: https://github.com/NVIDIA/nvidia-docker/issues/1399
-
-COMMAND="sudo ldconfig; sudo ldconfig"
-if [[ "${SHELL_ONLY}" != 0 ]]; then
- COMMAND="${COMMAND}; bash"
-else
- COMMAND="${COMMAND}; ${REPOSITORY_PATH_IN_CONTAINER}/ci/common/build.bash ${TARGETS} || bash"
-fi
-
-################################################################################
-# GPU - Setup GPUs.
-################################################################################
-
-# Note: We always start docker with --gpus, even for cpu builds. Otherwise
-# libcuda.so.1 is not present and no NVBench tests are able to run.
-
-# Limit GPUs available to the container based on ${CUDA_VISIBLE_DEVICES}.
-if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then
- VISIBLE_DEVICES="all"
-else
- VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES}"
-fi
-
-DOCKER_MAJOR_VER=$(docker -v | sed 's/[^[0-9]*\([0-9]*\).*/\1/')
-GPU_OPTS="--gpus device=${VISIBLE_DEVICES}"
-if [[ "${DOCKER_MAJOR_VER}" -lt 19 ]]
-then
- GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES='${VISIBLE_DEVICES}'"
-fi
-
-################################################################################
-# LAUNCH - Pull and launch the container.
-################################################################################
-
-#NVIDIA_DOCKER_INSTALLED=$(docker info 2>&1 | grep -i runtime | grep -c nvidia)
-NVIDIA_DOCKER_INSTALLED=1 # Broken on WSL
-if [[ "${NVIDIA_DOCKER_INSTALLED}" == 0 ]]; then
- echo "NVIDIA Docker not found, please install it: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-docker-ce"
- exit -4
-fi
-
-if [[ "${LOCAL_IMAGE}" == 0 ]]; then
- docker pull "${IMAGE}"
-fi
-
-docker run --rm -it ${GPU_OPTS} \
- --cap-add=SYS_PTRACE \
- --user "$(id -u)":"$(id -g)" \
- -v "${REPOSITORY_PATH}":"${REPOSITORY_PATH_IN_CONTAINER}" \
- -v "${BUILD_PATH}":"${BUILD_PATH_IN_CONTAINER}" \
- -v /etc/passwd:/etc/passwd:ro \
- -v /etc/group:/etc/group:ro \
- -v /etc/subuid:/etc/subuid:ro \
- -v /etc/subgid:/etc/subgid:ro \
- -v /etc/shadow:/etc/shadow:ro \
- -v /etc/gshadow:/etc/gshadow:ro \
- -e "WORKSPACE=${REPOSITORY_PATH_IN_CONTAINER}" \
- -e "BUILD_TYPE=${BUILD_TYPE}" \
- -e "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" \
- -e "COVERAGE_PLAN=${COVERAGE_PLAN}" \
- -e "PARALLEL_LEVEL=${PARALLEL_LEVEL}" \
- -w "${BUILD_PATH_IN_CONTAINER}" \
- "${IMAGE}" bash -c "${COMMAND}"
-
diff --git a/ci/matrix.yaml b/ci/matrix.yaml
new file mode 100644
index 0000000..e6054fd
--- /dev/null
+++ b/ci/matrix.yaml
@@ -0,0 +1,85 @@
+
+cuda_prev_min: &cuda_prev_min '11.1'
+cuda_prev_max: &cuda_prev_max '11.8'
+cuda_curr: &cuda_curr '12.4'
+
+# The GPUs to test on
+gpus:
+ - 'a100'
+ - 'v100'
+
+# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
+devcontainer_version: '24.06'
+
+# gcc compiler configurations
+gcc6: &gcc6 { name: 'gcc', version: '6', exe: 'g++' }
+gcc7: &gcc7 { name: 'gcc', version: '7', exe: 'g++' }
+gcc8: &gcc8 { name: 'gcc', version: '8', exe: 'g++' }
+gcc9: &gcc9 { name: 'gcc', version: '9', exe: 'g++' }
+gcc10: &gcc10 { name: 'gcc', version: '10', exe: 'g++' }
+gcc11: &gcc11 { name: 'gcc', version: '11', exe: 'g++' }
+gcc12: &gcc12 { name: 'gcc', version: '12', exe: 'g++' }
+gcc-oldest: &gcc-oldest { name: 'gcc', version: '6', exe: 'g++' }
+gcc-newest: &gcc-newest { name: 'gcc', version: '12', exe: 'g++' }
+
+# LLVM Compiler configurations
+llvm9: &llvm9 { name: 'llvm', version: '9', exe: 'clang++' }
+llvm10: &llvm10 { name: 'llvm', version: '10', exe: 'clang++' }
+llvm11: &llvm11 { name: 'llvm', version: '11', exe: 'clang++' }
+llvm12: &llvm12 { name: 'llvm', version: '12', exe: 'clang++' }
+llvm13: &llvm13 { name: 'llvm', version: '13', exe: 'clang++' }
+llvm14: &llvm14 { name: 'llvm', version: '14', exe: 'clang++' }
+llvm15: &llvm15 { name: 'llvm', version: '15', exe: 'clang++' }
+llvm16: &llvm16 { name: 'llvm', version: '16', exe: 'clang++' }
+llvm-oldest: &llvm-oldest { name: 'llvm', version: '9', exe: 'clang++' }
+llvm-newest: &llvm-newest { name: 'llvm', version: '16', exe: 'clang++' }
+
+# MSVC configs
+msvc2017: &msvc2017 { name: 'cl', version: '14.16', exe: 'cl++' }
+msvc2019: &msvc2019 { name: 'cl', version: '14.29', exe: 'cl++' }
+msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
+
+# oneAPI configs
+oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
+
+# Each environment below will generate a unique build/test job
+# See the "compute-matrix" job in the workflow for how this is parsed and used
+# cuda: The CUDA Toolkit version
+# os: The operating system used
+# cpu: The CPU architecture
+# compiler: The compiler to use
+# name: The compiler name
+# version: The compiler version
+# exe: The unverionsed compiler binary name
+# std: The C++ standards to build for
+# This field is unique as it will generate an independent build/test job for each value
+
+# Configurations that will run for every PR
+pull_request:
+ nvcc:
+ - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [17], jobs: ['build']}
+ - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [17], jobs: ['build']}
+ - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [17], jobs: ['build']}
+ - {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [17], jobs: ['build']}
+ - {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [17], jobs: ['build']}
+ - {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17], jobs: ['build', 'test']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [17], jobs: ['build', 'test']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [17], jobs: ['build']}
+ - {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi, std: [17], jobs: ['build']}
diff --git a/ci/pretty_printing.sh b/ci/pretty_printing.sh
new file mode 100644
index 0000000..5bea1af
--- /dev/null
+++ b/ci/pretty_printing.sh
@@ -0,0 +1,105 @@
+# Print "ARG=${ARG}" for all args.
+function print_var_values() {
+ # Iterate through the arguments
+ for var_name in "$@"; do
+ if [ -z "$var_name" ]; then
+ echo "Usage: print_var_values ..."
+ return 1
+ fi
+
+ # Dereference the variable and print the result
+ echo "$var_name=${!var_name:-(undefined)}"
+ done
+}
+
+# begin_group: Start a named section of log output, possibly with color.
+# Usage: begin_group "Group Name" [Color]
+# Group Name: A string specifying the name of the group.
+# Color (optional): ANSI color code to set text color. Default is blue (1;34).
+function begin_group() {
+ # See options for colors here: https://gist.github.com/JBlond/2fea43a3049b38287e5e9cefc87b2124
+ local blue="34"
+ local name="${1:-}"
+ local color="${2:-$blue}"
+
+ if [ -n "${GITHUB_ACTIONS:-}" ]; then
+ echo -e "::group::\e[${color}m${name}\e[0m"
+ else
+ echo -e "\e[${color}m================== ${name} ======================\e[0m"
+ fi
+}
+
+# end_group: End a named section of log output and print status based on exit status.
+# Usage: end_group "Group Name" [Exit Status]
+# Group Name: A string specifying the name of the group.
+# Exit Status (optional): The exit status of the command run within the group. Default is 0.
+function end_group() {
+ local name="${1:-}"
+ local build_status="${2:-0}"
+ local duration="${3:-}"
+ local red="31"
+ local blue="34"
+
+ if [ -n "${GITHUB_ACTIONS:-}" ]; then
+ echo "::endgroup::"
+
+ if [ "$build_status" -ne 0 ]; then
+ echo -e "::error::\e[${red}m ${name} - Failed (⬆️ click above for full log ⬆️)\e[0m"
+ fi
+ else
+ if [ "$build_status" -ne 0 ]; then
+ echo -e "\e[${red}m================== End ${name} - Failed${duration:+ - Duration: ${duration}s} ==================\e[0m"
+ else
+ echo -e "\e[${blue}m================== End ${name} - Success${duration:+ - Duration: ${duration}s} ==================\n\e[0m"
+ fi
+ fi
+}
+
+declare -A command_durations
+
+# Runs a command within a named group, handles the exit status, and prints appropriate messages based on the result.
+# Usage: run_command "Group Name" command [arguments...]
+function run_command() {
+ local group_name="${1:-}"
+ shift
+ local command=("$@")
+ local status
+
+ begin_group "$group_name"
+ set +e
+ local start_time=$(date +%s)
+ "${command[@]}"
+ status=$?
+ local end_time=$(date +%s)
+ set -e
+ local duration=$((end_time - start_time))
+ end_group "$group_name" $status $duration
+ command_durations["$group_name"]=$duration
+ return $status
+}
+
+function string_width() {
+ local str="$1"
+ echo "$str" | awk '{print length}'
+}
+
+function print_time_summary() {
+ local max_length=0
+ local group
+
+ # Find the longest group name for formatting
+ for group in "${!command_durations[@]}"; do
+ local group_length=$(echo "$group" | awk '{print length}')
+ if [ "$group_length" -gt "$max_length" ]; then
+ max_length=$group_length
+ fi
+ done
+
+ echo "Time Summary:"
+ for group in "${!command_durations[@]}"; do
+ printf "%-${max_length}s : %s seconds\n" "$group" "${command_durations[$group]}"
+ done
+
+ # Clear the array of timing info
+ declare -gA command_durations=()
+}
diff --git a/ci/sccache_hit_rate.sh b/ci/sccache_hit_rate.sh
new file mode 100755
index 0000000..de8ae46
--- /dev/null
+++ b/ci/sccache_hit_rate.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Ensure two arguments are provided
+if [ $# -ne 2 ]; then
+ echo "Usage: $0 " >&2
+ exit 1
+fi
+
+# Print the contents of the before file
+echo "=== Contents of $1 ===" >&2
+cat $1 >&2
+echo "=== End of $1 ===" >&2
+
+# Print the contents of the after file
+echo "=== Contents of $2 ===" >&2
+cat $2 >&2
+echo "=== End of $2 ===" >&2
+
+# Extract compile requests and cache hits from the before and after files
+requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1")
+hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1")
+requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2")
+hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2")
+
+# Calculate the differences to find out how many new requests and hits
+requests_diff=$((requests_after - requests_before))
+hits_diff=$((hits_after - hits_before))
+
+echo "New Compile Requests: $requests_diff" >&2
+echo "New Hits: $hits_diff" >&2
+
+# Calculate and print the hit rate
+if [ $requests_diff -eq 0 ]; then
+ echo "No new compile requests, hit rate is not applicable"
+else
+ hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
+ echo "sccache hit rate: $hit_rate%" >&2
+ echo "$hit_rate"
+fi
diff --git a/ci/sccache_stats.sh b/ci/sccache_stats.sh
new file mode 100755
index 0000000..3a3ebc4
--- /dev/null
+++ b/ci/sccache_stats.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# This script prints the sccache hit rate between two calls to sccache --show-stats.
+# It should be sourced in your script before and after the operations you want to profile,
+# with the 'start' or 'end' argument respectively.
+
+mode=$1
+
+if [[ "$mode" != "start" && "$mode" != "end" ]]; then
+ echo "Invalid mode: $mode"
+ echo "Usage: $0 {start|end}"
+ exit 1
+fi
+
+# Check if sccache is available
+if ! command -v sccache &> /dev/null; then
+ echo "Notice: sccache is not available. Skipping..."
+ exit 0
+fi
+
+case $mode in
+ start)
+ export SCCACHE_START_HITS=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
+ export SCCACHE_START_MISSES=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
+ ;;
+ end)
+ if [[ -z ${SCCACHE_START_HITS+x} || -z ${SCCACHE_START_MISSES+x} ]]; then
+ echo "Error: start stats not collected. Did you call this script with 'start' before your operations?"
+ exit 1
+ fi
+
+ final_hits=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
+ final_misses=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
+ hits=$((final_hits - SCCACHE_START_HITS))
+ misses=$((final_misses - SCCACHE_START_MISSES))
+ total=$((hits + misses))
+
+ prefix=""
+ if [ ${GITHUB_ACTIONS:-false} = "true" ]; then
+ prefix="::notice::"
+ fi
+
+ if (( total > 0 )); then
+ hit_rate=$(awk -v hits="$hits" -v total="$total" 'BEGIN { printf "%.2f", (hits / total) * 100 }')
+ echo ${prefix}"sccache hits: $hits | misses: $misses | hit rate: $hit_rate%"
+ else
+ echo ${prefix}"sccache stats: N/A No new compilation requests"
+ fi
+ unset SCCACHE_START_HITS
+ unset SCCACHE_START_MISSES
+ ;;
+esac
diff --git a/ci/test_nvbench.sh b/ci/test_nvbench.sh
new file mode 100755
index 0000000..f89c6fe
--- /dev/null
+++ b/ci/test_nvbench.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+source "$(dirname "$0")/build_common.sh"
+
+# Run NVBench tests with high parallelism. If any need to be
+# serialized, define the `RUN_SERIAL` CMake property on the
+# test.
+export CTEST_PARALLEL_LEVEL=${PARALLEL_LEVEL}
+
+print_environment_details
+
+./build_nvbench.sh "$@"
+
+PRESET="nvbench-cpp$CXX_STANDARD"
+
+test_preset "NVBench" ${PRESET}
+
+print_time_summary
diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1
new file mode 100644
index 0000000..9d8e9cf
--- /dev/null
+++ b/ci/windows/build_common.psm1
@@ -0,0 +1,205 @@
+
+Param(
+ [Parameter(Mandatory = $true)]
+ [Alias("std")]
+ [ValidateNotNullOrEmpty()]
+ [ValidateSet(17)]
+ [int]$CXX_STANDARD = 17
+)
+
+# We need the full path to cl because otherwise cmake will replace CMAKE_CXX_COMPILER with the full path
+# and keep CMAKE_CUDA_HOST_COMPILER at "cl" which breaks our cmake script
+$script:HOST_COMPILER = (Get-Command "cl").source -replace '\\','/'
+$script:PARALLEL_LEVEL = (Get-WmiObject -class Win32_processor).NumberOfLogicalProcessors
+
+# Extract the CL version for export to build scripts:
+$script:CL_VERSION_STRING = & cl.exe /?
+if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") {
+ $CL_VERSION = [version]$matches[1]
+ Write-Host "Detected cl.exe version: $CL_VERSION"
+}
+
+if (-not $env:CCCL_BUILD_INFIX) {
+ $env:CCCL_BUILD_INFIX = ""
+}
+
+# Presets will be configured in this directory:
+$BUILD_DIR = "../build/$env:CCCL_BUILD_INFIX"
+
+If(!(test-path -PathType container "../build")) {
+ New-Item -ItemType Directory -Path "../build"
+}
+
+# The most recent build will always be symlinked to cccl/build/latest
+New-Item -ItemType Directory -Path "$BUILD_DIR" -Force
+
+# Prepare environment for CMake:
+$env:CMAKE_BUILD_PARALLEL_LEVEL = $PARALLEL_LEVEL
+$env:CTEST_PARALLEL_LEVEL = 1
+$env:CUDAHOSTCXX = $HOST_COMPILER.FullName
+$env:CXX = $HOST_COMPILER.FullName
+
+Write-Host "========================================"
+Write-Host "Begin build"
+Write-Host "pwd=$pwd"
+Write-Host "BUILD_DIR=$BUILD_DIR"
+Write-Host "CXX_STANDARD=$CXX_STANDARD"
+Write-Host "CXX=$env:CXX"
+Write-Host "CUDACXX=$env:CUDACXX"
+Write-Host "CUDAHOSTCXX=$env:CUDAHOSTCXX"
+Write-Host "NVCC_VERSION=$NVCC_VERSION"
+Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL"
+Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL"
+Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX"
+Write-Host "Current commit is:"
+Write-Host "$(git log -1)"
+Write-Host "========================================"
+
+function configure_preset {
+ Param(
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$BUILD_NAME,
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$PRESET,
+ [Parameter(Mandatory = $true)]
+ [AllowEmptyString()]
+ [string]$CMAKE_OPTIONS
+ )
+
+ $step = "$BUILD_NAME (configure)"
+
+ # CMake must be invoked in the same directory as the presets file:
+ pushd ".."
+
+ cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE
+ $test_result = $LastExitCode
+
+ If ($test_result -ne 0) {
+ throw "$step Failed"
+ }
+
+ popd
+ Write-Host "$step complete."
+}
+
+function build_preset {
+ Param(
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$BUILD_NAME,
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$PRESET
+ )
+
+ $step = "$BUILD_NAME (build)"
+
+ # CMake must be invoked in the same directory as the presets file:
+ pushd ".."
+
+ sccache_stats('Start')
+
+ cmake --build --preset $PRESET -v
+ $test_result = $LastExitCode
+
+ sccache_stats('Stop')
+
+ echo "$step complete"
+
+ If ($test_result -ne 0) {
+ throw "$step Failed"
+ }
+
+ popd
+}
+
+function test_preset {
+ Param(
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$BUILD_NAME,
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$PRESET
+ )
+
+ $step = "$BUILD_NAME (test)"
+
+ # CTest must be invoked in the same directory as the presets file:
+ pushd ".."
+
+ sccache_stats('Start')
+
+ ctest --preset $PRESET
+ $test_result = $LastExitCode
+
+ sccache_stats('Stop')
+
+ echo "$step complete"
+
+ If ($test_result -ne 0) {
+ throw "$step Failed"
+ }
+
+ popd
+}
+
+function configure_and_build_preset {
+ Param(
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$BUILD_NAME,
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [string]$PRESET,
+ [Parameter(Mandatory = $true)]
+ [AllowEmptyString()]
+ [string]$CMAKE_OPTIONS
+ )
+
+ configure_preset "$BUILD_NAME" "$PRESET" "$CMAKE_OPTIONS"
+ build_preset "$BUILD_NAME" "$PRESET"
+}
+
+function sccache_stats {
+ Param (
+ [Parameter(Mandatory = $true)]
+ [ValidateNotNullOrEmpty()]
+ [ValidateSet('Start','Stop')]
+ [string]$MODE
+ )
+
+ $sccache_stats = sccache -s
+ If($MODE -eq 'Start') {
+ [int]$script:sccache_compile_requests = ($sccache_stats[0] -replace '[^\d]+')
+ [int]$script:sccache_cache_hits_cpp = ($sccache_stats[2] -replace '[^\d]+')
+ [int]$script:sccache_cache_hits_cuda = ($sccache_stats[3] -replace '[^\d]+')
+ [int]$script:sccache_cache_miss_cpp = ($sccache_stats[5] -replace '[^\d]+')
+ [int]$script:sccache_cache_miss_cuda = ($sccache_stats[6] -replace '[^\d]+')
+ } else {
+ [int]$final_sccache_compile_requests = ($sccache_stats[0] -replace '[^\d]+')
+ [int]$final_sccache_cache_hits_cpp = ($sccache_stats[2] -replace '[^\d]+')
+ [int]$final_sccache_cache_hits_cuda = ($sccache_stats[3] -replace '[^\d]+')
+ [int]$final_sccache_cache_miss_cpp = ($sccache_stats[5] -replace '[^\d]+')
+ [int]$final_sccache_cache_miss_cuda = ($sccache_stats[6] -replace '[^\d]+')
+
+ [int]$total_requests = $final_sccache_compile_requests - $script:sccache_compile_requests
+ [int]$total_hits_cpp = $final_sccache_cache_hits_cpp - $script:sccache_cache_hits_cpp
+ [int]$total_hits_cuda = $final_sccache_cache_hits_cuda - $script:sccache_cache_hits_cuda
+ [int]$total_miss_cpp = $final_sccache_cache_miss_cpp - $script:sccache_cache_miss_cpp
+ [int]$total_miss_cuda = $final_sccache_cache_miss_cuda - $script:sccache_cache_miss_cuda
+ If ( $total_requests -gt 0 ) {
+ [int]$hit_rate_cpp = $total_hits_cpp / $total_requests * 100;
+ [int]$hit_rate_cuda = $total_hits_cuda / $total_requests * 100;
+ echo "sccache hits cpp: $total_hits_cpp `t| misses: $total_miss_cpp `t| hit rate: $hit_rate_cpp%"
+ echo "sccache hits cuda: $total_hits_cuda `t| misses: $total_miss_cuda `t| hit rate: $hit_rate_cuda%"
+ } else {
+ echo "sccache stats: N/A No new compilation requests"
+ }
+ }
+}
+
+Export-ModuleMember -Function configure_preset, build_preset, test_preset, configure_and_build_preset, sccache_stats
+Export-ModuleMember -Variable BUILD_DIR, CL_VERSION
diff --git a/ci/windows/build_nvbench.ps1 b/ci/windows/build_nvbench.ps1
new file mode 100644
index 0000000..31b4f7c
--- /dev/null
+++ b/ci/windows/build_nvbench.ps1
@@ -0,0 +1,26 @@
+
+Param(
+ [Parameter(Mandatory = $true)]
+ [Alias("std")]
+ [ValidateNotNullOrEmpty()]
+ [ValidateSet(17)]
+ [int]$CXX_STANDARD = 17
+)
+
+$CURRENT_PATH = Split-Path $pwd -leaf
+If($CURRENT_PATH -ne "ci") {
+ Write-Host "Moving to ci folder"
+ pushd "$PSScriptRoot/.."
+}
+
+Remove-Module -Name build_common
+Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
+
+$PRESET = "nvbench-cpp$CXX_STANDARD"
+$CMAKE_OPTIONS = ""
+
+configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS"
+
+If($CURRENT_PATH -ne "ci") {
+ popd
+}
diff --git a/cmake/PrintCTestRunTimes.cmake b/cmake/PrintCTestRunTimes.cmake
new file mode 100644
index 0000000..f4ac7d9
--- /dev/null
+++ b/cmake/PrintCTestRunTimes.cmake
@@ -0,0 +1,127 @@
+## This CMake script parses the output of ctest and prints a formatted list
+## of individual test runtimes, sorted longest first.
+##
+## ctest > ctest_log
+## cmake -DLOGFILE=ctest_log \
+## -DMINSEC=10 \
+## -P PrintCTestRunTimes.cmake
+##
+################################################################################
+
+cmake_minimum_required(VERSION 3.15)
+
+# Prepend the string with "0" until the string length equals the specified width
+function(pad_string_with_zeros string_var width)
+ set(local_string "${${string_var}}")
+ string(LENGTH "${local_string}" size)
+ while(size LESS width)
+ string(PREPEND local_string "0")
+ string(LENGTH "${local_string}" size)
+ endwhile()
+ set(${string_var} "${local_string}" PARENT_SCOPE)
+endfunction()
+
+################################################################################
+
+if (NOT LOGFILE)
+ message(FATAL_ERROR "Missing -DLOGFILE= argument.")
+endif()
+
+if (NOT DEFINED MINSEC)
+ set(MINSEC 10)
+endif()
+
+set(num_below_thresh 0)
+
+# Check if logfile exists
+if (NOT EXISTS "${LOGFILE}")
+ message(FATAL_ERROR "LOGFILE does not exist ('${LOGFILE}').")
+endif()
+
+string(JOIN "" regex
+ "[0-9]+/[0-9]+[ ]+Test[ ]+#"
+ "([0-9]+)" # Test ID
+ ":[ ]+"
+ "([^ ]+)" # Test Name
+ "[ ]*\\.+[ ]*\\**[ ]*"
+ "([^ ]+)" # Result
+ "[ ]+"
+ "([0-9]+)" # Seconds
+ "\\.[0-9]+[ ]+sec"
+)
+
+message(DEBUG "LOGFILE: ${LOGFILE}")
+message(DEBUG "MINSEC: ${MINSEC}")
+message(DEBUG "regex: ${regex}")
+
+# Read the logfile and generate a map / keylist
+set(keys)
+file(STRINGS "${LOGFILE}" lines)
+foreach(line ${lines})
+
+ # Parse each build time
+ string(REGEX MATCH "${regex}" _DUMMY "${line}")
+
+ if (CMAKE_MATCH_COUNT EQUAL 4)
+ set(test_id "${CMAKE_MATCH_1}")
+ set(test_name "${CMAKE_MATCH_2}")
+ set(test_result "${CMAKE_MATCH_3}")
+ set(tmp "${CMAKE_MATCH_4}") # floor(runtime_seconds)
+
+ if (tmp LESS MINSEC)
+ math(EXPR num_below_thresh "${num_below_thresh} + 1")
+ continue()
+ endif()
+
+ # Compute human readable time
+ math(EXPR days "${tmp} / (60 * 60 * 24)")
+ math(EXPR tmp "${tmp} - (${days} * 60 * 60 * 24)")
+ math(EXPR hours "${tmp} / (60 * 60)")
+ math(EXPR tmp "${tmp} - (${hours} * 60 * 60)")
+ math(EXPR minutes "${tmp} / (60)")
+ math(EXPR tmp "${tmp} - (${minutes} * 60)")
+ math(EXPR seconds "${tmp}")
+
+ # Format time components
+ pad_string_with_zeros(days 3)
+ pad_string_with_zeros(hours 2)
+ pad_string_with_zeros(minutes 2)
+ pad_string_with_zeros(seconds 2)
+
+ # Construct table entry
+ # Later values in the file for the same command overwrite earlier entries
+ string(MAKE_C_IDENTIFIER "${test_id}" key)
+ string(JOIN " | " ENTRY_${key}
+ "${days}d ${hours}h ${minutes}m ${seconds}s"
+ "${test_result}"
+ "${test_id}: ${test_name}"
+ )
+
+ # Record the key:
+ list(APPEND keys "${key}")
+ endif()
+endforeach()
+
+list(REMOVE_DUPLICATES keys)
+
+# Build the entry list:
+set(entries)
+foreach(key ${keys})
+ list(APPEND entries "${ENTRY_${key}}")
+endforeach()
+
+if (NOT entries)
+ message(STATUS "LOGFILE contained no test times ('${LOGFILE}').")
+endif()
+
+# Sort in descending order:
+list(SORT entries ORDER DESCENDING)
+
+# Dump table:
+foreach(entry ${entries})
+ message(STATUS ${entry})
+endforeach()
+
+if (num_below_thresh GREATER 0)
+ message(STATUS "${num_below_thresh} additional tests took < ${MINSEC}s each.")
+endif()
diff --git a/cmake/PrintNinjaBuildTimes.cmake b/cmake/PrintNinjaBuildTimes.cmake
new file mode 100644
index 0000000..65d243d
--- /dev/null
+++ b/cmake/PrintNinjaBuildTimes.cmake
@@ -0,0 +1,101 @@
+## This CMake script parses a .ninja_log file (LOGFILE) and prints a list of
+## build/link times, sorted longest first.
+##
+## cmake -DLOGFILE=<.ninja_log file> \
+## -P PrintNinjaBuildTimes.cmake
+##
+## If LOGFILE is omitted, the current directory's .ninja_log file is used.
+################################################################################
+
+cmake_minimum_required(VERSION 3.15)
+
+# Prepend the string with "0" until the string length equals the specified width
+function(pad_string_with_zeros string_var width)
+ set(local_string "${${string_var}}")
+ string(LENGTH "${local_string}" size)
+ while(size LESS width)
+ string(PREPEND local_string "0")
+ string(LENGTH "${local_string}" size)
+ endwhile()
+ set(${string_var} "${local_string}" PARENT_SCOPE)
+endfunction()
+
+################################################################################
+
+if (NOT LOGFILE)
+ set(LOGFILE ".ninja_log")
+endif()
+
+# Check if logfile exists
+if (NOT EXISTS "${LOGFILE}")
+ message(FATAL_ERROR "LOGFILE does not exist ('${LOGFILE}').")
+endif()
+
+# Read the logfile and generate a map / keylist
+set(keys)
+file(STRINGS "${LOGFILE}" lines)
+foreach(line ${lines})
+
+ # Parse each build time
+ string(REGEX MATCH
+ "^([0-9]+)\t([0-9]+)\t[0-9]+\t([^\t]+)+\t[0-9a-fA-F]+$" _DUMMY "${line}")
+
+ if (CMAKE_MATCH_COUNT EQUAL 3)
+ set(start_ms ${CMAKE_MATCH_1})
+ set(end_ms ${CMAKE_MATCH_2})
+ set(command "${CMAKE_MATCH_3}")
+ math(EXPR runtime_ms "${end_ms} - ${start_ms}")
+
+ # Compute human readable time
+ math(EXPR days "${runtime_ms} / (1000 * 60 * 60 * 24)")
+ math(EXPR runtime_ms "${runtime_ms} - (${days} * 1000 * 60 * 60 * 24)")
+ math(EXPR hours "${runtime_ms} / (1000 * 60 * 60)")
+ math(EXPR runtime_ms "${runtime_ms} - (${hours} * 1000 * 60 * 60)")
+ math(EXPR minutes "${runtime_ms} / (1000 * 60)")
+ math(EXPR runtime_ms "${runtime_ms} - (${minutes} * 1000 * 60)")
+ math(EXPR seconds "${runtime_ms} / 1000")
+ math(EXPR milliseconds "${runtime_ms} - (${seconds} * 1000)")
+
+ # Format time components
+ pad_string_with_zeros(days 3)
+ pad_string_with_zeros(hours 2)
+ pad_string_with_zeros(minutes 2)
+ pad_string_with_zeros(seconds 2)
+ pad_string_with_zeros(milliseconds 3)
+
+ # Construct table entry
+ # Later values in the file for the same command overwrite earlier entries
+ string(MAKE_C_IDENTIFIER "${command}" key)
+ set(ENTRY_${key}
+ "${days}d ${hours}h ${minutes}m ${seconds}s ${milliseconds}ms | ${command}"
+ )
+
+ # Record the key:
+ list(APPEND keys "${key}")
+ endif()
+endforeach()
+
+list(REMOVE_DUPLICATES keys)
+
+# Build the entry list:
+set(entries)
+foreach(key ${keys})
+ list(APPEND entries "${ENTRY_${key}}")
+endforeach()
+
+if (NOT entries)
+ message(FATAL_ERROR "LOGFILE contained no build entries ('${LOGFILE}').")
+endif()
+
+# Sort in descending order:
+list(SORT entries)
+list(REVERSE entries)
+
+# Dump table:
+message(STATUS "-----------------------+----------------------------")
+message(STATUS "Time | Command ")
+message(STATUS "-----------------------+----------------------------")
+
+foreach(entry ${entries})
+ message(STATUS ${entry})
+endforeach()