mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
Merge remote-tracking branch 'origin/main' into fea/axes_iteration_space
This commit is contained in:
@@ -36,9 +36,33 @@ BreakBeforeBinaryOperators: None
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakInheritanceList: BeforeComma
|
||||
ColumnLimit: 80
|
||||
ColumnLimit: 100
|
||||
CompactNamespaces: false
|
||||
ContinuationIndentWidth: 2
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<nvbench'
|
||||
Priority: 1
|
||||
- Regex: '^<cub'
|
||||
Priority: 2
|
||||
- Regex: '^<thrust'
|
||||
Priority: 3
|
||||
- Regex: '^<cuda/'
|
||||
Priority: 4
|
||||
- Regex: '^<cuda'
|
||||
Priority: 5
|
||||
- Regex: '^<nvml'
|
||||
Priority: 6
|
||||
- Regex: '^<cupti'
|
||||
Priority: 7
|
||||
- Regex: '^<nvperf'
|
||||
Priority: 8
|
||||
- Regex: '^<nlohmann'
|
||||
Priority: 9
|
||||
- Regex: '^<fmt'
|
||||
Priority: 10
|
||||
- Regex: '^<[a-z_]*>$'
|
||||
Priority: 11
|
||||
IndentCaseLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentWidth: 2
|
||||
@@ -55,7 +79,7 @@ PenaltyExcessCharacter: 100
|
||||
PenaltyReturnTypeOnItsOwnLine: 90
|
||||
PointerAlignment: Right
|
||||
ReflowComments: true
|
||||
SortIncludes: true
|
||||
SortIncludes: CaseInsensitive
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
|
||||
62
.clangd
Normal file
62
.clangd
Normal file
@@ -0,0 +1,62 @@
|
||||
# https://clangd.llvm.org/config
|
||||
|
||||
# Apply a config conditionally to all C files
|
||||
If:
|
||||
PathMatch: .*\.(c|h)$
|
||||
|
||||
---
|
||||
|
||||
# Apply a config conditionally to all C++ files
|
||||
If:
|
||||
PathMatch: .*\.(c|h)pp
|
||||
|
||||
---
|
||||
|
||||
# Apply a config conditionally to all CUDA files
|
||||
If:
|
||||
PathMatch: .*\.cuh?
|
||||
CompileFlags:
|
||||
Add:
|
||||
# Allow variadic CUDA functions
|
||||
- "-Xclang=-fcuda-allow-variadic-functions"
|
||||
|
||||
---
|
||||
|
||||
# Tweak the clangd parse settings for all files
|
||||
CompileFlags:
|
||||
Compiler: clang++
|
||||
CompilationDatabase: .
|
||||
Add:
|
||||
- -x
|
||||
- cuda
|
||||
# report all errors
|
||||
- "-ferror-limit=0"
|
||||
- "-ftemplate-backtrace-limit=0"
|
||||
- "-std=c++17"
|
||||
Remove:
|
||||
# strip CUDA fatbin args
|
||||
- "-Xfatbin*"
|
||||
- "-Xcompiler*"
|
||||
- "-Xcudafe*"
|
||||
- "-rdc=*"
|
||||
- "-gpu=*"
|
||||
- "--diag_suppress*"
|
||||
# strip CUDA arch flags
|
||||
- "-gencode*"
|
||||
- "--generate-code*"
|
||||
# strip gcc's -fcoroutines
|
||||
- -fcoroutines
|
||||
# strip CUDA flags unknown to clang
|
||||
- "-ccbin*"
|
||||
- "--compiler-options*"
|
||||
- "--expt-extended-lambda"
|
||||
- "--expt-relaxed-constexpr"
|
||||
- "-forward-unknown-to-host-compiler"
|
||||
- "-Werror=cross-execution-space-call"
|
||||
Diagnostics:
|
||||
Suppress:
|
||||
- "variadic_device_fn"
|
||||
- "attributes_not_allowed"
|
||||
# The NVHPC version of _NVCXX_EXPAND_PACK macro triggers this clang error.
|
||||
# Temporarily suppressing it, but should probably fix
|
||||
- "template_param_shadow"
|
||||
198
.devcontainer/README.md
Normal file
198
.devcontainer/README.md
Normal file
@@ -0,0 +1,198 @@
|
||||
> **Note**
|
||||
> The instructions in this README are specific to Linux development environments. Instructions for Windows are coming soon!
|
||||
|
||||
[](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json)
|
||||
|
||||
# CCCL Dev Containers
|
||||
|
||||
CCCL uses [Development Containers](https://containers.dev/) to provide consistent and convenient development environments for both local development and for CI. This guide covers setup in [Visual Studio Code](#quickstart-vscode-recommended) and [Docker](#quickstart-docker-manual-approach). The guide also provides additional instructions in case you want use WSL.
|
||||
|
||||
## Table of Contents
|
||||
1. [Quickstart: VSCode (Recommended)](#vscode)
|
||||
2. [Quickstart: Docker (Manual Approach)](#docker)
|
||||
3. [Quickstart: Using WSL](#wsl)
|
||||
|
||||
## Quickstart: VSCode (Recommended) <a name="vscode"></a>
|
||||
|
||||
### Prerequisites
|
||||
- [Visual Studio Code](https://code.visualstudio.com/)
|
||||
- [Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
|
||||
- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
|
||||
- [Docker](https://docs.docker.com/engine/install/) - This is only for completeness because it should already be implicitly installed by the Dev Containers extension
|
||||
|
||||
### Steps
|
||||
|
||||
1. Clone the Repository
|
||||
```bash
|
||||
git clone https://github.com/nvidia/cccl.git
|
||||
```
|
||||
2. Open the cloned directory in VSCode
|
||||
|
||||
3. Launch a Dev Container by clicking the prompt suggesting to "Reopen in Container"
|
||||
|
||||

|
||||
|
||||
- Alternatively, use the Command Palette to start a Dev Container. Press `Ctrl+Shift+P` to open the Command Palette. Type "Remote-Containers: Reopen in Container" and select it.
|
||||
|
||||

|
||||
|
||||
4. Select an environment with the desired CTK and host compiler from the list:
|
||||
|
||||

|
||||
|
||||
5. VSCode will initialize the selected Dev Container. This can take a few minutes the first time.
|
||||
|
||||
6. Once initialized, the local `cccl/` directory is mirrored into the container to ensure any changes are persistent.
|
||||
|
||||
7. Done! See the [contributing guide](../CONTRIBUTING.md#building-and-testing) for instructions on how to build and run tests.
|
||||
|
||||
### (Optional) Authenticate with GitHub for `sccache`
|
||||
|
||||
After starting the container, there will be a prompt to authenticate with GitHub. This grants access to a [`sccache`](https://github.com/mozilla/sccache) server shared with CI and greatly accelerates local build times. This is currently limited to NVIDIA employees belonging to the `NVIDIA` or `rapidsai` GitHub organizations.
|
||||
|
||||
Without authentication to the remote server, `sccache` will still accelerate local builds by using a filesystem cache.
|
||||
|
||||
Follow the instructions in the prompt as below and enter the one-time code at https://github.com/login/device
|
||||
|
||||

|
||||
|
||||
To manually trigger this authentication, execute the `devcontainer-utils-vault-s3-init` script within the container.
|
||||
|
||||
For more information about the sccache configuration and authentication, see the documentation at [`rapidsai/devcontainers`](https://github.com/rapidsai/devcontainers/blob/branch-23.10/USAGE.md#build-caching-with-sccache).
|
||||
|
||||
## Quickstart: Docker (Manual Approach) <a name="docker"></a>
|
||||
|
||||
### Prerequisites
|
||||
- [Docker](https://docs.docker.com/desktop/install/linux-install/)
|
||||
|
||||
### Steps
|
||||
1. Clone the repository and use the [`launch.sh`](./launch.sh) script to launch the default container environment
|
||||
```bash
|
||||
git clone https://github.com/nvidia/cccl.git
|
||||
cd cccl
|
||||
./.devcontainer/launch.sh --docker
|
||||
```
|
||||
This script starts an interactive shell as the `coder` user inside the container with the local `cccl/` directory mirrored into `/home/coder/cccl`.
|
||||
|
||||
For specific environments, use the `--cuda` and `--host` options:
|
||||
```bassh
|
||||
./.devcontainer/launch.sh --docker --cuda 12.2 --host gcc10
|
||||
```
|
||||
See `./.devcontainer/launch.sh --help` for more information.
|
||||
|
||||
2. Done. See the [contributing guide](../CONTRIBUTING.md#building-and-testing) for instructions on how to build and run tests.
|
||||
|
||||
## Available Environments
|
||||
|
||||
CCCL provides environments for both the oldest and newest supported CUDA versions with all compatible host compilers.
|
||||
|
||||
Look in the [`.devcontainer/`](.) directory to see the available configurations. The top-level [`devcontainer.json`](./devcontainer.json) serves as the default environment. All `devcontainer.json` files in the `cuda<CTK_VERSION>-<HOST-COMPILER>` sub-directories are variations on this top-level file, with different base images for the different CUDA and host compiler versions.
|
||||
|
||||
## VSCode Customization
|
||||
|
||||
By default, CCCL's Dev Containers come with certain VSCode settings and extensions configured by default, as can be seen in the [`devcontainer.json`](./devcontainer.json) file. This can be further customized by users without needing to modify the `devcontainer.json` file directly.
|
||||
|
||||
For extensions, the [`dev.containers.defaultExtensions` setting](https://code.visualstudio.com/docs/devcontainers/containers#_always-installed-extensions) allows listing extensions that will always be installed.
|
||||
|
||||
For more general customizations, VSCode allows using a dotfile repository. See the [VSCode documentation](https://code.visualstudio.com/docs/devcontainers/containers#_personalizing-with-dotfile-repositories) for more information.
|
||||
|
||||
## GitHub Codespaces
|
||||
|
||||
[](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json)
|
||||
|
||||
One of the benefits of Dev Containers is that they integrate natively with [GitHub Codespaces](https://github.com/features/codespaces). Codespaces provide a VSCode development environment right in your browser running on a machine in the cloud. This provides a truly one-click, turnkey development environment where you can develop, build, and test with no other setup required.
|
||||
|
||||
Click the badge above or [click here](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json) to get started with CCCL's Dev Containers on Codespaces. This will start the default Dev Container environment. [Click here](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=296416761&skip_quickstart=true) to start a Codespace with a particular environment and hardware configuration as shown:
|
||||
|
||||

|
||||
|
||||
## For Maintainers: The `make_devcontainers.sh` Script
|
||||
|
||||
### Overview
|
||||
|
||||
[`make_devcontainers.sh`](./make_devcontainers.sh) generates devcontainer configurations for the unique combinations of CUDA Toolkit (CTK) versions and host compilers in [`ci/matrix.yaml`](../ci/matrix.yaml).
|
||||
|
||||
### How It Works:
|
||||
|
||||
1. Parses the matrix from `ci/matrix.yaml`.
|
||||
2. Use the top-level [`.devcontainer/devcontainer.json`](./devcontainer.json) as a template. For each unique combination of CTK version and host compiler, generate a corresponding `devcontainer.json` configuration, adjusting only the base Docker image to match the desired environment.
|
||||
3. Place the generated configurations in the `.devcontainer` directory, organizing them into subdirectories following the naming convention `cuda<CTK_VERSION>-<COMPILER_VERSION>`.
|
||||
|
||||
For more information, see the `.devcontainer/make_devcontainers.sh --help` message.
|
||||
|
||||
**Note**: When adding or updating supported environments, modify `matrix.yaml` and then rerun this script to synchronize the `devcontainer` configurations.
|
||||
|
||||
## Quickstart: Using WSL <a name="wsl"></a>
|
||||
|
||||
> [!NOTE]
|
||||
> _Make sure you have the Nvidia driver installed on your Windows host before moving further_. Type in `nvidia-smi` for verification.
|
||||
|
||||
### Install WSL on your Windows host
|
||||
|
||||
> [!WARNING]
|
||||
> Disclaimer: This guide was developed for WSL 2 on Windows 11.
|
||||
|
||||
1. Launch a Windows terminal (_e.g. Powershell_) as an administrator.
|
||||
|
||||
2. Install WSL 2 by running:
|
||||
```bash
|
||||
wsl --install
|
||||
```
|
||||
This should probably install Ubuntu distro as a default.
|
||||
|
||||
3. Restart your computer and run `wsl -l -v` on a Windows terminal to verify installation.
|
||||
|
||||
<h3 id="prereqs"> Install prerequisites and VS Code extensions</h3>
|
||||
|
||||
4. Launch your WSL/Ubuntu terminal by running `wsl` in Powershell.
|
||||
|
||||
5. Install the [WSL extension](ms-vscode-remote.remote-wsl) on VS Code.
|
||||
|
||||
- `Ctrl + Shift + P` and select `WSL: Connect to WSL` (it will prompt you to install the WSL extension).
|
||||
|
||||
- Make sure you are connected to WSL with VS Code by checking the bottom left corner of the VS Code window (should indicate "WSL: Ubuntu" in our case).
|
||||
|
||||
6. Install the [Dev Containers extension](ms-vscode-remote.remote-containers) on VS Code.
|
||||
|
||||
- In a vanilla system you should be prompted to install `Docker` at this point, accept it. If it hangs you might have to restart VS Code after that.
|
||||
|
||||
7. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). **Make sure you install the WSL 2 version and not the native Linux one**. This builds on top of Docker so make sure you have Docker properly installed (run `docker --version`).
|
||||
|
||||
8. Open `/etc/docker/daemon.json` from within your WSL system (if the file does not exist, create it) and add the following:
|
||||
|
||||
```json
|
||||
{
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
then run `sudo systemctl restart docker.service`.
|
||||
|
||||
---
|
||||
### Build CCCL in WSL using Dev Containers
|
||||
|
||||
9. Still on your WSL terminal run `git clone https://github.com/NVIDIA/cccl.git`
|
||||
|
||||
|
||||
10. Open the CCCL cloned repo in VS Code ( `Ctrl + Shift + P `, select `File: Open Folder...` and select the path where your CCCL clone is located).
|
||||
|
||||
11. If prompted, choose `Reopen in Container`.
|
||||
|
||||
- If you are not prompted just type `Ctrl + Shift + P` and `Dev Containers: Open Folder in Container ...`.
|
||||
|
||||
12. Verify that Dev Container was configured properly by running `nvidia-smi` in your Dev Container terminal. For a proper configuration it is important for the steps in [Install prerequisites and VS Code extensions](#prereqs) to be followed in a precise order.
|
||||
|
||||
From that point on, the guide aligns with our [existing Dev Containers native Linux guide](https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md) with just one minor potential alteration:
|
||||
|
||||
13. If WSL was launched without the X-server enabled, when asked to "authenticate Git with your Github credentials", if you answer **Yes**, the browser might not open automatically, with the following error message.
|
||||
|
||||
> Failed opening a web browser at https://github.com/login/device
|
||||
exec: "xdg-open,x-www-browser,www-browser,wslview": executable file not found in $PATH
|
||||
Please try entering the URL in your browser manually
|
||||
|
||||
In that case type in the address manually in your web browser https://github.com/login/device and fill in the one-time code.
|
||||
46
.devcontainer/cuda12.0-gcc10/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-gcc10/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc10-cuda12.0-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-gcc10",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "10",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-gcc10"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-gcc10"
|
||||
}
|
||||
46
.devcontainer/cuda12.0-gcc11/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-gcc11/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc11-cuda12.0-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-gcc11",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "11",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-gcc11"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-gcc11"
|
||||
}
|
||||
46
.devcontainer/cuda12.0-gcc12/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-gcc12/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc12-cuda12.0-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-gcc12",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "12",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-gcc12"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-gcc12"
|
||||
}
|
||||
46
.devcontainer/cuda12.0-gcc7/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-gcc7/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc7-cuda12.0-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-gcc7",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "7",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-gcc7"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-gcc7"
|
||||
}
|
||||
46
.devcontainer/cuda12.0-gcc8/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-gcc8/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc8-cuda12.0-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-gcc8",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "8",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-gcc8"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-gcc8"
|
||||
}
|
||||
46
.devcontainer/cuda12.0-gcc9/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-gcc9/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc9-cuda12.0-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-gcc9",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "9",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-gcc9"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-gcc9"
|
||||
}
|
||||
46
.devcontainer/cuda12.0-llvm14/devcontainer.json
Normal file
46
.devcontainer/cuda12.0-llvm14/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm14-cuda12.0-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.0-llvm14",
|
||||
"CCCL_CUDA_VERSION": "12.0",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "14",
|
||||
"CCCL_BUILD_INFIX": "cuda12.0-llvm14"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.0-llvm14"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc10/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc10/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc10-cuda12.8-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc10",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "10",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc10"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc10"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc11/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc11/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc11-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc11",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "11",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc11"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc11"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc12/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc12/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc12-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc12",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "12",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc12"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc12"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc13/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc13/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc13-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc13",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "13",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc13"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc13"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc14/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc14/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc14-cuda12.8-ubuntu24.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc14",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "14",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc14"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc14"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc7/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc7/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc7-cuda12.8-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc7",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "7",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc7"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc7"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc8/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc8/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc8-cuda12.8-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc8",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "8",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc8"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc8"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-gcc9/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-gcc9/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc9-cuda12.8-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc9",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "9",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc9"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc9"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-llvm14/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-llvm14/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm14-cuda12.8-ubuntu20.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-llvm14",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "14",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-llvm14"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-llvm14"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-llvm15/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-llvm15/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm15-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-llvm15",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "15",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-llvm15"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-llvm15"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-llvm16/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-llvm16/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm16-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-llvm16",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "16",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-llvm16"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-llvm16"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-llvm17/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-llvm17/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm17-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-llvm17",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "17",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-llvm17"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-llvm17"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-llvm18/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-llvm18/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm18-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-llvm18",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "18",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-llvm18"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-llvm18"
|
||||
}
|
||||
46
.devcontainer/cuda12.8-llvm19/devcontainer.json
Normal file
46
.devcontainer/cuda12.8-llvm19/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-llvm19-cuda12.8-ubuntu22.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-llvm19",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "llvm",
|
||||
"CCCL_HOST_COMPILER_VERSION": "19",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-llvm19"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-llvm19"
|
||||
}
|
||||
46
.devcontainer/devcontainer.json
Normal file
46
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"shutdownAction": "stopContainer",
|
||||
"image": "rapidsai/devcontainers:25.06-cpp-gcc14-cuda12.8-ubuntu24.04",
|
||||
"hostRequirements": {
|
||||
"gpu": "optional"
|
||||
},
|
||||
"initializeCommand": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
|
||||
],
|
||||
"containerEnv": {
|
||||
"SCCACHE_REGION": "us-east-2",
|
||||
"SCCACHE_BUCKET": "rapids-sccache-devs",
|
||||
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
|
||||
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
|
||||
"DEVCONTAINER_NAME": "cuda12.8-gcc14",
|
||||
"CCCL_CUDA_VERSION": "12.8",
|
||||
"CCCL_HOST_COMPILER": "gcc",
|
||||
"CCCL_HOST_COMPILER_VERSION": "14",
|
||||
"CCCL_BUILD_INFIX": "cuda12.8-gcc14"
|
||||
},
|
||||
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
|
||||
"mounts": [
|
||||
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
|
||||
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"xaver.clang-format"
|
||||
],
|
||||
"settings": {
|
||||
"editor.defaultFormatter": "xaver.clang-format",
|
||||
"clang-format.executable": "/usr/local/bin/clang-format",
|
||||
"clangd.arguments": [
|
||||
"--compile-commands-dir=${workspaceFolder}"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": "cuda12.8-gcc14"
|
||||
}
|
||||
49
.devcontainer/docker-entrypoint.sh
Executable file
49
.devcontainer/docker-entrypoint.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Maybe change the UID/GID of the container's non-root user to match the host's UID/GID
|
||||
|
||||
: "${REMOTE_USER:="coder"}";
|
||||
: "${OLD_UID:=}";
|
||||
: "${OLD_GID:=}";
|
||||
: "${NEW_UID:=}";
|
||||
: "${NEW_GID:=}";
|
||||
|
||||
eval "$(sed -n "s/${REMOTE_USER}:[^:]*:\([^:]*\):\([^:]*\):[^:]*:\([^:]*\).*/OLD_UID=\1;OLD_GID=\2;HOME_FOLDER=\3/p" /etc/passwd)";
|
||||
eval "$(sed -n "s/\([^:]*\):[^:]*:${NEW_UID}:.*/EXISTING_USER=\1/p" /etc/passwd)";
|
||||
eval "$(sed -n "s/\([^:]*\):[^:]*:${NEW_GID}:.*/EXISTING_GROUP=\1/p" /etc/group)";
|
||||
|
||||
if [ -z "$OLD_UID" ]; then
|
||||
echo "Remote user not found in /etc/passwd ($REMOTE_USER).";
|
||||
exec "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@";
|
||||
elif [ "$OLD_UID" = "$NEW_UID" ] && [ "$OLD_GID" = "$NEW_GID" ]; then
|
||||
echo "UIDs and GIDs are the same ($NEW_UID:$NEW_GID).";
|
||||
exec "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@";
|
||||
elif [ "$OLD_UID" != "$NEW_UID" ] && [ -n "$EXISTING_USER" ]; then
|
||||
echo "User with UID exists ($EXISTING_USER=$NEW_UID).";
|
||||
exec "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@";
|
||||
else
|
||||
if [ "$OLD_GID" != "$NEW_GID" ] && [ -n "$EXISTING_GROUP" ]; then
|
||||
echo "Group with GID exists ($EXISTING_GROUP=$NEW_GID).";
|
||||
NEW_GID="$OLD_GID";
|
||||
fi
|
||||
echo "Updating UID:GID from $OLD_UID:$OLD_GID to $NEW_UID:$NEW_GID.";
|
||||
sed -i -e "s/\(${REMOTE_USER}:[^:]*:\)[^:]*:[^:]*/\1${NEW_UID}:${NEW_GID}/" /etc/passwd;
|
||||
if [ "$OLD_GID" != "$NEW_GID" ]; then
|
||||
sed -i -e "s/\([^:]*:[^:]*:\)${OLD_GID}:/\1${NEW_GID}:/" /etc/group;
|
||||
fi
|
||||
|
||||
# Fast parallel `chown -R`
|
||||
find "$HOME_FOLDER/" -not -user "$REMOTE_USER" -print0 \
|
||||
| xargs -0 -r -n1 -P"$(nproc --all)" chown "$NEW_UID:$NEW_GID"
|
||||
|
||||
# Run the container command as $REMOTE_USER, preserving the container startup environment.
|
||||
#
|
||||
# We cannot use `su -w` because that's not supported by the `su` in Ubuntu18.04, so we reset the following
|
||||
# environment variables to the expected values, then pass through everything else from the startup environment.
|
||||
export HOME="$HOME_FOLDER";
|
||||
export XDG_CACHE_HOME="$HOME_FOLDER/.cache";
|
||||
export XDG_CONFIG_HOME="$HOME_FOLDER/.config";
|
||||
export XDG_STATE_HOME="$HOME_FOLDER/.local/state";
|
||||
export PYTHONHISTFILE="$HOME_FOLDER/.local/state/.python_history";
|
||||
exec su -p "$REMOTE_USER" -- "$(pwd)/.devcontainer/nvbench-entrypoint.sh" "$@";
|
||||
fi
|
||||
BIN
.devcontainer/img/container_list.png
Normal file
BIN
.devcontainer/img/container_list.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 156 KiB |
BIN
.devcontainer/img/github_auth.png
Normal file
BIN
.devcontainer/img/github_auth.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 53 KiB |
BIN
.devcontainer/img/open_in_container_manual.png
Normal file
BIN
.devcontainer/img/open_in_container_manual.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
BIN
.devcontainer/img/reopen_in_container.png
Normal file
BIN
.devcontainer/img/reopen_in_container.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 27 KiB |
306
.devcontainer/launch.sh
Executable file
306
.devcontainer/launch.sh
Executable file
@@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure the script is being executed in the nvbench/ root
|
||||
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/..";
|
||||
|
||||
print_help() {
|
||||
echo "Usage: $0 [-c|--cuda <CUDA version>] [-H|--host <Host compiler>] [-d|--docker]"
|
||||
echo "Launch a development container. If no CUDA version or Host compiler are specified,"
|
||||
echo "the top-level devcontainer in .devcontainer/devcontainer.json will be used."
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -c, --cuda Specify the CUDA version. E.g., 12.2"
|
||||
echo " -H, --host Specify the host compiler. E.g., gcc12"
|
||||
echo " -d, --docker Launch the development environment in Docker directly without using VSCode."
|
||||
echo " --gpus gpu-request GPU devices to add to the container ('all' to pass all GPUs)."
|
||||
echo " -e, --env list Set additional container environment variables."
|
||||
echo " -v, --volume list Bind mount a volume."
|
||||
echo " -h, --help Display this help message and exit."
|
||||
}
|
||||
|
||||
# Assign variable one scope above the caller
|
||||
# Usage: local "$1" && _upvar $1 "value(s)"
|
||||
# Param: $1 Variable name to assign value to
|
||||
# Param: $* Value(s) to assign. If multiple values, an array is
|
||||
# assigned, otherwise a single value is assigned.
|
||||
# See: http://fvue.nl/wiki/Bash:_Passing_variables_by_reference
|
||||
_upvar() {
|
||||
if unset -v "$1"; then
|
||||
if (( $# == 2 )); then
|
||||
eval $1=\"\$2\";
|
||||
else
|
||||
eval $1=\(\"\${@:2}\"\);
|
||||
fi;
|
||||
fi
|
||||
}
|
||||
|
||||
parse_options() {
|
||||
local -;
|
||||
set -euo pipefail;
|
||||
|
||||
# Read the name of the variable in which to return unparsed arguments
|
||||
local UNPARSED="${!#}";
|
||||
# Splice the unparsed arguments variable name from the arguments list
|
||||
set -- "${@:1:$#-1}";
|
||||
|
||||
local OPTIONS=c:e:H:dhv
|
||||
local LONG_OPTIONS=cuda:,env:,host:,gpus:,volume:,docker,help
|
||||
# shellcheck disable=SC2155
|
||||
local PARSED_OPTIONS=$(getopt -n "$0" -o "${OPTIONS}" --long "${LONG_OPTIONS}" -- "$@")
|
||||
|
||||
# shellcheck disable=SC2181
|
||||
if [[ $? -ne 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
eval set -- "${PARSED_OPTIONS}"
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-c|--cuda)
|
||||
cuda_version="$2"
|
||||
shift 2
|
||||
;;
|
||||
-e|--env)
|
||||
env_vars+=("$1" "$2")
|
||||
shift 2
|
||||
;;
|
||||
-H|--host)
|
||||
host_compiler="$2"
|
||||
shift 2
|
||||
;;
|
||||
--gpus)
|
||||
gpu_request="$2"
|
||||
shift 2
|
||||
;;
|
||||
-d|--docker)
|
||||
docker_mode=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
print_help
|
||||
exit 0
|
||||
;;
|
||||
-v|--volume)
|
||||
volumes+=("$1" "$2")
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
_upvar "${UNPARSED}" "${@}"
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Invalid option: $1"
|
||||
print_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2155
|
||||
launch_docker() {
|
||||
local -;
|
||||
set -euo pipefail
|
||||
|
||||
inline_vars() {
|
||||
cat - \
|
||||
`# inline local workspace folder` \
|
||||
| sed "s@\${localWorkspaceFolder}@$(pwd)@g" \
|
||||
`# inline local workspace folder basename` \
|
||||
| sed "s@\${localWorkspaceFolderBasename}@$(basename "$(pwd)")@g" \
|
||||
`# inline container workspace folder` \
|
||||
| sed "s@\${containerWorkspaceFolder}@${WORKSPACE_FOLDER:-}@g" \
|
||||
`# inline container workspace folder basename` \
|
||||
| sed "s@\${containerWorkspaceFolderBasename}@$(basename "${WORKSPACE_FOLDER:-}")@g" \
|
||||
`# translate local envvars to shell syntax` \
|
||||
| sed -r 's/\$\{localEnv:([^\:]*):?(.*)\}/${\1:-\2}/g'
|
||||
}
|
||||
|
||||
args_to_path() {
|
||||
local -a keys=("${@}")
|
||||
keys=("${keys[@]/#/[}")
|
||||
keys=("${keys[@]/%/]}")
|
||||
echo "$(IFS=; echo "${keys[*]}")"
|
||||
}
|
||||
|
||||
json_string() {
|
||||
python3 -c "import json,sys; print(json.load(sys.stdin)$(args_to_path "${@}"))" 2>/dev/null | inline_vars
|
||||
}
|
||||
|
||||
json_array() {
|
||||
python3 -c "import json,sys; [print(f'\"{x}\"') for x in json.load(sys.stdin)$(args_to_path "${@}")]" 2>/dev/null | inline_vars
|
||||
}
|
||||
|
||||
json_map() {
|
||||
python3 -c "import json,sys; [print(f'{k}=\"{v}\"') for k,v in json.load(sys.stdin)$(args_to_path "${@}").items()]" 2>/dev/null | inline_vars
|
||||
}
|
||||
|
||||
devcontainer_metadata_json() {
|
||||
docker inspect --type image --format '{{json .Config.Labels}}' "$DOCKER_IMAGE" \
|
||||
| json_string '"devcontainer.metadata"'
|
||||
}
|
||||
|
||||
###
|
||||
# Read relevant values from devcontainer.json
|
||||
###
|
||||
|
||||
local devcontainer_json="${path}/devcontainer.json";
|
||||
|
||||
# Read image
|
||||
local DOCKER_IMAGE="$(json_string '"image"' < "${devcontainer_json}")"
|
||||
# Always pull the latest copy of the image
|
||||
docker pull "$DOCKER_IMAGE"
|
||||
|
||||
# Read workspaceFolder
|
||||
local WORKSPACE_FOLDER="$(json_string '"workspaceFolder"' < "${devcontainer_json}")"
|
||||
# Read remoteUser
|
||||
local REMOTE_USER="$(json_string '"remoteUser"' < "${devcontainer_json}")"
|
||||
# If remoteUser isn't in our devcontainer.json, read it from the image's "devcontainer.metadata" label
|
||||
if test -z "${REMOTE_USER:-}"; then
|
||||
REMOTE_USER="$(devcontainer_metadata_json | json_string "-1" '"remoteUser"')"
|
||||
fi
|
||||
# Read runArgs
|
||||
local -a RUN_ARGS="($(json_array '"runArgs"' < "${devcontainer_json}"))"
|
||||
# Read initializeCommand
|
||||
local -a INITIALIZE_COMMAND="($(json_array '"initializeCommand"' < "${devcontainer_json}"))"
|
||||
# Read containerEnv
|
||||
local -a ENV_VARS="($(json_map '"containerEnv"' < "${devcontainer_json}" | sed -r 's/(.*)=(.*)/--env \1=\2/'))"
|
||||
# Read mounts
|
||||
local -a MOUNTS="($(
|
||||
tee < "${devcontainer_json}" \
|
||||
1>/dev/null \
|
||||
>(json_array '"mounts"') \
|
||||
>(json_string '"workspaceMount"') \
|
||||
| xargs -r -I% echo --mount '%'
|
||||
))"
|
||||
|
||||
###
|
||||
# Update run arguments and container environment variables
|
||||
###
|
||||
|
||||
# Only pass `-it` if the shell is a tty
|
||||
if ! ${CI:-'false'} && tty >/dev/null 2>&1 && (exec </dev/tty); then
|
||||
RUN_ARGS+=("-it")
|
||||
fi
|
||||
|
||||
for flag in rm init; do
|
||||
if [[ " ${RUN_ARGS[*]} " != *" --${flag} "* ]]; then
|
||||
RUN_ARGS+=("--${flag}")
|
||||
fi
|
||||
done
|
||||
|
||||
# Prefer the user-provided --gpus argument
|
||||
if test -n "${gpu_request:-}"; then
|
||||
RUN_ARGS+=(--gpus "${gpu_request}")
|
||||
else
|
||||
# Otherwise read and infer from hostRequirements.gpu
|
||||
local GPU_REQUEST="$(json_string '"hostRequirements"' '"gpu"' < "${devcontainer_json}")"
|
||||
if test "${GPU_REQUEST:-false}" = true; then
|
||||
RUN_ARGS+=(--gpus all)
|
||||
elif test "${GPU_REQUEST:-false}" = optional && \
|
||||
command -v nvidia-container-runtime >/dev/null 2>&1; then
|
||||
RUN_ARGS+=(--gpus all)
|
||||
fi
|
||||
fi
|
||||
|
||||
RUN_ARGS+=(--workdir "${WORKSPACE_FOLDER:-/home/coder/nvbench}")
|
||||
|
||||
if test -n "${REMOTE_USER:-}"; then
|
||||
ENV_VARS+=(--env NEW_UID="$(id -u)")
|
||||
ENV_VARS+=(--env NEW_GID="$(id -g)")
|
||||
ENV_VARS+=(--env REMOTE_USER="$REMOTE_USER")
|
||||
RUN_ARGS+=(-u root:root)
|
||||
RUN_ARGS+=(--entrypoint "${WORKSPACE_FOLDER:-/home/coder/nvbench}/.devcontainer/docker-entrypoint.sh")
|
||||
fi
|
||||
|
||||
if test -n "${SSH_AUTH_SOCK:-}"; then
|
||||
ENV_VARS+=(--env "SSH_AUTH_SOCK=/tmp/ssh-auth-sock")
|
||||
MOUNTS+=(--mount "source=${SSH_AUTH_SOCK},target=/tmp/ssh-auth-sock,type=bind")
|
||||
fi
|
||||
|
||||
# Append user-provided volumes
|
||||
if test -v volumes && test ${#volumes[@]} -gt 0; then
|
||||
MOUNTS+=("${volumes[@]}")
|
||||
fi
|
||||
|
||||
# Append user-provided envvars
|
||||
if test -v env_vars && test ${#env_vars[@]} -gt 0; then
|
||||
ENV_VARS+=("${env_vars[@]}")
|
||||
fi
|
||||
|
||||
# Run the initialize command before starting the container
|
||||
if test "${#INITIALIZE_COMMAND[@]}" -gt 0; then
|
||||
eval "${INITIALIZE_COMMAND[*]@Q}"
|
||||
fi
|
||||
|
||||
exec docker run \
|
||||
"${RUN_ARGS[@]}" \
|
||||
"${ENV_VARS[@]}" \
|
||||
"${MOUNTS[@]}" \
|
||||
"${DOCKER_IMAGE}" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
launch_vscode() {
|
||||
local -;
|
||||
set -euo pipefail;
|
||||
# Since Visual Studio Code allows only one instance per `devcontainer.json`,
|
||||
# this code prepares a unique temporary directory structure for each launch of a devcontainer.
|
||||
# By doing so, it ensures that multiple instances of the same environment can be run
|
||||
# simultaneously. The script replicates the `devcontainer.json` from the desired CUDA
|
||||
# and compiler environment into this temporary directory, adjusting paths to ensure the
|
||||
# correct workspace is loaded. A special URL is then generated to instruct VSCode to
|
||||
# launch the development container using this temporary configuration.
|
||||
local workspace="$(basename "$(pwd)")"
|
||||
local tmpdir="$(mktemp -d)/${workspace}"
|
||||
mkdir -p "${tmpdir}"
|
||||
mkdir -p "${tmpdir}/.devcontainer"
|
||||
cp -arL "${path}/devcontainer.json" "${tmpdir}/.devcontainer"
|
||||
sed -i "s@\\${localWorkspaceFolder}@$(pwd)@g" "${tmpdir}/.devcontainer/devcontainer.json"
|
||||
local path="${tmpdir}"
|
||||
local hash="$(echo -n "${path}" | xxd -pu - | tr -d '[:space:]')"
|
||||
local url="vscode://vscode-remote/dev-container+${hash}/home/coder/nvbench"
|
||||
|
||||
local launch=""
|
||||
if type open >/dev/null 2>&1; then
|
||||
launch="open"
|
||||
elif type xdg-open >/dev/null 2>&1; then
|
||||
launch="xdg-open"
|
||||
fi
|
||||
|
||||
if [ -n "${launch}" ]; then
|
||||
echo "Launching VSCode Dev Container URL: ${url}"
|
||||
code --new-window "${tmpdir}"
|
||||
exec "${launch}" "${url}" >/dev/null 2>&1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
local -a unparsed;
|
||||
parse_options "$@" unparsed;
|
||||
set -- "${unparsed[@]}";
|
||||
|
||||
# If no CTK/Host compiler are provided, just use the default environment
|
||||
if [[ -z ${cuda_version:-} ]] && [[ -z ${host_compiler:-} ]]; then
|
||||
path=".devcontainer"
|
||||
else
|
||||
path=".devcontainer/cuda${cuda_version}-${host_compiler}"
|
||||
if [[ ! -f "${path}/devcontainer.json" ]]; then
|
||||
echo "Unknown CUDA [${cuda_version}] compiler [${host_compiler}] combination"
|
||||
echo "Requested devcontainer ${path}/devcontainer.json does not exist"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if ${docker_mode:-'false'}; then
|
||||
launch_docker "$@"
|
||||
else
|
||||
launch_vscode
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
144
.devcontainer/make_devcontainers.sh
Executable file
144
.devcontainer/make_devcontainers.sh
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script parses the CI matrix.yaml file and generates a devcontainer.json file for each unique combination of
|
||||
# CUDA version, compiler name/version, and Ubuntu version. The devcontainer.json files are written to the
|
||||
# .devcontainer directory to a subdirectory named after the CUDA version and compiler name/version.
|
||||
# GitHub docs on using multiple devcontainer.json files:
|
||||
# https://docs.github.com/en/codespaces/setting-up-your-project-for-codespaces/adding-a-dev-container-configuration/introduction-to-dev-containers#devcontainerjson
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure the script is being executed in its containing directory
|
||||
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
|
||||
|
||||
|
||||
function usage {
|
||||
echo "Usage: $0 [--clean] [-h/--help] [-v/--verbose]"
|
||||
echo " --clean Remove stale devcontainer subdirectories"
|
||||
echo " -h, --help Display this help message"
|
||||
echo " -v, --verbose Enable verbose mode (set -x)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function to update the devcontainer.json file with the provided parameters
|
||||
update_devcontainer() {
|
||||
local input_file="$1"
|
||||
local output_file="$2"
|
||||
local name="$3"
|
||||
local cuda_version="$4"
|
||||
local compiler_name="$5"
|
||||
local compiler_exe="$6"
|
||||
local compiler_version="$7"
|
||||
local os="$8"
|
||||
local devcontainer_version="$9"
|
||||
|
||||
local IMAGE_ROOT="rapidsai/devcontainers:${devcontainer_version}-cpp-"
|
||||
local image="${IMAGE_ROOT}${compiler_name}${compiler_version}-cuda${cuda_version}-${os}"
|
||||
|
||||
jq --arg image "$image" --arg name "$name" \
|
||||
--arg cuda_version "$cuda_version" --arg compiler_name "$compiler_name" \
|
||||
--arg compiler_exe "$compiler_exe" --arg compiler_version "$compiler_version" --arg os "$os" \
|
||||
'.image = $image | .name = $name | .containerEnv.DEVCONTAINER_NAME = $name |
|
||||
.containerEnv.CCCL_BUILD_INFIX = $name |
|
||||
.containerEnv.CCCL_CUDA_VERSION = $cuda_version | .containerEnv.CCCL_HOST_COMPILER = $compiler_name |
|
||||
.containerEnv.CCCL_HOST_COMPILER_VERSION = $compiler_version '\
|
||||
"$input_file" > "$output_file"
|
||||
}
|
||||
|
||||
make_name() {
|
||||
local cuda_version="$1"
|
||||
local compiler_name="$2"
|
||||
local compiler_version="$3"
|
||||
|
||||
echo "cuda$cuda_version-$compiler_name$compiler_version"
|
||||
}
|
||||
|
||||
CLEAN=false
|
||||
VERBOSE=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--clean)
|
||||
CLEAN=true
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
-v|--verbose)
|
||||
VERBOSE=true
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
MATRIX_FILE="../ci/matrix.yaml"
|
||||
|
||||
# Enable verbose mode if requested
|
||||
if [ "$VERBOSE" = true ]; then
|
||||
set -x
|
||||
cat ${MATRIX_FILE}
|
||||
fi
|
||||
|
||||
# Read matrix.yaml and convert it to json
|
||||
matrix_json=$(yq -o json ${MATRIX_FILE})
|
||||
|
||||
# Exclude Windows environments
|
||||
readonly matrix_json=$(echo "$matrix_json" | jq 'del(.pull_request.nvcc[] | select(.os | contains("windows")))')
|
||||
|
||||
# Get the devcontainer image version and define image tag root
|
||||
readonly DEVCONTAINER_VERSION=$(echo "$matrix_json" | jq -r '.devcontainer_version')
|
||||
|
||||
# Get unique combinations of cuda version, compiler name/version, and Ubuntu version
|
||||
readonly combinations=$(echo "$matrix_json" | jq -c '[.pull_request.nvcc[] | {cuda: .cuda, compiler_name: .compiler.name, compiler_exe: .compiler.exe, compiler_version: .compiler.version, os: .os}] | unique | .[]')
|
||||
|
||||
# Update the base devcontainer with the default values
|
||||
# The root devcontainer.json file is used as the default container as well as a template for all
|
||||
# other devcontainer.json files by replacing the `image:` field with the appropriate image name
|
||||
readonly base_devcontainer_file="./devcontainer.json"
|
||||
readonly NEWEST_GCC_CUDA_ENTRY=$(echo "$combinations" | jq -rs '[.[] | select(.compiler_name == "gcc")] | sort_by((.cuda | tonumber), (.compiler_version | tonumber)) | .[-1]')
|
||||
readonly DEFAULT_CUDA=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.cuda')
|
||||
readonly DEFAULT_COMPILER_NAME=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_name')
|
||||
readonly DEFAULT_COMPILER_EXE=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_exe')
|
||||
readonly DEFAULT_COMPILER_VERSION=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.compiler_version')
|
||||
readonly DEFAULT_OS=$(echo "$NEWEST_GCC_CUDA_ENTRY" | jq -r '.os')
|
||||
readonly DEFAULT_NAME=$(make_name "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_VERSION")
|
||||
|
||||
update_devcontainer ${base_devcontainer_file} "./temp_devcontainer.json" "$DEFAULT_NAME" "$DEFAULT_CUDA" "$DEFAULT_COMPILER_NAME" "$DEFAULT_COMPILER_EXE" "$DEFAULT_COMPILER_VERSION" "$DEFAULT_OS" "$DEVCONTAINER_VERSION"
|
||||
mv "./temp_devcontainer.json" ${base_devcontainer_file}
|
||||
|
||||
# Create an array to keep track of valid subdirectory names
|
||||
valid_subdirs=()
|
||||
|
||||
# The img folder should not be removed:
|
||||
valid_subdirs+=("img")
|
||||
|
||||
# For each unique combination
|
||||
for combination in $combinations; do
|
||||
cuda_version=$(echo "$combination" | jq -r '.cuda')
|
||||
compiler_name=$(echo "$combination" | jq -r '.compiler_name')
|
||||
compiler_exe=$(echo "$combination" | jq -r '.compiler_exe')
|
||||
compiler_version=$(echo "$combination" | jq -r '.compiler_version')
|
||||
os=$(echo "$combination" | jq -r '.os')
|
||||
|
||||
name=$(make_name "$cuda_version" "$compiler_name" "$compiler_version")
|
||||
mkdir -p "$name"
|
||||
new_devcontainer_file="$name/devcontainer.json"
|
||||
|
||||
update_devcontainer "$base_devcontainer_file" "$new_devcontainer_file" "$name" "$cuda_version" "$compiler_name" "$compiler_exe" "$compiler_version" "$os" "$DEVCONTAINER_VERSION"
|
||||
echo "Created $new_devcontainer_file"
|
||||
|
||||
# Add the subdirectory name to the valid_subdirs array
|
||||
valid_subdirs+=("$name")
|
||||
done
|
||||
|
||||
# Clean up stale subdirectories and devcontainer.json files
|
||||
if [ "$CLEAN" = true ]; then
|
||||
for subdir in ./*; do
|
||||
if [ -d "$subdir" ] && [[ ! " ${valid_subdirs[@]} " =~ " ${subdir#./} " ]]; then
|
||||
echo "Removing stale subdirectory: $subdir"
|
||||
rm -r "$subdir"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
17
.devcontainer/nvbench-entrypoint.sh
Executable file
17
.devcontainer/nvbench-entrypoint.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
|
||||
set -e;
|
||||
|
||||
devcontainer-utils-post-create-command;
|
||||
devcontainer-utils-init-git;
|
||||
devcontainer-utils-post-attach-command;
|
||||
|
||||
cd /home/coder/nvbench/
|
||||
|
||||
if test $# -gt 0; then
|
||||
exec "$@";
|
||||
else
|
||||
exec /bin/bash -li;
|
||||
fi
|
||||
89
.devcontainer/verify_devcontainer.sh
Executable file
89
.devcontainer/verify_devcontainer.sh
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/bin/bash
|
||||
|
||||
function usage {
|
||||
echo "Usage: $0"
|
||||
echo
|
||||
echo "This script is intended to be run within one of CCCL's Dev Containers."
|
||||
echo "It verifies that the expected environment variables and binary versions match what is expected."
|
||||
}
|
||||
|
||||
check_envvars() {
|
||||
for var_name in "$@"; do
|
||||
if [[ -z "${!var_name:-}" ]]; then
|
||||
echo "::error:: ${var_name} variable is not set."
|
||||
exit 1
|
||||
else
|
||||
echo "$var_name=${!var_name}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
check_host_compiler_version() {
|
||||
local version_output=$($CXX --version)
|
||||
|
||||
if [[ "$CXX" == "g++" ]]; then
|
||||
local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 4 | cut -d '.' -f 1)
|
||||
local expected_compiler="gcc"
|
||||
elif [[ "$CXX" == "clang++" ]]; then
|
||||
if [[ $version_output =~ clang\ version\ ([0-9]+) ]]; then
|
||||
actual_version=${BASH_REMATCH[1]}
|
||||
else
|
||||
echo "::error:: Unable to determine clang version."
|
||||
exit 1
|
||||
fi
|
||||
expected_compiler="llvm"
|
||||
elif [[ "$CXX" == "icpc" ]]; then
|
||||
local actual_version=$(echo "$version_output" | head -n 1 | cut -d ' ' -f 3 )
|
||||
# The icpc compiler version of oneAPI release 2023.2.0 is 2021.10.0
|
||||
if [[ "$actual_version" == "2021.10.0" ]]; then
|
||||
actual_version="2023.2.0"
|
||||
fi
|
||||
expected_compiler="oneapi"
|
||||
else
|
||||
echo "::error:: Unexpected CXX value ($CXX)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$expected_compiler" != "${CCCL_HOST_COMPILER}" || "$actual_version" != "$CCCL_HOST_COMPILER_VERSION" ]]; then
|
||||
echo "::error:: CXX ($CXX) version ($actual_version) does not match the expected compiler (${CCCL_HOST_COMPILER}) and version (${CCCL_HOST_COMPILER_VERSION})."
|
||||
exit 1
|
||||
else
|
||||
echo "Detected host compiler: $CXX version $actual_version"
|
||||
fi
|
||||
}
|
||||
|
||||
check_cuda_version() {
|
||||
local cuda_version_output=$(nvcc --version)
|
||||
if [[ $cuda_version_output =~ release\ ([0-9]+\.[0-9]+) ]]; then
|
||||
local actual_cuda_version=${BASH_REMATCH[1]}
|
||||
else
|
||||
echo "::error:: Unable to determine CUDA version from nvcc."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$actual_cuda_version" != "$CCCL_CUDA_VERSION" ]]; then
|
||||
echo "::error:: CUDA version ($actual_cuda_version) does not match the expected CUDA version ($CCCL_CUDA_VERSION)."
|
||||
exit 1
|
||||
else
|
||||
echo "Detected CUDA version: $actual_cuda_version"
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
check_envvars DEVCONTAINER_NAME CXX CUDAHOSTCXX CCCL_BUILD_INFIX CCCL_HOST_COMPILER CCCL_CUDA_VERSION CCCL_HOST_COMPILER_VERSION
|
||||
|
||||
check_host_compiler_version
|
||||
|
||||
check_cuda_version
|
||||
|
||||
echo "Dev Container successfully verified!"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
17
.git-blame-ignore-revs
Normal file
17
.git-blame-ignore-revs
Normal file
@@ -0,0 +1,17 @@
|
||||
# Exclude these commits from git-blame and similar tools.
|
||||
#
|
||||
# To use this file, run the following command from the repo root:
|
||||
#
|
||||
# ```
|
||||
# $ git config blame.ignoreRevsFile .git-blame-ignore-revs
|
||||
# ```
|
||||
#
|
||||
# Include a brief comment with each commit added, for example:
|
||||
#
|
||||
# ```
|
||||
# 8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100
|
||||
# ```
|
||||
#
|
||||
# Only add commits that are pure formatting changes (e.g. clang-format version changes, etc).
|
||||
8f1152d4a22287a35be2dde596e3cf86ace8054a # Increase column limit to 100
|
||||
3440855dbd405db614861885ad1577fffd882867 # Initial addition of pre-commit.ci formatting.
|
||||
25
.github/actions/compute-matrix/action.yml
vendored
Normal file
25
.github/actions/compute-matrix/action.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
name: Compute Matrix
|
||||
description: "Compute the matrix for a given matrix type from the specified matrix file"
|
||||
|
||||
inputs:
|
||||
matrix_query:
|
||||
description: "The jq query used to specify the desired matrix. e.g., .pull_request.nvcc"
|
||||
required: true
|
||||
matrix_file:
|
||||
description: 'The file containing the matrix'
|
||||
required: true
|
||||
outputs:
|
||||
matrix:
|
||||
description: 'The requested matrix'
|
||||
value: ${{ steps.compute-matrix.outputs.MATRIX }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Compute matrix
|
||||
id: compute-matrix
|
||||
run: |
|
||||
MATRIX=$(./.github/actions/compute-matrix/compute-matrix.sh ${{inputs.matrix_file}} ${{inputs.matrix_query}} )
|
||||
echo "matrix=$MATRIX" | tee -a $GITHUB_OUTPUT
|
||||
shell: bash -euxo pipefail {0}
|
||||
44
.github/actions/compute-matrix/compute-matrix.sh
vendored
Executable file
44
.github/actions/compute-matrix/compute-matrix.sh
vendored
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
write_output() {
|
||||
local key="$1"
|
||||
local value="$2"
|
||||
echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
|
||||
}
|
||||
|
||||
extract_matrix() {
|
||||
local file="$1"
|
||||
local type="$2"
|
||||
local matrix=$(yq -o=json "$file" | jq -cr ".$type")
|
||||
write_output "DEVCONTAINER_VERSION" "$(yq -o json "$file" | jq -cr '.devcontainer_version')"
|
||||
|
||||
local nvcc_full_matrix="$(echo "$matrix" | jq -cr '.nvcc')"
|
||||
local per_cuda_compiler_matrix="$(echo "$nvcc_full_matrix" | jq -cr ' group_by(.cuda + .compiler.name) | map({(.[0].cuda + "-" + .[0].compiler.name): .}) | add')"
|
||||
write_output "PER_CUDA_COMPILER_MATRIX" "$per_cuda_compiler_matrix"
|
||||
write_output "PER_CUDA_COMPILER_KEYS" "$(echo "$per_cuda_compiler_matrix" | jq -r 'keys | @json')"
|
||||
}
|
||||
|
||||
main() {
|
||||
if [ "$1" == "-v" ]; then
|
||||
set -x
|
||||
shift
|
||||
fi
|
||||
|
||||
if [ $# -ne 2 ] || [ "$2" != "pull_request" ]; then
|
||||
echo "Usage: $0 [-v] MATRIX_FILE MATRIX_TYPE"
|
||||
echo " -v : Enable verbose output"
|
||||
echo " MATRIX_FILE : The path to the matrix file."
|
||||
echo " MATRIX_TYPE : The desired matrix. Supported values: 'pull_request'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Input matrix file:" >&2
|
||||
cat "$1" >&2
|
||||
echo "Matrix Type: $2" >&2
|
||||
|
||||
extract_matrix "$1" "$2"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
13
.github/actions/configure_cccl_sccache/action.yml
vendored
Normal file
13
.github/actions/configure_cccl_sccache/action.yml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
name: Set up AWS credentials and environment variables for sccache
|
||||
description: "Set up AWS credentials and environment variables for sccache"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Set environment variables
|
||||
run: |
|
||||
echo "SCCACHE_BUCKET=rapids-sccache-devs" >> $GITHUB_ENV
|
||||
echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV
|
||||
echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
|
||||
echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
|
||||
echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
4
.github/copy-pr-bot.yaml
vendored
Normal file
4
.github/copy-pr-bot.yaml
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
# Configuration file for `copy-pr-bot` GitHub App
|
||||
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
|
||||
|
||||
enabled: true
|
||||
14
.github/problem-matchers/problem-matcher.json
vendored
Normal file
14
.github/problem-matchers/problem-matcher.json
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"problemMatcher": [
|
||||
{
|
||||
"owner": "nvcc",
|
||||
"pattern": [
|
||||
{
|
||||
"regexp": "^\\/home\\/coder\\/(.+):(\\d+):(\\d+): (\\w+): \"(.+)\"$",
|
||||
"severity": 4,
|
||||
"message": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
36
.github/workflows/build-and-test-linux.yml
vendored
Normal file
36
.github/workflows/build-and-test-linux.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: build and test
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -exo pipefail {0}
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
cuda: {type: string, required: true}
|
||||
host: {type: string, required: true}
|
||||
cpu: {type: string, required: true}
|
||||
test_name: {type: string, required: false}
|
||||
build_script: {type: string, required: false}
|
||||
test_script: {type: string, required: false}
|
||||
container_image: {type: string, required: false}
|
||||
run_tests: {type: boolean, required: false, default: true}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-and-test:
|
||||
name: Build/Test ${{inputs.test_name}}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
uses: ./.github/workflows/run-as-coder.yml
|
||||
with:
|
||||
cuda: ${{ inputs.cuda }}
|
||||
host: ${{ inputs.host }}
|
||||
name: Build/Test ${{inputs.test_name}}
|
||||
runner: linux-${{inputs.cpu}}-gpu-l4-latest-1
|
||||
image: ${{ inputs.container_image }}
|
||||
command: |
|
||||
${{ inputs.test_script }}
|
||||
34
.github/workflows/dispatch-build-and-test.yml
vendored
Normal file
34
.github/workflows/dispatch-build-and-test.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
name: Dispatch build and test
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
project_name: {type: string, required: true}
|
||||
per_cuda_compiler_matrix: {type: string, required: true}
|
||||
devcontainer_version: {type: string, required: true}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
# Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
|
||||
# ensures that the build/test steps can overlap across different configurations. For example,
|
||||
# the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
|
||||
build_and_test_linux:
|
||||
name: build and test linux
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
uses: ./.github/workflows/build-and-test-linux.yml
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
|
||||
with:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
host: ${{matrix.compiler.name}}${{matrix.compiler.version}}
|
||||
cpu: ${{ matrix.cpu }}
|
||||
test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}} ${{matrix.extra_build_args}}
|
||||
build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}"
|
||||
test_script: "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} ${{matrix.extra_build_args}}"
|
||||
container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
|
||||
107
.github/workflows/pr.yml
vendored
Normal file
107
.github/workflows/pr.yml
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is the main workflow that runs on every PR and push to main
|
||||
name: pr
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "pull-request/[0-9]+"
|
||||
|
||||
# Only runs one instance of this workflow at a time for a given PR and cancels any in-progress runs when a new one starts.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-on-${{ github.event_name }}-from-${{ github.ref_name }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
compute-matrix:
|
||||
name: Compute matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
|
||||
PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
|
||||
PER_CUDA_COMPILER_KEYS: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_KEYS}}
|
||||
base_sha: ${{ steps.export-pr-info.outputs.base_sha }}
|
||||
pr_number: ${{ steps.export-pr-info.outputs.pr_number }}
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Lookup PR info
|
||||
id: get-pr-info
|
||||
uses: nv-gha-runners/get-pr-info@main
|
||||
- name: Export PR info
|
||||
id: export-pr-info
|
||||
run: |
|
||||
echo "base_sha=${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}" | tee -a "${GITHUB_OUTPUT}"
|
||||
echo "pr_number=${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }}" | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Compute matrix outputs
|
||||
id: set-outputs
|
||||
run: |
|
||||
.github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
|
||||
|
||||
nvbench:
|
||||
name: NVBench CUDA${{ matrix.cuda_host_combination }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: compute-matrix
|
||||
uses: ./.github/workflows/dispatch-build-and-test.yml
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
cuda_host_combination: ${{ fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_KEYS) }}
|
||||
with:
|
||||
project_name: "nvbench"
|
||||
per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ matrix.cuda_host_combination ]) }}
|
||||
devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
|
||||
|
||||
verify-devcontainers:
|
||||
name: Verify Dev Containers
|
||||
if: ${{ !contains(github.event.head_commit.message, '[skip-vdc]') }}
|
||||
needs: compute-matrix
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
uses: ./.github/workflows/verify-devcontainers.yml
|
||||
with:
|
||||
base_sha: ${{ needs.compute-matrix.outputs.base_sha }}
|
||||
|
||||
# This job is the final job that runs after all other jobs and is used for branch protection status checks.
|
||||
# See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks
|
||||
# https://github.com/orgs/community/discussions/26822#discussioncomment-5122101
|
||||
ci:
|
||||
runs-on: ubuntu-latest
|
||||
name: CI
|
||||
if: ${{ always() }} # need to use always() instead of !cancelled() because skipped jobs count as success
|
||||
needs:
|
||||
- nvbench
|
||||
- verify-devcontainers
|
||||
steps:
|
||||
- name: Check status of all precursor jobs
|
||||
if: >-
|
||||
${{
|
||||
contains(needs.*.result, 'failure')
|
||||
|| contains(needs.*.result, 'cancelled')
|
||||
}}
|
||||
run: exit 1
|
||||
156
.github/workflows/run-as-coder.yml
vendored
Normal file
156
.github/workflows/run-as-coder.yml
vendored
Normal file
@@ -0,0 +1,156 @@
|
||||
name: Run as coder user
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -exo pipefail {0}
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
cuda: {type: string, required: true}
|
||||
host: {type: string, required: true}
|
||||
name: {type: string, required: true}
|
||||
image: {type: string, required: true}
|
||||
runner: {type: string, required: true}
|
||||
command: {type: string, required: true}
|
||||
env: { type: string, required: false, default: "" }
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
run-as-coder:
|
||||
name: ${{inputs.name}}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: ${{inputs.runner}}
|
||||
container:
|
||||
# This job now uses a docker-outside-of-docker (DOOD) strategy.
|
||||
#
|
||||
# The GitHub Actions runner application mounts the host's docker socket `/var/run/docker.sock` into the
|
||||
# container. By using a container with the `docker` CLI, this container can launch docker containers
|
||||
# using the host's docker daemon.
|
||||
#
|
||||
# This allows us to run actions that require node v20 in the `cruizba/ubuntu-dind:jammy-26.1.3` container, and
|
||||
# then launch our Ubuntu18.04-based GCC 6/7 containers to build and test CCCL.
|
||||
#
|
||||
# The main inconvenience to this approach is that any container mounts have to match the paths of the runner host,
|
||||
# not the paths as seen in the intermediate (`cruizba/ubuntu-dind`) container.
|
||||
#
|
||||
# Note: I am using `cruizba/ubuntu-dind:jammy-26.1.3` instead of `docker:latest`, because GitHub doesn't support
|
||||
# JS actions in alpine aarch64 containers, instead failing actions with this error:
|
||||
# ```
|
||||
# Error: JavaScript Actions in Alpine containers are only supported on x64 Linux runners. Detected Linux Arm64
|
||||
# ```
|
||||
image: cruizba/ubuntu-dind:jammy-26.1.3
|
||||
env:
|
||||
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: nvbench
|
||||
persist-credentials: false
|
||||
- name: Add NVCC problem matcher
|
||||
run: |
|
||||
echo "::add-matcher::nvbench/.github/problem-matchers/problem-matcher.json"
|
||||
- name: Configure credentials and environment variables for sccache
|
||||
uses: ./nvbench/.github/actions/configure_cccl_sccache
|
||||
- name: Run command
|
||||
env:
|
||||
CI: true
|
||||
RUNNER: "${{inputs.runner}}"
|
||||
COMMAND: "${{inputs.command}}"
|
||||
AWS_ACCESS_KEY_ID: "${{env.AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SESSION_TOKEN: "${{env.AWS_SESSION_TOKEN}}"
|
||||
AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}"
|
||||
run: |
|
||||
echo "[host] github.workspace: ${{github.workspace}}"
|
||||
echo "[container] GITHUB_WORKSPACE: ${GITHUB_WORKSPACE:-}"
|
||||
echo "[container] PWD: $(pwd)"
|
||||
|
||||
# Necessary because we're doing docker-outside-of-docker:
|
||||
# Make a symlink in the container that matches the host's ${{github.workspace}}, so that way `$(pwd)`
|
||||
# in `.devcontainer/launch.sh` constructs volume paths relative to the hosts's ${{github.workspace}}.
|
||||
mkdir -p "$(dirname "${{github.workspace}}")"
|
||||
ln -s "$(pwd)" "${{github.workspace}}"
|
||||
|
||||
cd "${{github.workspace}}"
|
||||
|
||||
cat <<"EOF" > ci.sh
|
||||
|
||||
#! /usr/bin/env bash
|
||||
set -eo pipefail
|
||||
echo -e "\e[1;34mRunning as '$(whoami)' user in $(pwd):\e[0m"
|
||||
echo -e "\e[1;34m${{inputs.command}}\e[0m"
|
||||
eval "${{inputs.command}}" || exit_code=$?
|
||||
if [ ! -z "$exit_code" ]; then
|
||||
echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
|
||||
echo "::error:: To replicate this failure locally, follow the steps below:"
|
||||
echo "1. Clone the repository, and navigate to the correct branch and commit:"
|
||||
echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
|
||||
echo ""
|
||||
echo "2. Run the failed command inside the same Docker container used by the CI:"
|
||||
echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
|
||||
echo ""
|
||||
echo "For additional information, see:"
|
||||
echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
|
||||
echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
|
||||
exit $exit_code
|
||||
fi
|
||||
EOF
|
||||
|
||||
chmod +x ci.sh
|
||||
|
||||
mkdir "$RUNNER_TEMP/.aws";
|
||||
|
||||
cat <<EOF > "$RUNNER_TEMP/.aws/config"
|
||||
[default]
|
||||
bucket=rapids-sccache-devs
|
||||
region=us-east-2
|
||||
EOF
|
||||
|
||||
cat <<EOF > "$RUNNER_TEMP/.aws/credentials"
|
||||
[default]
|
||||
aws_access_key_id=$AWS_ACCESS_KEY_ID
|
||||
aws_session_token=$AWS_SESSION_TOKEN
|
||||
aws_secret_access_key=$AWS_SECRET_ACCESS_KEY
|
||||
EOF
|
||||
|
||||
chmod 0600 "$RUNNER_TEMP/.aws/credentials"
|
||||
chmod 0664 "$RUNNER_TEMP/.aws/config"
|
||||
|
||||
declare -a gpu_request=()
|
||||
|
||||
# Explicitly pass which GPU to use if on a GPU runner
|
||||
if [[ "${RUNNER}" = *"-gpu-"* ]]; then
|
||||
gpu_request+=(--gpus "device=${NVIDIA_VISIBLE_DEVICES}")
|
||||
fi
|
||||
|
||||
host_path() {
|
||||
sed "s@/__w@$(dirname "$(dirname "${{github.workspace}}")")@" <<< "$1"
|
||||
}
|
||||
|
||||
# Launch this container using the host's docker daemon
|
||||
${{github.event.repository.name}}/.devcontainer/launch.sh \
|
||||
--docker \
|
||||
--cuda ${{inputs.cuda}} \
|
||||
--host ${{inputs.host}} \
|
||||
"${gpu_request[@]}" \
|
||||
--env "CI=$CI" \
|
||||
--env "AWS_ROLE_ARN=" \
|
||||
--env "COMMAND=$COMMAND" \
|
||||
--env "GITHUB_ENV=$GITHUB_ENV" \
|
||||
--env "GITHUB_SHA=$GITHUB_SHA" \
|
||||
--env "GITHUB_PATH=$GITHUB_PATH" \
|
||||
--env "GITHUB_OUTPUT=$GITHUB_OUTPUT" \
|
||||
--env "GITHUB_ACTIONS=$GITHUB_ACTIONS" \
|
||||
--env "GITHUB_REF_NAME=$GITHUB_REF_NAME" \
|
||||
--env "GITHUB_WORKSPACE=$GITHUB_WORKSPACE" \
|
||||
--env "GITHUB_REPOSITORY=$GITHUB_REPOSITORY" \
|
||||
--env "GITHUB_STEP_SUMMARY=$GITHUB_STEP_SUMMARY" \
|
||||
--volume "${{github.workspace}}/ci.sh:/ci.sh" \
|
||||
--volume "$(host_path "$RUNNER_TEMP")/.aws:/root/.aws" \
|
||||
--volume "$(dirname "$(dirname "${{github.workspace}}")"):/__w" \
|
||||
-- /ci.sh
|
||||
150
.github/workflows/verify-devcontainers.yml
vendored
Normal file
150
.github/workflows/verify-devcontainers.yml
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
name: Verify devcontainers
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
base_sha:
|
||||
type: string
|
||||
description: 'For PRs, set the base SHA to conditionally run this workflow only when relevant files are modified.'
|
||||
required: false
|
||||
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
get-devcontainer-list:
|
||||
name: Verify devcontainer files are up-to-date
|
||||
outputs:
|
||||
skip: ${{ steps.inspect-changes.outputs.skip }}
|
||||
devcontainers: ${{ steps.get-list.outputs.devcontainers }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Setup jq and yq
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install jq -y
|
||||
sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.34.2/yq_linux_amd64
|
||||
sudo chmod +x /usr/local/bin/yq
|
||||
- name: Run the script to generate devcontainer files
|
||||
run: |
|
||||
./.devcontainer/make_devcontainers.sh --verbose --clean
|
||||
- name: Check for changes
|
||||
run: |
|
||||
if [[ $(git diff --stat) != '' || $(git status --porcelain | grep '^??') != '' ]]; then
|
||||
git diff --minimal
|
||||
git status --porcelain
|
||||
echo "::error:: Dev Container files are out of date or there are untracked files. Run the .devcontainer/make_devcontainers.sh script and commit the changes."
|
||||
exit 1
|
||||
else
|
||||
echo "::note::Dev Container files are up-to-date."
|
||||
fi
|
||||
- name: Inspect changes
|
||||
if: ${{ inputs.base_sha != '' }}
|
||||
id: inspect-changes
|
||||
env:
|
||||
BASE_SHA: ${{ inputs.base_sha }}
|
||||
run: |
|
||||
echo "Fetch history and determine merge base..."
|
||||
git fetch origin --unshallow -q
|
||||
git fetch origin $BASE_SHA -q
|
||||
merge_base_sha=$(git merge-base $GITHUB_SHA $BASE_SHA)
|
||||
|
||||
echo "Head SHA: $GITHUB_SHA"
|
||||
echo "PR Base SHA: $BASE_SHA"
|
||||
echo "Merge Base SHA: $merge_base_sha"
|
||||
|
||||
echo "Checking for changes to devcontainer/matrix files..."
|
||||
|
||||
all_dirty_files=$(git diff --name-only "${merge_base_sha}" "${GITHUB_SHA}")
|
||||
echo "::group::All dirty files"
|
||||
echo "${all_dirty_files}"
|
||||
echo "::endgroup::"
|
||||
|
||||
file_regex="^(.devcontainer|ci/matrix.yaml|.github/actions/workflow-build/build-workflow.py)"
|
||||
echo "Regex: ${file_regex}"
|
||||
|
||||
relevant_dirty_files=$(echo "${all_dirty_files}" | grep -E "${file_regex}" || true)
|
||||
echo "::group::Relevant dirty files"
|
||||
echo "${relevant_dirty_files}"
|
||||
echo "::endgroup::"
|
||||
|
||||
if [[ -z "${relevant_dirty_files}" ]]; then
|
||||
echo "No relevant changes detected. Skipping devcontainer testing."
|
||||
echo "skip=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "Detected relevant changes. Continuing."
|
||||
echo "skip=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Get list of devcontainer.json paths and names
|
||||
if: ${{ steps.inspect-changes.outputs.skip != 'true' }}
|
||||
id: get-list
|
||||
run: |
|
||||
devcontainers=$(find .devcontainer/ -name 'devcontainer.json' | while read -r devcontainer; do
|
||||
jq --arg path "$devcontainer" '{path: $path, name: .name}' "$devcontainer"
|
||||
done | jq -s -c .)
|
||||
echo "devcontainers=${devcontainers}" | tee --append "${GITHUB_OUTPUT}"
|
||||
|
||||
verify-devcontainers:
|
||||
name: ${{matrix.devcontainer.name}}
|
||||
needs: get-devcontainer-list
|
||||
if: ${{ needs.get-devcontainer-list.outputs.skip != 'true' }}
|
||||
runs-on: linux-amd64-cpu4
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
devcontainer: ${{fromJson(needs.get-devcontainer-list.outputs.devcontainers)}}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Check out the code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# Add PPA for nodejs, devcontainer CLI requires a newer version:
|
||||
curl -fsSL https://deb.nodesource.com/setup_20.x -o /tmp/nodesource_setup.sh
|
||||
sudo bash /tmp/nodesource_setup.sh
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y nodejs
|
||||
sudo npm install -g @devcontainers/cli
|
||||
|
||||
# We don't really need sccache configured, but we need the AWS credentials envvars to be set
|
||||
# in order to avoid the devcontainer hanging waiting for GitHub authentication
|
||||
- name: Get AWS credentials for sccache bucket
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
|
||||
aws-region: us-east-2
|
||||
role-duration-seconds: 43200 # 12 hours
|
||||
- name: Set environment variables
|
||||
run: |
|
||||
echo "SCCACHE_BUCKET=rapids-sccache-devs" >> $GITHUB_ENV
|
||||
echo "SCCACHE_REGION=us-east-2" >> $GITHUB_ENV
|
||||
echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
|
||||
echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
|
||||
echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
|
||||
|
||||
- name: Run in devcontainer
|
||||
uses: devcontainers/ci@v0.3
|
||||
with:
|
||||
push: never
|
||||
configFile: ${{ matrix.devcontainer.path }}
|
||||
env: |
|
||||
SCCACHE_REGION=${{ env.SCCACHE_REGION }}
|
||||
AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SESSION_TOKEN=${{ env.AWS_SESSION_TOKEN }}
|
||||
AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }}
|
||||
runCmd: |
|
||||
.devcontainer/verify_devcontainer.sh
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,4 +1,10 @@
|
||||
build*/
|
||||
.aws
|
||||
.vscode
|
||||
.cache
|
||||
.config
|
||||
.idea
|
||||
cmake-build-*
|
||||
*~
|
||||
compile_commands.json
|
||||
CMakeUserPresets.json
|
||||
|
||||
70
.pre-commit-config.yaml
Normal file
70
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION.
|
||||
ci:
|
||||
autofix_commit_msg: |
|
||||
[pre-commit.ci] auto code formatting
|
||||
autofix_prs: false
|
||||
autoupdate_branch: ''
|
||||
autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
|
||||
autoupdate_schedule: quarterly
|
||||
skip: []
|
||||
submodules: false
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: end-of-file-fixer
|
||||
- id: mixed-line-ending
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: v19.1.6
|
||||
hooks:
|
||||
- id: clang-format
|
||||
types_or: [file]
|
||||
files: |
|
||||
(?x)^(
|
||||
^.*\.c$|
|
||||
^.*\.cpp$|
|
||||
^.*\.cu$|
|
||||
^.*\.cuh$|
|
||||
^.*\.cxx$|
|
||||
^.*\.h$|
|
||||
^.*\.hpp$|
|
||||
^.*\.inl$|
|
||||
^.*\.mm$
|
||||
)
|
||||
args: ["-fallback-style=none", "-style=file", "-i"]
|
||||
|
||||
# TODO/REMINDER: add the Ruff vscode extension to the devcontainers
|
||||
# Ruff, the Python auto-correcting linter/formatter written in Rust
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.8.6
|
||||
hooks:
|
||||
- id: ruff # linter
|
||||
- id: ruff-format # formatter
|
||||
|
||||
# TOML lint & format
|
||||
- repo: https://github.com/ComPWA/taplo-pre-commit
|
||||
rev: v0.9.3
|
||||
hooks:
|
||||
# See https://github.com/NVIDIA/cccl/issues/3426
|
||||
# - id: taplo-lint
|
||||
# exclude: "^docs/"
|
||||
- id: taplo-format
|
||||
exclude: "^docs/"
|
||||
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.3.0
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies: [tomli]
|
||||
args: ["--toml", "pyproject.toml"]
|
||||
exclude: |
|
||||
(?x)^(
|
||||
build|
|
||||
CITATION.md
|
||||
)
|
||||
|
||||
|
||||
default_language_version:
|
||||
python: python3
|
||||
@@ -1,6 +1,5 @@
|
||||
# 3.20.1 required for rapids-cmake
|
||||
# 3.21.0 required for NVBench_ADD_DEPENDENT_DLLS_TO_* (MSVC only)
|
||||
cmake_minimum_required(VERSION 3.20.1)
|
||||
# 3.30.4 required for rapids-cmake
|
||||
cmake_minimum_required(VERSION 3.30.4)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
@@ -22,6 +21,11 @@ project(NVBench
|
||||
|
||||
nvbench_init_rapids_cmake()
|
||||
|
||||
# Define NVBench_DETECTED_${LANG}_STANDARDS
|
||||
include(cmake/DetectSupportedStandards.cmake)
|
||||
detect_supported_standards(NVBench CXX 17 20)
|
||||
detect_supported_standards(NVBench CUDA 17 20)
|
||||
|
||||
# See NVIDIA/NVBench#52
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
set(cupti_default ON)
|
||||
@@ -29,29 +33,37 @@ if (${CUDAToolkit_VERSION} VERSION_LESS 11.3)
|
||||
set(cupti_default OFF)
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "Build NVBench as a shared library" ON)
|
||||
|
||||
option(NVBench_ENABLE_NVML "Build with NVML support from the Cuda Toolkit." ON)
|
||||
option(NVBench_ENABLE_CUPTI "Build NVBench with CUPTI." ${cupti_default})
|
||||
|
||||
option(NVBench_ENABLE_TESTING "Build NVBench testing suite." OFF)
|
||||
option(NVBench_ENABLE_HEADER_TESTING "Build NVBench testing suite." OFF)
|
||||
option(NVBench_ENABLE_DEVICE_TESTING
|
||||
"Include tests that require a GPU (with locked clocks)."
|
||||
OFF
|
||||
)
|
||||
option(NVBench_ENABLE_EXAMPLES "Build NVBench examples." OFF)
|
||||
option(NVBench_ENABLE_INSTALL_RULES "Install NVBench." ${NVBench_TOPLEVEL_PROJECT})
|
||||
|
||||
include(cmake/NVBenchUtilities.cmake) # Must be first
|
||||
include(cmake/NVBenchClangdCompileInfo.cmake) # Must be before any targets are created
|
||||
|
||||
include(cmake/NVBenchConfigTarget.cmake)
|
||||
include(cmake/NVBenchDependentDlls.cmake)
|
||||
include(cmake/NVBenchExports.cmake)
|
||||
include(cmake/NVBenchWriteConfigHeader.cmake)
|
||||
include(cmake/NVBenchDependencies.cmake)
|
||||
include(cmake/NVBenchInstallRules.cmake)
|
||||
include(cmake/NVBenchUtilities.cmake)
|
||||
|
||||
message(STATUS "NVBench CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
|
||||
add_subdirectory(nvbench)
|
||||
|
||||
if (NVBench_ENABLE_EXAMPLES OR NVBench_ENABLE_TESTING)
|
||||
if (NVBench_ENABLE_EXAMPLES OR
|
||||
NVBench_ENABLE_TESTING OR
|
||||
NVBench_ENABLE_HEADER_TESTING)
|
||||
include(CTest)
|
||||
enable_testing()
|
||||
endif()
|
||||
|
||||
@@ -65,4 +77,8 @@ if (NVBench_ENABLE_TESTING)
|
||||
add_subdirectory(testing)
|
||||
endif()
|
||||
|
||||
if (NVBench_ENABLE_HEADER_TESTING)
|
||||
include(cmake/NVBenchHeaderTesting.cmake)
|
||||
endif()
|
||||
|
||||
nvbench_generate_exports()
|
||||
|
||||
74
CMakePresets.json
Normal file
74
CMakePresets.json
Normal file
@@ -0,0 +1,74 @@
|
||||
{
|
||||
"version": 3,
|
||||
"cmakeMinimumRequired": {
|
||||
"major": 3,
|
||||
"minor": 23,
|
||||
"patch": 1
|
||||
},
|
||||
"configurePresets": [
|
||||
{
|
||||
"name": "base",
|
||||
"hidden": true,
|
||||
"generator": "Ninja",
|
||||
"binaryDir": "${sourceDir}/build/$env{CCCL_BUILD_INFIX}/${presetName}",
|
||||
"cacheVariables": {
|
||||
"CMAKE_BUILD_TYPE": "Release",
|
||||
"CMAKE_CUDA_ARCHITECTURES": "all-major",
|
||||
"NVBench_ENABLE_CUPTI": true,
|
||||
"NVBench_ENABLE_DEVICE_TESTING": false,
|
||||
"NVBench_ENABLE_EXAMPLES": true,
|
||||
"NVBench_ENABLE_HEADER_TESTING": true,
|
||||
"NVBench_ENABLE_INSTALL_RULES": true,
|
||||
"NVBench_ENABLE_NVML": true,
|
||||
"NVBench_ENABLE_TESTING": true,
|
||||
"NVBench_ENABLE_WERROR": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "nvbench-dev",
|
||||
"displayName": "Developer Build",
|
||||
"inherits": "base",
|
||||
"cacheVariables": {
|
||||
"NVBench_ENABLE_DEVICE_TESTING": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "nvbench-ci",
|
||||
"displayName": "NVBench CI",
|
||||
"inherits": "base"
|
||||
}
|
||||
],
|
||||
"buildPresets": [
|
||||
{
|
||||
"name": "nvbench-dev",
|
||||
"configurePreset": "nvbench-dev"
|
||||
},
|
||||
{
|
||||
"name": "nvbench-ci",
|
||||
"configurePreset": "nvbench-ci"
|
||||
}
|
||||
],
|
||||
"testPresets": [
|
||||
{
|
||||
"name": "base",
|
||||
"hidden": true,
|
||||
"output": {
|
||||
"outputOnFailure": true
|
||||
},
|
||||
"execution": {
|
||||
"noTestsAction": "error",
|
||||
"stopOnFailure": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "nvbench-dev",
|
||||
"configurePreset": "nvbench-dev",
|
||||
"inherits": "base"
|
||||
},
|
||||
{
|
||||
"name": "nvbench-ci",
|
||||
"configurePreset": "nvbench-ci",
|
||||
"inherits": "base"
|
||||
}
|
||||
]
|
||||
}
|
||||
30
README.md
30
README.md
@@ -25,6 +25,17 @@ features:
|
||||
* Batch Measurements:
|
||||
* Executes the benchmark multiple times back-to-back and records total time.
|
||||
* Reports the average execution time (total time / number of executions).
|
||||
* [CPU-only Measurements](docs/benchmarks.md#cpu-only-benchmarks)
|
||||
* Measures the host-side execution time of a non-GPU benchmark.
|
||||
* Not suitable for microbenchmarking.
|
||||
|
||||
# Supported Compilers and Tools
|
||||
|
||||
- CMake > 3.30.4
|
||||
- CUDA Toolkit + nvcc: 12.0 and above
|
||||
- g++: 7 -> 14
|
||||
- clang++: 14 -> 19
|
||||
- Headers are tested with C++17 -> C++20.
|
||||
|
||||
# Getting Started
|
||||
|
||||
@@ -34,7 +45,7 @@ A basic kernel benchmark can be created with just a few lines of CUDA C++:
|
||||
|
||||
```cpp
|
||||
void my_benchmark(nvbench::state& state) {
|
||||
state.exec([](nvbench::launch& launch) {
|
||||
state.exec([](nvbench::launch& launch) {
|
||||
my_kernel<<<num_blocks, 256, 0, launch.get_stream()>>>();
|
||||
});
|
||||
}
|
||||
@@ -57,10 +68,12 @@ This repository provides a number of [examples](examples/) that demonstrate
|
||||
various NVBench features and usecases:
|
||||
|
||||
- [Runtime and compile-time parameter sweeps](examples/axes.cu)
|
||||
- [CPU-only benchmarking](examples/cpu_only.cu)
|
||||
- [Enums and compile-time-constant-integral parameter axes](examples/enums.cu)
|
||||
- [Reporting item/sec and byte/sec throughput statistics](examples/throughput.cu)
|
||||
- [Skipping benchmark configurations](examples/skip.cu)
|
||||
- [Benchmarking on a specific stream](examples/stream.cu)
|
||||
- [Adding / hiding columns (summaries) in markdown output](examples/summaries.cu)
|
||||
- [Benchmarks that sync CUDA devices: `nvbench::exec_tag::sync`](examples/exec_tag_sync.cu)
|
||||
- [Manual timing: `nvbench::exec_tag::timer`](examples/exec_tag_timer.cu)
|
||||
|
||||
@@ -70,9 +83,9 @@ To build the examples:
|
||||
```
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DNVBench_ENABLE_EXAMPLES=ON -DCMAKE_CUDA_ARCHITECTURE=70 .. && make
|
||||
cmake -DNVBench_ENABLE_EXAMPLES=ON -DCMAKE_CUDA_ARCHITECTURES=70 .. && make
|
||||
```
|
||||
Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on.
|
||||
Be sure to set `CMAKE_CUDA_ARCHITECTURE` based on the GPU you are running on.
|
||||
|
||||
Examples are built by default into `build/bin` and are prefixed with `nvbench.example`.
|
||||
|
||||
@@ -119,7 +132,7 @@ Pass: Batch: 0.261963ms GPU, 7.18s total GPU, 27394x
|
||||
## Demo Project
|
||||
|
||||
To get started using NVBench with your own kernels, consider trying out
|
||||
the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo).
|
||||
the [NVBench Demo Project](https://github.com/allisonvacanti/nvbench_demo).
|
||||
|
||||
`nvbench_demo` provides a simple CMake project that uses NVBench to build an
|
||||
example benchmark. It's a great way to experiment with the library without a lot
|
||||
@@ -129,7 +142,7 @@ of investment.
|
||||
|
||||
Contributions are welcome!
|
||||
|
||||
For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors.
|
||||
For current issues, see the [issue board](https://github.com/NVIDIA/nvbench/issues). Issues labeled with [](https://github.com/NVIDIA/nvbench/labels/good%20first%20issue) are good for first time contributors.
|
||||
|
||||
## Tests
|
||||
|
||||
@@ -146,7 +159,7 @@ To run all tests:
|
||||
```
|
||||
make test
|
||||
```
|
||||
or
|
||||
or
|
||||
```
|
||||
ctest
|
||||
```
|
||||
@@ -163,6 +176,7 @@ testing and parameter tuning of individual kernels. For in-depth analysis of
|
||||
end-to-end performance of multiple applications, the NVIDIA Nsight tools are
|
||||
more appropriate.
|
||||
|
||||
NVBench is focused on evaluating the performance of CUDA kernels and is not
|
||||
optimized for CPU microbenchmarks. This may change in the future, but for now,
|
||||
NVBench is focused on evaluating the performance of CUDA kernels. It also provides
|
||||
CPU-only benchmarking facilities intended for non-trivial CPU workloads, but is
|
||||
not optimized for CPU microbenchmarks. This may change in the future, but for now,
|
||||
consider using Google Benchmark for high resolution CPU benchmarks.
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
SDK_TYPE:
|
||||
- cuda
|
||||
|
||||
SDK_VER:
|
||||
- 11.5.1-devel
|
||||
|
||||
OS_TYPE:
|
||||
- ubuntu
|
||||
|
||||
OS_VER:
|
||||
- 20.04
|
||||
|
||||
CXX_TYPE:
|
||||
- clang
|
||||
- gcc
|
||||
|
||||
CXX_VER:
|
||||
- 5
|
||||
- 6
|
||||
- 7
|
||||
- 8
|
||||
- 9
|
||||
- 10
|
||||
- 11
|
||||
- 12
|
||||
|
||||
exclude:
|
||||
- CXX_TYPE: clang
|
||||
CXX_VER: 5
|
||||
- CXX_TYPE: clang
|
||||
CXX_VER: 6
|
||||
- CXX_TYPE: gcc
|
||||
CXX_VER: 12
|
||||
@@ -1,30 +0,0 @@
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
SDK_TYPE:
|
||||
- cuda
|
||||
|
||||
SDK_VER:
|
||||
- 11.5.1-devel
|
||||
|
||||
OS_TYPE:
|
||||
- ubuntu
|
||||
|
||||
OS_VER:
|
||||
- 20.04
|
||||
|
||||
CXX_TYPE:
|
||||
- clang
|
||||
- gcc
|
||||
|
||||
CXX_VER:
|
||||
- 11
|
||||
- 12
|
||||
|
||||
exclude:
|
||||
- CXX_TYPE: clang
|
||||
CXX_VER: 11
|
||||
- CXX_TYPE: gcc
|
||||
CXX_VER: 12
|
||||
246
ci/build_common.sh
Executable file
246
ci/build_common.sh
Executable file
@@ -0,0 +1,246 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Ensure the script is being executed in its containing directory
|
||||
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )";
|
||||
|
||||
# Script defaults
|
||||
HOST_COMPILER=${CXX:-g++} # $CXX if set, otherwise `g++`
|
||||
CXX_STANDARD=17
|
||||
CUDA_COMPILER=${CUDACXX:-nvcc} # $CUDACXX if set, otherwise `nvcc`
|
||||
CUDA_ARCHS= # Empty, use presets by default.
|
||||
GLOBAL_CMAKE_OPTIONS=()
|
||||
DISABLE_CUB_BENCHMARKS= # Enable to force-disable building CUB benchmarks.
|
||||
|
||||
# Check if the correct number of arguments has been provided
|
||||
function usage {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo
|
||||
echo "The PARALLEL_LEVEL environment variable controls the amount of build parallelism. Default is the number of cores."
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -v/--verbose: enable shell echo for debugging"
|
||||
echo " -cuda: CUDA compiler (Defaults to \$CUDACXX if set, otherwise nvcc)"
|
||||
echo " -cxx: Host compiler (Defaults to \$CXX if set, otherwise g++)"
|
||||
echo " -std: CUDA/C++ standard (Defaults to 17)"
|
||||
echo " -arch: Target CUDA arches, e.g. \"60-real;70;80-virtual\" (Defaults to value in presets file)"
|
||||
echo " -cmake-options: Additional options to pass to CMake"
|
||||
echo
|
||||
echo "Examples:"
|
||||
echo " $ PARALLEL_LEVEL=8 $0"
|
||||
echo " $ PARALLEL_LEVEL=8 $0 -cxx g++-9"
|
||||
echo " $ $0 -cxx clang++-8"
|
||||
echo " $ $0 -cxx g++-8 -std 20 -arch 80-real -v -cuda /usr/local/bin/nvcc"
|
||||
echo " $ $0 -cmake-options \"-DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS=-Wfatal-errors\""
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse options
|
||||
|
||||
# Copy the args into a temporary array, since we will modify them and
|
||||
# the parent script may still need them.
|
||||
args=("$@")
|
||||
while [ "${#args[@]}" -ne 0 ]; do
|
||||
case "${args[0]}" in
|
||||
-v | --verbose) VERBOSE=1; args=("${args[@]:1}");;
|
||||
-cxx) HOST_COMPILER="${args[1]}"; args=("${args[@]:2}");;
|
||||
-std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");;
|
||||
-cuda) CUDA_COMPILER="${args[1]}"; args=("${args[@]:2}");;
|
||||
-arch) CUDA_ARCHS="${args[1]}"; args=("${args[@]:2}");;
|
||||
-disable-benchmarks) DISABLE_CUB_BENCHMARKS=1; args=("${args[@]:1}");;
|
||||
-cmake-options)
|
||||
if [ -n "${args[1]}" ]; then
|
||||
IFS=' ' read -ra split_args <<< "${args[1]}"
|
||||
GLOBAL_CMAKE_OPTIONS+=("${split_args[@]}")
|
||||
args=("${args[@]:2}")
|
||||
else
|
||||
echo "Error: No arguments provided for -cmake-options"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
-h | -help | --help) usage ;;
|
||||
*) echo "Unrecognized option: ${args[0]}"; usage ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Convert to full paths:
|
||||
HOST_COMPILER=$(which ${HOST_COMPILER})
|
||||
CUDA_COMPILER=$(which ${CUDA_COMPILER})
|
||||
|
||||
if [[ -n "${CUDA_ARCHS}" ]]; then
|
||||
GLOBAL_CMAKE_OPTIONS+=("-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}")
|
||||
fi
|
||||
|
||||
if [ $VERBOSE ]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
# Begin processing unsets after option parsing
|
||||
set -u
|
||||
|
||||
readonly PARALLEL_LEVEL=${PARALLEL_LEVEL:=$(nproc)}
|
||||
|
||||
if [ -z ${CCCL_BUILD_INFIX+x} ]; then
|
||||
CCCL_BUILD_INFIX=""
|
||||
fi
|
||||
|
||||
# Presets will be configured in this directory:
|
||||
BUILD_DIR="../build/${CCCL_BUILD_INFIX}"
|
||||
|
||||
# The most recent build will always be symlinked to cccl/build/latest
|
||||
mkdir -p $BUILD_DIR
|
||||
rm -f ../build/latest
|
||||
ln -sf $BUILD_DIR ../build/latest
|
||||
|
||||
# Now that BUILD_DIR exists, use readlink to canonicalize the path:
|
||||
BUILD_DIR=$(readlink -f "${BUILD_DIR}")
|
||||
|
||||
# Prepare environment for CMake:
|
||||
export CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL}"
|
||||
export CTEST_PARALLEL_LEVEL="1"
|
||||
export CXX="${HOST_COMPILER}"
|
||||
export CUDACXX="${CUDA_COMPILER}"
|
||||
export CUDAHOSTCXX="${HOST_COMPILER}"
|
||||
export CXX_STANDARD
|
||||
|
||||
source ./pretty_printing.sh
|
||||
|
||||
print_environment_details() {
|
||||
begin_group "⚙️ Environment Details"
|
||||
|
||||
echo "pwd=$(pwd)"
|
||||
|
||||
print_var_values \
|
||||
BUILD_DIR \
|
||||
CXX_STANDARD \
|
||||
CXX \
|
||||
CUDACXX \
|
||||
CUDAHOSTCXX \
|
||||
NVCC_VERSION \
|
||||
CMAKE_BUILD_PARALLEL_LEVEL \
|
||||
CTEST_PARALLEL_LEVEL \
|
||||
CCCL_BUILD_INFIX \
|
||||
GLOBAL_CMAKE_OPTIONS
|
||||
|
||||
echo "Current commit is:"
|
||||
git log -1 || echo "Not a repository"
|
||||
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
nvidia-smi
|
||||
else
|
||||
echo "nvidia-smi not found"
|
||||
fi
|
||||
|
||||
end_group "⚙️ Environment Details"
|
||||
}
|
||||
|
||||
fail_if_no_gpu() {
|
||||
if ! nvidia-smi &> /dev/null; then
|
||||
echo "Error: No NVIDIA GPU detected. Please ensure you have an NVIDIA GPU installed and the drivers are properly configured." >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function print_test_time_summary()
|
||||
{
|
||||
ctest_log=${1}
|
||||
|
||||
if [ -f ${ctest_log} ]; then
|
||||
begin_group "⏱️ Longest Test Steps"
|
||||
# Only print the full output in CI:
|
||||
if [ -n "${GITHUB_ACTIONS:-}" ]; then
|
||||
cmake -DLOGFILE=${ctest_log} -P ../cmake/PrintCTestRunTimes.cmake
|
||||
else
|
||||
cmake -DLOGFILE=${ctest_log} -P ../cmake/PrintCTestRunTimes.cmake | head -n 15
|
||||
fi
|
||||
end_group "⏱️ Longest Test Steps"
|
||||
fi
|
||||
}
|
||||
|
||||
function configure_preset()
|
||||
{
|
||||
local BUILD_NAME=$1
|
||||
local PRESET=$2
|
||||
local CMAKE_OPTIONS=$3
|
||||
local GROUP_NAME="🛠️ CMake Configure ${BUILD_NAME}"
|
||||
|
||||
pushd .. > /dev/null
|
||||
run_command "$GROUP_NAME" cmake --preset=$PRESET --log-level=VERBOSE "${GLOBAL_CMAKE_OPTIONS[@]}" $CMAKE_OPTIONS
|
||||
status=$?
|
||||
popd > /dev/null
|
||||
return $status
|
||||
}
|
||||
|
||||
function build_preset() {
|
||||
local BUILD_NAME=$1
|
||||
local PRESET=$2
|
||||
local green="1;32"
|
||||
local red="1;31"
|
||||
local GROUP_NAME="🏗️ Build ${BUILD_NAME}"
|
||||
|
||||
source "./sccache_stats.sh" "start"
|
||||
|
||||
pushd .. > /dev/null
|
||||
run_command "$GROUP_NAME" cmake --build --preset=$PRESET -v
|
||||
status=$?
|
||||
popd > /dev/null
|
||||
|
||||
minimal_sccache_stats=$(source "./sccache_stats.sh" "end")
|
||||
|
||||
# Only print detailed stats in actions workflow
|
||||
if [ -n "${GITHUB_ACTIONS:-}" ]; then
|
||||
begin_group "💲 sccache stats"
|
||||
echo "${minimal_sccache_stats}"
|
||||
sccache -s
|
||||
end_group
|
||||
|
||||
begin_group "🥷 ninja build times"
|
||||
echo "The "weighted" time is the elapsed time of each build step divided by the number
|
||||
of tasks that were running in parallel. This makes it an excellent approximation
|
||||
of how "important" a slow step was. A link that is entirely or mostly serialized
|
||||
will have a weighted time that is the same or similar to its elapsed time. A
|
||||
compile that runs in parallel with 999 other compiles will have a weighted time
|
||||
that is tiny."
|
||||
./ninja_summary.py -C ${BUILD_DIR}/${PRESET} || echo "ninja_summary.py failed"
|
||||
end_group
|
||||
else
|
||||
echo $minimal_sccache_stats
|
||||
fi
|
||||
|
||||
return $status
|
||||
}
|
||||
|
||||
function test_preset()
|
||||
{
|
||||
local BUILD_NAME=$1
|
||||
local PRESET=$2
|
||||
local GROUP_NAME="🚀 Test ${BUILD_NAME}"
|
||||
|
||||
fail_if_no_gpu
|
||||
|
||||
|
||||
ctest_log_dir="${BUILD_DIR}/log/ctest"
|
||||
ctest_log="${ctest_log_dir}/${PRESET}"
|
||||
mkdir -p "${ctest_log_dir}"
|
||||
|
||||
pushd .. > /dev/null
|
||||
run_command "$GROUP_NAME" ctest --output-log "${ctest_log}" --preset=$PRESET
|
||||
status=$?
|
||||
popd > /dev/null
|
||||
|
||||
print_test_time_summary ${ctest_log}
|
||||
|
||||
return $status
|
||||
}
|
||||
|
||||
function configure_and_build_preset()
|
||||
{
|
||||
local BUILD_NAME=$1
|
||||
local PRESET=$2
|
||||
local CMAKE_OPTIONS=$3
|
||||
|
||||
configure_preset "$BUILD_NAME" "$PRESET" "$CMAKE_OPTIONS"
|
||||
build_preset "$BUILD_NAME" "$PRESET"
|
||||
}
|
||||
30
ci/build_nvbench.sh
Executable file
30
ci/build_nvbench.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
source "$(dirname "$0")/build_common.sh"
|
||||
|
||||
print_environment_details
|
||||
|
||||
PRESET="nvbench-ci"
|
||||
|
||||
CMAKE_OPTIONS=""
|
||||
|
||||
function version_lt() {
|
||||
local lhs="${1//v/}"
|
||||
local rhs="${2//v/}"
|
||||
# If the versions are equal, return false
|
||||
[ "$lhs" = "$rhs" ] && return 1
|
||||
# If the left-hand side is less than the right-hand side, return true
|
||||
[ "$lhs" = `echo -e "$lhs\n$rhs" | sort -V | head -n1` ]
|
||||
}
|
||||
|
||||
# If CUDA_COMPILER is nvcc and the version < 11.3, disable CUPTI
|
||||
if [[ "$CUDA_COMPILER" == *"nvcc"* ]]; then
|
||||
CUDA_VERSION=$(nvcc --version | grep release | sed -r 's/.*release ([0-9.]+).*/\1/')
|
||||
if version_lt "$CUDA_VERSION" "11.3"; then
|
||||
CMAKE_OPTIONS+=" -DNVBench_ENABLE_CUPTI=OFF "
|
||||
fi
|
||||
fi
|
||||
|
||||
configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS"
|
||||
|
||||
print_time_summary
|
||||
@@ -1,231 +0,0 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
################################################################################
|
||||
# NVBench build script for gpuCI
|
||||
################################################################################
|
||||
|
||||
set -e
|
||||
|
||||
# append variable value
|
||||
# Appends ${value} to ${variable}, adding a space before ${value} if
|
||||
# ${variable} is not empty.
|
||||
function append {
|
||||
tmp="${!1:+${!1} }${2}"
|
||||
eval "${1}=\${tmp}"
|
||||
}
|
||||
|
||||
# log args...
|
||||
# Prints out ${args[*]} with a gpuCI log prefix and a newline before and after.
|
||||
function log() {
|
||||
printf "\n>>>> %s\n\n" "${*}"
|
||||
}
|
||||
|
||||
# print_with_trailing_blank_line args...
|
||||
# Prints ${args[*]} with one blank line following, preserving newlines within
|
||||
# ${args[*]} but stripping any preceding ${args[*]}.
|
||||
function print_with_trailing_blank_line {
|
||||
printf "%s\n\n" "${*}"
|
||||
}
|
||||
|
||||
# echo_and_run name args...
|
||||
# Echo ${args[@]}, then execute ${args[@]}
|
||||
function echo_and_run {
|
||||
echo "${1}: ${@:2}"
|
||||
${@:2}
|
||||
}
|
||||
|
||||
# echo_and_run_timed name args...
|
||||
# Echo ${args[@]}, then execute ${args[@]} and report how long it took,
|
||||
# including ${name} in the output of the time.
|
||||
function echo_and_run_timed {
|
||||
echo "${@:2}"
|
||||
TIMEFORMAT=$'\n'"${1} Time: %lR"
|
||||
time ${@:2}
|
||||
}
|
||||
|
||||
# join_delimit <delimiter> [value [value [...]]]
|
||||
# Combine all values into a single string, separating each by a single character
|
||||
# delimiter. Eg:
|
||||
# foo=(bar baz kramble)
|
||||
# joined_foo=$(join_delimit "|" "${foo[@]}")
|
||||
# echo joined_foo # "bar|baz|kramble"
|
||||
function join_delimit {
|
||||
local IFS="${1}"
|
||||
shift
|
||||
echo "${*}"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# VARIABLES - Set up bash and environmental variables.
|
||||
################################################################################
|
||||
|
||||
# Get the variables the Docker container set up for us: ${CXX}, ${CUDACXX}, etc.
|
||||
source /etc/cccl.bashrc
|
||||
|
||||
# Set path.
|
||||
export PATH=/usr/local/cuda/bin:${PATH}
|
||||
|
||||
# Set home to the job's workspace.
|
||||
export HOME=${WORKSPACE}
|
||||
|
||||
# Switch to the build directory.
|
||||
cd ${WORKSPACE}
|
||||
mkdir -p build
|
||||
cd build
|
||||
|
||||
# Remove any old .ninja_log file so the PrintNinjaBuildTimes step is accurate:
|
||||
rm -f .ninja_log
|
||||
|
||||
if [[ -z "${CMAKE_BUILD_TYPE}" ]]; then
|
||||
CMAKE_BUILD_TYPE="Release"
|
||||
fi
|
||||
|
||||
CMAKE_BUILD_FLAGS="--"
|
||||
|
||||
# The Docker image sets up `${CXX}` and `${CUDACXX}`.
|
||||
append CMAKE_FLAGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
|
||||
append CMAKE_FLAGS "-DCMAKE_CUDA_COMPILER='${CUDACXX}'"
|
||||
|
||||
if [[ "${CXX_TYPE}" == "nvcxx" ]]; then
|
||||
echo "nvc++ not supported."
|
||||
exit 1
|
||||
else
|
||||
if [[ "${CXX_TYPE}" == "icc" ]]; then
|
||||
echo "icc not supported."
|
||||
exit 1
|
||||
fi
|
||||
# We're using NVCC so we need to set the host compiler.
|
||||
append CMAKE_FLAGS "-DCMAKE_CXX_COMPILER='${CXX}'"
|
||||
append CMAKE_FLAGS "-DCMAKE_CUDA_HOST_COMPILER='${CXX}'"
|
||||
append CMAKE_FLAGS "-G Ninja"
|
||||
# Don't stop on build failures.
|
||||
append CMAKE_BUILD_FLAGS "-k0"
|
||||
fi
|
||||
|
||||
if [[ -n "${PARALLEL_LEVEL}" ]]; then
|
||||
DETERMINE_PARALLELISM_FLAGS="-j ${PARALLEL_LEVEL}"
|
||||
fi
|
||||
|
||||
WSL=0
|
||||
if [[ $(grep -i microsoft /proc/version) ]]; then
|
||||
echo "Windows Subsystem for Linux detected."
|
||||
WSL=1
|
||||
fi
|
||||
export WSL
|
||||
|
||||
#append CMAKE_FLAGS "-DCMAKE_CUDA_ARCHITECTURES=all"
|
||||
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_EXAMPLES=ON"
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_TESTING=ON"
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_CUPTI=ON"
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_WERROR=ON"
|
||||
|
||||
# These consume a lot of time and don't currently have
|
||||
# any value as regression tests.
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_DEVICE_TESTING=OFF"
|
||||
|
||||
# NVML doesn't work under WSL
|
||||
if [[ ${WSL} -eq 0 ]]; then
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_NVML=ON"
|
||||
else
|
||||
append CMAKE_FLAGS "-DNVBench_ENABLE_NVML=OFF"
|
||||
fi
|
||||
|
||||
if [[ -n "${@}" ]]; then
|
||||
append CMAKE_BUILD_FLAGS "${@}"
|
||||
fi
|
||||
|
||||
append CTEST_FLAGS "--output-on-failure"
|
||||
|
||||
# Export variables so they'll show up in the logs when we report the environment.
|
||||
export CMAKE_FLAGS
|
||||
export CMAKE_BUILD_FLAGS
|
||||
export CTEST_FLAGS
|
||||
|
||||
################################################################################
|
||||
# ENVIRONMENT - Configure and print out information about the environment.
|
||||
################################################################################
|
||||
|
||||
log "Determine system topology..."
|
||||
|
||||
# Set `${PARALLEL_LEVEL}` if it is unset; otherwise, this just reports the
|
||||
# system topology.
|
||||
source ${WORKSPACE}/ci/common/determine_build_parallelism.bash ${DETERMINE_PARALLELISM_FLAGS}
|
||||
|
||||
log "Get environment..."
|
||||
|
||||
env | sort
|
||||
|
||||
log "Check versions..."
|
||||
|
||||
# We use sed and echo below to ensure there is always one and only trailing
|
||||
# line following the output from each tool.
|
||||
|
||||
${CXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/'
|
||||
|
||||
echo
|
||||
|
||||
${CUDACXX} --version 2>&1 | sed -Ez '$ s/\n*$/\n/'
|
||||
|
||||
echo
|
||||
|
||||
cmake --version 2>&1 | sed -Ez '$ s/\n*$/\n/'
|
||||
|
||||
echo
|
||||
|
||||
if [[ "${BUILD_TYPE}" == "gpu" ]]; then
|
||||
nvidia-smi 2>&1 | sed -Ez '$ s/\n*$/\n/'
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# BUILD
|
||||
################################################################################
|
||||
|
||||
log "Configure..."
|
||||
|
||||
echo_and_run_timed "Configure" cmake .. --log-level=VERBOSE ${CMAKE_FLAGS}
|
||||
configure_status=$?
|
||||
|
||||
log "Build..."
|
||||
|
||||
# ${PARALLEL_LEVEL} needs to be passed after we run
|
||||
# determine_build_parallelism.bash, so it can't be part of ${CMAKE_BUILD_FLAGS}.
|
||||
set +e # Don't stop on build failures.
|
||||
echo_and_run_timed "Build" cmake --build . ${CMAKE_BUILD_FLAGS} -j ${PARALLEL_LEVEL}
|
||||
build_status=$?
|
||||
set -e
|
||||
|
||||
################################################################################
|
||||
# TEST - Run examples and tests.
|
||||
################################################################################
|
||||
|
||||
log "Test..."
|
||||
|
||||
(
|
||||
# Make sure test_status captures ctest, not tee:
|
||||
# https://stackoverflow.com/a/999259/11130318
|
||||
set -o pipefail
|
||||
echo_and_run_timed "Test" ctest ${CTEST_FLAGS} -j ${PARALLEL_LEVEL} | tee ctest_log
|
||||
)
|
||||
|
||||
test_status=$?
|
||||
|
||||
################################################################################
|
||||
# SUMMARY - Print status of each step and exit with failure if needed.
|
||||
################################################################################
|
||||
|
||||
log "Summary:"
|
||||
echo "- Configure Error Code: ${configure_status}"
|
||||
echo "- Build Error Code: ${build_status}"
|
||||
echo "- Test Error Code: ${test_status}"
|
||||
|
||||
if [[ "${configure_status}" != "0" ]] || \
|
||||
[[ "${build_status}" != "0" ]] || \
|
||||
[[ "${test_status}" != "0" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
@@ -1,119 +0,0 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
function usage {
|
||||
echo "Usage: ${0} [flags...]"
|
||||
echo
|
||||
echo "Examine the system topology to determine a reasonable amount of build"
|
||||
echo "parallelism."
|
||||
echo
|
||||
echo "Exported variables:"
|
||||
echo " \${LOGICAL_CPUS} : Logical processors (e.g. threads)."
|
||||
echo " \${PHYSICAL_CPUS} : Physical processors (e.g. cores)."
|
||||
echo " \${TOTAL_MEM} : Total system memory [GB]."
|
||||
echo " \${MAX_THREADS_PER_CORE} : Maximum threads per core allowed."
|
||||
echo " \${MIN_MEMORY_PER_THREAD} : Minimum memory [GB] per thread allowed."
|
||||
echo " \${CPU_BOUND_THREADS} : # of build threads constrained by processors."
|
||||
echo " \${MEM_BOUND_THREADS} : # of build threads constrained by memory [GB]."
|
||||
echo " \${PARALLEL_LEVEL} : Determined # of build threads."
|
||||
echo " \${MEM_PER_THREAD} : Memory [GB] per build thread."
|
||||
echo
|
||||
echo "-h, -help, --help"
|
||||
echo " Print this message."
|
||||
echo
|
||||
echo "-q, --quiet"
|
||||
echo " Print nothing and only export variables."
|
||||
echo
|
||||
echo "-j <threads>, --jobs <threads>"
|
||||
echo " Explicitly set the number of build threads to use."
|
||||
echo
|
||||
echo "--max-threads-per-core <threads>"
|
||||
echo " Specify the maximum threads per core allowed (default: ${MAX_THREADS_PER_CORE} [threads/core])."
|
||||
echo
|
||||
echo "--min-memory-per-thread <gigabytes>"
|
||||
echo " Specify the minimum memory per thread allowed (default: ${MIN_MEMORY_PER_THREAD} [GBs/thread])."
|
||||
|
||||
exit -3
|
||||
}
|
||||
|
||||
QUIET=0
|
||||
|
||||
export MAX_THREADS_PER_CORE=2
|
||||
export MIN_MEMORY_PER_THREAD=1 # [GB]
|
||||
|
||||
while test ${#} != 0
|
||||
do
|
||||
case "${1}" in
|
||||
-h) ;&
|
||||
-help) ;&
|
||||
--help) usage ;;
|
||||
-q) ;&
|
||||
--quiet) QUIET=1 ;;
|
||||
-j) ;&
|
||||
--jobs)
|
||||
shift # The next argument is the number of threads.
|
||||
PARALLEL_LEVEL="${1}"
|
||||
;;
|
||||
--max-threads-per-core)
|
||||
shift # The next argument is the number of threads per core.
|
||||
MAX_THREADS_PER_CORE="${1}"
|
||||
;;
|
||||
--min-memory-per-thread)
|
||||
shift # The next argument is the amount of memory per thread.
|
||||
MIN_MEMORY_PER_THREAD="${1}"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# https://stackoverflow.com/a/23378780
|
||||
if [ $(uname) == "Darwin" ]; then
|
||||
export LOGICAL_CPUS=$(sysctl -n hw.logicalcpu_max)
|
||||
export PHYSICAL_CPUS=$(sysctl -n hw.physicalcpu_max)
|
||||
else
|
||||
export LOGICAL_CPUS=$(lscpu -p | egrep -v '^#' | wc -l)
|
||||
export PHYSICAL_CPUS=$(lscpu -p | egrep -v '^#' | sort -u -t, -k 2,4 | wc -l)
|
||||
fi
|
||||
|
||||
export TOTAL_MEM=$(awk "BEGIN { printf \"%0.4g\", $(grep MemTotal /proc/meminfo | awk '{ print $2 }') / (1024 * 1024) }")
|
||||
|
||||
export CPU_BOUND_THREADS=$(awk "BEGIN { printf \"%.04g\", int(${PHYSICAL_CPUS} * ${MAX_THREADS_PER_CORE}) }")
|
||||
export MEM_BOUND_THREADS=$(awk "BEGIN { printf \"%.04g\", int(${TOTAL_MEM} / ${MIN_MEMORY_PER_THREAD}) }")
|
||||
|
||||
if [[ -z "${PARALLEL_LEVEL}" ]]; then
|
||||
# Pick the smaller of the two as the default.
|
||||
if [[ "${MEM_BOUND_THREADS}" -lt "${CPU_BOUND_THREADS}" ]]; then
|
||||
export PARALLEL_LEVEL=${MEM_BOUND_THREADS}
|
||||
else
|
||||
export PARALLEL_LEVEL=${CPU_BOUND_THREADS}
|
||||
fi
|
||||
else
|
||||
EXPLICIT_PARALLEL_LEVEL=1
|
||||
fi
|
||||
|
||||
# This can be a floating point number.
|
||||
export MEM_PER_THREAD=$(awk "BEGIN { printf \"%.04g\", ${TOTAL_MEM} / ${PARALLEL_LEVEL} }")
|
||||
|
||||
if [[ "${QUIET}" == 0 ]]; then
|
||||
echo "Logical CPUs: ${LOGICAL_CPUS} [threads]"
|
||||
echo "Physical CPUs: ${PHYSICAL_CPUS} [cores]"
|
||||
echo "Total Mem: ${TOTAL_MEM} [GBs]"
|
||||
echo "Max Threads Per Core: ${MAX_THREADS_PER_CORE} [threads/core]"
|
||||
echo "Min Memory Per Threads: ${MIN_MEMORY_PER_THREAD} [GBs/thread]"
|
||||
echo "CPU Bound Threads: ${CPU_BOUND_THREADS} [threads]"
|
||||
echo "Mem Bound Threads: ${MEM_BOUND_THREADS} [threads]"
|
||||
|
||||
echo -n "Parallel Level: ${PARALLEL_LEVEL} [threads]"
|
||||
if [[ -n "${EXPLICIT_PARALLEL_LEVEL}" ]]; then
|
||||
echo " (explicitly set)"
|
||||
else
|
||||
echo
|
||||
fi
|
||||
|
||||
echo "Mem Per Thread: ${MEM_PER_THREAD} [GBs/thread]"
|
||||
fi
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
################################################################################
|
||||
# NVBench build script for gpuCI (CPU-only)
|
||||
################################################################################
|
||||
|
||||
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
|
||||
|
||||
source ${WORKSPACE}/ci/common/build.bash
|
||||
@@ -1,14 +0,0 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
################################################################################
|
||||
# NVBench build script for gpuCI (heterogeneous)
|
||||
################################################################################
|
||||
|
||||
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
|
||||
|
||||
source ${WORKSPACE}/ci/common/build.bash
|
||||
@@ -1,215 +0,0 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Copyright (c) 2018-2020 NVIDIA Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# Released under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
|
||||
################################################################################
|
||||
# NVBench local containerized build script
|
||||
################################################################################
|
||||
|
||||
function usage {
|
||||
echo "Usage: ${0} [flags...] [cmake-targets...]"
|
||||
echo
|
||||
echo "Build and test your local repository using a gpuCI Docker image."
|
||||
echo "If CMake targets are specified, only those targets are built and tested."
|
||||
echo "Otherwise, everything is built and tested."
|
||||
echo
|
||||
echo "-h, -help, --help"
|
||||
echo " Print this message."
|
||||
echo
|
||||
echo "-r <path>, --repository <path>"
|
||||
echo " Path to the repository (default: ${REPOSITORY_PATH})."
|
||||
echo
|
||||
echo "-i <image>, --image <image>"
|
||||
echo " Docker image to use (default: ${IMAGE})"
|
||||
echo
|
||||
echo "-l, --local-image"
|
||||
echo " Use the local version of the image instead of pulling from Docker hub."
|
||||
echo
|
||||
echo "-s, --shell-only"
|
||||
echo " Skip building and testing and launch an interactive shell instead."
|
||||
echo
|
||||
echo "-d, --disable-gpus"
|
||||
echo " Don't start the container with the NVIDIA runtime and GPUs attached."
|
||||
echo
|
||||
echo "-c, --clean"
|
||||
echo " If the build directory already exists, delete it."
|
||||
echo
|
||||
echo "-j <threads>, --jobs <threads>"
|
||||
echo " Number of threads to use when building (default: inferred)."
|
||||
echo
|
||||
echo "-b <type>, --cmake-build-type <plan>"
|
||||
echo " CMake build type to use, either Release, RelWithDebInfo, or Debug"
|
||||
echo " (default: ${CMAKE_BUILD_TYPE})."
|
||||
echo
|
||||
|
||||
exit -3
|
||||
}
|
||||
|
||||
SCRIPT_PATH=$(cd $(dirname ${0}); pwd -P)
|
||||
|
||||
REPOSITORY_PATH=$(realpath ${SCRIPT_PATH}/../..)
|
||||
|
||||
################################################################################
|
||||
# FLAGS - Process command line flags.
|
||||
################################################################################
|
||||
|
||||
IMAGE="gpuci/cccl:cuda11.5.1-devel-ubuntu20.04-gcc9"
|
||||
|
||||
LOCAL_IMAGE=0
|
||||
|
||||
SHELL_ONLY=0
|
||||
|
||||
BUILD_TYPE="gpu"
|
||||
|
||||
CLEAN=0
|
||||
|
||||
PARALLEL_LEVEL=""
|
||||
|
||||
CMAKE_BUILD_TYPE="Release"
|
||||
|
||||
TARGETS=""
|
||||
|
||||
while test ${#} != 0
|
||||
do
|
||||
case "${1}" in
|
||||
-h) ;&
|
||||
-help) ;&
|
||||
--help) usage ;;
|
||||
-r) ;&
|
||||
--repository)
|
||||
shift # The next argument is the path.
|
||||
REPOSITORY_PATH="${1}"
|
||||
;;
|
||||
-i) ;&
|
||||
--image)
|
||||
shift # The next argument is the image.
|
||||
IMAGE="${1}"
|
||||
;;
|
||||
-l) ;&
|
||||
--local-image) LOCAL_IMAGE=1 ;;
|
||||
-s) ;&
|
||||
--shell-only) SHELL_ONLY=1 ;;
|
||||
-d) ;&
|
||||
--disable-gpus) BUILD_TYPE="cpu" ;;
|
||||
-c) ;&
|
||||
--clean) CLEAN=1 ;;
|
||||
-j) ;&
|
||||
--jobs)
|
||||
shift # The next argument is the number of threads.
|
||||
PARALLEL_LEVEL="${1}"
|
||||
;;
|
||||
-b) ;&
|
||||
--cmake-build-type)
|
||||
shift # The next argument is the build type.
|
||||
CMAKE_BUILD_TYPE="${1}"
|
||||
;;
|
||||
*)
|
||||
TARGETS="${TARGETS:+${TARGETS} }${1}"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
################################################################################
|
||||
# PATHS - Setup paths for the container.
|
||||
################################################################################
|
||||
|
||||
# ${REPOSITORY_PATH} is the local filesystem path to the Git repository being
|
||||
# built and tested. It can be set with the --repository flag.
|
||||
#
|
||||
# ${BUILD_PATH} is the local filesystem path that will be used for the build. It
|
||||
# is named after the image name, allowing multiple image builds to coexist on
|
||||
# the local filesystem.
|
||||
#
|
||||
# ${REPOSITORY_PATH_IN_CONTAINER} is the location of ${REPOSITORY_PATH} inside
|
||||
# the container.
|
||||
#
|
||||
# ${BUILD_PATH_IN_CONTAINER} is the location of ${BUILD_PATH} inside the
|
||||
# container.
|
||||
|
||||
BUILD_PATH=${REPOSITORY_PATH}/build_$(echo "$(basename "${IMAGE}")" | sed -e 's/:/_/g' | sed -e 's/-/_/g')
|
||||
|
||||
if [[ "${CLEAN}" != 0 ]]; then
|
||||
rm -rf ${BUILD_PATH}
|
||||
fi
|
||||
|
||||
mkdir -p ${BUILD_PATH}
|
||||
|
||||
BASE_PATH_IN_CONTAINER="/cccl"
|
||||
|
||||
REPOSITORY_PATH_IN_CONTAINER="${BASE_PATH_IN_CONTAINER}/$(basename "${REPOSITORY_PATH}")"
|
||||
|
||||
BUILD_PATH_IN_CONTAINER="${BASE_PATH_IN_CONTAINER}/$(basename "${REPOSITORY_PATH}")/build"
|
||||
|
||||
################################################################################
|
||||
# ENVIRONMENT - Setup the thunk build script that will be run by the container.
|
||||
################################################################################
|
||||
|
||||
# We have to run `ldconfig` to rebuild `ld.so.cache` to work around this
|
||||
# failure on Debian: https://github.com/NVIDIA/nvidia-docker/issues/1399
|
||||
|
||||
COMMAND="sudo ldconfig; sudo ldconfig"
|
||||
if [[ "${SHELL_ONLY}" != 0 ]]; then
|
||||
COMMAND="${COMMAND}; bash"
|
||||
else
|
||||
COMMAND="${COMMAND}; ${REPOSITORY_PATH_IN_CONTAINER}/ci/common/build.bash ${TARGETS} || bash"
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# GPU - Setup GPUs.
|
||||
################################################################################
|
||||
|
||||
# Note: We always start docker with --gpus, even for cpu builds. Otherwise
|
||||
# libcuda.so.1 is not present and no NVBench tests are able to run.
|
||||
|
||||
# Limit GPUs available to the container based on ${CUDA_VISIBLE_DEVICES}.
|
||||
if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then
|
||||
VISIBLE_DEVICES="all"
|
||||
else
|
||||
VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES}"
|
||||
fi
|
||||
|
||||
DOCKER_MAJOR_VER=$(docker -v | sed 's/[^[0-9]*\([0-9]*\).*/\1/')
|
||||
GPU_OPTS="--gpus device=${VISIBLE_DEVICES}"
|
||||
if [[ "${DOCKER_MAJOR_VER}" -lt 19 ]]
|
||||
then
|
||||
GPU_OPTS="--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES='${VISIBLE_DEVICES}'"
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# LAUNCH - Pull and launch the container.
|
||||
################################################################################
|
||||
|
||||
#NVIDIA_DOCKER_INSTALLED=$(docker info 2>&1 | grep -i runtime | grep -c nvidia)
|
||||
NVIDIA_DOCKER_INSTALLED=1 # Broken on WSL
|
||||
if [[ "${NVIDIA_DOCKER_INSTALLED}" == 0 ]]; then
|
||||
echo "NVIDIA Docker not found, please install it: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-docker-ce"
|
||||
exit -4
|
||||
fi
|
||||
|
||||
if [[ "${LOCAL_IMAGE}" == 0 ]]; then
|
||||
docker pull "${IMAGE}"
|
||||
fi
|
||||
|
||||
docker run --rm -it ${GPU_OPTS} \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--user "$(id -u)":"$(id -g)" \
|
||||
-v "${REPOSITORY_PATH}":"${REPOSITORY_PATH_IN_CONTAINER}" \
|
||||
-v "${BUILD_PATH}":"${BUILD_PATH_IN_CONTAINER}" \
|
||||
-v /etc/passwd:/etc/passwd:ro \
|
||||
-v /etc/group:/etc/group:ro \
|
||||
-v /etc/subuid:/etc/subuid:ro \
|
||||
-v /etc/subgid:/etc/subgid:ro \
|
||||
-v /etc/shadow:/etc/shadow:ro \
|
||||
-v /etc/gshadow:/etc/gshadow:ro \
|
||||
-e "WORKSPACE=${REPOSITORY_PATH_IN_CONTAINER}" \
|
||||
-e "BUILD_TYPE=${BUILD_TYPE}" \
|
||||
-e "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" \
|
||||
-e "COVERAGE_PLAN=${COVERAGE_PLAN}" \
|
||||
-e "PARALLEL_LEVEL=${PARALLEL_LEVEL}" \
|
||||
-w "${BUILD_PATH_IN_CONTAINER}" \
|
||||
"${IMAGE}" bash -c "${COMMAND}"
|
||||
|
||||
61
ci/matrix.yaml
Normal file
61
ci/matrix.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
|
||||
cuda_prev_min: &cuda_prev_min '11.1' # Unsupported: No cupti support, issues compiling newer fmt.
|
||||
cuda_prev_max: &cuda_prev_max '11.8'
|
||||
cuda_curr_min: &cuda_curr_min '12.0'
|
||||
cuda_curr_max: &cuda_curr_max '12.8'
|
||||
|
||||
# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
|
||||
devcontainer_version: '25.06'
|
||||
|
||||
# gcc compiler configurations
|
||||
gcc7: &gcc7 { name: 'gcc', version: '7', exe: 'g++' }
|
||||
gcc8: &gcc8 { name: 'gcc', version: '8', exe: 'g++' }
|
||||
gcc9: &gcc9 { name: 'gcc', version: '9', exe: 'g++' }
|
||||
gcc10: &gcc10 { name: 'gcc', version: '10', exe: 'g++' }
|
||||
gcc11: &gcc11 { name: 'gcc', version: '11', exe: 'g++' }
|
||||
gcc12: &gcc12 { name: 'gcc', version: '12', exe: 'g++' }
|
||||
gcc13: &gcc13 { name: 'gcc', version: '13', exe: 'g++' }
|
||||
gcc14: &gcc14 { name: 'gcc', version: '14', exe: 'g++' }
|
||||
|
||||
# LLVM Compiler configurations
|
||||
llvm14: &llvm14 { name: 'llvm', version: '14', exe: 'clang++' }
|
||||
llvm15: &llvm15 { name: 'llvm', version: '15', exe: 'clang++' }
|
||||
llvm16: &llvm16 { name: 'llvm', version: '16', exe: 'clang++' }
|
||||
llvm17: &llvm17 { name: 'llvm', version: '17', exe: 'clang++' }
|
||||
llvm18: &llvm18 { name: 'llvm', version: '18', exe: 'clang++' }
|
||||
llvm19: &llvm19 { name: 'llvm', version: '19', exe: 'clang++' }
|
||||
|
||||
# Each environment below will generate a unique build/test job
|
||||
# See the "compute-matrix" job in the workflow for how this is parsed and used
|
||||
# cuda: The CUDA Toolkit version
|
||||
# os: The operating system used
|
||||
# cpu: The CPU architecture
|
||||
# compiler: The compiler to use
|
||||
# name: The compiler name
|
||||
# version: The compiler version
|
||||
# exe: The unverionsed compiler binary name
|
||||
|
||||
# Configurations that will run for every PR
|
||||
pull_request:
|
||||
nvcc:
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7 }
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8 }
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9 }
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10 }
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 }
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12 }
|
||||
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc13 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu24.04', cpu: 'amd64', compiler: *gcc14 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm17 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm18 }
|
||||
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm19 }
|
||||
390
ci/ninja_summary.py
Executable file
390
ci/ninja_summary.py
Executable file
@@ -0,0 +1,390 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2018 The Chromium Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
r"""Summarize the last ninja build, invoked with ninja's -C syntax.
|
||||
|
||||
This script is designed to be automatically run after each ninja build in
|
||||
order to summarize the build's performance. Making build performance information
|
||||
more visible should make it easier to notice anomalies and opportunities. To use
|
||||
this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat.
|
||||
|
||||
On Linux you can get autoninja to invoke this script using this syntax:
|
||||
|
||||
$ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome
|
||||
|
||||
You can also call this script directly using ninja's syntax to specify the
|
||||
output directory of interest:
|
||||
|
||||
> python3 post_build_ninja_summary.py -C out/Default
|
||||
|
||||
Typical output looks like this:
|
||||
|
||||
>ninja -C out\debug_component base
|
||||
ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp
|
||||
ninja: Entering directory `out\debug_component'
|
||||
[1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files
|
||||
Longest build steps:
|
||||
0.1 weighted s to build obj/base/base/trace_log.obj (6.7 s elapsed time)
|
||||
0.2 weighted s to build nasm.exe, nasm.exe.pdb (0.2 s elapsed time)
|
||||
0.3 weighted s to build obj/base/base/win_util.obj (12.4 s elapsed time)
|
||||
1.2 weighted s to build base.dll, base.dll.lib (1.2 s elapsed time)
|
||||
Time by build-step type:
|
||||
0.0 s weighted time to generate 6 .lib files (0.3 s elapsed time sum)
|
||||
0.1 s weighted time to generate 25 .stamp files (1.2 s elapsed time sum)
|
||||
0.2 s weighted time to generate 20 .o files (2.8 s elapsed time sum)
|
||||
1.7 s weighted time to generate 4 PEFile (linking) files (2.0 s elapsed
|
||||
time sum)
|
||||
23.9 s weighted time to generate 770 .obj files (974.8 s elapsed time sum)
|
||||
26.1 s weighted time (982.9 s elapsed time sum, 37.7x parallelism)
|
||||
839 build steps completed, average of 32.17/s
|
||||
|
||||
If no gn clean has been done then results will be for the last non-NULL
|
||||
invocation of ninja. Ideas for future statistics, and implementations are
|
||||
appreciated.
|
||||
|
||||
The "weighted" time is the elapsed time of each build step divided by the number
|
||||
of tasks that were running in parallel. This makes it an excellent approximation
|
||||
of how "important" a slow step was. A link that is entirely or mostly serialized
|
||||
will have a weighted time that is the same or similar to its elapsed time. A
|
||||
compile that runs in parallel with 999 other compiles will have a weighted time
|
||||
that is tiny."""
|
||||
|
||||
import argparse
|
||||
import errno
|
||||
import fnmatch
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# The number of long build times to report:
|
||||
long_count = 10
|
||||
# The number of long times by extension to report
|
||||
long_ext_count = 10
|
||||
|
||||
|
||||
class Target:
|
||||
"""Represents a single line read for a .ninja_log file."""
|
||||
|
||||
def __init__(self, start, end):
|
||||
"""Creates a target object by passing in the start/end times in seconds
|
||||
as a float."""
|
||||
self.start = start
|
||||
self.end = end
|
||||
# A list of targets, appended to by the owner of this object.
|
||||
self.targets = []
|
||||
self.weighted_duration = 0.0
|
||||
|
||||
def Duration(self):
|
||||
"""Returns the task duration in seconds as a float."""
|
||||
return self.end - self.start
|
||||
|
||||
def SetWeightedDuration(self, weighted_duration):
|
||||
"""Sets the duration, in seconds, passed in as a float."""
|
||||
self.weighted_duration = weighted_duration
|
||||
|
||||
def WeightedDuration(self):
|
||||
"""Returns the task's weighted duration in seconds as a float.
|
||||
|
||||
Weighted_duration takes the elapsed time of the task and divides it
|
||||
by how many other tasks were running at the same time. Thus, it
|
||||
represents the approximate impact of this task on the total build time,
|
||||
with serialized or serializing steps typically ending up with much
|
||||
longer weighted durations.
|
||||
weighted_duration should always be the same or shorter than duration.
|
||||
"""
|
||||
# Allow for modest floating-point errors
|
||||
epsilon = 0.000002
|
||||
if self.weighted_duration > self.Duration() + epsilon:
|
||||
print("%s > %s?" % (self.weighted_duration, self.Duration()))
|
||||
assert self.weighted_duration <= self.Duration() + epsilon
|
||||
return self.weighted_duration
|
||||
|
||||
def DescribeTargets(self):
|
||||
"""Returns a printable string that summarizes the targets."""
|
||||
# Some build steps generate dozens of outputs - handle them sanely.
|
||||
# The max_length was chosen so that it can fit most of the long
|
||||
# single-target names, while minimizing word wrapping.
|
||||
result = ", ".join(self.targets)
|
||||
max_length = 65
|
||||
if len(result) > max_length:
|
||||
result = result[:max_length] + "..."
|
||||
return result
|
||||
|
||||
|
||||
# Copied with some modifications from ninjatracing
|
||||
def ReadTargets(log, show_all):
|
||||
"""Reads all targets from .ninja_log file |log_file|, sorted by duration.
|
||||
|
||||
The result is a list of Target objects."""
|
||||
header = log.readline()
|
||||
# Handle empty ninja_log gracefully by silently returning an empty list of
|
||||
# targets.
|
||||
if not header:
|
||||
return []
|
||||
assert header == "# ninja log v5\n", "unrecognized ninja log version %r" % header
|
||||
targets_dict = {}
|
||||
last_end_seen = 0.0
|
||||
for line in log:
|
||||
parts = line.strip().split("\t")
|
||||
if len(parts) != 5:
|
||||
# If ninja.exe is rudely halted then the .ninja_log file may be
|
||||
# corrupt. Silently continue.
|
||||
continue
|
||||
start, end, _, name, cmdhash = parts # Ignore restat.
|
||||
# Convert from integral milliseconds to float seconds.
|
||||
start = int(start) / 1000.0
|
||||
end = int(end) / 1000.0
|
||||
if not show_all and end < last_end_seen:
|
||||
# An earlier time stamp means that this step is the first in a new
|
||||
# build, possibly an incremental build. Throw away the previous
|
||||
# data so that this new build will be displayed independently.
|
||||
# This has to be done by comparing end times because records are
|
||||
# written to the .ninja_log file when commands complete, so end
|
||||
# times are guaranteed to be in order, but start times are not.
|
||||
targets_dict = {}
|
||||
target = None
|
||||
if cmdhash in targets_dict:
|
||||
target = targets_dict[cmdhash]
|
||||
if not show_all and (target.start != start or target.end != end):
|
||||
# If several builds in a row just run one or two build steps
|
||||
# then the end times may not go backwards so the last build may
|
||||
# not be detected as such. However in many cases there will be a
|
||||
# build step repeated in the two builds and the changed
|
||||
# start/stop points for that command, identified by the hash,
|
||||
# can be used to detect and reset the target dictionary.
|
||||
targets_dict = {}
|
||||
target = None
|
||||
if not target:
|
||||
targets_dict[cmdhash] = target = Target(start, end)
|
||||
last_end_seen = end
|
||||
target.targets.append(name)
|
||||
return list(targets_dict.values())
|
||||
|
||||
|
||||
def GetExtension(target, extra_patterns):
|
||||
"""Return the file extension that best represents a target.
|
||||
|
||||
For targets that generate multiple outputs it is important to return a
|
||||
consistent 'canonical' extension. Ultimately the goal is to group build steps
|
||||
by type."""
|
||||
for output in target.targets:
|
||||
if extra_patterns:
|
||||
for fn_pattern in extra_patterns.split(";"):
|
||||
if fnmatch.fnmatch(output, "*" + fn_pattern + "*"):
|
||||
return fn_pattern
|
||||
# Not a true extension, but a good grouping.
|
||||
if output.endswith("type_mappings"):
|
||||
extension = "type_mappings"
|
||||
break
|
||||
|
||||
# Capture two extensions if present. For example: file.javac.jar should
|
||||
# be distinguished from file.interface.jar.
|
||||
root, ext1 = os.path.splitext(output)
|
||||
_, ext2 = os.path.splitext(root)
|
||||
extension = ext2 + ext1 # Preserve the order in the file name.
|
||||
|
||||
if len(extension) == 0:
|
||||
extension = "(no extension found)"
|
||||
|
||||
if ext1 in [".pdb", ".dll", ".exe"]:
|
||||
extension = "PEFile (linking)"
|
||||
# Make sure that .dll and .exe are grouped together and that the
|
||||
# .dll.lib files don't cause these to be listed as libraries
|
||||
break
|
||||
if ext1 in [".so", ".TOC"]:
|
||||
extension = ".so (linking)"
|
||||
# Attempt to identify linking, avoid identifying as '.TOC'
|
||||
break
|
||||
# Make sure .obj files don't get categorized as mojo files
|
||||
if ext1 in [".obj", ".o"]:
|
||||
break
|
||||
# Jars are the canonical output of java targets.
|
||||
if ext1 == ".jar":
|
||||
break
|
||||
# Normalize all mojo related outputs to 'mojo'.
|
||||
if output.count(".mojom") > 0:
|
||||
extension = "mojo"
|
||||
break
|
||||
return extension
|
||||
|
||||
|
||||
def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
|
||||
"""Print a summary of the passed in list of Target objects."""
|
||||
|
||||
# Create a list that is in order by time stamp and has entries for the
|
||||
# beginning and ending of each build step (one time stamp may have multiple
|
||||
# entries due to multiple steps starting/stopping at exactly the same time).
|
||||
# Iterate through this list, keeping track of which tasks are running at all
|
||||
# times. At each time step calculate a running total for weighted time so
|
||||
# that when each task ends its own weighted time can easily be calculated.
|
||||
task_start_stop_times = []
|
||||
|
||||
earliest = -1
|
||||
latest = 0
|
||||
total_cpu_time = 0
|
||||
for target in entries:
|
||||
if earliest < 0 or target.start < earliest:
|
||||
earliest = target.start
|
||||
if target.end > latest:
|
||||
latest = target.end
|
||||
total_cpu_time += target.Duration()
|
||||
task_start_stop_times.append((target.start, "start", target))
|
||||
task_start_stop_times.append((target.end, "stop", target))
|
||||
length = latest - earliest
|
||||
weighted_total = 0.0
|
||||
|
||||
# Sort by the time/type records and ignore |target|
|
||||
task_start_stop_times.sort(key=lambda times: times[:2])
|
||||
# Now we have all task start/stop times sorted by when they happen. If a
|
||||
# task starts and stops on the same time stamp then the start will come
|
||||
# first because of the alphabet, which is important for making this work
|
||||
# correctly.
|
||||
# Track the tasks which are currently running.
|
||||
running_tasks = {}
|
||||
# Record the time we have processed up to so we know how to calculate time
|
||||
# deltas.
|
||||
last_time = task_start_stop_times[0][0]
|
||||
# Track the accumulated weighted time so that it can efficiently be added
|
||||
# to individual tasks.
|
||||
last_weighted_time = 0.0
|
||||
# Scan all start/stop events.
|
||||
for event in task_start_stop_times:
|
||||
time, action_name, target = event
|
||||
# Accumulate weighted time up to now.
|
||||
num_running = len(running_tasks)
|
||||
if num_running > 0:
|
||||
# Update the total weighted time up to this moment.
|
||||
last_weighted_time += (time - last_time) / float(num_running)
|
||||
if action_name == "start":
|
||||
# Record the total weighted task time when this task starts.
|
||||
running_tasks[target] = last_weighted_time
|
||||
if action_name == "stop":
|
||||
# Record the change in the total weighted task time while this task
|
||||
# ran.
|
||||
weighted_duration = last_weighted_time - running_tasks[target]
|
||||
target.SetWeightedDuration(weighted_duration)
|
||||
weighted_total += weighted_duration
|
||||
del running_tasks[target]
|
||||
last_time = time
|
||||
assert len(running_tasks) == 0
|
||||
|
||||
# Warn if the sum of weighted times is off by more than half a second.
|
||||
if abs(length - weighted_total) > 500:
|
||||
print(
|
||||
"Warning: Possible corrupt ninja log, results may be "
|
||||
"untrustworthy. Length = %.3f, weighted total = %.3f"
|
||||
% (length, weighted_total)
|
||||
)
|
||||
|
||||
# Print the slowest build steps:
|
||||
print(" Longest build steps:")
|
||||
if elapsed_time_sorting:
|
||||
entries.sort(key=lambda x: x.Duration())
|
||||
else:
|
||||
entries.sort(key=lambda x: x.WeightedDuration())
|
||||
for target in entries[-long_count:]:
|
||||
print(
|
||||
" %8.1f weighted s to build %s (%.1f s elapsed time)"
|
||||
% (target.WeightedDuration(), target.DescribeTargets(), target.Duration())
|
||||
)
|
||||
|
||||
# Sum up the time by file extension/type of the output file
|
||||
count_by_ext = {}
|
||||
time_by_ext = {}
|
||||
weighted_time_by_ext = {}
|
||||
# Scan through all of the targets to build up per-extension statistics.
|
||||
for target in entries:
|
||||
extension = GetExtension(target, extra_step_types)
|
||||
time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration()
|
||||
weighted_time_by_ext[extension] = (
|
||||
weighted_time_by_ext.get(extension, 0) + target.WeightedDuration()
|
||||
)
|
||||
count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
|
||||
|
||||
print(" Time by build-step type:")
|
||||
# Copy to a list with extension name and total time swapped, to (time, ext)
|
||||
if elapsed_time_sorting:
|
||||
weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in time_by_ext.items())
|
||||
else:
|
||||
weighted_time_by_ext_sorted = sorted(
|
||||
(y, x) for (x, y) in weighted_time_by_ext.items()
|
||||
)
|
||||
# Print the slowest build target types:
|
||||
for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
|
||||
print(
|
||||
" %8.1f s weighted time to generate %d %s files "
|
||||
"(%1.1f s elapsed time sum)"
|
||||
% (time, count_by_ext[extension], extension, time_by_ext[extension])
|
||||
)
|
||||
|
||||
print(
|
||||
" %.1f s weighted time (%.1f s elapsed time sum, %1.1fx "
|
||||
"parallelism)" % (length, total_cpu_time, total_cpu_time * 1.0 / length)
|
||||
)
|
||||
print(
|
||||
" %d build steps completed, average of %1.2f/s"
|
||||
% (len(entries), len(entries) / (length))
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
log_file = ".ninja_log"
|
||||
metrics_file = "siso_metrics.json"
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-C", dest="build_directory", help="Build directory.")
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--step-types",
|
||||
help="semicolon separated fnmatch patterns for build-step grouping",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--elapsed_time_sorting",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Sort output by elapsed time instead of weighted time",
|
||||
)
|
||||
parser.add_argument("--log-file", help="specific ninja log file to analyze.")
|
||||
args, _extra_args = parser.parse_known_args()
|
||||
if args.build_directory:
|
||||
log_file = os.path.join(args.build_directory, log_file)
|
||||
metrics_file = os.path.join(args.build_directory, metrics_file)
|
||||
if args.log_file:
|
||||
log_file = args.log_file
|
||||
if not args.step_types:
|
||||
# Offer a convenient way to add extra step types automatically,
|
||||
# including when this script is run by autoninja. get() returns None if
|
||||
# the variable isn't set.
|
||||
args.step_types = os.environ.get("chromium_step_types")
|
||||
if args.step_types:
|
||||
# Make room for the extra build types.
|
||||
global long_ext_count
|
||||
long_ext_count += len(args.step_types.split(";"))
|
||||
|
||||
if os.path.exists(metrics_file):
|
||||
# Automatically handle summarizing siso builds.
|
||||
cmd = ["siso.bat" if "win32" in sys.platform else "siso"]
|
||||
cmd.extend(["metrics", "summary"])
|
||||
if args.build_directory:
|
||||
cmd.extend(["-C", args.build_directory])
|
||||
if args.step_types:
|
||||
cmd.extend(["--step_types", args.step_types])
|
||||
if args.elapsed_time_sorting:
|
||||
cmd.append("--elapsed_time_sorting")
|
||||
subprocess.run(cmd)
|
||||
else:
|
||||
try:
|
||||
with open(log_file, "r") as log:
|
||||
entries = ReadTargets(log, False)
|
||||
if entries:
|
||||
SummarizeEntries(
|
||||
entries, args.step_types, args.elapsed_time_sorting
|
||||
)
|
||||
except IOError:
|
||||
print("Log file %r not found, no build summary created." % log_file)
|
||||
return errno.ENOENT
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
105
ci/pretty_printing.sh
Normal file
105
ci/pretty_printing.sh
Normal file
@@ -0,0 +1,105 @@
|
||||
# Print "ARG=${ARG}" for all args.
|
||||
function print_var_values() {
|
||||
# Iterate through the arguments
|
||||
for var_name in "$@"; do
|
||||
if [ -z "$var_name" ]; then
|
||||
echo "Usage: print_var_values <variable_name1> <variable_name2> ..."
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Dereference the variable and print the result
|
||||
echo "$var_name=${!var_name:-(undefined)}"
|
||||
done
|
||||
}
|
||||
|
||||
# begin_group: Start a named section of log output, possibly with color.
|
||||
# Usage: begin_group "Group Name" [Color]
|
||||
# Group Name: A string specifying the name of the group.
|
||||
# Color (optional): ANSI color code to set text color. Default is blue (1;34).
|
||||
function begin_group() {
|
||||
# See options for colors here: https://gist.github.com/JBlond/2fea43a3049b38287e5e9cefc87b2124
|
||||
local blue="34"
|
||||
local name="${1:-}"
|
||||
local color="${2:-$blue}"
|
||||
|
||||
if [ -n "${GITHUB_ACTIONS:-}" ]; then
|
||||
echo -e "::group::\e[${color}m${name}\e[0m"
|
||||
else
|
||||
echo -e "\e[${color}m================== ${name} ======================\e[0m"
|
||||
fi
|
||||
}
|
||||
|
||||
# end_group: End a named section of log output and print status based on exit status.
|
||||
# Usage: end_group "Group Name" [Exit Status]
|
||||
# Group Name: A string specifying the name of the group.
|
||||
# Exit Status (optional): The exit status of the command run within the group. Default is 0.
|
||||
function end_group() {
|
||||
local name="${1:-}"
|
||||
local build_status="${2:-0}"
|
||||
local duration="${3:-}"
|
||||
local red="31"
|
||||
local blue="34"
|
||||
|
||||
if [ -n "${GITHUB_ACTIONS:-}" ]; then
|
||||
echo "::endgroup::"
|
||||
|
||||
if [ "$build_status" -ne 0 ]; then
|
||||
echo -e "::error::\e[${red}m ${name} - Failed (⬆️ click above for full log ⬆️)\e[0m"
|
||||
fi
|
||||
else
|
||||
if [ "$build_status" -ne 0 ]; then
|
||||
echo -e "\e[${red}m================== End ${name} - Failed${duration:+ - Duration: ${duration}s} ==================\e[0m"
|
||||
else
|
||||
echo -e "\e[${blue}m================== End ${name} - Success${duration:+ - Duration: ${duration}s} ==================\n\e[0m"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
declare -A command_durations
|
||||
|
||||
# Runs a command within a named group, handles the exit status, and prints appropriate messages based on the result.
|
||||
# Usage: run_command "Group Name" command [arguments...]
|
||||
function run_command() {
|
||||
local group_name="${1:-}"
|
||||
shift
|
||||
local command=("$@")
|
||||
local status
|
||||
|
||||
begin_group "$group_name"
|
||||
set +e
|
||||
local start_time=$(date +%s)
|
||||
"${command[@]}"
|
||||
status=$?
|
||||
local end_time=$(date +%s)
|
||||
set -e
|
||||
local duration=$((end_time - start_time))
|
||||
end_group "$group_name" $status $duration
|
||||
command_durations["$group_name"]=$duration
|
||||
return $status
|
||||
}
|
||||
|
||||
function string_width() {
|
||||
local str="$1"
|
||||
echo "$str" | awk '{print length}'
|
||||
}
|
||||
|
||||
function print_time_summary() {
|
||||
local max_length=0
|
||||
local group
|
||||
|
||||
# Find the longest group name for formatting
|
||||
for group in "${!command_durations[@]}"; do
|
||||
local group_length=$(echo "$group" | awk '{print length}')
|
||||
if [ "$group_length" -gt "$max_length" ]; then
|
||||
max_length=$group_length
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Time Summary:"
|
||||
for group in "${!command_durations[@]}"; do
|
||||
printf "%-${max_length}s : %s seconds\n" "$group" "${command_durations[$group]}"
|
||||
done
|
||||
|
||||
# Clear the array of timing info
|
||||
declare -gA command_durations=()
|
||||
}
|
||||
41
ci/sccache_hit_rate.sh
Executable file
41
ci/sccache_hit_rate.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure two arguments are provided
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "Usage: $0 <before-file> <after-file>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Print the contents of the before file
|
||||
echo "=== Contents of $1 ===" >&2
|
||||
cat $1 >&2
|
||||
echo "=== End of $1 ===" >&2
|
||||
|
||||
# Print the contents of the after file
|
||||
echo "=== Contents of $2 ===" >&2
|
||||
cat $2 >&2
|
||||
echo "=== End of $2 ===" >&2
|
||||
|
||||
# Extract compile requests and cache hits from the before and after files
|
||||
requests_before=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$1")
|
||||
hits_before=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$1")
|
||||
requests_after=$(awk '/^[ \t]*Compile requests[ \t]+[0-9]+/ {print $3}' "$2")
|
||||
hits_after=$(awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}' "$2")
|
||||
|
||||
# Calculate the differences to find out how many new requests and hits
|
||||
requests_diff=$((requests_after - requests_before))
|
||||
hits_diff=$((hits_after - hits_before))
|
||||
|
||||
echo "New Compile Requests: $requests_diff" >&2
|
||||
echo "New Hits: $hits_diff" >&2
|
||||
|
||||
# Calculate and print the hit rate
|
||||
if [ $requests_diff -eq 0 ]; then
|
||||
echo "No new compile requests, hit rate is not applicable"
|
||||
else
|
||||
hit_rate=$(awk -v hits=$hits_diff -v requests=$requests_diff 'BEGIN {printf "%.2f", hits/requests * 100}')
|
||||
echo "sccache hit rate: $hit_rate%" >&2
|
||||
echo "$hit_rate"
|
||||
fi
|
||||
52
ci/sccache_stats.sh
Executable file
52
ci/sccache_stats.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script prints the sccache hit rate between two calls to sccache --show-stats.
|
||||
# It should be sourced in your script before and after the operations you want to profile,
|
||||
# with the 'start' or 'end' argument respectively.
|
||||
|
||||
mode=$1
|
||||
|
||||
if [[ "$mode" != "start" && "$mode" != "end" ]]; then
|
||||
echo "Invalid mode: $mode"
|
||||
echo "Usage: $0 {start|end}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if sccache is available
|
||||
if ! command -v sccache &> /dev/null; then
|
||||
echo "Notice: sccache is not available. Skipping..."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
case $mode in
|
||||
start)
|
||||
export SCCACHE_START_HITS=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
|
||||
export SCCACHE_START_MISSES=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
|
||||
;;
|
||||
end)
|
||||
if [[ -z ${SCCACHE_START_HITS+x} || -z ${SCCACHE_START_MISSES+x} ]]; then
|
||||
echo "Error: start stats not collected. Did you call this script with 'start' before your operations?"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
final_hits=$(sccache --show-stats | awk '/^[ \t]*Cache hits[ \t]+[0-9]+/ {print $3}')
|
||||
final_misses=$(sccache --show-stats | awk '/^[ \t]*Cache misses[ \t]+[0-9]+/ {print $3}')
|
||||
hits=$((final_hits - SCCACHE_START_HITS))
|
||||
misses=$((final_misses - SCCACHE_START_MISSES))
|
||||
total=$((hits + misses))
|
||||
|
||||
prefix=""
|
||||
if [ ${GITHUB_ACTIONS:-false} = "true" ]; then
|
||||
prefix="::notice::"
|
||||
fi
|
||||
|
||||
if (( total > 0 )); then
|
||||
hit_rate=$(awk -v hits="$hits" -v total="$total" 'BEGIN { printf "%.2f", (hits / total) * 100 }')
|
||||
echo ${prefix}"sccache hits: $hits | misses: $misses | hit rate: $hit_rate%"
|
||||
else
|
||||
echo ${prefix}"sccache stats: N/A No new compilation requests"
|
||||
fi
|
||||
unset SCCACHE_START_HITS
|
||||
unset SCCACHE_START_MISSES
|
||||
;;
|
||||
esac
|
||||
18
ci/test_nvbench.sh
Executable file
18
ci/test_nvbench.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
source "$(dirname "$0")/build_common.sh"
|
||||
|
||||
# Run NVBench tests with high parallelism. If any need to be
|
||||
# serialized, define the `RUN_SERIAL` CMake property on the
|
||||
# test.
|
||||
export CTEST_PARALLEL_LEVEL=${PARALLEL_LEVEL}
|
||||
|
||||
print_environment_details
|
||||
|
||||
./build_nvbench.sh "$@"
|
||||
|
||||
PRESET="nvbench-ci"
|
||||
|
||||
test_preset "NVBench" ${PRESET}
|
||||
|
||||
print_time_summary
|
||||
65
cmake/DetectSupportedStandards.cmake
Normal file
65
cmake/DetectSupportedStandards.cmake
Normal file
@@ -0,0 +1,65 @@
|
||||
# Detect the language standards supported by the current compilers.
|
||||
#
|
||||
# Usage: detect_supported_cxx_standards(<var_prefix> <lang> <standards>)
|
||||
#
|
||||
# - var_prefix: Used to name result variables,
|
||||
# e.g. ${var_prefix}_${lang}_XX_SUPPORTED will be TRUE or FALSE. Defined for
|
||||
# each XX in ${standards}.
|
||||
# - lang: The language to test: C, CXX, or CUDA.
|
||||
# - standards: List of any standard versions.
|
||||
#
|
||||
# Example: detect_supported_standards(PROJ CXX 11 14 17)
|
||||
# - Sets the following variables in the parent scope to TRUE or FALSE:
|
||||
# - PROJ_CXX_11_SUPPORTED
|
||||
# - PROJ_CXX_14_SUPPORTED
|
||||
# - PROJ_CXX_17_SUPPORTED
|
||||
# - Sets `PROJ_DETECTED_CXX_STANDARDS` to a list of supported standards (e.g. "11;14;17").
|
||||
function(detect_supported_standards prefix lang)
|
||||
string(TOLOWER "${lang}_std" feature_prefix)
|
||||
set(all_stds)
|
||||
foreach(standard IN LISTS ARGN)
|
||||
set(var_name "${prefix}_${lang}_${standard}_SUPPORTED")
|
||||
if ("${feature_prefix}_${standard}" IN_LIST CMAKE_${lang}_COMPILE_FEATURES)
|
||||
set(${var_name} TRUE)
|
||||
else()
|
||||
set(${var_name} FALSE)
|
||||
endif()
|
||||
|
||||
# Special cases:
|
||||
if (standard EQUAL 17 AND
|
||||
(lang STREQUAL "CXX" OR lang STREQUAL "CUDA") AND
|
||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) OR
|
||||
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)))
|
||||
# gcc < 7 and clang < 8 don't fully support C++17.
|
||||
# They accept the flag and have partial support, but nvcc will refuse
|
||||
# to enable it and falls back to the default dialect for the current
|
||||
# CXX compiler version. This breaks our CI.
|
||||
# CMake's COMPILE_FEATURES var reports that these compilers support C++17,
|
||||
# but we can't rely on it, so manually disable the dialect in these cases.
|
||||
set(${var_name} FALSE)
|
||||
endif()
|
||||
|
||||
if (standard EQUAL 20 AND
|
||||
(lang STREQUAL "CXX" OR lang STREQUAL "CUDA") AND
|
||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR
|
||||
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) OR
|
||||
(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_LESS 1930)))
|
||||
# Similar to the above, but for C++20.
|
||||
set(${var_name} FALSE)
|
||||
endif()
|
||||
|
||||
if (${var_name})
|
||||
list(APPEND all_stds ${standard})
|
||||
endif()
|
||||
|
||||
message(STATUS "Testing ${lang}${standard} Support: ${${var_name}}")
|
||||
set(${var_name} ${${var_name}} PARENT_SCOPE)
|
||||
endforeach()
|
||||
|
||||
set(${prefix}_DETECTED_${lang}_STANDARDS "${all_stds}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
@@ -22,47 +22,15 @@ function(nvbench_add_cupti_dep dep_name)
|
||||
|
||||
add_library(nvbench::${dep_name_lower} SHARED IMPORTED)
|
||||
|
||||
if (WIN32)
|
||||
# Attempt to locate the dll in the expected location. This is necessary
|
||||
# because the CUPTI dll has a versioned suffix, so we can't directly search
|
||||
# for it with find_file.
|
||||
file(GLOB dep_dll_path "${nvbench_cupti_root}/lib64/${dep_name_lower}*dll")
|
||||
cmake_path(GET dep_dll_path FILENAME dep_dll_filename)
|
||||
find_library(NVBench_${dep_name_upper}_LIBRARY ${dep_name_lower} REQUIRED
|
||||
DOC "The full path to lib${dep_name_lower}.so from the CUDA Toolkit."
|
||||
HINTS "${nvbench_cupti_root}/lib64"
|
||||
)
|
||||
mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY)
|
||||
|
||||
# If the dll was not found in the expected location, use a default filename as a user hint.
|
||||
if (NOT dep_dll_filename)
|
||||
set(dep_dll_filename ${dep_name_lower}.dll)
|
||||
endif()
|
||||
|
||||
# Use find_file to create a cache variable and mark the file as REQUIRED.
|
||||
find_file(NVBench_${dep_name_upper}_DLL ${dep_dll_filename} REQUIRED
|
||||
DOC "The full path to ${dep_name_lower}.dll from the CUDA Toolkit."
|
||||
HINTS "${nvbench_cupti_root}/lib64/"
|
||||
)
|
||||
mark_as_advanced(NVBench_${dep_name_upper}_DLL)
|
||||
|
||||
# The .libs don't have suffixes, so we can just directly search for them.
|
||||
find_library(NVBench_${dep_name_upper}_LIBRARY ${dep_name_lower}.lib REQUIRED
|
||||
DOC "The full path to ${dep_name_lower}.lib from the CUDA Toolkit."
|
||||
HINTS "${nvbench_cupti_root}/lib64/"
|
||||
)
|
||||
mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY)
|
||||
|
||||
set_target_properties(nvbench::${dep_name_lower} PROPERTIES
|
||||
IMPORTED_LOCATION "${NVBench_${dep_name_upper}_DLL}"
|
||||
IMPORTED_IMPLIB "${NVBench_${dep_name_upper}_LIBRARY}"
|
||||
)
|
||||
else()
|
||||
find_library(NVBench_${dep_name_upper}_LIBRARY ${dep_name_lower} REQUIRED
|
||||
DOC "The full path to lib${dep_name_lower}.so from the CUDA Toolkit."
|
||||
HINTS "${nvbench_cupti_root}/lib64"
|
||||
)
|
||||
mark_as_advanced(NVBench_${dep_name_upper}_LIBRARY)
|
||||
|
||||
set_target_properties(nvbench::${dep_name_lower} PROPERTIES
|
||||
IMPORTED_LOCATION "${NVBench_${dep_name_upper}_LIBRARY}"
|
||||
)
|
||||
endif()
|
||||
set_target_properties(nvbench::${dep_name_lower} PROPERTIES
|
||||
IMPORTED_LOCATION "${NVBench_${dep_name_upper}_LIBRARY}"
|
||||
)
|
||||
endfunction()
|
||||
|
||||
nvbench_add_cupti_dep(nvperf_target)
|
||||
|
||||
28
cmake/NVBenchClangdCompileInfo.cmake
Normal file
28
cmake/NVBenchClangdCompileInfo.cmake
Normal file
@@ -0,0 +1,28 @@
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Tell cmake to generate a json file of compile commands for clangd:
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Symlink the compile command output to the source dir, where clangd will find it.
|
||||
set(compile_commands_file "${CMAKE_BINARY_DIR}/compile_commands.json")
|
||||
set(compile_commands_link "${CMAKE_SOURCE_DIR}/compile_commands.json")
|
||||
message(STATUS "Creating symlink from ${compile_commands_link} to ${compile_commands_file}...")
|
||||
nvbench_execute_non_fatal_process(COMMAND
|
||||
"${CMAKE_COMMAND}" -E rm -f "${compile_commands_link}")
|
||||
nvbench_execute_non_fatal_process(COMMAND
|
||||
"${CMAKE_COMMAND}" -E touch "${compile_commands_file}")
|
||||
nvbench_execute_non_fatal_process(COMMAND
|
||||
"${CMAKE_COMMAND}" -E create_symlink "${compile_commands_file}" "${compile_commands_link}")
|
||||
@@ -29,46 +29,37 @@ function(nvbench_add_cxx_flag target_name type flag)
|
||||
target_compile_options(${target_name} ${type}
|
||||
$<$<COMPILE_LANGUAGE:CXX>:${flag}>
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcompiler=${flag}>
|
||||
# FIXME nvc++ case
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/W4")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wall")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wextra")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wconversion")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Woverloaded-virtual")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wcast-qual")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wpointer-arith")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-local-typedef")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-parameter")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wvla")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wgnu")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wno-gnu-line-marker") # WAR 3916341
|
||||
|
||||
if (NVBench_ENABLE_WERROR)
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/WX")
|
||||
endif()
|
||||
|
||||
# Suppress overly-pedantic/unavoidable warnings brought in with /W4:
|
||||
# C4505: unreferenced local function has been removed
|
||||
# The CUDA `host_runtime.h` header emits this for
|
||||
# `__cudaUnregisterBinaryUtil`.
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "/wd4505")
|
||||
else()
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wall")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wextra")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wconversion")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Woverloaded-virtual")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wcast-qual")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wpointer-arith")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-local-typedef")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wunused-parameter")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wvla")
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Wgnu")
|
||||
|
||||
if (NVBench_ENABLE_WERROR)
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Werror")
|
||||
endif()
|
||||
if (NVBench_ENABLE_WERROR)
|
||||
nvbench_add_cxx_flag(nvbench.build_interface INTERFACE "-Werror")
|
||||
endif()
|
||||
|
||||
# GCC-specific flags
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
|
||||
# Experimental filesystem library
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU OR CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
||||
target_link_libraries(nvbench.build_interface INTERFACE stdc++fs)
|
||||
endif()
|
||||
|
||||
# CUDA-specific flags
|
||||
if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
# fmtlib uses llvm's _BitInt internally, which is not available when compiling through nvcc:
|
||||
target_compile_definitions(nvbench.build_interface INTERFACE "FMT_USE_BITINT=0")
|
||||
endif()
|
||||
|
||||
target_compile_options(nvbench.build_interface INTERFACE
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--display_error_number>
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Wno-deprecated-gpu-targets>
|
||||
@@ -85,6 +76,5 @@ function(nvbench_config_target target_name)
|
||||
ARCHIVE_OUTPUT_DIRECTORY "${NVBench_LIBRARY_OUTPUT_DIR}"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${NVBench_LIBRARY_OUTPUT_DIR}"
|
||||
RUNTIME_OUTPUT_DIRECTORY "${NVBench_EXECUTABLE_OUTPUT_DIR}"
|
||||
WINDOWS_EXPORT_ALL_SYMBOLS ON # oooo pretty hammer...
|
||||
)
|
||||
endfunction()
|
||||
|
||||
@@ -1,52 +1,61 @@
|
||||
################################################################################
|
||||
# fmtlib/fmt
|
||||
rapids_cpm_find(fmt 7.1.3
|
||||
set(export_set_details)
|
||||
set(install_fmt OFF)
|
||||
if(NOT BUILD_SHARED_LIBS AND NVBench_ENABLE_INSTALL_RULES)
|
||||
set(export_set_details BUILD_EXPORT_SET nvbench-targets
|
||||
INSTALL_EXPORT_SET nvbench-targets)
|
||||
set(install_fmt ON)
|
||||
endif()
|
||||
|
||||
rapids_cpm_find(fmt 11.1.4 ${export_set_details}
|
||||
GLOBAL_TARGETS fmt::fmt fmt::fmt-header-only
|
||||
CPM_ARGS
|
||||
GITHUB_REPOSITORY fmtlib/fmt
|
||||
GIT_TAG 7.1.3
|
||||
GIT_SHALLOW TRUE
|
||||
GIT_REPOSITORY "https://github.com/fmtlib/fmt.git"
|
||||
GIT_TAG "11.1.4"
|
||||
OPTIONS
|
||||
# Force static to keep fmt internal.
|
||||
"BUILD_SHARED_LIBS OFF"
|
||||
# Suppress warnings from fmt headers by marking them as system.
|
||||
"FMT_SYSTEM_HEADERS ON"
|
||||
# Disable install rules since we're linking statically.
|
||||
"FMT_INSTALL ${install_fmt}"
|
||||
"CMAKE_POSITION_INDEPENDENT_CODE ON"
|
||||
)
|
||||
|
||||
if(NOT fmt_ADDED)
|
||||
set(fmt_is_external TRUE)
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
# nlohmann/json
|
||||
#
|
||||
# Following recipe from
|
||||
# http://github.com/cpm-cmake/CPM.cmake/blob/master/examples/json/CMakeLists.txt
|
||||
# Download the zips because the repo takes an excessively long time to clone.
|
||||
rapids_cpm_find(nlohmann_json 3.9.1
|
||||
# Release:
|
||||
rapids_cpm_find(nlohmann_json 3.11.3
|
||||
CPM_ARGS
|
||||
URL https://github.com/nlohmann/json/releases/download/v3.9.1/include.zip
|
||||
URL_HASH SHA256=6bea5877b1541d353bd77bdfbdb2696333ae5ed8f9e8cc22df657192218cad91
|
||||
PATCH_COMMAND
|
||||
# Work around compiler bug in nvcc 11.0, see NVIDIA/NVBench#18
|
||||
${CMAKE_COMMAND} -E copy
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/nlohmann_json.hpp"
|
||||
"./include/nlohmann/json.hpp"
|
||||
|
||||
# Development version:
|
||||
# I'm waiting for https://github.com/nlohmann/json/issues/2676 to be fixed,
|
||||
# leave this in to simplify testing patches as they come out. Update the
|
||||
# `nvbench_json` target too when switching branches.
|
||||
# CPM_ARGS
|
||||
# VERSION develop
|
||||
# URL https://github.com/nlohmann/json/archive/refs/heads/develop.zip
|
||||
# OPTIONS JSON_MultipleHeaders ON
|
||||
URL https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip
|
||||
URL_HASH SHA256=a22461d13119ac5c78f205d3df1db13403e58ce1bb1794edc9313677313f4a9d
|
||||
PATCH_COMMAND
|
||||
${CMAKE_COMMAND}
|
||||
-D "CUDA_VERSION=${CMAKE_CUDA_COMPILER_VERSION}"
|
||||
-D "CXX_VERSION=${CMAKE_CXX_COMPILER_VERSION}"
|
||||
-D "CXX_ID=${CMAKE_CXX_COMPILER_ID}"
|
||||
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/json_unordered_map_ice.cmake"
|
||||
)
|
||||
|
||||
# nlohmann_json release headers
|
||||
add_library(nvbench_json INTERFACE IMPORTED)
|
||||
target_include_directories(nvbench_json SYSTEM INTERFACE
|
||||
"${nlohmann_json_SOURCE_DIR}/include"
|
||||
)
|
||||
|
||||
# nlohmann_json development branch:
|
||||
#add_library(nvbench_json INTERFACE)
|
||||
#target_link_libraries(nvbench_json INTERFACE nlohmann_json)
|
||||
if (TARGET nlohmann_json::nlohmann_json)
|
||||
# If we have a target, just use it. Cannot be an ALIAS library because
|
||||
# nlohmann_json::nlohmann_json itself might be one.
|
||||
target_link_libraries(nvbench_json INTERFACE nlohmann_json::nlohmann_json)
|
||||
else()
|
||||
# Otherwise we only downloaded the headers.
|
||||
target_include_directories(nvbench_json SYSTEM INTERFACE
|
||||
"${nlohmann_json_SOURCE_DIR}/include"
|
||||
)
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
# CUDAToolkit
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
# By default, add dependent DLLs to the build dir on MSVC. This avoids
|
||||
# a variety of runtime issues when using NVML, etc.
|
||||
# This behavior can be disabled using the following options:
|
||||
if (WIN32)
|
||||
option(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD
|
||||
"Copy dependent dlls to NVBench library build location (MSVC only)."
|
||||
ON
|
||||
)
|
||||
else()
|
||||
# These are forced off for non-MSVC builds, as $<TARGET_RUNTIME_DLLS:...>
|
||||
# will always be empty on non-dll platforms.
|
||||
set(NVBench_ADD_DEPENDENT_DLLS_TO_BUILD OFF)
|
||||
endif()
|
||||
|
||||
if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD)
|
||||
message(STATUS
|
||||
"CMake 3.21.0 is required when NVBench_ADD_DEPENDENT_DLLS_TO_BUILD "
|
||||
"is enabled."
|
||||
)
|
||||
cmake_minimum_required(VERSION 3.21.0)
|
||||
endif()
|
||||
|
||||
function(nvbench_setup_dep_dlls target_name)
|
||||
# The custom command below fails when there aren't any runtime DLLs to copy,
|
||||
# so only enable it when a relevant dependency is enabled:
|
||||
if (NVBench_ADD_DEPENDENT_DLLS_TO_BUILD AND
|
||||
(NVBench_ENABLE_NVML OR
|
||||
NVBench_ENABLE_CUPTI))
|
||||
add_custom_command(TARGET ${target_name}
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
"${CMAKE_COMMAND}" -E copy
|
||||
"$<TARGET_RUNTIME_DLLS:${target_name}>"
|
||||
"$<TARGET_FILE_DIR:${target_name}>"
|
||||
COMMAND_EXPAND_LISTS
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
@@ -1,37 +1,51 @@
|
||||
macro(nvbench_generate_exports)
|
||||
set(nvbench_build_export_code_block "")
|
||||
set(nvbench_install_export_code_block "")
|
||||
if(NVBench_ENABLE_INSTALL_RULES)
|
||||
set(nvbench_build_export_code_block "")
|
||||
set(nvbench_install_export_code_block "")
|
||||
|
||||
if (NVBench_ENABLE_NVML)
|
||||
string(APPEND nvbench_build_export_code_block
|
||||
"include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake\")\n"
|
||||
if (NVBench_ENABLE_NVML)
|
||||
string(APPEND nvbench_build_export_code_block
|
||||
"include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake\")\n"
|
||||
)
|
||||
string(APPEND nvbench_install_export_code_block
|
||||
"include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake\")\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NVBench_ENABLE_CUPTI)
|
||||
string(APPEND nvbench_build_export_code_block
|
||||
"include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake\")\n"
|
||||
)
|
||||
string(APPEND nvbench_install_export_code_block
|
||||
"include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchCUPTI.cmake\")\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (TARGET nvbench_json)
|
||||
set(nvbench_json_code_block
|
||||
[=[
|
||||
add_library(nvbench_json INTERFACE IMPORTED)
|
||||
if (TARGET nlohmann_json::nlohmann_json)
|
||||
target_link_libraries(nvbench_json INTERFACE nlohmann_json::nlohmann_json)
|
||||
endif()
|
||||
]=])
|
||||
string(APPEND nvbench_build_export_code_block ${nvbench_json_code_block})
|
||||
string(APPEND nvbench_install_export_code_block ${nvbench_json_code_block})
|
||||
endif()
|
||||
|
||||
rapids_export(BUILD NVBench
|
||||
EXPORT_SET nvbench-targets
|
||||
NAMESPACE "nvbench::"
|
||||
GLOBAL_TARGETS nvbench main ctl internal_build_interface
|
||||
LANGUAGES CUDA CXX
|
||||
FINAL_CODE_BLOCK nvbench_build_export_code_block
|
||||
)
|
||||
string(APPEND nvbench_install_export_code_block
|
||||
"include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchNVML.cmake\")\n"
|
||||
rapids_export(INSTALL NVBench
|
||||
EXPORT_SET nvbench-targets
|
||||
NAMESPACE "nvbench::"
|
||||
GLOBAL_TARGETS nvbench main ctl internal_build_interface
|
||||
LANGUAGES CUDA CXX
|
||||
FINAL_CODE_BLOCK nvbench_install_export_code_block
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NVBench_ENABLE_CUPTI)
|
||||
string(APPEND nvbench_build_export_code_block
|
||||
"include(\"${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake\")\n"
|
||||
)
|
||||
string(APPEND nvbench_install_export_code_block
|
||||
"include(\"\${CMAKE_CURRENT_LIST_DIR}/NVBenchCUPTI.cmake\")\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
rapids_export(BUILD NVBench
|
||||
EXPORT_SET nvbench-targets
|
||||
NAMESPACE "nvbench::"
|
||||
GLOBAL_TARGETS nvbench main ctl internal_build_interface
|
||||
LANGUAGES CUDA CXX
|
||||
FINAL_CODE_BLOCK nvbench_build_export_code_block
|
||||
)
|
||||
rapids_export(INSTALL NVBench
|
||||
EXPORT_SET nvbench-targets
|
||||
NAMESPACE "nvbench::"
|
||||
GLOBAL_TARGETS nvbench main ctl internal_build_interface
|
||||
LANGUAGES CUDA CXX
|
||||
FINAL_CODE_BLOCK nvbench_install_export_code_block
|
||||
)
|
||||
endmacro()
|
||||
|
||||
40
cmake/NVBenchHeaderTesting.cmake
Normal file
40
cmake/NVBenchHeaderTesting.cmake
Normal file
@@ -0,0 +1,40 @@
|
||||
# For every public header, build a translation unit containing `#include <header>`
|
||||
# with some various checks.
|
||||
|
||||
set(excluded_headers_regexes
|
||||
# Should never be used externally.
|
||||
"^detail"
|
||||
"^internal"
|
||||
)
|
||||
|
||||
# Meta target for all configs' header builds:
|
||||
add_custom_target(nvbench.headers.all)
|
||||
add_dependencies(nvbench.all nvbench.headers.all)
|
||||
|
||||
file(GLOB_RECURSE header_files
|
||||
RELATIVE "${NVBench_SOURCE_DIR}/nvbench/"
|
||||
CONFIGURE_DEPENDS
|
||||
"${NVBench_SOURCE_DIR}/nvbench/*.cuh"
|
||||
)
|
||||
|
||||
foreach (exclusion IN LISTS excluded_headers_regexes)
|
||||
list(FILTER header_files EXCLUDE REGEX "${exclusion}")
|
||||
endforeach()
|
||||
|
||||
function (nvbench_add_header_target target_name cuda_std)
|
||||
foreach (header IN LISTS header_files)
|
||||
set(headertest_src "headers/${target_name}/${header}.cu")
|
||||
set(header_str "nvbench/${header}") # Substitution used by configure_file:
|
||||
configure_file("${NVBench_SOURCE_DIR}/cmake/header_test.in.cxx" "${headertest_src}")
|
||||
list(APPEND headertest_srcs "${headertest_src}")
|
||||
endforeach()
|
||||
|
||||
add_library(${target_name} OBJECT ${headertest_srcs})
|
||||
target_link_libraries(${target_name} PUBLIC nvbench::nvbench)
|
||||
set_target_properties(${target_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std})
|
||||
add_dependencies(nvbench.headers.all ${target_name})
|
||||
endfunction()
|
||||
|
||||
foreach (std IN LISTS NVBench_DETECTED_CUDA_STANDARDS)
|
||||
nvbench_add_header_target(nvbench.headers.cpp${std} ${std})
|
||||
endforeach()
|
||||
@@ -1,61 +1,69 @@
|
||||
include(GNUInstallDirs)
|
||||
rapids_cmake_install_lib_dir(NVBench_INSTALL_LIB_DIR)
|
||||
|
||||
# in-source public headers:
|
||||
install(DIRECTORY "${NVBench_SOURCE_DIR}/nvbench"
|
||||
TYPE INCLUDE
|
||||
FILES_MATCHING
|
||||
PATTERN "*.cuh"
|
||||
PATTERN "internal" EXCLUDE
|
||||
)
|
||||
if(NVBench_ENABLE_INSTALL_RULES)
|
||||
|
||||
# generated headers from build dir:
|
||||
install(
|
||||
FILES
|
||||
"${NVBench_BINARY_DIR}/nvbench/config.cuh"
|
||||
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench"
|
||||
)
|
||||
install(
|
||||
FILES
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/version.cuh"
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh"
|
||||
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench/detail"
|
||||
)
|
||||
include(GNUInstallDirs)
|
||||
rapids_cmake_install_lib_dir(NVBench_INSTALL_LIB_DIR)
|
||||
|
||||
#
|
||||
# Install CMake files needed by consumers to locate dependencies:
|
||||
#
|
||||
# in-source public headers:
|
||||
install(DIRECTORY "${NVBench_SOURCE_DIR}/nvbench"
|
||||
TYPE INCLUDE
|
||||
FILES_MATCHING
|
||||
PATTERN "*.cuh"
|
||||
PATTERN "internal" EXCLUDE
|
||||
)
|
||||
|
||||
# Borrowing this logic from rapids_cmake's export logic to make sure these end
|
||||
# up in the same location as nvbench-config.cmake:
|
||||
rapids_cmake_install_lib_dir(config_install_location)
|
||||
set(config_install_location "${config_install_location}/cmake/nvbench")
|
||||
|
||||
if (NVBench_ENABLE_NVML)
|
||||
# generated headers from build dir:
|
||||
install(
|
||||
FILES
|
||||
"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake"
|
||||
DESTINATION "${config_install_location}"
|
||||
"${NVBench_BINARY_DIR}/nvbench/config.cuh"
|
||||
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NVBench_ENABLE_CUPTI)
|
||||
install(
|
||||
FILES
|
||||
"${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake"
|
||||
DESTINATION "${config_install_location}"
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/version.cuh"
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh"
|
||||
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvbench/detail"
|
||||
)
|
||||
|
||||
#
|
||||
# Install CMake files needed by consumers to locate dependencies:
|
||||
#
|
||||
|
||||
# Borrowing this logic from rapids_cmake's export logic to make sure these end
|
||||
# up in the same location as nvbench-config.cmake:
|
||||
rapids_cmake_install_lib_dir(config_install_location)
|
||||
set(config_install_location "${config_install_location}/cmake/nvbench")
|
||||
|
||||
if (NVBench_ENABLE_NVML)
|
||||
install(
|
||||
FILES
|
||||
"${NVBench_SOURCE_DIR}/cmake/NVBenchNVML.cmake"
|
||||
DESTINATION "${config_install_location}"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NVBench_ENABLE_CUPTI)
|
||||
install(
|
||||
FILES
|
||||
"${NVBench_SOURCE_DIR}/cmake/NVBenchCUPTI.cmake"
|
||||
DESTINATION "${config_install_location}"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Call with a list of library targets to generate install rules:
|
||||
function(nvbench_install_libraries)
|
||||
install(TARGETS ${ARGN}
|
||||
DESTINATION "${NVBench_INSTALL_LIB_DIR}"
|
||||
EXPORT nvbench-targets
|
||||
)
|
||||
if(NVBench_ENABLE_INSTALL_RULES)
|
||||
install(TARGETS ${ARGN}
|
||||
DESTINATION "${NVBench_INSTALL_LIB_DIR}"
|
||||
EXPORT nvbench-targets
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Call with a list of executables to generate install rules:
|
||||
function(nvbench_install_executables)
|
||||
install(TARGETS ${ARGN} EXPORT nvbench-targets)
|
||||
if(NVBench_ENABLE_INSTALL_RULES)
|
||||
install(TARGETS ${ARGN} EXPORT nvbench-targets)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
@@ -1,37 +1,7 @@
|
||||
# Since this file is installed, we need to make sure that the CUDAToolkit has
|
||||
# been found by consumers:
|
||||
if (NOT TARGET CUDA::toolkit)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
# The CUDA:: targets currently don't provide dll locations through the
|
||||
# `IMPORTED_LOCATION` property, nor are they marked as `SHARED` libraries
|
||||
# (they're currently `UNKNOWN`). This prevents the `nvbench_setup_dep_dlls`
|
||||
# CMake function from copying the dlls to the build / install directories.
|
||||
# This is discussed in https://gitlab.kitware.com/cmake/cmake/-/issues/22845
|
||||
# and the other CMake issues it links to.
|
||||
#
|
||||
# We create a nvbench-specific target that configures the nvml interface as
|
||||
# described here:
|
||||
# https://gitlab.kitware.com/cmake/cmake/-/issues/22845#note_1077538
|
||||
#
|
||||
# Use find_file instead of find_library, which would search for a .lib file.
|
||||
# This is also nice because find_file searches recursively (find_library
|
||||
# does not) and some versions of CTK nest nvml.dll several directories deep
|
||||
# under C:\Windows\System32.
|
||||
find_file(NVBench_NVML_DLL nvml.dll REQUIRED
|
||||
DOC "The full path to nvml.dll. Usually somewhere under C:/Windows/System32."
|
||||
PATHS "C:/Windows/System32"
|
||||
)
|
||||
mark_as_advanced(NVBench_NVML_DLL)
|
||||
add_library(nvbench::nvml SHARED IMPORTED)
|
||||
target_link_libraries(nvbench::nvml INTERFACE CUDA::toolkit)
|
||||
set_target_properties(nvbench::nvml PROPERTIES
|
||||
IMPORTED_LOCATION "${NVBench_NVML_DLL}"
|
||||
IMPORTED_IMPLIB "${CUDA_nvml_LIBRARY}"
|
||||
)
|
||||
else()
|
||||
# Linux is much easier...
|
||||
add_library(nvbench::nvml ALIAS CUDA::nvml)
|
||||
endif()
|
||||
# Since this file is installed, we need to make sure that the CUDAToolkit has
|
||||
# been found by consumers:
|
||||
if (NOT TARGET CUDA::toolkit)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif()
|
||||
|
||||
add_library(nvbench::nvml ALIAS CUDA::nvml)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
# Called before project(...)
|
||||
macro(nvbench_load_rapids_cmake)
|
||||
file(DOWNLOAD
|
||||
https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake
|
||||
"${CMAKE_BINARY_DIR}/RAPIDS.cmake"
|
||||
)
|
||||
include("${CMAKE_BINARY_DIR}/RAPIDS.cmake")
|
||||
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/NVBENCH_RAPIDS.cmake")
|
||||
file(DOWNLOAD
|
||||
https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-25.04/RAPIDS.cmake
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/NVBENCH_RAPIDS.cmake"
|
||||
)
|
||||
endif()
|
||||
include("${CMAKE_CURRENT_BINARY_DIR}/NVBENCH_RAPIDS.cmake")
|
||||
|
||||
include(rapids-cmake)
|
||||
include(rapids-cpm)
|
||||
@@ -18,10 +20,9 @@ endmacro()
|
||||
# Called after project(...)
|
||||
macro(nvbench_init_rapids_cmake)
|
||||
rapids_cmake_build_type(Release)
|
||||
rapids_cmake_write_version_file("${NVBench_BINARY_DIR}/nvbench/detail/version.cuh")
|
||||
rapids_cmake_write_git_revision_file(
|
||||
nvbench_git_revision
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh"
|
||||
rapids_cmake_write_version_file(
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/version.cuh"
|
||||
PREFIX "NVBENCH"
|
||||
)
|
||||
rapids_cpm_init()
|
||||
endmacro()
|
||||
|
||||
@@ -1,3 +1,48 @@
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Passes all args directly to execute_process while setting up the following
|
||||
# results variables and propagating them to the caller's scope:
|
||||
#
|
||||
# - nvbench_process_exit_code
|
||||
# - nvbench_process_stdout
|
||||
# - nvbench_process_stderr
|
||||
#
|
||||
# If the command is not successful (e.g. the last command does not return zero),
|
||||
# a non-fatal warning is printed.
|
||||
function(nvbench_execute_non_fatal_process)
|
||||
execute_process(${ARGN}
|
||||
RESULT_VARIABLE nvbench_process_exit_code
|
||||
OUTPUT_VARIABLE nvbench_process_stdout
|
||||
ERROR_VARIABLE nvbench_process_stderr
|
||||
)
|
||||
|
||||
if (NOT nvbench_process_exit_code EQUAL 0)
|
||||
message(WARNING
|
||||
"execute_process failed with non-zero exit code: ${nvbench_process_exit_code}\n"
|
||||
"${ARGN}\n"
|
||||
"stdout:\n${nvbench_process_stdout}\n"
|
||||
"stderr:\n${nvbench_process_stderr}\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
set(nvbench_process_exit_code "${nvbench_process_exit_code}" PARENT_SCOPE)
|
||||
set(nvbench_process_stdout "${nvbench_process_stdout}" PARENT_SCOPE)
|
||||
set(nvbench_process_stderr "${nvbench_process_stderr}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# Writes CMAKE_CUDA_ARCHITECTURES to out_var, but using escaped semicolons
|
||||
# as delimiters
|
||||
function(nvbench_escaped_cuda_arches out_var)
|
||||
|
||||
127
cmake/PrintCTestRunTimes.cmake
Normal file
127
cmake/PrintCTestRunTimes.cmake
Normal file
@@ -0,0 +1,127 @@
|
||||
## This CMake script parses the output of ctest and prints a formatted list
|
||||
## of individual test runtimes, sorted longest first.
|
||||
##
|
||||
## ctest > ctest_log
|
||||
## cmake -DLOGFILE=ctest_log \
|
||||
## -DMINSEC=10 \
|
||||
## -P PrintCTestRunTimes.cmake
|
||||
##
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
# Prepend the string with "0" until the string length equals the specified width
|
||||
function(pad_string_with_zeros string_var width)
|
||||
set(local_string "${${string_var}}")
|
||||
string(LENGTH "${local_string}" size)
|
||||
while(size LESS width)
|
||||
string(PREPEND local_string "0")
|
||||
string(LENGTH "${local_string}" size)
|
||||
endwhile()
|
||||
set(${string_var} "${local_string}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################
|
||||
|
||||
if (NOT LOGFILE)
|
||||
message(FATAL_ERROR "Missing -DLOGFILE=<ctest output> argument.")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED MINSEC)
|
||||
set(MINSEC 10)
|
||||
endif()
|
||||
|
||||
set(num_below_thresh 0)
|
||||
|
||||
# Check if logfile exists
|
||||
if (NOT EXISTS "${LOGFILE}")
|
||||
message(FATAL_ERROR "LOGFILE does not exist ('${LOGFILE}').")
|
||||
endif()
|
||||
|
||||
string(JOIN "" regex
|
||||
"[0-9]+/[0-9]+[ ]+Test[ ]+#"
|
||||
"([0-9]+)" # Test ID
|
||||
":[ ]+"
|
||||
"([^ ]+)" # Test Name
|
||||
"[ ]*\\.+[ ]*\\**[ ]*"
|
||||
"([^ ]+)" # Result
|
||||
"[ ]+"
|
||||
"([0-9]+)" # Seconds
|
||||
"\\.[0-9]+[ ]+sec"
|
||||
)
|
||||
|
||||
message(DEBUG "LOGFILE: ${LOGFILE}")
|
||||
message(DEBUG "MINSEC: ${MINSEC}")
|
||||
message(DEBUG "regex: ${regex}")
|
||||
|
||||
# Read the logfile and generate a map / keylist
|
||||
set(keys)
|
||||
file(STRINGS "${LOGFILE}" lines)
|
||||
foreach(line ${lines})
|
||||
|
||||
# Parse each build time
|
||||
string(REGEX MATCH "${regex}" _DUMMY "${line}")
|
||||
|
||||
if (CMAKE_MATCH_COUNT EQUAL 4)
|
||||
set(test_id "${CMAKE_MATCH_1}")
|
||||
set(test_name "${CMAKE_MATCH_2}")
|
||||
set(test_result "${CMAKE_MATCH_3}")
|
||||
set(tmp "${CMAKE_MATCH_4}") # floor(runtime_seconds)
|
||||
|
||||
if (tmp LESS MINSEC)
|
||||
math(EXPR num_below_thresh "${num_below_thresh} + 1")
|
||||
continue()
|
||||
endif()
|
||||
|
||||
# Compute human readable time
|
||||
math(EXPR days "${tmp} / (60 * 60 * 24)")
|
||||
math(EXPR tmp "${tmp} - (${days} * 60 * 60 * 24)")
|
||||
math(EXPR hours "${tmp} / (60 * 60)")
|
||||
math(EXPR tmp "${tmp} - (${hours} * 60 * 60)")
|
||||
math(EXPR minutes "${tmp} / (60)")
|
||||
math(EXPR tmp "${tmp} - (${minutes} * 60)")
|
||||
math(EXPR seconds "${tmp}")
|
||||
|
||||
# Format time components
|
||||
pad_string_with_zeros(days 3)
|
||||
pad_string_with_zeros(hours 2)
|
||||
pad_string_with_zeros(minutes 2)
|
||||
pad_string_with_zeros(seconds 2)
|
||||
|
||||
# Construct table entry
|
||||
# Later values in the file for the same command overwrite earlier entries
|
||||
string(MAKE_C_IDENTIFIER "${test_id}" key)
|
||||
string(JOIN " | " ENTRY_${key}
|
||||
"${days}d ${hours}h ${minutes}m ${seconds}s"
|
||||
"${test_result}"
|
||||
"${test_id}: ${test_name}"
|
||||
)
|
||||
|
||||
# Record the key:
|
||||
list(APPEND keys "${key}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
list(REMOVE_DUPLICATES keys)
|
||||
|
||||
# Build the entry list:
|
||||
set(entries)
|
||||
foreach(key ${keys})
|
||||
list(APPEND entries "${ENTRY_${key}}")
|
||||
endforeach()
|
||||
|
||||
if (NOT entries)
|
||||
message(STATUS "LOGFILE contained no test times ('${LOGFILE}').")
|
||||
endif()
|
||||
|
||||
# Sort in descending order:
|
||||
list(SORT entries ORDER DESCENDING)
|
||||
|
||||
# Dump table:
|
||||
foreach(entry ${entries})
|
||||
message(STATUS ${entry})
|
||||
endforeach()
|
||||
|
||||
if (num_below_thresh GREATER 0)
|
||||
message(STATUS "${num_below_thresh} additional tests took < ${MINSEC}s each.")
|
||||
endif()
|
||||
101
cmake/PrintNinjaBuildTimes.cmake
Normal file
101
cmake/PrintNinjaBuildTimes.cmake
Normal file
@@ -0,0 +1,101 @@
|
||||
## This CMake script parses a .ninja_log file (LOGFILE) and prints a list of
|
||||
## build/link times, sorted longest first.
|
||||
##
|
||||
## cmake -DLOGFILE=<.ninja_log file> \
|
||||
## -P PrintNinjaBuildTimes.cmake
|
||||
##
|
||||
## If LOGFILE is omitted, the current directory's .ninja_log file is used.
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
# Prepend the string with "0" until the string length equals the specified width
|
||||
function(pad_string_with_zeros string_var width)
|
||||
set(local_string "${${string_var}}")
|
||||
string(LENGTH "${local_string}" size)
|
||||
while(size LESS width)
|
||||
string(PREPEND local_string "0")
|
||||
string(LENGTH "${local_string}" size)
|
||||
endwhile()
|
||||
set(${string_var} "${local_string}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################
|
||||
|
||||
if (NOT LOGFILE)
|
||||
set(LOGFILE ".ninja_log")
|
||||
endif()
|
||||
|
||||
# Check if logfile exists
|
||||
if (NOT EXISTS "${LOGFILE}")
|
||||
message(FATAL_ERROR "LOGFILE does not exist ('${LOGFILE}').")
|
||||
endif()
|
||||
|
||||
# Read the logfile and generate a map / keylist
|
||||
set(keys)
|
||||
file(STRINGS "${LOGFILE}" lines)
|
||||
foreach(line ${lines})
|
||||
|
||||
# Parse each build time
|
||||
string(REGEX MATCH
|
||||
"^([0-9]+)\t([0-9]+)\t[0-9]+\t([^\t]+)+\t[0-9a-fA-F]+$" _DUMMY "${line}")
|
||||
|
||||
if (CMAKE_MATCH_COUNT EQUAL 3)
|
||||
set(start_ms ${CMAKE_MATCH_1})
|
||||
set(end_ms ${CMAKE_MATCH_2})
|
||||
set(command "${CMAKE_MATCH_3}")
|
||||
math(EXPR runtime_ms "${end_ms} - ${start_ms}")
|
||||
|
||||
# Compute human readable time
|
||||
math(EXPR days "${runtime_ms} / (1000 * 60 * 60 * 24)")
|
||||
math(EXPR runtime_ms "${runtime_ms} - (${days} * 1000 * 60 * 60 * 24)")
|
||||
math(EXPR hours "${runtime_ms} / (1000 * 60 * 60)")
|
||||
math(EXPR runtime_ms "${runtime_ms} - (${hours} * 1000 * 60 * 60)")
|
||||
math(EXPR minutes "${runtime_ms} / (1000 * 60)")
|
||||
math(EXPR runtime_ms "${runtime_ms} - (${minutes} * 1000 * 60)")
|
||||
math(EXPR seconds "${runtime_ms} / 1000")
|
||||
math(EXPR milliseconds "${runtime_ms} - (${seconds} * 1000)")
|
||||
|
||||
# Format time components
|
||||
pad_string_with_zeros(days 3)
|
||||
pad_string_with_zeros(hours 2)
|
||||
pad_string_with_zeros(minutes 2)
|
||||
pad_string_with_zeros(seconds 2)
|
||||
pad_string_with_zeros(milliseconds 3)
|
||||
|
||||
# Construct table entry
|
||||
# Later values in the file for the same command overwrite earlier entries
|
||||
string(MAKE_C_IDENTIFIER "${command}" key)
|
||||
set(ENTRY_${key}
|
||||
"${days}d ${hours}h ${minutes}m ${seconds}s ${milliseconds}ms | ${command}"
|
||||
)
|
||||
|
||||
# Record the key:
|
||||
list(APPEND keys "${key}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
list(REMOVE_DUPLICATES keys)
|
||||
|
||||
# Build the entry list:
|
||||
set(entries)
|
||||
foreach(key ${keys})
|
||||
list(APPEND entries "${ENTRY_${key}}")
|
||||
endforeach()
|
||||
|
||||
if (NOT entries)
|
||||
message(FATAL_ERROR "LOGFILE contained no build entries ('${LOGFILE}').")
|
||||
endif()
|
||||
|
||||
# Sort in descending order:
|
||||
list(SORT entries)
|
||||
list(REVERSE entries)
|
||||
|
||||
# Dump table:
|
||||
message(STATUS "-----------------------+----------------------------")
|
||||
message(STATUS "Time | Command ")
|
||||
message(STATUS "-----------------------+----------------------------")
|
||||
|
||||
foreach(entry ${entries})
|
||||
message(STATUS ${entry})
|
||||
endforeach()
|
||||
45
cmake/header_test.in.cxx
Normal file
45
cmake/header_test.in.cxx
Normal file
@@ -0,0 +1,45 @@
|
||||
// This source file checks that:
|
||||
// 1) Header <${header_str}> compiles without error.
|
||||
// 2) Common macro collisions with platform/system headers are avoided.
|
||||
|
||||
// Turn off failures for certain configurations:
|
||||
#ifndef NVBench_IGNORE_MACRO_CHECKS
|
||||
|
||||
// Define NVBench_MACRO_CHECK(macro, header), which emits a diagnostic indicating
|
||||
// a potential macro collision and halts.
|
||||
//
|
||||
// Hacky way to build a string, but it works on all tested platforms.
|
||||
#define NVBench_MACRO_CHECK(MACRO, HEADER) \
|
||||
NVBench_MACRO_CHECK_IMPL( \
|
||||
Identifier MACRO should not be used from NVBench headers due to conflicts with HEADER macros.)
|
||||
|
||||
// Use raw platform checks instead of the NVBench_HOST_COMPILER macros since we
|
||||
// don't want to #include any headers other than the one being tested.
|
||||
//
|
||||
// This is only implemented for GCC/Clang.
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
|
||||
// GCC/clang are easy:
|
||||
#define NVBench_MACRO_CHECK_IMPL(msg) NVBench_MACRO_CHECK_IMPL0(GCC error #msg)
|
||||
#define NVBench_MACRO_CHECK_IMPL0(expr) _Pragma(#expr)
|
||||
|
||||
#endif // defined(__clang__) || defined(__GNUC__)
|
||||
|
||||
// complex.h conflicts
|
||||
#define I NVBench_MACRO_CHECK('I', complex.h)
|
||||
|
||||
// windows.h conflicts
|
||||
#define small NVBench_MACRO_CHECK('small', windows.h)
|
||||
// We can't enable these checks without breaking some builds -- some standard
|
||||
// library implementations unconditionally `#undef` these macros, which then
|
||||
// causes random failures later.
|
||||
// Leaving these commented out as a warning: Here be dragons.
|
||||
// #define min(...) NVBench_MACRO_CHECK('min', windows.h)
|
||||
// #define max(...) NVBench_MACRO_CHECK('max', windows.h)
|
||||
|
||||
// termios.h conflicts (NVIDIA/thrust#1547)
|
||||
#define B0 NVBench_MACRO_CHECK("B0", termios.h)
|
||||
|
||||
#endif // NVBench_IGNORE_MACRO_CHECKS
|
||||
|
||||
#include <${header_str}>
|
||||
22
cmake/patches/json_unordered_map_ice.cmake
Normal file
22
cmake/patches/json_unordered_map_ice.cmake
Normal file
@@ -0,0 +1,22 @@
|
||||
# NVCC 11.1 and GCC 9 need a patch to build, otherwise:
|
||||
#
|
||||
# nlohmann/ordered_map.hpp(29): error #3316:
|
||||
# Internal Compiler Error (codegen): "internal error during structure layout!"
|
||||
#
|
||||
# Usage:
|
||||
# ${CMAKE_COMMAND}
|
||||
# -D "CUDA_VERSION=${CMAKE_CUDA_COMPILER_VERSION}"
|
||||
# -D "CXX_VERSION=${CMAKE_CXX_COMPILER_VERSION}"
|
||||
# -D "CXX_ID=${CMAKE_CXX_COMPILER_ID}"
|
||||
# -P "json_unordered_map_ice.cmake"
|
||||
|
||||
if(CUDA_VERSION VERSION_GREATER 11.8 OR NOT CXX_ID STREQUAL "GNU" OR CXX_VERSION VERSION_LESS 9.0)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Read the file and replace the string "JSON_NO_UNIQUE_ADDRESS" with
|
||||
# "/* JSON_NO_UNIQUE_ADDRESS */".
|
||||
file(READ "include/nlohmann/ordered_map.hpp" NLOHMANN_ORDERED_MAP_HPP)
|
||||
string(REPLACE "JSON_NO_UNIQUE_ADDRESS" "/* [NVBench Patch] JSON_NO_UNIQUE_ADDRESS */"
|
||||
NLOHMANN_ORDERED_MAP_HPP "${NLOHMANN_ORDERED_MAP_HPP}")
|
||||
file(WRITE "include/nlohmann/ordered_map.hpp" "${NLOHMANN_ORDERED_MAP_HPP}")
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ A basic kernel benchmark can be created with just a few lines of CUDA C++:
|
||||
|
||||
```cpp
|
||||
void my_benchmark(nvbench::state& state) {
|
||||
state.exec([](nvbench::launch& launch) {
|
||||
state.exec([](nvbench::launch& launch) {
|
||||
my_kernel<<<num_blocks, 256, 0, launch.get_stream()>>>();
|
||||
});
|
||||
}
|
||||
@@ -97,7 +97,7 @@ void benchmark(nvbench::state& state)
|
||||
const auto num_inputs = state.get_int64("NumInputs");
|
||||
thrust::device_vector<int> data = generate_input(num_inputs);
|
||||
|
||||
state.exec([&data](nvbench::launch& launch) {
|
||||
state.exec([&data](nvbench::launch& launch) {
|
||||
my_kernel<<<blocks, threads, 0, launch.get_stream()>>>(data.begin(), data.end());
|
||||
});
|
||||
}
|
||||
@@ -134,7 +134,7 @@ void benchmark(nvbench::state& state)
|
||||
const auto quality = state.get_float64("Quality");
|
||||
|
||||
state.exec([&quality](nvbench::launch& launch)
|
||||
{
|
||||
{
|
||||
my_kernel<<<blocks, threads, 0, launch.get_stream()>>>(quality);
|
||||
});
|
||||
}
|
||||
@@ -153,7 +153,7 @@ void benchmark(nvbench::state& state)
|
||||
thrust::device_vector<int> data = generate_input(rng_dist);
|
||||
|
||||
state.exec([&data](nvbench::launch& launch)
|
||||
{
|
||||
{
|
||||
my_kernel<<<blocks, threads, 0, launch.get_stream()>>>(data.begin(), data.end());
|
||||
});
|
||||
}
|
||||
@@ -182,13 +182,13 @@ void my_benchmark(nvbench::state& state, nvbench::type_list<T>)
|
||||
thrust::device_vector<T> data = generate_input<T>();
|
||||
|
||||
state.exec([&data](nvbench::launch& launch)
|
||||
{
|
||||
{
|
||||
my_kernel<<<blocks, threads, 0, launch.get_stream()>>>(data.begin(), data.end());
|
||||
});
|
||||
}
|
||||
using my_types = nvbench::type_list<int, float, double>;
|
||||
NVBENCH_BENCH_TYPES(my_benchmark, NVBENCH_TYPE_AXES(my_types))
|
||||
.set_type_axis_names({"ValueType"});
|
||||
.set_type_axes_names({"ValueType"});
|
||||
```
|
||||
|
||||
The `NVBENCH_TYPE_AXES` macro is unfortunately necessary to prevent commas in
|
||||
@@ -293,7 +293,6 @@ In general::
|
||||
|
||||
More examples can found in [examples/throughput.cu](../examples/throughput.cu).
|
||||
|
||||
|
||||
# Skip Uninteresting / Invalid Benchmarks
|
||||
|
||||
Sometimes particular combinations of parameters aren't useful or interesting —
|
||||
@@ -321,7 +320,7 @@ void my_benchmark(nvbench::state& state, nvbench::type_list<T, U>)
|
||||
// Skip benchmarks at compile time -- for example, always skip when T == U
|
||||
// (Note that the `type_list` argument defines the same type twice).
|
||||
template <typename SameType>
|
||||
void my_benchmark(nvbench::state& state,
|
||||
void my_benchmark(nvbench::state& state,
|
||||
nvbench::type_list<SameType, SameType>)
|
||||
{
|
||||
state.skip("T must not be the same type as U.");
|
||||
@@ -347,6 +346,15 @@ true:
|
||||
synchronize internally.
|
||||
- `nvbench::exec_tag::timer` requests a timer object that can be used to
|
||||
restrict the timed region.
|
||||
- `nvbench::exec_tag::no_batch` disables batch measurements. This both disables
|
||||
them during execution to reduce runtime, and prevents their compilation to
|
||||
reduce compile-time and binary size.
|
||||
- `nvbench::exec_tag::gpu` is an optional hint that prevents non-GPU benchmarking
|
||||
code from being compiled for a particular benchmark. A runtime error is emitted
|
||||
if the benchmark is defined with `set_is_cpu_only(true)`.
|
||||
- `nvbench::exec_tag::no_gpu` is an optional hint that prevents GPU benchmarking
|
||||
code from being compiled for a particular benchmark. A runtime error is emitted
|
||||
if the benchmark does not also define `set_is_cpu_only(true)`.
|
||||
|
||||
Multiple execution tags may be combined using `operator|`, e.g.
|
||||
|
||||
@@ -397,7 +405,7 @@ Note that using manual timer mode disables batch measurements.
|
||||
void timer_example(nvbench::state& state)
|
||||
{
|
||||
// Pass the `timer` exec tag to request a timer:
|
||||
state.exec(nvbench::exec_tag::timer,
|
||||
state.exec(nvbench::exec_tag::timer,
|
||||
// Lambda now accepts a timer:
|
||||
[](nvbench::launch& launch, auto& timer)
|
||||
{
|
||||
@@ -418,6 +426,79 @@ NVBENCH_BENCH(timer_example);
|
||||
See [examples/exec_tag_timer.cu](../examples/exec_tag_timer.cu) for a complete
|
||||
example.
|
||||
|
||||
## Compilation hints: `nvbench::exec_tag::no_batch`, `gpu`, and `no_gpu`
|
||||
|
||||
These execution tags are optional hints that disable the compilation of various
|
||||
code paths when they are not needed. They apply only to a single benchmark.
|
||||
|
||||
- `nvbench::exec_tag::no_batch` prevents the execution and instantiation of the batch measurement backend.
|
||||
- `nvbench::exec_tag::gpu` prevents the instantiation of CPU-only benchmarking backends.
|
||||
- Requires that the benchmark does not define `set_is_cpu_only(true)`.
|
||||
- Optional; this has no effect on runtime measurements, but reduces compile-time and binary size.
|
||||
- Host-side CPU measurements of GPU kernel execution time are still provided.
|
||||
- `nvbench::exec_tag::no_gpu` prevents the instantiation of GPU benchmarking backends.
|
||||
- Requires that the benchmark defines `set_is_cpu_only(true)`.
|
||||
- Optional; this has no effect on runtime measurements, but reduces compile-time and binary size.
|
||||
- See also [CPU-only Benchmarks](#cpu-only-benchmarks).
|
||||
|
||||
# CPU-only Benchmarks
|
||||
|
||||
NVBench provides CPU-only benchmarking facilities that are intended for measuring
|
||||
significant CPU workloads. We do not recommend using these features for high-resolution
|
||||
CPU benchmarking -- other libraries (such as Google Benchmark) are more appropriate for
|
||||
such applications. Examples are provided in [examples/cpu_only.cu](../examples/cpu_only.cu).
|
||||
|
||||
Note that NVBench still requires a CUDA compiler and runtime even if a project only contains
|
||||
CPU-only benchmarks.
|
||||
|
||||
The `is_cpu_only` property of the benchmark toggles between GPU and CPU-only measurements:
|
||||
|
||||
```cpp
|
||||
void my_cpu_benchmark(nvbench::state &state)
|
||||
{
|
||||
state.exec([](nvbench::launch &) { /* workload */ });
|
||||
}
|
||||
NVBENCH_BENCH(my_cpu_benchmark)
|
||||
.set_is_cpu_only(true); // Mark as CPU-only.
|
||||
```
|
||||
|
||||
The optional `nvbench::exec_tag::no_gpu` hint may be used to reduce tbe compilation time and
|
||||
binary size of CPU-only benchmarks. An error is emitted at runtime if this tag is used while
|
||||
`is_cpu_only` is false.
|
||||
|
||||
```cpp
|
||||
void my_cpu_benchmark(nvbench::state &state)
|
||||
{
|
||||
state.exec(nvbench::exec_tag::no_gpu, // Prevent compilation of GPU backends
|
||||
[](nvbench::launch &) { /* workload */ });
|
||||
}
|
||||
NVBENCH_BENCH(my_cpu_benchmark)
|
||||
.set_is_cpu_only(true); // Mark as CPU-only.
|
||||
```
|
||||
|
||||
The `nvbench::exec_tag::timer` execution tag is also supported by CPU-only benchmarks. This
|
||||
is useful for benchmarks that require additional per-sample setup/teardown. See the
|
||||
[`nvbench::exec_tag::timer`](#explicit-timer-mode-nvbenchexec_tagtimer) section for more
|
||||
details.
|
||||
|
||||
```cpp
|
||||
void my_cpu_benchmark(nvbench::state &state)
|
||||
{
|
||||
state.exec(nvbench::exec_tag::no_gpu | // Prevent compilation of GPU backends
|
||||
nvbench::exec_tag::timer, // Request a timer object
|
||||
[](nvbench::launch &, auto &timer)
|
||||
{
|
||||
// Setup here
|
||||
timer.start();
|
||||
// timed workload
|
||||
timer.stop();
|
||||
// teardown here
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(my_cpu_benchmark)
|
||||
.set_is_cpu_only(true); // Mark as CPU-only.
|
||||
```
|
||||
|
||||
# Beware: Combinatorial Explosion Is Lurking
|
||||
|
||||
Be very careful of how quickly the configuration space can grow. The following
|
||||
@@ -430,7 +511,7 @@ using value_types = nvbench::type_list<nvbench::uint8_t,
|
||||
nvbench::int32_t,
|
||||
nvbench::float32_t,
|
||||
nvbench::float64_t>;
|
||||
using op_types = nvbench::type_list<thrust::plus<>,
|
||||
using op_types = nvbench::type_list<thrust::plus<>,
|
||||
thrust::multiplies<>,
|
||||
thrust::maximum<>>;
|
||||
|
||||
@@ -445,7 +526,7 @@ NVBENCH_BENCH_TYPES(my_benchmark,
|
||||
|
||||
```
|
||||
960 total configs
|
||||
= 4 [T=(U8, I32, F32, F64)]
|
||||
= 4 [T=(U8, I32, F32, F64)]
|
||||
* 4 [U=(U8, I32, F32, F64)]
|
||||
* 4 [V=(U8, I32, F32, F64)]
|
||||
* 3 [Op=(plus, multiplies, max)]
|
||||
@@ -453,9 +534,10 @@ NVBENCH_BENCH_TYPES(my_benchmark,
|
||||
```
|
||||
|
||||
For large configuration spaces like this, pruning some of the less useful
|
||||
combinations using the techniques described in the [Zipped/Tied Iteration of Value Axes](#zipped-iteration-of-value-axes)
|
||||
or [Skip Uninteresting / Invalid Benchmarks](#skip-uninteresting--invalid-benchmarks) section can help immensely with
|
||||
keeping compile / run times manageable.
|
||||
combinations using the techniques described in the
|
||||
[Zipped/Tied Iteration of Value Axes](#zipped-iteration-of-value-axes)
|
||||
or [Skip Uninteresting / Invalid Benchmarks](#skip-uninteresting--invalid-benchmarks)
|
||||
sections can help immensely with keeping compile / run times manageable.
|
||||
|
||||
Splitting a single large configuration space into multiple, more focused
|
||||
benchmarks with reduced dimensionality will likely be worth the effort as well.
|
||||
|
||||
112
docs/cli_help.md
112
docs/cli_help.md
@@ -83,28 +83,6 @@
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--min-samples <count>`
|
||||
* Gather at least `<count>` samples per measurement.
|
||||
* Default is 10 samples.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--min-time <seconds>`
|
||||
* Accumulate at least `<seconds>` of execution time per measurement.
|
||||
* Default is 0.5 seconds.
|
||||
* If both GPU and CPU times are gathered, this applies to GPU time only.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--max-noise <value>`
|
||||
* Gather samples until the error in the measurement drops below `<value>`.
|
||||
* Noise is specified as the percent relative standard deviation.
|
||||
* Default is 0.5% (`--max-noise 0.5`)
|
||||
* Only applies to Cold measurements.
|
||||
* If both GPU and CPU times are gathered, this applies to GPU noise only.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--skip-time <seconds>`
|
||||
* Skip a measurement when a warmup run executes in less than `<seconds>`.
|
||||
* Default is -1 seconds (disabled).
|
||||
@@ -115,6 +93,42 @@
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--throttle-threshold <value>`
|
||||
* Set the GPU throttle threshold as percentage of the device's default clock rate.
|
||||
* Default is 75.
|
||||
* Set to 0 to disable throttle detection entirely.
|
||||
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--throttle-recovery-delay <value>`
|
||||
* Set the GPU throttle recovery delay in seconds.
|
||||
* Default is 0.05 seconds.
|
||||
* Note that throttling is disabled when `nvbench::exec_tag::sync` is used.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--run-once`
|
||||
* Only run the benchmark once, skipping any warmup runs and batched
|
||||
measurements.
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--disable-blocking-kernel`
|
||||
* Don't use the `blocking_kernel`.
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--profile`
|
||||
* Implies `--run-once` and `--disable-blocking-kernel`.
|
||||
* Intended for use with external profiling tools.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
## Stopping Criteria
|
||||
|
||||
* `--timeout <seconds>`
|
||||
* Measurements will timeout after `<seconds>` have elapsed.
|
||||
* Default is 15 seconds.
|
||||
@@ -125,9 +139,55 @@
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--run-once`
|
||||
* Only run the benchmark once, skipping any warmup runs and batched
|
||||
measurements.
|
||||
* Intended for use with external profiling tools.
|
||||
* `--min-samples <count>`
|
||||
* Gather at least `<count>` samples per measurement before checking any
|
||||
other stopping criterion besides the timeout.
|
||||
* Default is 10 samples.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--stopping-criterion <criterion>`
|
||||
* After `--min-samples` is satisfied, use `<criterion>` to detect if enough
|
||||
samples were collected.
|
||||
* Only applies to Cold and CPU-only measurements.
|
||||
* If both GPU and CPU times are gathered, GPU time is used for stopping
|
||||
analysis.
|
||||
* Stopping criteria provided by NVBench are:
|
||||
* "stdrel": (default) Converges to a minimal relative standard deviation,
|
||||
stdev / mean
|
||||
* "entropy": Converges based on the cumulative entropy of all samples.
|
||||
* Each stopping criterion may provide additional parameters to customize
|
||||
behavior, as detailed below:
|
||||
|
||||
### "stdrel" Stopping Criterion Parameters
|
||||
|
||||
* `--min-time <seconds>`
|
||||
* Accumulate at least `<seconds>` of execution time per measurement.
|
||||
* Only applies to `stdrel` stopping criterion.
|
||||
* Default is 0.5 seconds.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--max-noise <value>`
|
||||
* Gather samples until the error in the measurement drops below `<value>`.
|
||||
* Noise is specified as the percent relative standard deviation (stdev/mean).
|
||||
* Default is 0.5% (`--max-noise 0.5`)
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
### "entropy" Stopping Criterion Parameters
|
||||
|
||||
* `--max-angle <value>`
|
||||
* Maximum linear regression angle of cumulative entropy.
|
||||
* Smaller values give more accurate results.
|
||||
* Default is 0.048.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
* `--min-r2 <value>`
|
||||
* Minimum coefficient of determination for linear regression of cumulative
|
||||
entropy.
|
||||
* Larger values give more accurate results.
|
||||
* Default is 0.36.
|
||||
* Applies to the most recent `--benchmark`, or all benchmarks if specified
|
||||
before any `--benchmark` arguments.
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
set(example_srcs
|
||||
auto_throughput.cu
|
||||
axes.cu
|
||||
custom_criterion.cu
|
||||
cpu_only.cu
|
||||
enums.cu
|
||||
exec_tag_sync.cu
|
||||
exec_tag_timer.cu
|
||||
skip.cu
|
||||
stream.cu
|
||||
summaries.cu
|
||||
throughput.cu
|
||||
auto_throughput.cu
|
||||
custom_iteration_spaces.cu
|
||||
)
|
||||
|
||||
@@ -14,39 +17,39 @@ set(example_srcs
|
||||
add_custom_target(nvbench.example.all)
|
||||
add_dependencies(nvbench.all nvbench.example.all)
|
||||
|
||||
foreach(example_src IN LISTS example_srcs)
|
||||
get_filename_component(example_name "${example_src}" NAME_WLE)
|
||||
string(PREPEND example_name "nvbench.example.")
|
||||
add_executable(${example_name} "${example_src}")
|
||||
nvbench_config_target(${example_name})
|
||||
target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}")
|
||||
target_link_libraries(${example_name} PRIVATE nvbench::main)
|
||||
set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_17)
|
||||
add_test(NAME ${example_name}
|
||||
COMMAND "$<TARGET_FILE:${example_name}>" --timeout 0.1 --min-time 1e-5
|
||||
)
|
||||
function (nvbench_add_examples_target target_prefix cuda_std)
|
||||
add_custom_target(${target_prefix}.all)
|
||||
add_dependencies(nvbench.example.all ${target_prefix}.all)
|
||||
|
||||
add_dependencies(nvbench.example.all ${example_name})
|
||||
endforeach()
|
||||
foreach(example_src IN LISTS example_srcs)
|
||||
get_filename_component(example_name "${example_src}" NAME_WLE)
|
||||
string(PREPEND example_name "${target_prefix}.")
|
||||
add_executable(${example_name} "${example_src}")
|
||||
nvbench_config_target(${example_name})
|
||||
target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}")
|
||||
target_link_libraries(${example_name} PRIVATE nvbench::main)
|
||||
set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std})
|
||||
|
||||
# Silence some warnings from old thrust headers:
|
||||
set(thrust_examples
|
||||
auto_throughput
|
||||
axes
|
||||
exec_tag_sync
|
||||
exec_tag_timer
|
||||
skip
|
||||
throughput
|
||||
)
|
||||
foreach (example IN LISTS thrust_examples)
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# C4324: structure was padded due to alignment specifier
|
||||
nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4324")
|
||||
|
||||
# warning C4201: nonstandard extension used: nameless struct/union:
|
||||
# Fixed in Thrust 1.12.0 (CTK 11.4, NV HPC 21.3)
|
||||
if (${CUDAToolkit_VERSION} VERSION_LESS 11.4)
|
||||
nvbench_add_cxx_flag(nvbench.example.${example} PRIVATE "/wd4201")
|
||||
set(example_args --timeout 0.1)
|
||||
# The custom_criterion example doesn't support the --min-time argument:
|
||||
if (NOT "${example_src}" STREQUAL "custom_criterion.cu")
|
||||
list(APPEND example_args --min-time 1e-5)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_test(NAME ${example_name}
|
||||
COMMAND "$<TARGET_FILE:${example_name}>" ${example_args})
|
||||
|
||||
# These should not deadlock. If they do, it may be that the CUDA context was created before
|
||||
# setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136.
|
||||
set_tests_properties(${example_name} PROPERTIES
|
||||
FAIL_REGULAR_EXPRESSION "Possible Deadlock Detected"
|
||||
)
|
||||
|
||||
add_dependencies(${target_prefix}.all ${example_name})
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
|
||||
foreach (std IN LISTS NVBench_DETECTED_CUDA_STANDARDS)
|
||||
nvbench_add_examples_target(nvbench.example.cpp${std} ${std})
|
||||
endforeach()
|
||||
|
||||
@@ -24,37 +24,33 @@
|
||||
template <int ItemsPerThread>
|
||||
__global__ void kernel(std::size_t stride,
|
||||
std::size_t elements,
|
||||
const nvbench::int32_t * __restrict__ in,
|
||||
const nvbench::int32_t *__restrict__ in,
|
||||
nvbench::int32_t *__restrict__ out)
|
||||
{
|
||||
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const std::size_t tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const std::size_t step = gridDim.x * blockDim.x;
|
||||
|
||||
for (std::size_t i = stride * tid;
|
||||
i < stride * elements;
|
||||
i += stride * step)
|
||||
for (std::size_t i = stride * tid; i < stride * elements; i += stride * step)
|
||||
{
|
||||
for (int j = 0; j < ItemsPerThread; j++)
|
||||
{
|
||||
const auto read_id = (ItemsPerThread * i + j) % elements;
|
||||
const auto read_id = (ItemsPerThread * i + j) % elements;
|
||||
const auto write_id = tid + j * elements;
|
||||
out[write_id] = in[read_id];
|
||||
out[write_id] = in[read_id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// `throughput_bench` copies a 128 MiB buffer of int32_t, and reports throughput
|
||||
// and cache hit rates.
|
||||
//
|
||||
// Calling state.collect_*() enables particular metric collection if nvbench
|
||||
// was build with CUPTI support (CMake option: -DNVBench_ENABLE_CUPTI=ON).
|
||||
template <int ItemsPerThread>
|
||||
void throughput_bench(nvbench::state &state,
|
||||
nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
|
||||
void throughput_bench(nvbench::state &state, nvbench::type_list<nvbench::enum_type<ItemsPerThread>>)
|
||||
{
|
||||
// Allocate input data:
|
||||
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
|
||||
const std::size_t stride = static_cast<std::size_t>(state.get_int64("Stride"));
|
||||
const std::size_t elements = 128 * 1024 * 1024 / sizeof(nvbench::int32_t);
|
||||
thrust::device_vector<nvbench::int32_t> input(elements);
|
||||
thrust::device_vector<nvbench::int32_t> output(elements * ItemsPerThread);
|
||||
@@ -72,12 +68,11 @@ void throughput_bench(nvbench::state &state,
|
||||
static_cast<int>((elements + threads_in_block - 1) / threads_in_block);
|
||||
|
||||
state.exec([&](nvbench::launch &launch) {
|
||||
kernel<ItemsPerThread>
|
||||
<<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
|
||||
stride,
|
||||
elements,
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()));
|
||||
kernel<ItemsPerThread><<<blocks_in_grid, threads_in_block, 0, launch.get_stream()>>>(
|
||||
stride,
|
||||
elements,
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -56,8 +56,8 @@ NVBENCH_BENCH(single_float64_axis)
|
||||
void copy_sweep_grid_shape(nvbench::state &state)
|
||||
{
|
||||
// Get current parameters:
|
||||
const int block_size = static_cast<int>(state.get_int64("BlockSize"));
|
||||
const int num_blocks = static_cast<int>(state.get_int64("NumBlocks"));
|
||||
const auto block_size = static_cast<unsigned int>(state.get_int64("BlockSize"));
|
||||
const auto num_blocks = static_cast<unsigned int>(state.get_int64("NumBlocks"));
|
||||
|
||||
// Number of int32s in 256 MiB:
|
||||
const std::size_t num_values = 256 * 1024 * 1024 / sizeof(nvbench::int32_t);
|
||||
@@ -71,17 +71,16 @@ void copy_sweep_grid_shape(nvbench::state &state)
|
||||
thrust::device_vector<nvbench::int32_t> in(num_values, 0);
|
||||
thrust::device_vector<nvbench::int32_t> out(num_values, 0);
|
||||
|
||||
state.exec(
|
||||
[block_size,
|
||||
num_blocks,
|
||||
num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(
|
||||
in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
state.exec([block_size,
|
||||
num_blocks,
|
||||
num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(copy_sweep_grid_shape)
|
||||
// Every second power of two from 64->1024:
|
||||
@@ -106,14 +105,12 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list<ValueType>)
|
||||
thrust::device_vector<ValueType> in(num_values, 0);
|
||||
thrust::device_vector<ValueType> out(num_values, 0);
|
||||
|
||||
state.exec(
|
||||
[num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
state.exec([num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
|
||||
});
|
||||
}
|
||||
// Define a type_list to use for the type axis:
|
||||
using cts_types = nvbench::type_list<nvbench::uint8_t,
|
||||
@@ -129,11 +126,10 @@ NVBENCH_BENCH_TYPES(copy_type_sweep, NVBENCH_TYPE_AXES(cts_types));
|
||||
// Convert 64 MiB of InputTypes to OutputTypes, represented with various
|
||||
// value_types.
|
||||
template <typename InputType, typename OutputType>
|
||||
void copy_type_conversion_sweep(nvbench::state &state,
|
||||
nvbench::type_list<InputType, OutputType>)
|
||||
void copy_type_conversion_sweep(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
{
|
||||
// Optional: Skip narrowing conversions.
|
||||
if (sizeof(InputType) > sizeof(OutputType))
|
||||
if constexpr (sizeof(InputType) > sizeof(OutputType))
|
||||
{
|
||||
state.skip("Narrowing conversion: sizeof(InputType) > sizeof(OutputType).");
|
||||
return;
|
||||
@@ -152,14 +148,12 @@ void copy_type_conversion_sweep(nvbench::state &state,
|
||||
thrust::device_vector<InputType> in(num_values, 0);
|
||||
thrust::device_vector<OutputType> out(num_values, 0);
|
||||
|
||||
state.exec(
|
||||
[num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
|
||||
out_ptr,
|
||||
num_values);
|
||||
});
|
||||
state.exec([num_values,
|
||||
in_ptr = thrust::raw_pointer_cast(in.data()),
|
||||
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr, out_ptr, num_values);
|
||||
});
|
||||
}
|
||||
// Optional: Skip when InputType == OutputType. This approach avoids
|
||||
// instantiating the benchmark at all.
|
||||
@@ -175,6 +169,5 @@ using ctcs_types = nvbench::type_list<nvbench::int8_t,
|
||||
nvbench::float32_t,
|
||||
nvbench::int64_t,
|
||||
nvbench::float64_t>;
|
||||
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep,
|
||||
NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
|
||||
NVBENCH_BENCH_TYPES(copy_type_conversion_sweep, NVBENCH_TYPE_AXES(ctcs_types, ctcs_types))
|
||||
.set_type_axes_names({"In", "Out"});
|
||||
|
||||
83
examples/cpu_only.cu
Normal file
83
examples/cpu_only.cu
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright 2025 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
// Block execution of the current CPU thread for `seconds` seconds.
|
||||
void sleep_host(double seconds)
|
||||
{
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::milliseconds(static_cast<nvbench::int64_t>(seconds * 1000)));
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Simple CPU-only benchmark that sleeps on host for a specified duration.
|
||||
void simple(nvbench::state &state)
|
||||
{
|
||||
const auto duration = state.get_float64("Duration");
|
||||
|
||||
state.exec([duration](nvbench::launch &) { sleep_host(duration); });
|
||||
}
|
||||
NVBENCH_BENCH(simple)
|
||||
// 100 -> 500 ms in 100 ms increments.
|
||||
.add_float64_axis("Duration", nvbench::range(.1, .5, .1))
|
||||
// Mark as CPU-only.
|
||||
.set_is_cpu_only(true);
|
||||
|
||||
//=============================================================================
|
||||
// Simple CPU-only benchmark that sleeps on host for a specified duration and
|
||||
// uses a custom timed region.
|
||||
void simple_timer(nvbench::state &state)
|
||||
{
|
||||
const auto duration = state.get_float64("Duration");
|
||||
|
||||
state.exec(nvbench::exec_tag::timer, [duration](nvbench::launch &, auto &timer) {
|
||||
// Do any setup work before starting the timer here...
|
||||
timer.start();
|
||||
|
||||
// The region of code to be timed:
|
||||
sleep_host(duration);
|
||||
|
||||
timer.stop();
|
||||
// Any per-run cleanup here...
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(simple_timer)
|
||||
// 100 -> 500 ms in 100 ms increments.
|
||||
.add_float64_axis("Duration", nvbench::range(.1, .5, .1))
|
||||
// Mark as CPU-only.
|
||||
.set_is_cpu_only(true);
|
||||
|
||||
//=============================================================================
|
||||
// Simple CPU-only benchmark that uses the optional `nvbench::exec_tag::no_gpu`
|
||||
// hint to prevent GPU measurement code from being instantiated. Note that
|
||||
// `set_is_cpu_only(true)` is still required when using this hint.
|
||||
void simple_no_gpu(nvbench::state &state)
|
||||
{
|
||||
const auto duration = state.get_float64("Duration");
|
||||
|
||||
state.exec(nvbench::exec_tag::no_gpu, [duration](nvbench::launch &) { sleep_host(duration); });
|
||||
}
|
||||
NVBENCH_BENCH(simple_no_gpu)
|
||||
// 100 -> 500 ms in 100 ms increments.
|
||||
.add_float64_axis("Duration", nvbench::range(.1, .5, .1))
|
||||
// Mark as CPU-only.
|
||||
.set_is_cpu_only(true);
|
||||
77
examples/custom_criterion.cu
Normal file
77
examples/custom_criterion.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright 2023 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
// Grab some testing kernels from NVBench:
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
|
||||
// Thrust vectors simplify memory management:
|
||||
#include <thrust/device_vector.h>
|
||||
|
||||
// Inherit from the stopping_criterion_base class:
|
||||
class fixed_criterion final : public nvbench::stopping_criterion_base
|
||||
{
|
||||
nvbench::int64_t m_num_samples{};
|
||||
|
||||
public:
|
||||
fixed_criterion()
|
||||
: nvbench::stopping_criterion_base{"fixed", {{"max-samples", nvbench::int64_t{42}}}}
|
||||
{}
|
||||
|
||||
protected:
|
||||
// Setup the criterion in the `do_initialize()` method:
|
||||
virtual void do_initialize() override { m_num_samples = 0; }
|
||||
|
||||
// Process new measurements in the `add_measurement()` method:
|
||||
virtual void do_add_measurement(nvbench::float64_t /* measurement */) override
|
||||
{
|
||||
m_num_samples++;
|
||||
}
|
||||
|
||||
// Check if the stopping criterion is met in the `is_finished()` method:
|
||||
virtual bool do_is_finished() override
|
||||
{
|
||||
return m_num_samples >= m_params.get_int64("max-samples");
|
||||
}
|
||||
};
|
||||
|
||||
// Register the criterion with NVBench:
|
||||
NVBENCH_REGISTER_CRITERION(fixed_criterion);
|
||||
|
||||
void throughput_bench(nvbench::state &state)
|
||||
{
|
||||
// Allocate input data:
|
||||
const std::size_t num_values = 64 * 1024 * 1024 / sizeof(nvbench::int32_t);
|
||||
thrust::device_vector<nvbench::int32_t> input(num_values);
|
||||
thrust::device_vector<nvbench::int32_t> output(num_values);
|
||||
|
||||
// Provide throughput information:
|
||||
state.add_element_count(num_values, "NumElements");
|
||||
state.add_global_memory_reads<nvbench::int32_t>(num_values, "DataSize");
|
||||
state.add_global_memory_writes<nvbench::int32_t>(num_values);
|
||||
|
||||
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
num_values);
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(throughput_bench).set_stopping_criterion("fixed");
|
||||
@@ -89,7 +89,7 @@ NVBENCH_BENCH(copy_sweep_grid_shape)
|
||||
//
|
||||
struct under_diag final : nvbench::user_axis_space
|
||||
{
|
||||
under_diag(std::vector<std::size_t> input_indices)
|
||||
explicit under_diag(std::vector<std::size_t> input_indices)
|
||||
: nvbench::user_axis_space(std::move(input_indices))
|
||||
{}
|
||||
|
||||
@@ -162,7 +162,7 @@ NVBENCH_BENCH(copy_sweep_grid_shape)
|
||||
struct gauss final : nvbench::user_axis_space
|
||||
{
|
||||
|
||||
gauss(std::vector<std::size_t> input_indices)
|
||||
explicit gauss(std::vector<std::size_t> input_indices)
|
||||
: nvbench::user_axis_space(std::move(input_indices))
|
||||
{}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
|
||||
// Enum to use as parameter axis:
|
||||
@@ -68,12 +67,10 @@ void runtime_enum_sweep_string(nvbench::state &state)
|
||||
// Create inputs, etc, configure runtime kernel parameters, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
NVBENCH_BENCH(runtime_enum_sweep_string)
|
||||
.add_string_axis("MyEnum", {"A", "B", "C"});
|
||||
NVBENCH_BENCH(runtime_enum_sweep_string).add_string_axis("MyEnum", {"A", "B", "C"});
|
||||
|
||||
//==============================================================================
|
||||
// Sweep through enum values at runtime using an int64 axis.
|
||||
@@ -91,15 +88,14 @@ NVBENCH_BENCH(runtime_enum_sweep_string)
|
||||
// ```
|
||||
void runtime_enum_sweep_int64(nvbench::state &state)
|
||||
{
|
||||
const auto enum_value = static_cast<MyEnum>(state.get_int64("MyEnum"));
|
||||
[[maybe_unused]] const auto enum_value = static_cast<MyEnum>(state.get_int64("MyEnum"));
|
||||
|
||||
// Do stuff with enum_value.
|
||||
// Create inputs, etc, configure runtime kernel parameters, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
NVBENCH_BENCH(runtime_enum_sweep_int64)
|
||||
.add_int64_axis("MyEnum",
|
||||
@@ -178,12 +174,10 @@ void compile_time_enum_sweep(nvbench::state &state,
|
||||
// Template parameters, static dispatch, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
using MyEnumList =
|
||||
nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
|
||||
using MyEnumList = nvbench::enum_type_list<MyEnum::ValueA, MyEnum::ValueB, MyEnum::ValueC>;
|
||||
NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
|
||||
.set_type_axes_names({"MyEnum"});
|
||||
|
||||
@@ -199,16 +193,14 @@ NVBENCH_BENCH_TYPES(compile_time_enum_sweep, NVBENCH_TYPE_AXES(MyEnumList))
|
||||
// * `-12` (struct std::integral_constant<int,-12>)
|
||||
// ```
|
||||
template <nvbench::int32_t IntValue>
|
||||
void compile_time_int_sweep(nvbench::state &state,
|
||||
nvbench::type_list<nvbench::enum_type<IntValue>>)
|
||||
void compile_time_int_sweep(nvbench::state &state, nvbench::type_list<nvbench::enum_type<IntValue>>)
|
||||
{
|
||||
// Use IntValue in compile time contexts.
|
||||
// Template parameters, static dispatch, etc.
|
||||
|
||||
// Just a dummy kernel.
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
using MyInts = nvbench::enum_type_list<0, 16, 4096, -12>;
|
||||
NVBENCH_BENCH_TYPES(compile_time_int_sweep, NVBENCH_TYPE_AXES(MyInts))
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
// Used to initialize input data:
|
||||
#include <thrust/sequence.h>
|
||||
|
||||
// Used to run the benchmark on a CUDA stream
|
||||
#include <thrust/execution_policy.h>
|
||||
|
||||
// `sequence_bench` measures the execution time of `thrust::sequence`. Since
|
||||
// algorithms in `thrust::` implicitly sync the CUDA device, the
|
||||
// `nvbench::exec_tag::sync` must be passed to `state.exec(...)`.
|
||||
@@ -50,9 +53,7 @@ void sequence_bench(nvbench::state &state)
|
||||
|
||||
// nvbench::exec_tag::sync indicates that this will implicitly sync:
|
||||
state.exec(nvbench::exec_tag::sync, [&data](nvbench::launch &launch) {
|
||||
thrust::sequence(thrust::device.on(launch.get_stream()),
|
||||
data.begin(),
|
||||
data.end());
|
||||
thrust::sequence(thrust::device.on(launch.get_stream()), data.begin(), data.end());
|
||||
});
|
||||
}
|
||||
NVBENCH_BENCH(sequence_bench);
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
// Thrust simplifies memory management, etc:
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/sequence.h>
|
||||
|
||||
// mod2_inplace performs an in-place mod2 over every element in `data`. `data`
|
||||
@@ -53,6 +54,8 @@ void mod2_inplace(nvbench::state &state)
|
||||
state.exec(nvbench::exec_tag::timer,
|
||||
// Lambda now takes a `timer` argument:
|
||||
[&input, &data, num_values](nvbench::launch &launch, auto &timer) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
|
||||
// Reset working data:
|
||||
thrust::copy(thrust::device.on(launch.get_stream()),
|
||||
input.cbegin(),
|
||||
|
||||
@@ -72,14 +72,12 @@ NVBENCH_BENCH(runtime_skip)
|
||||
// Two type axes are swept, but configurations where InputType == OutputType are
|
||||
// skipped.
|
||||
template <typename InputType, typename OutputType>
|
||||
void skip_overload(nvbench::state &state,
|
||||
nvbench::type_list<InputType, OutputType>)
|
||||
void skip_overload(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
{
|
||||
// This is a contrived example that focuses on the skip overloads, so this is
|
||||
// just a sleep kernel:
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
// Overload of skip_overload that is called when InputType == OutputType.
|
||||
template <typename T>
|
||||
@@ -107,9 +105,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
{
|
||||
// This is a contrived example that focuses on the skip overloads, so this is
|
||||
// just a sleep kernel:
|
||||
state.exec([](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3);
|
||||
});
|
||||
state.exec(
|
||||
[](nvbench::launch &launch) { nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(1e-3); });
|
||||
}
|
||||
// Enable this overload if InputType is larger than OutputType
|
||||
template <typename InputType, typename OutputType>
|
||||
@@ -119,10 +116,8 @@ skip_sfinae(nvbench::state &state, nvbench::type_list<InputType, OutputType>)
|
||||
state.skip("sizeof(InputType) > sizeof(OutputType).");
|
||||
}
|
||||
// The same type_list is used for both inputs/outputs.
|
||||
using sn_types = nvbench::type_list<nvbench::int8_t,
|
||||
nvbench::int16_t,
|
||||
nvbench::int32_t,
|
||||
nvbench::int64_t>;
|
||||
using sn_types =
|
||||
nvbench::type_list<nvbench::int8_t, nvbench::int16_t, nvbench::int32_t, nvbench::int64_t>;
|
||||
// Setup benchmark:
|
||||
NVBENCH_BENCH_TYPES(skip_sfinae, NVBENCH_TYPE_AXES(sn_types, sn_types))
|
||||
.set_type_axes_names({"In", "Out"});
|
||||
|
||||
@@ -52,6 +52,7 @@ void stream_bench(nvbench::state &state)
|
||||
state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream));
|
||||
|
||||
state.exec([&input, &output, num_values](nvbench::launch &) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
copy(thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
num_values);
|
||||
|
||||
73
examples/summaries.cu
Normal file
73
examples/summaries.cu
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright 2025 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/nvbench.cuh>
|
||||
|
||||
// Grab some testing kernels from NVBench:
|
||||
#include <nvbench/test_kernels.cuh>
|
||||
|
||||
// #define PRINT_DEFAULT_SUMMARY_TAGS
|
||||
|
||||
void summary_example(nvbench::state &state)
|
||||
{
|
||||
// Fetch parameters and compute duration in seconds:
|
||||
const auto ms = static_cast<nvbench::float64_t>(state.get_int64("ms"));
|
||||
const auto us = static_cast<nvbench::float64_t>(state.get_int64("us"));
|
||||
const auto duration = ms * 1e-3 + us * 1e-6;
|
||||
|
||||
// Add a new column to the summary table with the derived duration used by the benchmark.
|
||||
// See the documentation in nvbench/summary.cuh for more details.
|
||||
{
|
||||
nvbench::summary &summary = state.add_summary("duration");
|
||||
summary.set_string("name", "Duration (s)");
|
||||
summary.set_string("description", "The duration of the kernel execution.");
|
||||
summary.set_string("hint", "duration");
|
||||
summary.set_float64("value", duration);
|
||||
}
|
||||
|
||||
// Run the measurements:
|
||||
state.exec(nvbench::exec_tag::no_batch, [duration](nvbench::launch &launch) {
|
||||
nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration);
|
||||
});
|
||||
|
||||
#ifdef PRINT_DEFAULT_SUMMARY_TAGS
|
||||
// The default summary tags can be found by inspecting the state after calling
|
||||
// state.exec.
|
||||
// They can also be found by looking at the json output (--json <filename>)
|
||||
for (const auto &summary : state.get_summaries())
|
||||
{
|
||||
std::cout << summary.get_tag() << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Default summary columns can be shown/hidden in the markdown output tables by adding/removing
|
||||
// the "hide" key. Modify this benchmark to show the minimum and maximum GPUs times, but hide the
|
||||
// mean GPU time and all CPU times. SM Clock frequency and throttling info are also shown.
|
||||
state.get_summary("nv/cold/time/gpu/min").remove_value("hide");
|
||||
state.get_summary("nv/cold/time/gpu/max").remove_value("hide");
|
||||
state.get_summary("nv/cold/time/gpu/mean").set_string("hide", "");
|
||||
state.get_summary("nv/cold/time/cpu/mean").set_string("hide", "");
|
||||
state.get_summary("nv/cold/time/cpu/min").set_string("hide", "");
|
||||
state.get_summary("nv/cold/time/cpu/max").set_string("hide", "");
|
||||
state.get_summary("nv/cold/time/cpu/stdev/relative").set_string("hide", "");
|
||||
state.get_summary("nv/cold/sm_clock_rate/mean").remove_value("hide");
|
||||
state.get_summary("nv/cold/sm_clock_rate/scaling/percent").remove_value("hide");
|
||||
}
|
||||
NVBENCH_BENCH(summary_example)
|
||||
.add_int64_axis("ms", nvbench::range(10, 50, 20))
|
||||
.add_int64_axis("us", nvbench::range(100, 500, 200));
|
||||
@@ -51,6 +51,7 @@ void throughput_bench(nvbench::state &state)
|
||||
state.add_global_memory_writes<nvbench::int32_t>(num_values);
|
||||
|
||||
state.exec([&input, &output, num_values](nvbench::launch &launch) {
|
||||
(void)num_values; // clang thinks this is unused...
|
||||
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
|
||||
thrust::raw_pointer_cast(input.data()),
|
||||
thrust::raw_pointer_cast(output.data()),
|
||||
|
||||
@@ -6,7 +6,6 @@ set_target_properties(nvbench.ctl PROPERTIES
|
||||
EXPORT_NAME ctl
|
||||
)
|
||||
add_dependencies(nvbench.all nvbench.ctl)
|
||||
nvbench_setup_dep_dlls(nvbench.ctl)
|
||||
nvbench_install_executables(nvbench.ctl)
|
||||
|
||||
if (NVBench_ENABLE_TESTING)
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
/*
|
||||
* Copyright 2021 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
* Copyright 2021 NVIDIA Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 with the LLVM exception
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License.
|
||||
*
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://llvm.org/foundation/relicensing/LICENSE.txt
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <nvbench/main.cuh>
|
||||
|
||||
@@ -24,7 +24,7 @@ int main(int argc, char const *const *argv)
|
||||
try
|
||||
{
|
||||
// If no args, substitute a new argv that prints the version
|
||||
std::vector<const char*> alt_argv;
|
||||
std::vector<const char *> alt_argv;
|
||||
if (argc == 1)
|
||||
{
|
||||
alt_argv.push_back("--version");
|
||||
@@ -36,7 +36,7 @@ try
|
||||
NVBENCH_CUDA_CALL(cudaDeviceReset());
|
||||
return 0;
|
||||
}
|
||||
catch (std::exception & e)
|
||||
catch (std::exception &e)
|
||||
{
|
||||
std::cerr << "\nNVBench encountered an error:\n\n" << e.what() << "\n";
|
||||
return 1;
|
||||
|
||||
@@ -5,6 +5,7 @@ set(srcs
|
||||
benchmark_base.cxx
|
||||
benchmark_manager.cxx
|
||||
blocking_kernel.cu
|
||||
criterion_manager.cxx
|
||||
csv_printer.cu
|
||||
cuda_call.cu
|
||||
device_info.cu
|
||||
@@ -19,25 +20,29 @@ set(srcs
|
||||
printer_multiplex.cxx
|
||||
runner.cxx
|
||||
state.cxx
|
||||
stopping_criterion.cxx
|
||||
string_axis.cxx
|
||||
type_axis.cxx
|
||||
type_strings.cxx
|
||||
user_axis_space.cxx
|
||||
zip_axis_space.cxx
|
||||
|
||||
detail/entropy_criterion.cxx
|
||||
detail/measure_cold.cu
|
||||
detail/measure_cpu_only.cxx
|
||||
detail/measure_hot.cu
|
||||
detail/state_generator.cxx
|
||||
detail/stdrel_criterion.cxx
|
||||
detail/gpu_frequency.cxx
|
||||
detail/timestamps_kernel.cu
|
||||
|
||||
internal/nvml.cxx
|
||||
)
|
||||
|
||||
if (NVBench_ENABLE_CUPTI)
|
||||
list(APPEND srcs detail/measure_cupti.cu cupti_profiler.cxx)
|
||||
endif()
|
||||
|
||||
if (NVBench_ENABLE_NVML)
|
||||
list(APPEND srcs internal/nvml.cxx)
|
||||
endif()
|
||||
|
||||
# CUDA 11.0 can't compile json_printer without crashing
|
||||
# So for that version fall back to C++ with degraded
|
||||
# output ( no PTX version info )
|
||||
@@ -69,7 +74,7 @@ nvbench_write_config_header(config.cuh.in
|
||||
)
|
||||
|
||||
# nvbench (nvbench::nvbench)
|
||||
add_library(nvbench SHARED ${srcs})
|
||||
add_library(nvbench ${srcs})
|
||||
nvbench_config_target(nvbench)
|
||||
target_include_directories(nvbench PUBLIC
|
||||
"$<BUILD_INTERFACE:${NVBench_SOURCE_DIR}>"
|
||||
@@ -82,8 +87,29 @@ target_link_libraries(nvbench
|
||||
PRIVATE
|
||||
fmt::fmt
|
||||
nvbench_json
|
||||
nvbench_git_revision
|
||||
)
|
||||
|
||||
# ##################################################################################################
|
||||
# * conda environment -----------------------------------------------------------------------------
|
||||
rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
|
||||
if(TARGET conda_env)
|
||||
# When we are inside a conda env the linker will be set to
|
||||
# `ld.bfd` which will try to resolve all undefined symbols at link time.
|
||||
#
|
||||
# Since we could be using a shared library version of fmt we need
|
||||
# it on the final link line of consumers
|
||||
target_link_libraries(nvbench PRIVATE $<BUILD_INTERFACE:conda_env>)
|
||||
|
||||
# When we are inside a conda env the linker will be set to
|
||||
# `ld.bfd` which will try to resolve all undefined symbols at link time.
|
||||
#
|
||||
# Since we could be using a shared library version of fmt we need
|
||||
# it on the final link line of consumers
|
||||
if(fmt_is_external)
|
||||
target_link_libraries(nvbench PUBLIC fmt::fmt)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
target_compile_features(nvbench PUBLIC cuda_std_17 PRIVATE cxx_std_17)
|
||||
add_dependencies(nvbench.all nvbench)
|
||||
|
||||
@@ -98,7 +124,6 @@ add_dependencies(nvbench.all nvbench.main)
|
||||
add_library(nvbench::nvbench ALIAS nvbench)
|
||||
add_library(nvbench::main ALIAS nvbench.main)
|
||||
|
||||
nvbench_setup_dep_dlls(nvbench)
|
||||
nvbench_install_libraries(nvbench nvbench.main nvbench.build_interface)
|
||||
|
||||
# nvcc emits several unavoidable warnings while compiling nlohmann_json:
|
||||
@@ -111,3 +136,19 @@ if (json_is_cu)
|
||||
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcudafe=--diag_suppress=940>
|
||||
)
|
||||
endif()
|
||||
|
||||
# The call to `rapids_cmake_write_git_revision_file` must be in the same
|
||||
# CMakeLists.txt as the consumer ( nvbench ) for CMake to get the dependency
|
||||
# graph correct.
|
||||
rapids_cmake_write_git_revision_file(
|
||||
nvbench_git_revision
|
||||
"${NVBench_BINARY_DIR}/nvbench/detail/git_revision.cuh"
|
||||
PREFIX "NVBENCH"
|
||||
)
|
||||
target_link_libraries(nvbench PRIVATE nvbench_git_revision)
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
# Need to ensure that for static builds we export the nvbench_git_revision
|
||||
# target
|
||||
nvbench_install_libraries(nvbench_git_revision)
|
||||
endif()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user