Squashed commit of the following:

commit 4b309e6ad8
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 13:19:14 2024 +0000

    Minor cleanups

commit 476ed2ceae
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 12:53:37 2024 +0000

    WAR compiler ice in nlohmann json.

    Only seeing this on GCC 9 + CTK 11.1. Seems to be
    having trouble with the `[[no_unique_address]]` optimization.

commit a9bf1d3e42
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 00:24:47 2024 +0000

    Bump nlohmann json.

commit 80980fe373
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Sat Apr 6 00:22:07 2024 +0000

    Fix llvm filesystem support

commit f6099e6311
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 23:18:44 2024 +0000

    Drop MSVC 2017 testing.

commit 5ae50a8ef5
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 23:02:32 2024 +0000

    Add mroe missing headers.

commit b2a9ae04d9
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 22:37:56 2024 +0000

    Remove old CUDA+MSVC builds and make windows build-only.

commit 5b18c26a28
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 22:37:07 2024 +0000

    Fix header for std::min/max.

    Why do I always think it's utility instead of algorithm....

commit 6a409efa2d
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 22:18:18 2024 +0000

    Temporarily disable CUPTI on all windows builds.

commit f432f88866
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 21:42:52 2024 +0000

    Fix warnings on MSVC.

commit 829787649b
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 21:03:16 2024 +0000

    More flailing about in powershell.

commit 21742e6bea
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 20:36:08 2024 +0000

    Cleanup filesystem header handling.

commit de3d202635
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 20:09:00 2024 +0000

    Windows CI debugging.

commit a4151667ff
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:45:40 2024 +0000

    Quotation mark madness

commit dd04f3befe
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:27:27 2024 +0000

    Temporarily disable NVML on windows CI until new containers are ready.

commit f3952848c4
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:25:22 2024 +0000

    WAR issues on gcc-7.

commit 198986875e
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 19:25:04 2024 +0000

    More matrix/devcontainer updates.

commit b9712f8696
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 18:30:35 2024 +0000

    Fix windows build scripts.

commit 943f268280
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 18:18:33 2024 +0000

    Fix warnings with clang host compiler.

commit 7063e1d60a
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 18:14:28 2024 +0000

    More devcontainer hijinks.

commit 06532fde81
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:51:25 2024 +0000

    More matrix updates.

commit 78a265ea55
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:34:00 2024 +0000

    Support CLI CMake options for windows ci scripts.

commit 670895c867
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:31:59 2024 +0000

    Add missing devcontainers.

commit b121823e74
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:22:54 2024 +0000

    Build for `all-major` architectures in presets.

    We can get away with this because we require CMake 3.23.1.
    This was added in 3.23.

commit fccfd44685
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 17:22:08 2024 +0000

    Update matrix file.

commit e7d43ba90e
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 16:23:48 2024 +0000

    Consolidate build/test jobs.

commit c4044056ec
Author: Allison Piper <alliepiper16@gmail.com>
Date:   Fri Apr 5 16:04:11 2024 +0000

    Add missing build script.
This commit is contained in:
Allison Piper
2024-04-06 13:56:10 +00:00
parent 04b70059b8
commit e8c8877d36
39 changed files with 1031 additions and 8928 deletions

View File

@@ -1,6 +1,6 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-oneapi2023.2.0-cuda12.4-ubuntu22.04",
"image": "rapidsai/devcontainers:24.06-cpp-gcc10-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
@@ -14,11 +14,11 @@
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.4-oneapi2023.2.0",
"CCCL_CUDA_VERSION": "12.4",
"CCCL_HOST_COMPILER": "oneapi",
"CCCL_HOST_COMPILER_VERSION": "2023.2.0",
"CCCL_BUILD_INFIX": "cuda12.4-oneapi2023.2.0"
"DEVCONTAINER_NAME": "cuda12.0-gcc10",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "gcc",
"CCCL_HOST_COMPILER_VERSION": "10",
"CCCL_BUILD_INFIX": "cuda12.0-gcc10"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
@@ -42,5 +42,5 @@
}
}
},
"name": "cuda12.4-oneapi2023.2.0"
"name": "cuda12.0-gcc10"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda12.0-ubuntu22.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-gcc11",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "gcc",
"CCCL_HOST_COMPILER_VERSION": "11",
"CCCL_BUILD_INFIX": "cuda12.0-gcc11"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-gcc11"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.0-ubuntu22.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-gcc12",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "gcc",
"CCCL_HOST_COMPILER_VERSION": "12",
"CCCL_BUILD_INFIX": "cuda12.0-gcc12"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-gcc12"
}

View File

@@ -1,6 +1,6 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc6-cuda11.1-ubuntu18.04",
"image": "rapidsai/devcontainers:24.06-cpp-gcc7-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
@@ -14,11 +14,11 @@
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda11.1-gcc6",
"CCCL_CUDA_VERSION": "11.1",
"DEVCONTAINER_NAME": "cuda12.0-gcc7",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "gcc",
"CCCL_HOST_COMPILER_VERSION": "6",
"CCCL_BUILD_INFIX": "cuda11.1-gcc6"
"CCCL_HOST_COMPILER_VERSION": "7",
"CCCL_BUILD_INFIX": "cuda12.0-gcc7"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
@@ -42,5 +42,5 @@
}
}
},
"name": "cuda11.1-gcc6"
"name": "cuda12.0-gcc7"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc8-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-gcc8",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "gcc",
"CCCL_HOST_COMPILER_VERSION": "8",
"CCCL_BUILD_INFIX": "cuda12.0-gcc8"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-gcc8"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-gcc9-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-gcc9",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "gcc",
"CCCL_HOST_COMPILER_VERSION": "9",
"CCCL_BUILD_INFIX": "cuda12.0-gcc9"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-gcc9"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-llvm10-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-llvm10",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "llvm",
"CCCL_HOST_COMPILER_VERSION": "10",
"CCCL_BUILD_INFIX": "cuda12.0-llvm10"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-llvm10"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-llvm11-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-llvm11",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "llvm",
"CCCL_HOST_COMPILER_VERSION": "11",
"CCCL_BUILD_INFIX": "cuda12.0-llvm11"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-llvm11"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-llvm12-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-llvm12",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "llvm",
"CCCL_HOST_COMPILER_VERSION": "12",
"CCCL_BUILD_INFIX": "cuda12.0-llvm12"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-llvm12"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-llvm13-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-llvm13",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "llvm",
"CCCL_HOST_COMPILER_VERSION": "13",
"CCCL_BUILD_INFIX": "cuda12.0-llvm13"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-llvm13"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-llvm14-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-llvm14",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "llvm",
"CCCL_HOST_COMPILER_VERSION": "14",
"CCCL_BUILD_INFIX": "cuda12.0-llvm14"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-llvm14"
}

View File

@@ -0,0 +1,46 @@
{
"shutdownAction": "stopContainer",
"image": "rapidsai/devcontainers:24.06-cpp-llvm9-cuda12.0-ubuntu20.04",
"hostRequirements": {
"gpu": "optional"
},
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
],
"containerEnv": {
"SCCACHE_REGION": "us-east-2",
"SCCACHE_BUCKET": "rapids-sccache-devs",
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
"DEVCONTAINER_NAME": "cuda12.0-llvm9",
"CCCL_CUDA_VERSION": "12.0",
"CCCL_HOST_COMPILER": "llvm",
"CCCL_HOST_COMPILER_VERSION": "9",
"CCCL_BUILD_INFIX": "cuda12.0-llvm9"
},
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"xaver.clang-format"
],
"settings": {
"editor.defaultFormatter": "xaver.clang-format",
"clang-format.executable": "/usr/local/bin/clang-format",
"clangd.arguments": [
"--compile-commands-dir=${workspaceFolder}"
]
}
}
},
"name": "cuda12.0-llvm9"
}

View File

@@ -18,30 +18,15 @@ permissions:
contents: read
jobs:
build:
name: Build ${{inputs.test_name}}
build-and-test:
name: Build/Test ${{inputs.test_name}}
permissions:
id-token: write
contents: read
uses: ./.github/workflows/run-as-coder.yml
with:
name: Build ${{inputs.test_name}}
runner: linux-${{inputs.cpu}}-cpu16
name: Build/Test ${{inputs.test_name}}
runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
image: ${{ inputs.container_image }}
command: |
${{ inputs.build_script }}
test:
needs: build
permissions:
id-token: write
contents: read
if: ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
name: Test ${{inputs.test_name}}
uses: ./.github/workflows/run-as-coder.yml
with:
name: Test ${{inputs.test_name}}
runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
image: ${{inputs.container_image}}
command: |
${{ inputs.test_script }}

View File

@@ -5,11 +5,12 @@ on:
inputs:
test_name: {type: string, required: false}
build_script: {type: string, required: false}
test_script: {type: string, required: false}
container_image: {type: string, required: false}
jobs:
prepare:
name: Build ${{inputs.test_name}}
name: Build Only ${{inputs.test_name}}
runs-on: windows-amd64-cpu16
permissions:
id-token: write
@@ -41,9 +42,8 @@ jobs:
[System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
[System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
[System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
git clone https://github.com/NVIDIA/cccl.git;
cd cccl;
git clone https://github.com/NVIDIA/nvbench.git;
cd nvbench;
git fetch --all;
git checkout ${{github.ref_name}};
${{inputs.build_script}};"

View File

@@ -29,10 +29,9 @@ jobs:
with:
cpu: ${{ matrix.cpu }}
test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}}
build_script: './ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
test_script: './ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
build_script: "./ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} ${{matrix.extra_build_args}}"
test_script: "./ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} ${{matrix.extra_build_args}}"
container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }}
build_and_test_windows:
name: build and test windows
@@ -47,5 +46,6 @@ jobs:
include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
with:
test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}}"
build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 -std ${{matrix.std}} ${{matrix.extra_build_args}}"
test_script: "./ci/windows/test_${{ inputs.project_name }}.ps1 -std ${{matrix.std}} ${{matrix.extra_build_args}}"
container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}}

View File

@@ -13,7 +13,7 @@
"binaryDir": "${sourceDir}/build/$env{CCCL_BUILD_INFIX}/${presetName}",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"CMAKE_CUDA_ARCHITECTURES": "60;70;80",
"CMAKE_CUDA_ARCHITECTURES": "all-major",
"NVBench_ENABLE_CUPTI": true,
"NVBench_ENABLE_DEVICE_TESTING": false,
"NVBench_ENABLE_EXAMPLES": true,

View File

@@ -1,26 +1,19 @@
cuda_prev_min: &cuda_prev_min '11.1'
cuda_prev_max: &cuda_prev_max '11.8'
cuda_curr: &cuda_curr '12.4'
# The GPUs to test on
gpus:
- 'a100'
- 'v100'
cuda_prev_min: &cuda_prev_min '11.1' # Does not support the CUPTI APIs we use (added in 11.3)
cuda_prev_max: &cuda_prev_max '11.8'
cuda_curr_min: &cuda_curr_min '12.0'
cuda_curr_max: &cuda_curr_max '12.4'
# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
devcontainer_version: '24.06'
# gcc compiler configurations
gcc6: &gcc6 { name: 'gcc', version: '6', exe: 'g++' }
gcc7: &gcc7 { name: 'gcc', version: '7', exe: 'g++' }
gcc8: &gcc8 { name: 'gcc', version: '8', exe: 'g++' }
gcc9: &gcc9 { name: 'gcc', version: '9', exe: 'g++' }
gcc10: &gcc10 { name: 'gcc', version: '10', exe: 'g++' }
gcc11: &gcc11 { name: 'gcc', version: '11', exe: 'g++' }
gcc12: &gcc12 { name: 'gcc', version: '12', exe: 'g++' }
gcc-oldest: &gcc-oldest { name: 'gcc', version: '6', exe: 'g++' }
gcc-newest: &gcc-newest { name: 'gcc', version: '12', exe: 'g++' }
# LLVM Compiler configurations
llvm9: &llvm9 { name: 'llvm', version: '9', exe: 'clang++' }
@@ -31,17 +24,11 @@ llvm13: &llvm13 { name: 'llvm', version: '13', exe: 'clang++' }
llvm14: &llvm14 { name: 'llvm', version: '14', exe: 'clang++' }
llvm15: &llvm15 { name: 'llvm', version: '15', exe: 'clang++' }
llvm16: &llvm16 { name: 'llvm', version: '16', exe: 'clang++' }
llvm-oldest: &llvm-oldest { name: 'llvm', version: '9', exe: 'clang++' }
llvm-newest: &llvm-newest { name: 'llvm', version: '16', exe: 'clang++' }
# MSVC configs
msvc2017: &msvc2017 { name: 'cl', version: '14.16', exe: 'cl++' }
msvc2019: &msvc2019 { name: 'cl', version: '14.29', exe: 'cl++' }
msvc2022: &msvc2022 { name: 'cl', version: '14.39', exe: 'cl++' }
# oneAPI configs
oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
# Each environment below will generate a unique build/test job
# See the "compute-matrix" job in the workflow for how this is parsed and used
# cuda: The CUDA Toolkit version
@@ -57,29 +44,36 @@ oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
# Configurations that will run for every PR
pull_request:
nvcc:
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [17], jobs: ['build']}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [17], jobs: ['build']}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [17], jobs: ['build']}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [17], jobs: ['build']}
- {cuda: *cuda_prev_min, os: 'windows2022', cpu: 'amd64', compiler: *msvc2017, std: [17], jobs: ['build']}
- {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90'}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17], jobs: ['build', 'test']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [17], jobs: ['build', 'test']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'arm64', compiler: *llvm16, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [17], jobs: ['build']}
- {cuda: *cuda_curr, os: 'ubuntu22.04', cpu: 'amd64', compiler: *oneapi, std: [17], jobs: ['build']}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc7, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc8, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *gcc9, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"}
- {cuda: *cuda_prev_min, os: 'ubuntu18.04', cpu: 'amd64', compiler: *llvm9, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF'"}
- {cuda: *cuda_prev_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [17]}
- {cuda: *cuda_curr_min, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc7, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc8, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm10, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm11, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm12, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm13, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm14, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm15, std: [17]}
- {cuda: *cuda_curr_max, os: 'ubuntu22.04', cpu: 'amd64', compiler: *llvm16, std: [17]}
- {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2019, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"}
- {cuda: *cuda_curr_max, os: 'windows2022', cpu: 'amd64', compiler: *msvc2022, std: [17], extra_build_args: "-cmake-options '-DNVBench_ENABLE_CUPTI=OFF -DNVBench_ENABLE_NVML=OFF'"}

381
ci/ninja_summary.py Executable file
View File

@@ -0,0 +1,381 @@
#!/usr/bin/env python3
# Copyright (c) 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
r"""Summarize the last ninja build, invoked with ninja's -C syntax.
This script is designed to be automatically run after each ninja build in
order to summarize the build's performance. Making build performance information
more visible should make it easier to notice anomalies and opportunities. To use
this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat.
On Linux you can get autoninja to invoke this script using this syntax:
$ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome
You can also call this script directly using ninja's syntax to specify the
output directory of interest:
> python3 post_build_ninja_summary.py -C out/Default
Typical output looks like this:
>ninja -C out\debug_component base
ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp
ninja: Entering directory `out\debug_component'
[1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files
Longest build steps:
0.1 weighted s to build obj/base/base/trace_log.obj (6.7 s elapsed time)
0.2 weighted s to build nasm.exe, nasm.exe.pdb (0.2 s elapsed time)
0.3 weighted s to build obj/base/base/win_util.obj (12.4 s elapsed time)
1.2 weighted s to build base.dll, base.dll.lib (1.2 s elapsed time)
Time by build-step type:
0.0 s weighted time to generate 6 .lib files (0.3 s elapsed time sum)
0.1 s weighted time to generate 25 .stamp files (1.2 s elapsed time sum)
0.2 s weighted time to generate 20 .o files (2.8 s elapsed time sum)
1.7 s weighted time to generate 4 PEFile (linking) files (2.0 s elapsed
time sum)
23.9 s weighted time to generate 770 .obj files (974.8 s elapsed time sum)
26.1 s weighted time (982.9 s elapsed time sum, 37.7x parallelism)
839 build steps completed, average of 32.17/s
If no gn clean has been done then results will be for the last non-NULL
invocation of ninja. Ideas for future statistics, and implementations are
appreciated.
The "weighted" time is the elapsed time of each build step divided by the number
of tasks that were running in parallel. This makes it an excellent approximation
of how "important" a slow step was. A link that is entirely or mostly serialized
will have a weighted time that is the same or similar to its elapsed time. A
compile that runs in parallel with 999 other compiles will have a weighted time
that is tiny."""
import argparse
import errno
import fnmatch
import os
import subprocess
import sys
# The number of long build times to report:
long_count = 10
# The number of long times by extension to report
long_ext_count = 10
class Target:
"""Represents a single line read for a .ninja_log file."""
def __init__(self, start, end):
"""Creates a target object by passing in the start/end times in seconds
as a float."""
self.start = start
self.end = end
# A list of targets, appended to by the owner of this object.
self.targets = []
self.weighted_duration = 0.0
def Duration(self):
"""Returns the task duration in seconds as a float."""
return self.end - self.start
def SetWeightedDuration(self, weighted_duration):
"""Sets the duration, in seconds, passed in as a float."""
self.weighted_duration = weighted_duration
def WeightedDuration(self):
"""Returns the task's weighted duration in seconds as a float.
Weighted_duration takes the elapsed time of the task and divides it
by how many other tasks were running at the same time. Thus, it
represents the approximate impact of this task on the total build time,
with serialized or serializing steps typically ending up with much
longer weighted durations.
weighted_duration should always be the same or shorter than duration.
"""
# Allow for modest floating-point errors
epsilon = 0.000002
if (self.weighted_duration > self.Duration() + epsilon):
print('%s > %s?' % (self.weighted_duration, self.Duration()))
assert (self.weighted_duration <= self.Duration() + epsilon)
return self.weighted_duration
def DescribeTargets(self):
"""Returns a printable string that summarizes the targets."""
# Some build steps generate dozens of outputs - handle them sanely.
# The max_length was chosen so that it can fit most of the long
# single-target names, while minimizing word wrapping.
result = ', '.join(self.targets)
max_length = 65
if len(result) > max_length:
result = result[:max_length] + '...'
return result
# Copied with some modifications from ninjatracing
def ReadTargets(log, show_all):
"""Reads all targets from .ninja_log file |log_file|, sorted by duration.
The result is a list of Target objects."""
header = log.readline()
# Handle empty ninja_log gracefully by silently returning an empty list of
# targets.
if not header:
return []
assert header == '# ninja log v5\n', \
'unrecognized ninja log version %r' % header
targets_dict = {}
last_end_seen = 0.0
for line in log:
parts = line.strip().split('\t')
if len(parts) != 5:
# If ninja.exe is rudely halted then the .ninja_log file may be
# corrupt. Silently continue.
continue
start, end, _, name, cmdhash = parts # Ignore restat.
# Convert from integral milliseconds to float seconds.
start = int(start) / 1000.0
end = int(end) / 1000.0
if not show_all and end < last_end_seen:
# An earlier time stamp means that this step is the first in a new
# build, possibly an incremental build. Throw away the previous
# data so that this new build will be displayed independently.
# This has to be done by comparing end times because records are
# written to the .ninja_log file when commands complete, so end
# times are guaranteed to be in order, but start times are not.
targets_dict = {}
target = None
if cmdhash in targets_dict:
target = targets_dict[cmdhash]
if not show_all and (target.start != start or target.end != end):
# If several builds in a row just run one or two build steps
# then the end times may not go backwards so the last build may
# not be detected as such. However in many cases there will be a
# build step repeated in the two builds and the changed
# start/stop points for that command, identified by the hash,
# can be used to detect and reset the target dictionary.
targets_dict = {}
target = None
if not target:
targets_dict[cmdhash] = target = Target(start, end)
last_end_seen = end
target.targets.append(name)
return list(targets_dict.values())
def GetExtension(target, extra_patterns):
"""Return the file extension that best represents a target.
For targets that generate multiple outputs it is important to return a
consistent 'canonical' extension. Ultimately the goal is to group build steps
by type."""
for output in target.targets:
if extra_patterns:
for fn_pattern in extra_patterns.split(';'):
if fnmatch.fnmatch(output, '*' + fn_pattern + '*'):
return fn_pattern
# Not a true extension, but a good grouping.
if output.endswith('type_mappings'):
extension = 'type_mappings'
break
# Capture two extensions if present. For example: file.javac.jar should
# be distinguished from file.interface.jar.
root, ext1 = os.path.splitext(output)
_, ext2 = os.path.splitext(root)
extension = ext2 + ext1 # Preserve the order in the file name.
if len(extension) == 0:
extension = '(no extension found)'
if ext1 in ['.pdb', '.dll', '.exe']:
extension = 'PEFile (linking)'
# Make sure that .dll and .exe are grouped together and that the
# .dll.lib files don't cause these to be listed as libraries
break
if ext1 in ['.so', '.TOC']:
extension = '.so (linking)'
# Attempt to identify linking, avoid identifying as '.TOC'
break
# Make sure .obj files don't get categorized as mojo files
if ext1 in ['.obj', '.o']:
break
# Jars are the canonical output of java targets.
if ext1 == '.jar':
break
# Normalize all mojo related outputs to 'mojo'.
if output.count('.mojom') > 0:
extension = 'mojo'
break
return extension
def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
"""Print a summary of the passed in list of Target objects."""
# Create a list that is in order by time stamp and has entries for the
# beginning and ending of each build step (one time stamp may have multiple
# entries due to multiple steps starting/stopping at exactly the same time).
# Iterate through this list, keeping track of which tasks are running at all
# times. At each time step calculate a running total for weighted time so
# that when each task ends its own weighted time can easily be calculated.
task_start_stop_times = []
earliest = -1
latest = 0
total_cpu_time = 0
for target in entries:
if earliest < 0 or target.start < earliest:
earliest = target.start
if target.end > latest:
latest = target.end
total_cpu_time += target.Duration()
task_start_stop_times.append((target.start, 'start', target))
task_start_stop_times.append((target.end, 'stop', target))
length = latest - earliest
weighted_total = 0.0
# Sort by the time/type records and ignore |target|
task_start_stop_times.sort(key=lambda times: times[:2])
# Now we have all task start/stop times sorted by when they happen. If a
# task starts and stops on the same time stamp then the start will come
# first because of the alphabet, which is important for making this work
# correctly.
# Track the tasks which are currently running.
running_tasks = {}
# Record the time we have processed up to so we know how to calculate time
# deltas.
last_time = task_start_stop_times[0][0]
# Track the accumulated weighted time so that it can efficiently be added
# to individual tasks.
last_weighted_time = 0.0
# Scan all start/stop events.
for event in task_start_stop_times:
time, action_name, target = event
# Accumulate weighted time up to now.
num_running = len(running_tasks)
if num_running > 0:
# Update the total weighted time up to this moment.
last_weighted_time += (time - last_time) / float(num_running)
if action_name == 'start':
# Record the total weighted task time when this task starts.
running_tasks[target] = last_weighted_time
if action_name == 'stop':
# Record the change in the total weighted task time while this task
# ran.
weighted_duration = last_weighted_time - running_tasks[target]
target.SetWeightedDuration(weighted_duration)
weighted_total += weighted_duration
del running_tasks[target]
last_time = time
assert (len(running_tasks) == 0)
# Warn if the sum of weighted times is off by more than half a second.
if abs(length - weighted_total) > 500:
print('Warning: Possible corrupt ninja log, results may be '
'untrustworthy. Length = %.3f, weighted total = %.3f' %
(length, weighted_total))
# Print the slowest build steps:
print(' Longest build steps:')
if elapsed_time_sorting:
entries.sort(key=lambda x: x.Duration())
else:
entries.sort(key=lambda x: x.WeightedDuration())
for target in entries[-long_count:]:
print(' %8.1f weighted s to build %s (%.1f s elapsed time)' %
(target.WeightedDuration(), target.DescribeTargets(),
target.Duration()))
# Sum up the time by file extension/type of the output file
count_by_ext = {}
time_by_ext = {}
weighted_time_by_ext = {}
# Scan through all of the targets to build up per-extension statistics.
for target in entries:
extension = GetExtension(target, extra_step_types)
time_by_ext[extension] = time_by_ext.get(extension,
0) + target.Duration()
weighted_time_by_ext[extension] = weighted_time_by_ext.get(
extension, 0) + target.WeightedDuration()
count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
print(' Time by build-step type:')
# Copy to a list with extension name and total time swapped, to (time, ext)
if elapsed_time_sorting:
weighted_time_by_ext_sorted = sorted(
(y, x) for (x, y) in time_by_ext.items())
else:
weighted_time_by_ext_sorted = sorted(
(y, x) for (x, y) in weighted_time_by_ext.items())
# Print the slowest build target types:
for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
print(
' %8.1f s weighted time to generate %d %s files '
'(%1.1f s elapsed time sum)' %
(time, count_by_ext[extension], extension, time_by_ext[extension]))
print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx '
'parallelism)' %
(length, total_cpu_time, total_cpu_time * 1.0 / length))
print(' %d build steps completed, average of %1.2f/s' %
(len(entries), len(entries) / (length)))
def main():
log_file = '.ninja_log'
metrics_file = 'siso_metrics.json'
parser = argparse.ArgumentParser()
parser.add_argument('-C', dest='build_directory', help='Build directory.')
parser.add_argument(
'-s',
'--step-types',
help='semicolon separated fnmatch patterns for build-step grouping')
parser.add_argument(
'-e',
'--elapsed_time_sorting',
default=False,
action='store_true',
help='Sort output by elapsed time instead of weighted time')
parser.add_argument('--log-file',
help="specific ninja log file to analyze.")
args, _extra_args = parser.parse_known_args()
if args.build_directory:
log_file = os.path.join(args.build_directory, log_file)
metrics_file = os.path.join(args.build_directory, metrics_file)
if args.log_file:
log_file = args.log_file
if not args.step_types:
# Offer a convenient way to add extra step types automatically,
# including when this script is run by autoninja. get() returns None if
# the variable isn't set.
args.step_types = os.environ.get('chromium_step_types')
if args.step_types:
# Make room for the extra build types.
global long_ext_count
long_ext_count += len(args.step_types.split(';'))
if os.path.exists(metrics_file):
# Automatically handle summarizing siso builds.
cmd = ['siso.bat' if 'win32' in sys.platform else 'siso']
cmd.extend(['metrics', 'summary'])
if args.build_directory:
cmd.extend(['-C', args.build_directory])
if args.step_types:
cmd.extend(['--step_types', args.step_types])
if args.elapsed_time_sorting:
cmd.append('--elapsed_time_sorting')
subprocess.run(cmd)
else:
try:
with open(log_file, 'r') as log:
entries = ReadTargets(log, False)
if entries:
SummarizeEntries(entries, args.step_types,
args.elapsed_time_sorting)
except IOError:
print('Log file %r not found, no build summary created.' % log_file)
return errno.ENOENT
if __name__ == '__main__':
sys.exit(main())

View File

@@ -73,7 +73,9 @@ function configure_preset {
# CMake must be invoked in the same directory as the presets file:
pushd ".."
cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE
$cmake_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE"
echo "$cmake_command"
Invoke-Expression $cmake_command
$test_result = $LastExitCode
If ($test_result -ne 0) {

View File

@@ -4,7 +4,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(17)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[Alias("cmake-options")]
[ValidateNotNullOrEmpty()]
[string]$ARG_CMAKE_OPTIONS = ""
)
$CURRENT_PATH = Split-Path $pwd -leaf
@@ -19,6 +23,11 @@ Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
$PRESET = "nvbench-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
# Append any arguments pass in on the command line
If($ARG_CMAKE_OPTIONS -ne "") {
$CMAKE_OPTIONS += "$ARG_CMAKE_OPTIONS"
}
configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS"
If($CURRENT_PATH -ne "ci") {

View File

@@ -0,0 +1,36 @@
Param(
[Parameter(Mandatory = $true)]
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(17)]
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[Alias("cmake-options")]
[ValidateNotNullOrEmpty()]
[string]$ARG_CMAKE_OPTIONS = ""
)
$CURRENT_PATH = Split-Path $pwd -leaf
If($CURRENT_PATH -ne "ci") {
Write-Host "Moving to ci folder"
pushd "$PSScriptRoot/.."
}
Remove-Module -Name build_common
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
$PRESET = "nvbench-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
# Append any arguments pass in on the command line
If($ARG_CMAKE_OPTIONS -ne "") {
$CMAKE_OPTIONS += "$ARG_CMAKE_OPTIONS"
}
configure_and_build_preset "NVBench" "$PRESET" "$CMAKE_OPTIONS"
test_preset "NVBench" "$PRESET"
If($CURRENT_PATH -ne "ci") {
popd
}

View File

@@ -29,7 +29,6 @@ function(nvbench_add_cxx_flag target_name type flag)
target_compile_options(${target_name} ${type}
$<$<COMPILE_LANGUAGE:CXX>:${flag}>
$<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:-Xcompiler=${flag}>
# FIXME nvc++ case
)
endif()
endfunction()
@@ -64,8 +63,8 @@ else()
endif()
endif()
# GCC-specific flags
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
# Experimental filesystem library
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU OR CMAKE_CXX_COMPILER_ID STREQUAL Clang)
target_link_libraries(nvbench.build_interface INTERFACE stdc++fs)
endif()

View File

@@ -24,24 +24,16 @@ endif()
# Following recipe from
# http://github.com/cpm-cmake/CPM.cmake/blob/master/examples/json/CMakeLists.txt
# Download the zips because the repo takes an excessively long time to clone.
rapids_cpm_find(nlohmann_json 3.9.1
# Release:
rapids_cpm_find(nlohmann_json 3.11.3
CPM_ARGS
URL https://github.com/nlohmann/json/releases/download/v3.9.1/include.zip
URL_HASH SHA256=6bea5877b1541d353bd77bdfbdb2696333ae5ed8f9e8cc22df657192218cad91
PATCH_COMMAND
# Work around compiler bug in nvcc 11.0, see NVIDIA/NVBench#18
${CMAKE_COMMAND} -E copy
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/nlohmann_json.hpp"
"./include/nlohmann/json.hpp"
# Development version:
# I'm waiting for https://github.com/nlohmann/json/issues/2676 to be fixed,
# leave this in to simplify testing patches as they come out.
# CPM_ARGS
# VERSION develop
# URL https://github.com/nlohmann/json/archive/refs/heads/develop.zip
# OPTIONS JSON_MultipleHeaders ON
URL https://github.com/nlohmann/json/releases/download/v3.11.3/include.zip
URL_HASH SHA256=a22461d13119ac5c78f205d3df1db13403e58ce1bb1794edc9313677313f4a9d
PATCH_COMMAND
${CMAKE_COMMAND}
-D "CUDA_VERSION=${CMAKE_CUDA_COMPILER_VERSION}"
-D "CXX_VERSION=${CMAKE_CXX_COMPILER_VERSION}"
-D "CXX_ID=${CMAKE_CXX_COMPILER_ID}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/json_unordered_map_ice.cmake"
)
add_library(nvbench_json INTERFACE IMPORTED)

View File

@@ -0,0 +1,22 @@
# NVCC 11.1 and GCC 9 need a patch to build, otherwise:
#
# nlohmann/ordered_map.hpp(29): error #3316:
# Internal Compiler Error (codegen): "internal error during structure layout!"
#
# Usage:
# ${CMAKE_COMMAND}
# -D "CUDA_VERSION=${CMAKE_CUDA_COMPILER_VERSION}"
# -D "CXX_VERSION=${CMAKE_CXX_COMPILER_VERSION}"
# -D "CXX_ID=${CMAKE_CXX_COMPILER_ID}"
# -P "json_unordered_map_ice.cmake"
if(CUDA_VERSION VERSION_GREATER 11.8 OR NOT CXX_ID STREQUAL "GNU" OR CXX_VERSION VERSION_LESS 9.0)
return()
endif()
# Read the file and replace the string "JSON_NO_UNIQUE_ADDRESS" with
# "/* JSON_NO_UNIQUE_ADDRESS */".
file(READ "include/nlohmann/ordered_map.hpp" NLOHMANN_ORDERED_MAP_HPP)
string(REPLACE "JSON_NO_UNIQUE_ADDRESS" "/* [NVBench Patch] JSON_NO_UNIQUE_ADDRESS */"
NLOHMANN_ORDERED_MAP_HPP "${NLOHMANN_ORDERED_MAP_HPP}")
file(WRITE "include/nlohmann/ordered_map.hpp" "${NLOHMANN_ORDERED_MAP_HPP}")

File diff suppressed because it is too large Load Diff

View File

@@ -56,8 +56,8 @@ NVBENCH_BENCH(single_float64_axis)
void copy_sweep_grid_shape(nvbench::state &state)
{
// Get current parameters:
const int block_size = static_cast<int>(state.get_int64("BlockSize"));
const int num_blocks = static_cast<int>(state.get_int64("NumBlocks"));
const auto block_size = static_cast<unsigned int>(state.get_int64("BlockSize"));
const auto num_blocks = static_cast<unsigned int>(state.get_int64("NumBlocks"));
// Number of int32s in 256 MiB:
const std::size_t num_values = 256 * 1024 * 1024 / sizeof(nvbench::int32_t);
@@ -77,6 +77,7 @@ void copy_sweep_grid_shape(nvbench::state &state)
num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<num_blocks, block_size, 0, launch.get_stream()>>>(
in_ptr,
out_ptr,
@@ -110,6 +111,7 @@ void copy_type_sweep(nvbench::state &state, nvbench::type_list<ValueType>)
[num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
out_ptr,
num_values);
@@ -156,6 +158,7 @@ void copy_type_conversion_sweep(nvbench::state &state,
[num_values,
in_ptr = thrust::raw_pointer_cast(in.data()),
out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(in_ptr,
out_ptr,
num_values);

View File

@@ -36,7 +36,7 @@ public:
protected:
// Setup the criterion in the `do_initialize()` method:
virtual void do_initialize() override
virtual void do_initialize() override
{
m_num_samples = 0;
}
@@ -71,6 +71,7 @@ void throughput_bench(nvbench::state &state)
state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec(nvbench::exec_tag::no_batch, [&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),

View File

@@ -54,6 +54,8 @@ void mod2_inplace(nvbench::state &state)
state.exec(nvbench::exec_tag::timer,
// Lambda now takes a `timer` argument:
[&input, &data, num_values](nvbench::launch &launch, auto &timer) {
(void) num_values; // clang thinks this is unused...
// Reset working data:
thrust::copy(thrust::device.on(launch.get_stream()),
input.cbegin(),

View File

@@ -52,6 +52,7 @@ void stream_bench(nvbench::state &state)
state.set_cuda_stream(nvbench::make_cuda_stream_view(default_stream));
state.exec([&input, &output, num_values](nvbench::launch &) {
(void) num_values; // clang thinks this is unused...
copy(thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),
num_values);

View File

@@ -51,6 +51,7 @@ void throughput_bench(nvbench::state &state)
state.add_global_memory_writes<nvbench::int32_t>(num_values);
state.exec([&input, &output, num_values](nvbench::launch &launch) {
(void) num_values; // clang thinks this is unused...
nvbench::copy_kernel<<<256, 256, 0, launch.get_stream()>>>(
thrust::raw_pointer_cast(input.data()),
thrust::raw_pointer_cast(output.data()),

View File

@@ -19,6 +19,13 @@
#include <nvbench/criterion_manager.cuh>
#include <nvbench/detail/throw.cuh>
#include <algorithm>
#include <memory>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
namespace nvbench
{
@@ -60,7 +67,7 @@ stopping_criterion_base &criterion_manager::add(std::unique_ptr<stopping_criteri
auto [it, success] = m_map.emplace(name, std::move(criterion));
if (!success)
if (!success)
{
NVBENCH_THROW(std::runtime_error,
"Stopping criterion \"{}\" is already registered.", name);

View File

@@ -43,8 +43,8 @@ measure_cold_base::measure_cold_base(state &exec_state)
{
if (m_min_samples > 0)
{
m_cuda_times.reserve(m_min_samples);
m_cpu_times.reserve(m_min_samples);
m_cuda_times.reserve(static_cast<std::size_t>(m_min_samples));
m_cpu_times.reserve(static_cast<std::size_t>(m_min_samples));
}
}

View File

@@ -27,7 +27,7 @@
#include <cuda_runtime.h>
#include <utility>
#include <algorithm>
namespace nvbench
{

View File

@@ -43,10 +43,14 @@
#include <utility>
#include <vector>
#if defined __GNUC__ && !defined __clang__
#include <experimental/filesystem>
#else
#if __has_include(<filesystem>)
#include <filesystem>
namespace fs = std::filesystem;
#elif __has_include(<experimental/filesystem>)
#include <experimental/filesystem>
namespace fs = std::experimental::filesystem;
#else
static_assert(false, "No <filesystem> or <experimental/filesystem> found.");
#endif
#if NVBENCH_CPP_DIALECT >= 2020
@@ -140,12 +144,6 @@ void json_printer::do_process_bulk_data_float64(state &state,
if (hint == "sample_times")
{
#if defined __GNUC__ && !defined __clang__
namespace fs = std::experimental::filesystem;
#else
namespace fs = std::filesystem;
#endif
nvbench::cpu_timer timer;
timer.start();

View File

@@ -159,7 +159,7 @@ Axis: Other
const std::string test = fmt::to_string(buffer);
const auto diff =
std::mismatch(ref.cbegin(), ref.cend(), test.cbegin(), test.cend());
const auto idx = diff.second - test.cbegin();
const auto idx = static_cast<std::size_t>(diff.second - test.cbegin());
ASSERT_MSG(test == ref,
"Differs at character {}.\n"
"Expected:\n\"{}\"\n\n"

View File

@@ -46,7 +46,7 @@ void test_no_duplicates_are_allowed()
bool exception_triggered = false;
try {
nvbench::stopping_criterion_base& custom = manager.get_criterion("custom");
[[maybe_unused]] nvbench::stopping_criterion_base& _ = manager.get_criterion("custom");
} catch(...) {
exception_triggered = true;
}
@@ -73,4 +73,3 @@ int main()
test_standard_criteria_exist();
test_no_duplicates_are_allowed();
}

View File

@@ -24,6 +24,11 @@
#include <type_traits>
// If using gcc version < 7, disable some tests to WAR a compiler bug. See NVIDIA/nvbench#39.
#if defined(__GNUC__) && __GNUC__ == 7
#define USING_GCC_7
#endif
enum class scoped_enum
{
val_1,
@@ -109,9 +114,11 @@ void test_int()
void test_scoped_enum()
{
#ifndef USING_GCC_7
ASSERT((
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1>,
nvbench::type_list<nvbench::enum_type<scoped_enum::val_1>>>));
#endif
ASSERT((
std::is_same_v<nvbench::enum_type_list<scoped_enum::val_1,
scoped_enum::val_2,
@@ -123,6 +130,7 @@ void test_scoped_enum()
void test_unscoped_enum()
{
#ifndef USING_GCC_7
ASSERT(
(std::is_same_v<nvbench::enum_type_list<unscoped_val_1>,
nvbench::type_list<nvbench::enum_type<unscoped_val_1>>>));
@@ -132,6 +140,7 @@ void test_unscoped_enum()
nvbench::type_list<nvbench::enum_type<unscoped_val_1>,
nvbench::enum_type<unscoped_val_2>,
nvbench::enum_type<unscoped_val_3>>>));
#endif
}
void test_scoped_enum_type_strings()

View File

@@ -21,6 +21,7 @@
#include "test_asserts.cuh"
#include <algorithm>
#include <vector>
namespace statistics = nvbench::detail::statistics;

View File

@@ -32,7 +32,7 @@ void test_const()
nvbench::detail::stdrel_criterion criterion;
criterion.initialize(params);
for (int i = 0; i < 5; i++)
for (int i = 0; i < 5; i++)
{ // nvbench wants at least 5 to compute the standard deviation
criterion.add_measurement(42.0);
}
@@ -43,7 +43,7 @@ std::vector<double> generate(double mean, double rel_std_dev, int size)
{
std::random_device rd;
std::mt19937 gen(rd());
std::vector<nvbench::float64_t> v(size);
std::vector<nvbench::float64_t> v(static_cast<std::size_t>(size));
std::normal_distribution<nvbench::float64_t> dist(mean, mean * rel_std_dev);
std::generate(v.begin(), v.end(), [&]{ return dist(gen); });
return v;
@@ -61,7 +61,7 @@ void test_stdrel()
nvbench::detail::stdrel_criterion criterion;
criterion.initialize(params);
for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size))
for (nvbench::float64_t measurement: generate(mean, max_noise / 2, size))
{
criterion.add_measurement(measurement);
}
@@ -70,7 +70,7 @@ void test_stdrel()
params.set_float64("max-noise", max_noise);
criterion.initialize(params);
for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size))
for (nvbench::float64_t measurement: generate(mean, max_noise * 2, size))
{
criterion.add_measurement(measurement);
}