mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-03-14 20:27:24 +00:00
* devcontainer: replace VAULT_HOST with AWS_ROLE_ARN Signed-off-by: Jordan Jacobelli <jjacobelli@nvidia.com> * Update devcontainers base image to support AWS_ROLE_ARN Signed-off-by: Jordan Jacobelli <jjacobelli@nvidia.com> * Bump cuda latest version to 12.6 Signed-off-by: Jordan Jacobelli <jjacobelli@nvidia.com> * Replace ubuntu18.04 with ubuntu20.04 Ubuntu 18.04 is not supported anymore Signed-off-by: Jordan Jacobelli <jjacobelli@nvidia.com> * Use DOOD stategy to keep supporting ubuntu18.04 See https://github.com/NVIDIA/cccl/pull/1779 Signed-off-by: Jordan Jacobelli <jjacobelli@nvidia.com> --------- Signed-off-by: Jordan Jacobelli <jjacobelli@nvidia.com>
157 lines
6.6 KiB
YAML
157 lines
6.6 KiB
YAML
name: Run as coder user
|
|
|
|
defaults:
|
|
run:
|
|
shell: bash -exo pipefail {0}
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
cuda: {type: string, required: true}
|
|
host: {type: string, required: true}
|
|
name: {type: string, required: true}
|
|
image: {type: string, required: true}
|
|
runner: {type: string, required: true}
|
|
command: {type: string, required: true}
|
|
env: { type: string, required: false, default: "" }
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
run-as-coder:
|
|
name: ${{inputs.name}}
|
|
permissions:
|
|
id-token: write
|
|
contents: read
|
|
runs-on: ${{inputs.runner}}
|
|
container:
|
|
# This job now uses a docker-outside-of-docker (DOOD) strategy.
|
|
#
|
|
# The GitHub Actions runner application mounts the host's docker socket `/var/run/docker.sock` into the
|
|
# container. By using a container with the `docker` CLI, this container can launch docker containers
|
|
# using the host's docker daemon.
|
|
#
|
|
# This allows us to run actions that require node v20 in the `cruizba/ubuntu-dind:jammy-26.1.3` container, and
|
|
# then launch our Ubuntu18.04-based GCC 6/7 containers to build and test CCCL.
|
|
#
|
|
# The main inconvenience to this approach is that any container mounts have to match the paths of the runner host,
|
|
# not the paths as seen in the intermediate (`cruizba/ubuntu-dind`) container.
|
|
#
|
|
# Note: I am using `cruizba/ubuntu-dind:jammy-26.1.3` instead of `docker:latest`, because GitHub doesn't support
|
|
# JS actions in alpine aarch64 containers, instead failing actions with this error:
|
|
# ```
|
|
# Error: JavaScript Actions in Alpine containers are only supported on x64 Linux runners. Detected Linux Arm64
|
|
# ```
|
|
image: cruizba/ubuntu-dind:jammy-26.1.3
|
|
env:
|
|
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
|
|
steps:
|
|
- name: Checkout repo
|
|
uses: actions/checkout@v4
|
|
with:
|
|
path: nvbench
|
|
persist-credentials: false
|
|
- name: Add NVCC problem matcher
|
|
run: |
|
|
echo "::add-matcher::nvbench/.github/problem-matchers/problem-matcher.json"
|
|
- name: Configure credentials and environment variables for sccache
|
|
uses: ./nvbench/.github/actions/configure_cccl_sccache
|
|
- name: Run command
|
|
env:
|
|
CI: true
|
|
RUNNER: "${{inputs.runner}}"
|
|
COMMAND: "${{inputs.command}}"
|
|
AWS_ACCESS_KEY_ID: "${{env.AWS_ACCESS_KEY_ID}}"
|
|
AWS_SESSION_TOKEN: "${{env.AWS_SESSION_TOKEN}}"
|
|
AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}"
|
|
run: |
|
|
echo "[host] github.workspace: ${{github.workspace}}"
|
|
echo "[container] GITHUB_WORKSPACE: ${GITHUB_WORKSPACE:-}"
|
|
echo "[container] PWD: $(pwd)"
|
|
|
|
# Necessary because we're doing docker-outside-of-docker:
|
|
# Make a symlink in the container that matches the host's ${{github.workspace}}, so that way `$(pwd)`
|
|
# in `.devcontainer/launch.sh` constructs volume paths relative to the hosts's ${{github.workspace}}.
|
|
mkdir -p "$(dirname "${{github.workspace}}")"
|
|
ln -s "$(pwd)" "${{github.workspace}}"
|
|
|
|
cd "${{github.workspace}}"
|
|
|
|
cat <<"EOF" > ci.sh
|
|
|
|
#! /usr/bin/env bash
|
|
set -eo pipefail
|
|
echo -e "\e[1;34mRunning as '$(whoami)' user in $(pwd):\e[0m"
|
|
echo -e "\e[1;34m${{inputs.command}}\e[0m"
|
|
eval "${{inputs.command}}" || exit_code=$?
|
|
if [ ! -z "$exit_code" ]; then
|
|
echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
|
|
echo "::error:: To replicate this failure locally, follow the steps below:"
|
|
echo "1. Clone the repository, and navigate to the correct branch and commit:"
|
|
echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
|
|
echo ""
|
|
echo "2. Run the failed command inside the same Docker container used by the CI:"
|
|
echo " docker run --rm -it --gpus all --pull=always --volume \$PWD:/repo --workdir /repo ${{ inputs.image }} ${{inputs.command}}"
|
|
echo ""
|
|
echo "For additional information, see:"
|
|
echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
|
|
echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
|
|
exit $exit_code
|
|
fi
|
|
EOF
|
|
|
|
chmod +x ci.sh
|
|
|
|
mkdir "$RUNNER_TEMP/.aws";
|
|
|
|
cat <<EOF > "$RUNNER_TEMP/.aws/config"
|
|
[default]
|
|
bucket=rapids-sccache-devs
|
|
region=us-east-2
|
|
EOF
|
|
|
|
cat <<EOF > "$RUNNER_TEMP/.aws/credentials"
|
|
[default]
|
|
aws_access_key_id=$AWS_ACCESS_KEY_ID
|
|
aws_session_token=$AWS_SESSION_TOKEN
|
|
aws_secret_access_key=$AWS_SECRET_ACCESS_KEY
|
|
EOF
|
|
|
|
chmod 0600 "$RUNNER_TEMP/.aws/credentials"
|
|
chmod 0664 "$RUNNER_TEMP/.aws/config"
|
|
|
|
declare -a gpu_request=()
|
|
|
|
# Explicitly pass which GPU to use if on a GPU runner
|
|
if [[ "${RUNNER}" = *"-gpu-"* ]]; then
|
|
gpu_request+=(--gpus "device=${NVIDIA_VISIBLE_DEVICES}")
|
|
fi
|
|
|
|
host_path() {
|
|
sed "s@/__w@$(dirname "$(dirname "${{github.workspace}}")")@" <<< "$1"
|
|
}
|
|
|
|
# Launch this container using the host's docker daemon
|
|
${{github.event.repository.name}}/.devcontainer/launch.sh \
|
|
--docker \
|
|
--cuda ${{inputs.cuda}} \
|
|
--host ${{inputs.host}} \
|
|
"${gpu_request[@]}" \
|
|
--env "CI=$CI" \
|
|
--env "AWS_ROLE_ARN=" \
|
|
--env "COMMAND=$COMMAND" \
|
|
--env "GITHUB_ENV=$GITHUB_ENV" \
|
|
--env "GITHUB_SHA=$GITHUB_SHA" \
|
|
--env "GITHUB_PATH=$GITHUB_PATH" \
|
|
--env "GITHUB_OUTPUT=$GITHUB_OUTPUT" \
|
|
--env "GITHUB_ACTIONS=$GITHUB_ACTIONS" \
|
|
--env "GITHUB_REF_NAME=$GITHUB_REF_NAME" \
|
|
--env "GITHUB_WORKSPACE=$GITHUB_WORKSPACE" \
|
|
--env "GITHUB_REPOSITORY=$GITHUB_REPOSITORY" \
|
|
--env "GITHUB_STEP_SUMMARY=$GITHUB_STEP_SUMMARY" \
|
|
--volume "${{github.workspace}}/ci.sh:/ci.sh" \
|
|
--volume "$(host_path "$RUNNER_TEMP")/.aws:/root/.aws" \
|
|
--volume "$(dirname "$(dirname "${{github.workspace}}")"):/__w" \
|
|
-- /ci.sh
|