From 3f5d6a4d1f0e0b080a56fa99f04d50040f2b5522 Mon Sep 17 00:00:00 2001 From: Geo Min Date: Tue, 12 Aug 2025 14:13:01 -0700 Subject: [PATCH] [TheRock CI] Adding TheRock CI gate check (#2648) * Adding initial TheRock CI * Adding composable kernel link * Adding correct repo for rocm-libraries * Adding entire rocm-libraries checkout * Adding correct flag * Adding correct flag for fetch sources * Fixing git health * Removing patch * Removing patching * Removing manual check * PR comments * testing without dist * Removing test branch * PR comments * PR comments * PR comment * Adding test_runs_on [ROCm/composable_kernel commit: 30dafe82810bd49a186149007f33ebbf120084de] --- .github/workflows/therock-ci-linux.yml | 128 ++++++++++++++++++++ .github/workflows/therock-ci.yml | 50 ++++++++ .github/workflows/therock-test-packages.yml | 76 ++++++++++++ 3 files changed, 254 insertions(+) create mode 100644 .github/workflows/therock-ci-linux.yml create mode 100644 .github/workflows/therock-ci.yml create mode 100644 .github/workflows/therock-test-packages.yml diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml new file mode 100644 index 0000000000..645a91c030 --- /dev/null +++ b/.github/workflows/therock-ci-linux.yml @@ -0,0 +1,128 @@ +name: TheRock CI Linux + +on: + workflow_call: + inputs: + cmake_options: + type: string + amdgpu_families: + type: string + test_runs_on: + type: string + +permissions: + contents: read + +jobs: + therock-build-linux: + name: Build Linux Packages + runs-on: azure-linux-scale-rocm + permissions: + id-token: write + container: + image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:044b113562629f4bd2ec5d2e64b32eee11562d48fb1a75d7493daec9dd8d8292 + env: + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + TEATIME_FORCE_INTERACTIVE: 0 + steps: + - name: Checkout composable_kernel repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Checkout TheRock repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: "ROCm/TheRock" + ref: ec1c2ef4f2636bce7733fd8c95e1dbb6692c8a57 + path: "TheRock" + + - name: Runner Health Settings + run: | + df -h + cmake --version + echo "Installed Python versions:" + ls -d /opt/python + echo "python: $(which python), python3: $(which python3)" + echo "Git version: $(git --version)" + git config --global --add safe.directory $PWD + git config fetch.parallel 10 + + - name: Fetch sources + run: | + ./TheRock/build_tools/fetch_sources.py --jobs 12 + + - name: Install python deps + run: | + pip install -r TheRock/requirements.txt + pip freeze + + - name: Configure Projects + env: + amdgpu_families: ${{ env.AMDGPU_FAMILIES }} + package_version: ADHOCBUILD + extra_cmake_options: ${{ inputs.cmake_options }} + BUILD_DIR: build + run: | + python3 TheRock/build_tools/github_actions/build_configure.py + + - name: Build TheRock + run: cmake --build TheRock/build + + - name: Build therock-archives + run: cmake --build TheRock/build --target therock-archives + + - name: Report + if: ${{ !cancelled() }} + run: | + echo "Full SDK du:" + echo "------------" + du -h -d 1 TheRock/build/dist/rocm + echo "Artifact Archives:" + echo "------------------" + ls -lh TheRock/build/artifacts/*.tar.xz + echo "Artifacts:" + echo "----------" + du -h -d 1 TheRock/build/artifacts + + - name: Configure AWS Credentials + if: always() + uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::692859939525:role/therock-artifacts-external + + - name: Create Logs index Files and upload logs + if: always() + run: | + python3 TheRock/build_tools/github_actions/create_log_index.py \ + --build-dir=TheRock/build \ + --amdgpu-family=${{ env.AMDGPU_FAMILIES }} + + python3 TheRock/build_tools/github_actions/upload_build_logs_to_s3.py \ + --build-dir=TheRock/build \ + --run-id ${{ github.run_id }} \ + --amdgpu-family ${{ env.AMDGPU_FAMILIES }} + + - name: Upload artifacts + run: | + python TheRock/build_tools/github_actions/upload_build_artifacts.py \ + --run-id ${{ github.run_id }} \ + --amdgpu-family ${{ env.AMDGPU_FAMILIES }} \ + --build-dir TheRock/build + + - name: Add Links to Job Summary + if: always() + run: | + python TheRock/build_tools/github_actions/upload_build_summary.py \ + --run-id ${{ github.run_id }} \ + --amdgpu-family ${{ env.AMDGPU_FAMILIES }} \ + --build-dir TheRock/build + + therock-test-linux: + name: "Test" + needs: [therock-build-linux] + uses: ./.github/workflows/therock-test-packages.yml + with: + project_to_test: "miopen" + amdgpu_families: ${{ inputs.amdgpu_families }} + test_runs_on: ${{ inputs.test_runs_on }} + platform: "linux" diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml new file mode 100644 index 0000000000..18411baa09 --- /dev/null +++ b/.github/workflows/therock-ci.yml @@ -0,0 +1,50 @@ +name: TheRock CI for composable_kernel + +on: + push: + branches: + - develop + workflow_dispatch: + +permissions: + contents: read + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). The workflow name is prepended to avoid conflicts between + # different workflows. + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + therock-ci-linux: + name: TheRock CI Linux + permissions: + contents: read + id-token: write + uses: ./.github/workflows/therock-ci-linux.yml + secrets: inherit + with: + cmake_options: "-DTHEROCK_ENABLE_COMPOSABLE_KERNEL=ON -DTHEROCK_ENABLE_MIOPEN=ON -DTHEROCK_ENABLE_ALL=OFF -DTHEROCK_USE_EXTERNAL_CK=ON -DTHEROCK_CK_SOURCE_DIR=../" + amdgpu_families: "gfx94X-dcgpu" + test_runs_on: "linux-mi325-1gpu-ossci-rocm" + + therock_ci_summary: + name: TheRock CI Summary + if: always() + needs: + - therock-ci-linux + runs-on: ubuntu-24.04 + steps: + - name: Output failed jobs + run: | + echo '${{ toJson(needs) }}' + FAILED_JOBS="$(echo '${{ toJson(needs) }}' \ + | jq --raw-output \ + 'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \ + )" + if [[ "${FAILED_JOBS}" != "" ]]; then + echo "The following jobs failed: ${FAILED_JOBS}" + exit 1 + fi diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml new file mode 100644 index 0000000000..439135743c --- /dev/null +++ b/.github/workflows/therock-test-packages.yml @@ -0,0 +1,76 @@ +name: TheRock Test Packages + +on: + workflow_call: + inputs: + project_to_test: + type: string + amdgpu_families: + type: string + test_runs_on: + type: string + platform: + type: string + +permissions: + contents: read + +jobs: + configure_test_matrix: + name: "Configure test matrix" + runs-on: ubuntu-24.04 + if: ${{ inputs.test_runs_on != '' }} + outputs: + components: ${{ steps.configure.outputs.components }} + steps: + - name: "Checking out repository" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: "ROCm/TheRock" + + - name: "Configuring CI options" + env: + PLATFORM: ${{ inputs.platform }} + project_to_test: ${{ inputs.project_to_test }} + id: configure + run: python ./build_tools/github_actions/fetch_test_configurations.py + + test_components: + name: 'Test ${{ matrix.components.job_name }}' + runs-on: ${{ inputs.test_runs_on }} + needs: configure_test_matrix + # skip tests if no test matrix to run + if: ${{ needs.configure_test_matrix.outputs.components != '[]' }} + strategy: + fail-fast: false + matrix: + components: ${{ fromJSON(needs.configure_test_matrix.outputs.components) }} + defaults: + run: + shell: bash + env: + VENV_DIR: ${{ github.workspace }}/.venv + ARTIFACT_RUN_ID: "${{ github.run_id }}" + OUTPUT_ARTIFACTS_DIR: ${{ github.workspace }}/build + THEROCK_BIN_DIR: "./build/bin" + steps: + - name: Checkout Repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: "ROCm/TheRock" + + - name: Run setup test environment workflow + uses: './.github/actions/setup_test_environment' + with: + ARTIFACT_RUN_ID: ${{ env.ARTIFACT_RUN_ID }} + AMDGPU_FAMILIES: ${{ inputs.amdgpu_families }} + OUTPUT_ARTIFACTS_DIR: ${{ env.OUTPUT_ARTIFACTS_DIR }} + VENV_DIR: ${{ env.VENV_DIR }} + FETCH_ARTIFACT_ARGS: ${{ matrix.components.fetch_artifact_args }} + PLATFORM: ${{ inputs.platform }} + + - name: Test + timeout-minutes: ${{ matrix.components.timeout_minutes }} + run: | + if [ "${{ inputs.PLATFORM }}" == "linux" ]; then source ${VENV_DIR}/bin/activate ; else . ${VENV_DIR}/Scripts/activate ; fi + ${{ matrix.components.test_script }}