diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d4ac3f14f..04674124cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -373,9 +373,10 @@ include_directories(BEFORE SET(BUILD_DEV ON CACHE BOOL "BUILD_DEV") if(BUILD_DEV) - add_compile_options(-Werror) - add_compile_options(-Weverything) + add_compile_options(-Werror -Weverything) endif() +#add flags to reduce the size of binaries +add_compile_options(-Oz -flto=thin) message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR}) @@ -390,35 +391,27 @@ IF(IS_DIRECTORY "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu file(READ "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu/${subdir_path}/CMakeLists.txt" cmake_instance) set(add_inst 0) if(("${cmake_instance}" MATCHES "fp8" OR "${cmake_instance}" MATCHES "_f8") AND DTYPES MATCHES "fp8") - #message("fp8 instance found!") set(add_inst 1) endif() if(("${cmake_instance}" MATCHES "bf8" OR "${cmake_instance}" MATCHES "_b8") AND DTYPES MATCHES "bf8") - #message("bf8 instance found!") set(add_inst 1) endif() if(("${cmake_instance}" MATCHES "fp16" OR "${cmake_instance}" MATCHES "_f16") AND DTYPES MATCHES "fp16") - #message("fp16 instance found!") set(add_inst 1) endif() if(("${cmake_instance}" MATCHES "fp32" OR "${cmake_instance}" MATCHES "_f32") AND DTYPES MATCHES "fp32") - #message("fp32 instance found!") set(add_inst 1) endif() if(("${cmake_instance}" MATCHES "fp64" OR "${cmake_instance}" MATCHES "_f64") AND DTYPES MATCHES "fp64") - #message("fp64 instance found!") set(add_inst 1) endif() if(("${cmake_instance}" MATCHES "bf16" OR "${cmake_instance}" MATCHES "_b16") AND DTYPES MATCHES "bf16") - #message("bf16 instance found!") set(add_inst 1) endif() if(("${cmake_instance}" MATCHES "int8" OR "${cmake_instance}" MATCHES "_i8") AND DTYPES MATCHES "int8") - #message("int8 instance found!") set(add_inst 1) endif() if(NOT "${cmake_instance}" MATCHES "DTYPES") - #message("instance should be built for all types!") set(add_inst 1) endif() if(add_inst EQUAL 1 OR NOT DEFINED DTYPES) diff --git a/Dockerfile b/Dockerfile index eb3305a42e..7134e206c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,25 +26,37 @@ RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \ RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms +## Sccache binary built from source for ROCm +ARG SCCACHE_REPO_URL=http://compute-artifactory.amd.com/artifactory/rocm-generic-experimental/rocm-sccache +ENV SCCACHE_INSTALL_LOCATION=/usr/local/.cargo/bin +RUN mkdir -p ${SCCACHE_INSTALL_LOCATION} && \ +curl ${SCCACHE_REPO_URL}/portable/0.2.16/sccache-0.2.16-alpha.1-rocm --output ${SCCACHE_INSTALL_LOCATION}/sccache && \ +chmod +x ${SCCACHE_INSTALL_LOCATION}/sccache +ENV PATH=$PATH:${SCCACHE_INSTALL_LOCATION} + # Install dependencies RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ build-essential \ - ccache \ cmake \ + ccache \ git \ hip-rocclr \ + iputils-ping \ jq \ libelf-dev \ libncurses5-dev \ libnuma-dev \ libpthread-stubs0-dev \ llvm-amdgpu \ + net-tools \ pkg-config \ python \ python3 \ python3-dev \ python3-pip \ + redis \ sshpass \ + stunnel \ software-properties-common \ vim \ nano \ @@ -62,7 +74,7 @@ RUN gunzip /usr/local/bin/ninja.gz RUN chmod a+x /usr/local/bin/ninja RUN git clone https://github.com/nico/ninjatracing.git # Update the cmake to the latest version -RUN pip install --upgrade cmake +RUN pip install --upgrade cmake==3.27.5 # Setup ubsan environment to printstacktrace RUN ln -s /usr/bin/llvm-symbolizer-3.8 /usr/local/bin/llvm-symbolizer @@ -77,9 +89,9 @@ ARG PREFIX=/opt/rocm RUN pip3 install --upgrade pip RUN pip3 install sqlalchemy==1.4.46 RUN pip3 install pymysql -RUN pip3 install pandas +RUN pip3 install pandas==2.0.3 RUN pip3 install setuptools-rust -RUN pip3 install sshtunnel +RUN pip3 install sshtunnel==0.4.0 # Setup ubsan environment to printstacktrace ENV UBSAN_OPTIONS=print_stacktrace=1 @@ -115,6 +127,8 @@ RUN if [ "$compiler_version" = "amd-stg-open" ] && [ "$compiler_commit" != "" ]; else echo "using the release compiler"; \ fi +#clean-up the deb package +RUN sh -c "rm -rf amdgpu-install*" #ENV HIP_CLANG_PATH='/llvm-project/build/bin' #RUN sh -c "echo HIP_CLANG_PATH = '$HIP_CLANG_PATH'" diff --git a/Jenkinsfile b/Jenkinsfile index 021c19f150..ddcbc53bff 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -65,10 +65,10 @@ def getDockerImageName(){ } def check_host() { - if ("${env.CK_CCACHE}" != "null"){ - def CCACHE_SERVER="${env.CK_CCACHE.split(':')[0]}" - echo "ccache server: ${CCACHE_SERVER}" - sh '''ping -c 1 -p 6379 "${CCACHE_SERVER}" | echo $? > tmp.txt''' + if ("${env.CK_SCCACHE}" != "null"){ + def SCCACHE_SERVER="${env.CK_SCCACHE.split(':')[0]}" + echo "sccache server: ${SCCACHE_SERVER}" + sh '''ping -c 1 -p 6379 "${SCCACHE_SERVER}" | echo $? > tmp.txt''' def output = readFile(file: "tmp.txt") echo "tmp.txt contents: \$output" return (output != "0") @@ -96,24 +96,9 @@ def build_compiler(){ def getDockerImage(Map conf=[:]){ env.DOCKER_BUILDKIT=1 - def prefixpath = conf.get("prefixpath", "/opt/rocm") // prefix:/opt/rocm + def prefixpath = conf.get("prefixpath", "/opt/rocm") def no_cache = conf.get("no_cache", false) def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' " - echo "ccache server: ${env.CK_CCACHE}" - if(env.CK_CCACHE) - { - if(check_host()) - { - echo "FOUND CCACHE SERVER: ${env.CK_CCACHE}" - } - else - { - echo "CCACHE SERVER: ${env.CK_CCACHE} NOT FOUND, got ${check_host} response" - } - dockerArgs = dockerArgs + " --build-arg CCACHE_SECONDARY_STORAGE='redis://${env.CK_CCACHE}' --build-arg COMPILER_LAUNCHER='ccache' " - env.CCACHE_DIR = """/tmp/ccache_store""" - env.CCACHE_SECONDARY_STORAGE="""redis://${env.CK_CCACHE}""" - } if(no_cache) { dockerArgs = dockerArgs + " --no-cache " @@ -142,21 +127,6 @@ def buildDocker(install_prefix){ def image_name = getDockerImageName() echo "Building Docker for ${image_name}" def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' " - echo "ccache server: ${env.CK_CCACHE}" - if(env.CK_CCACHE) - { - if(check_host()) - { - echo "FOUND CCACHE SERVER: ${env.CK_CCACHE}" - } - else - { - echo "CCACHE SERVER: ${env.CK_CCACHE} NOT FOUND, got ${check_host} response" - } - dockerArgs = dockerArgs + " --build-arg CCACHE_SECONDARY_STORAGE='redis://${env.CK_CCACHE}' --build-arg COMPILER_LAUNCHER='ccache' " - env.CCACHE_DIR = """/tmp/ccache_store""" - env.CCACHE_SECONDARY_STORAGE="""redis://${env.CK_CCACHE}""" - } echo "Build Args: ${dockerArgs}" try{ @@ -219,13 +189,9 @@ def cmake_build(Map conf=[:]){ }else{ setup_args = " -DCMAKE_BUILD_TYPE=release" + setup_args } - if(env.CK_CCACHE) - { - setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER='ccache' -DCMAKE_C_COMPILER_LAUNCHER='ccache' " + setup_args - } - echo "ccache server: ${env.CK_CCACHE}" def pre_setup_cmd = """ + #!/bin/bash echo \$HSA_ENABLE_SDMA ulimit -c unlimited rm -rf build @@ -234,6 +200,46 @@ def cmake_build(Map conf=[:]){ mkdir install cd build """ + def invocation_tag="" + if (setup_args.contains("gfx11")){ + invocation_tag="gfx11" + } + if (setup_args.contains("gfx10")){ + invocation_tag="gfx10" + } + if (setup_args.contains("gfx90")){ + invocation_tag="gfx90" + } + if (setup_args.contains("gfx94")){ + invocation_tag="gfx94" + } + if(check_host() && params.USE_SCCACHE && "${env.CK_SCCACHE}" != "null" && "${invocation_tag}" != "") { + pre_setup_cmd = pre_setup_cmd + """ + #!/bin/bash + export ROCM_PATH=/opt/rocm + export SCCACHE_ENABLED=true + export SCCACHE_LOG_LEVEL=debug + export SCCACHE_IDLE_TIMEOUT=14400 + export COMPILERS_HASH_DIR=/tmp/.sccache + export SCCACHE_BIN=/usr/local/.cargo/bin/sccache + export SCCACHE_EXTRAFILES=/tmp/.sccache/rocm_compilers_hash_file + export SCCACHE_REDIS="redis://${env.CK_SCCACHE}" + echo "connect = ${env.CK_SCCACHE}" >> ../script/redis-cli.conf + export SCCACHE_C_CUSTOM_CACHE_BUSTER="${invocation_tag}" + echo \$SCCACHE_C_CUSTOM_CACHE_BUSTER + stunnel ../script/redis-cli.conf + ( + set -e + ../script/sccache_wrapper.sh --enforce_redis + ) + error_code=\$? + if [ \$error_code -ne 0 ]; then + echo "could not connect to the redis server. using sccache locally." + ../script/sccache_wrapper.sh + fi + """ + setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache " + setup_args + } def setup_cmd = conf.get("setup_cmd", "${cmake_envs} cmake ${setup_args} .. ") // reduce parallelism when compiling, clang uses too much memory def nt = nthreads() @@ -251,7 +257,7 @@ def cmake_build(Map conf=[:]){ sh cmd // Only archive from master or develop - if (package_build == true && (env.BRANCH_NAME == "develop" || env.BRANCH_NAME == "master")) { + if (package_build == true && (env.BRANCH_NAME == "develop" || env.BRANCH_NAME == "amd-master")) { archiveArtifacts artifacts: "build/*.deb", allowEmptyArchive: true, fingerprint: true } } @@ -635,7 +641,7 @@ def process_results(Map conf=[:]){ //launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=5.7;COMPILER_VERSION= 0 21 * * * % ROCMVERSION=5.7;COMPILER_VERSION=;COMPILER_COMMIT= - 0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-stg-open;COMPILER_COMMIT=''' : "" + 0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-stg-open;COMPILER_COMMIT=;USE_SCCACHE=false''' : "" pipeline { agent none @@ -682,7 +688,10 @@ pipeline { name: 'hipTensor_branch', defaultValue: 'mainline', description: 'Specify which branch of hipTensor to use (default: mainline)') - + booleanParam( + name: "USE_SCCACHE", + defaultValue: true, + description: "Use the sccache for building CK (default: ON)") } environment{ dbuser = "${dbuser}" diff --git a/README.md b/README.md index c2b493db11..e5a20f143f 100644 --- a/README.md +++ b/README.md @@ -1,139 +1,189 @@ # Composable Kernel -## Methodology +The Composable Kernel (CK) library provides a programming model for writing performance-critical +kernels for machine learning workloads across multiple architectures (GPUs, CPUs, etc.). The CK library +uses general purpose kernel languages, such as HIP C++. -Composable Kernel (CK) library aims to provide a programming model for writing performance critical kernels for machine learning workloads across multiple architectures including GPUs, CPUs, etc, through general purpose kernel languages, like HIP C++. +CK uses two concepts to achieve performance portability and code maintainability: -CK utilizes two concepts to achieve performance portability and code maintainability: * A tile-based programming model -* Algorithm complexity reduction for complex ML operators, using innovative technique we call "Tensor Coordinate Transformation". +* Algorithm complexity reduction for complex machine learning (ML) operators. This uses an innovative + technique called *Tensor Coordinate Transformation*. ![ALT](/docs/data/ck_component.png "CK Components") -## Code Structure +The current CK library is structured into four layers: -Current CK library are structured into 4 layers: -* "Templated Tile Operators" layer -* "Templated Kernel and Invoker" layer -* "Instantiated Kernel and Invoker" layer -* "Client API" layer +* Templated Tile Operators +* Templated Kernel and Invoker +* Instantiated Kernel and Invoker +* Client API ![ALT](/docs/data/ck_layer.png "CK Layers") -## Documentation +## General information -Run the steps below to build documentation locally. +To build our documentation locally, use the following code: -``` +``` bash cd docs pip3 install -r sphinx/requirements.txt python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html ``` -## Contributors +You can find a list of our developers and contributors on our [Contributors](/CONTRIBUTORS.md) page. +page. -The list of developers and contributors is here: [Contributors](/CONTRIBUTORS.md) +```note +If you use CK, cite us as follows: -## Citation - -If you use CK, please use following citations: -* CK paper will be freely available on arXiv soon: [Realizing Tensor Operators Using Coordinate Transformations and Tile Based Programming](???) +* [Realizing Tensor Operators Using Coordinate Transformations and Tile Based Programming](???): + This paper will be available on arXiv soon. * [CITATION.cff](/CITATION.cff) - -## License - -CK is released under the MIT license. [License File](/LICENSE) - - -# Build CK - -## Build docker image - -```bash -DOCKER_BUILDKIT=1 docker build -t ck:latest -f Dockerfile . -``` -Pre-built dockers are available from this public repo: -https://hub.docker.com/r/rocm/composable_kernel/tags - -## Launch docker - -```bash -docker run \ --it \ ---privileged \ ---group-add sudo \ --w /root/workspace \ --v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \ -ck:latest \ -/bin/bash ``` -## Build CK +CK is released under the **[MIT license](/LICENSE)**. -```bash -mkdir build && cd build +## Building CK -# Need to specify target ID, example below is for gfx908 and gfx90a +We recommend building CK inside Docker containers, which include all necessary packages. Pre-built +Docker images are available on [DockerHub](https://hub.docker.com/r/rocm/composable_kernel/tags). -cmake \ --D CMAKE_PREFIX_PATH=/opt/rocm \ --D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ --D CMAKE_BUILD_TYPE=Release \ --D GPU_TARGETS="gfx908;gfx90a" \ -.. -``` +1. To build a new Docker image, use the Dockerfile provided with the source code: -If GPU_TARGETS is not set on the cmake command line, CK will be built for all targets supported by the -current compiler. + ```bash + DOCKER_BUILDKIT=1 docker build -t ck:latest -f Dockerfile . + ``` +2. Launch the Docker container: + + ```bash + docker run \ + -it \ + --privileged \ + --group-add sudo \ + -w /root/workspace \ + -v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \ + ck:latest \ + /bin/bash + ``` + +3. Clone CK source code from the GitHub repository and start the build: + + ```bash + git clone https://github.com/ROCmSoftwarePlatform/composable_kernel.git && \ + cd composable_kernel && \ + mkdir build && \ + cd build + ``` + + You must set the `GPU_TARGETS` macro to specify the GPU target architecture(s) you want + to run CK on. You can specify single or multiple architectures. If you specify multiple architectures, + use a semicolon between each; for example, `gfx908;gfx90a;gfx940`. + + ```bash + cmake \ + -D CMAKE_PREFIX_PATH=/opt/rocm \ + -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ + -D CMAKE_BUILD_TYPE=Release \ + -D GPU_TARGETS="gfx908;gfx90a" \ + .. + ``` + + If you don't set `GPU_TARGETS` on the cmake command line, CK is built for all GPU targets + supported by the current compiler (this may take a long time). + +4. Build the entire CK library: + + ```bash + make -j + ``` + +5. Install CK: + + ```bash + make -j install + ``` + +## Optional post-install steps + +* Build examples and tests: + + ```bash + make -j examples tests + ``` + +* Build and run all examples and tests: + + ```bash + make -j check + ``` + + You can find instructions for running each individual example in [example](/example). + +* Build ckProfiler: + + ```bash + make -j ckProfiler + ``` + + You can find instructions for running ckProfiler in [profiler](/profiler). + +Note the `-j` option for building with multiple threads in parallel. This speeds up the build significantly. +Depending on the number of CPU cores and the amount of RAM on your system, you may want to +limit the number of threads. For example, if you have a 128-core CPU and 64 Gb of RAM. + +By default, `-j` launches one thread per CPU core, which can cause the build to run out of memory and +crash. In such cases, you can reduce the number of threads to 32 by using `-j32`. Additional cmake flags can be used to significantly speed-up the build: -INSTANCES_ONLY (by default is OFF) must be set to ON in order to build only the instances and library -while skipping all tests, examples, and profiler. This is useful for libraries that use CK as a dependency. +* `INSTANCES_ONLY` (default is OFF) must be set to ON in order to build only the instances and library + while skipping all tests, examples, and profiler. This is useful in cases when you plan to use CK as a + dependency and don't plan to run any examples or tests. -DTYPES (by default not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build instances -of select data types only. Currently, building of int8 instances is taking a lot of time (the compiler fix is in the works). +* `DTYPES` (default is not set) can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build + instances of select data types only. The main default data types are fp32 and fp16; you can safely skip + other data types. -DL_KERNELS (by default is OFF) must be set to ON in order to build the gemm_dl and batched_gemm_multi_d_dl -instances. Those instances are only needed for the NAVI2x platforms. +* `DL_KERNELS` (default is OFF) must be set to ON in order to build instances, such as `gemm_dl` or + `batched_gemm_multi_d_dl`. These instances are useful on architectures like the NAVI2x, as most + other platforms have faster instances, such as `xdl` or `wmma`, available. -### Build examples and tests +## Using sccache for building + +The default CK Docker images come with a pre-installed version of sccache, which supports clang +being used as hip-compiler (" -x hip"). Using sccache can help reduce the time to re-build code from +hours to 1-2 minutes. In order to invoke sccache, you need to run: ```bash - make -j examples tests - make test + sccache --start-server ``` -Instructions for running each individual examples are under [example](/example) - - -## Build ckProfiler +then add the following flags to the cmake command line: ```bash - make -j ckProfiler + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache ``` -Instructions for running ckProfiler are under [profiler](/profiler) -## Install CK - -```bash -make install -``` +You may need to clean up the build folder and repeat the cmake and make steps in order to take +advantage of the sccache during subsequent builds. ## Using CK as pre-built kernel library -Instructions for using CK as a pre-built kernel library are under [client_example](/client_example) +You can find instructions for using CK as a pre-built kernel library in [client_example](/client_example). -## Contributing +## Contributing to CK -When you contribute to Composable Kernel, make sure to run `clang-format` on all the changed files. We highly recommend using git hooks that are managed by the `pre-commit` framework. To install hooks, run: +When you contribute to CK, make sure you run `clang-format` on all changed files. We highly +recommend using git hooks that are managed by the `pre-commit` framework. To install hooks, run: ```bash sudo script/install_precommit.sh ``` -This way, `pre-commit` will add the appropriate hooks to your local repository and automatically run `clang-format` (and possibly additional checks) before any commit is created. +With this approach, `pre-commit` adds the appropriate hooks to your local repository and +automatically runs `clang-format` (and possibly additional checks) before any commit is created. If you need to uninstall hooks from the repository, you can do so by running the following command: @@ -141,14 +191,5 @@ If you need to uninstall hooks from the repository, you can do so by running the script/uninstall_precommit.sh ``` -If for any reason, you need to temporarily disable precommit hooks, you can add the `--no-verify` option to the `git commit` command. - -## Caveat -### Kernel Timing and Verification - -CK's own kernel timer will warn up kernel once, and then run it multiple times -to get average kernel time. For some kernels that use atomic add, this will cause -output buffer to be accumulated multiple times, causing verification failure. -To work around it, do not use CK's own timer and do verification at the same time. -CK's own timer and verification in each example and ckProfiler can be enabled or -disabled from command line. +If you need to temporarily disable pre-commit hooks, you can add the `--no-verify` option to the +`git commit` command. diff --git a/script/redis-cli.conf b/script/redis-cli.conf new file mode 100644 index 0000000000..c17bc66202 --- /dev/null +++ b/script/redis-cli.conf @@ -0,0 +1,10 @@ +fips = no +setuid = root +setgid = root +pid = /var/run/stunnel.pid +debug = 7 +options = NO_SSLv2 +options = NO_SSLv3 +[redis-cli] +client = yes +accept = 127.0.0.1:6379 diff --git a/script/sccache_wrapper.sh b/script/sccache_wrapper.sh new file mode 100755 index 0000000000..b0ec08de45 --- /dev/null +++ b/script/sccache_wrapper.sh @@ -0,0 +1,56 @@ +#!/bin/bash +set -e +COMPILERS_HASH_DIR=${COMPILERS_HASH_DIR:-"/tmp/.sccache"} +SCCACHE_EXTRAFILES=${SCCACHE_EXTRAFILES:-"${COMPILERS_HASH_DIR}/rocm_compilers_hash_file"} +SCCACHE_BIN=${SCCACHE_BIN:-"${SCCACHE_INSTALL_LOCATION}/sccache"} +ENFORCE_REDIS="false" +while [ "$1" != "" ]; +do + case $1 in + --enforce_redis ) + shift; ENFORCE_REDIS="true" ;; + --no-hipcc ) + shift ;; + *) + break ;; + esac +done +setup_rocm_compilers_hash_file() { + mkdir -p "$COMPILERS_HASH_DIR" + HIPCC_MD5="$(md5sum "${ROCM_PATH}/bin/hipcc")" + pushd "${ROCM_PATH}/amdgcn/bitcode" + DEVICELIBS_BITCODES_MD5="$(find . -type f -exec md5sum {} \; | sort | md5sum)" + popd + HIPCC_HASH_VALUE="${HIPCC_MD5%% *}" + DEVICELIBS_BITCODES_HASH_VALUE="${DEVICELIBS_BITCODES_MD5%% *}" + # MD5 checksums of clang and clang-offload-bundler cannot be used since they will keep changing + # if the ROCM_PATH changes, ie; for every mainline build. + # This is because ROCM_PATH gets encoded into the clang/clang-offload-bundler binaries as part + # of RPATH. + # The versions themselves contain the commit hash of the compiler repo at the time of building. + # Hence, this should be a viable alternative to using the binary checksum itself. + CLANG_VERSION="$("${ROCM_PATH}/llvm/bin/clang" --version | head -n 1)" + CLANG_OFFLOAD_BUNDLER_VERSION="$("${ROCM_PATH}/llvm/bin/clang-offload-bundler" --version | head -n 1)" + printf '%s: %s\n' 'clang version' "${CLANG_VERSION}" | tee -a "$SCCACHE_EXTRAFILES" + printf '%s: %s\n' 'clang-offload-bundler version' "${CLANG_OFFLOAD_BUNDLER_VERSION}" | tee -a "$SCCACHE_EXTRAFILES" + printf '%s: %s\n' 'hipcc md5sum' "${HIPCC_HASH_VALUE}" | tee -a "$SCCACHE_EXTRAFILES" + printf '%s: %s\n' 'devicelibs bitcode md5sum' "${DEVICELIBS_BITCODES_HASH_VALUE}" | tee -a "$SCCACHE_EXTRAFILES" + echo "sccache-wrapper: compilers hash file set up at ${SCCACHE_EXTRAFILES}" + cat "$SCCACHE_EXTRAFILES" +} +if [ "${ENFORCE_REDIS}" == "true" ]; then + if [ -z "${SCCACHE_REDIS}" ]; then + echo "SCCACHE_REDIS not set. Not wrapping compilers with sccache." + exit 10 + else + response=$(redis-cli -u ${SCCACHE_REDIS} ping) || true + if [ "${response}" != "PONG" ]; then + echo "Redis server unreachable. Not wrapping compilers with sccache." + exit 20 + fi + fi +fi +setup_rocm_compilers_hash_file +$SCCACHE_BIN --version +$SCCACHE_BIN --start-server +