From 4bb95f18ed78088b3925ef568ea1f542383b72df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Tue, 5 Nov 2024 10:09:52 +0100 Subject: [PATCH] [generate.py] Override blob list if it already exists (#1635) Before, generate.py appended the list at the end of the output file. When running the cmake configuration steps multiple times on the examples, the blob list (such as fwd_blob_list.txt) would grow at every configuration. `library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt` worked around this issue by removing the output file if it exists. Now, generate.py overrides the content of the output file. There is no need for the workaround in the CMakeLists.txt; and the issue is solved for the example projects too. [ROCm/composable_kernel commit: 464abd235e27c33422aa52ed2044af8fbcc3a88d] --- example/ck_tile/01_fmha/generate.py | 3 +++ example/ck_tile/02_layernorm2d/generate.py | 2 +- library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt | 5 ----- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/example/ck_tile/01_fmha/generate.py b/example/ck_tile/01_fmha/generate.py index 9b91d36fb2..5b1b6664cc 100644 --- a/example/ck_tile/01_fmha/generate.py +++ b/example/ck_tile/01_fmha/generate.py @@ -47,6 +47,9 @@ def list_blobs(output_file : Optional[str], api_list : List[str], kernel_filter assert output_file is not None file_path = Path(output_file) + # create an empty file / drop its contents if it exists + open(file_path, "w").close() + for api in api_list: handler = handlers[api][HandlerId.LIST_BLOBS] handler(file_path, kernel_filter, receipt, mask_impl) diff --git a/example/ck_tile/02_layernorm2d/generate.py b/example/ck_tile/02_layernorm2d/generate.py index bf576db97e..09aa6b65f8 100644 --- a/example/ck_tile/02_layernorm2d/generate.py +++ b/example/ck_tile/02_layernorm2d/generate.py @@ -559,7 +559,7 @@ float layernorm2d_fwd(layernorm2d_fwd_traits t, w_p = Path(self.working_path) list_p = w_p / 'layernorm2d_fwd_blobs.txt' blobs = self.get_blobs() - with list_p.open('a') as list_f: + with list_p.open('w') as list_f: # api related file list_f.write(str(w_p / (self.name_api + ".cpp")) + "\n") list_f.write(str(w_p / (self.name_common_header + ".hpp")) + "\n") diff --git a/library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt index 6d638b1747..a53fde1662 100644 --- a/library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt @@ -27,11 +27,6 @@ rocm_install(FILES ${MHA_HEADERS} DESTINATION include/ck_tile/ops) # headers for building lib file(COPY ${MHA_HEADERS} DESTINATION ${FMHA_CPP_FOLDER}) -# Delete the blob file if it exists to avoid append of old content. -if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt) - file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt) -endif() - set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd") # generate a list of kernels, but not actually emit files at config stage