mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 12:41:26 +00:00
[rocm-libraries] ROCm/rocm-libraries#5516 (commit ff3afda)
[CK_TILE, CK_BUILDER] Add bwd data to CK Tile profiler (#5516) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation We want close the performance gap between old CK and CK Tile for bwd data convolutions. To achieve this, we need tow things - Configurations for the old CK kernel instances such that we can map them into CK Tile instances. - Support in CK profiler to run the CK Tile instance with the same API as for old CK instances. ## Technical Details Extracted kernel configurations from old CK. The codegen python script for CK Tile convs is extended to support also bwd data. The generated instances are added to the CMake build (target `device_grouped_conv_bwd_data_tile_instances`). A new profiler op (`grouped_conv_bwd_data_tile`) has been added to the CK Profiler. The API is same as for old CK's profiler op `grouped_conv_bwd_data`.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
1834e318da
commit
ec2dbfbfde
@@ -50,6 +50,7 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
|
||||
"-D__HIP_PLATFORM_AMD__",
|
||||
"-D CK_EXPERIMENTAL_BUILDER=ON",
|
||||
"-O3",
|
||||
"-Wno-unknown-warning-option",
|
||||
*include_flags,
|
||||
str(cpp_file),
|
||||
"-o", str(output_file)
|
||||
@@ -63,10 +64,15 @@ def compile_single_file(cpp_file: Path, project_root: Path, gpu_target: str, ver
|
||||
timeout=300 # 5 minute timeout per file
|
||||
)
|
||||
|
||||
print(f"\n\n Command: {' '.join(cmd)}\n") if verbose else None
|
||||
|
||||
if result.returncode == 0:
|
||||
return True, ""
|
||||
else:
|
||||
# Extract the key error message
|
||||
if verbose and result.stderr:
|
||||
print(f" {result.stderr}")
|
||||
print()
|
||||
error_output = result.stderr
|
||||
return False, error_output
|
||||
|
||||
|
||||
Reference in New Issue
Block a user