Merge commit 'b38bb492a1a55b5abb0c345962143c0f9c482cfb' into develop

This commit is contained in:
assistant-librarian[bot]
2025-11-15 01:40:21 +00:00
parent b4e313286b
commit 54282fc7b2
2 changed files with 3 additions and 3 deletions

View File

@@ -122,7 +122,7 @@ add_compile_options(
# Recent change in compiler makes this warning ON by default, which led to compile errors.
add_compile_options(-Wno-nrvo)
if(NOT DISABLE_DL_KERNELS)
if(NOT DISABLE_DL_KERNELS AND GPU_TARGETS MATCHES "gfx103|gfx10-3-generic")
add_definitions(-DDL_KERNELS)
set(DL_KERNELS "ON")
set(CK_ENABLE_DL_KERNELS "ON")

View File

@@ -86,7 +86,7 @@ inline __device__ f8x8_t amd_assembly_i4_to_fp8x8(int a)
return bit_cast<f8x8_t>(((static_cast<uint64_t>(fp8x4_1) << 32) | fp8x4_0));
}
#ifdef DL_KERNELS
// c0 += inner_product(a, b0)
// c1 += inner_product(a, b1)
__device__ void amd_assembly_outer_product_1x2(float a, float b0, float b1, float& c0, float& c1)
@@ -430,6 +430,6 @@ __device__ void amd_assembly_outer_product_1x4(int8x16_t a,
c2,
c3);
}
#endif
} // namespace ck
#endif