mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Disable DL kernels on all architectures except gfx103x. (#3218)
* disable dl kernels on all archs except gfx103
* add gfx10-3-generic target to cmake
[ROCm/composable_kernel commit: b38bb492a1]
This commit is contained in:
@@ -122,7 +122,7 @@ add_compile_options(
|
||||
# Recent change in compiler makes this warning ON by default, which led to compile errors.
|
||||
add_compile_options(-Wno-nrvo)
|
||||
|
||||
if(NOT DISABLE_DL_KERNELS)
|
||||
if(NOT DISABLE_DL_KERNELS AND GPU_TARGETS MATCHES "gfx103|gfx10-3-generic")
|
||||
add_definitions(-DDL_KERNELS)
|
||||
set(DL_KERNELS "ON")
|
||||
set(CK_ENABLE_DL_KERNELS "ON")
|
||||
|
||||
@@ -86,7 +86,7 @@ inline __device__ f8x8_t amd_assembly_i4_to_fp8x8(int a)
|
||||
|
||||
return bit_cast<f8x8_t>(((static_cast<uint64_t>(fp8x4_1) << 32) | fp8x4_0));
|
||||
}
|
||||
|
||||
#ifdef DL_KERNELS
|
||||
// c0 += inner_product(a, b0)
|
||||
// c1 += inner_product(a, b1)
|
||||
__device__ void amd_assembly_outer_product_1x2(float a, float b0, float b1, float& c0, float& c1)
|
||||
@@ -430,6 +430,6 @@ __device__ void amd_assembly_outer_product_1x4(int8x16_t a,
|
||||
c2,
|
||||
c3);
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace ck
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user