mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Add s_nops after v_dot to avoid hazard (#808)
* Add s_nops after v_dot to avoid hazard * Fix builtin for inner_produxt fp16 * Skip inline version to builtin * Add comments regarding isa * Fix comment regarding s_nop
This commit is contained in:
@@ -118,8 +118,12 @@
|
||||
// inline asm
|
||||
#define CK_USE_AMD_INLINE_ASM 1
|
||||
|
||||
// inner product (DLOP)
|
||||
#define CK_USE_AMD_INNER_PRODUCT_INLINE_ASM 1
|
||||
// inner product (V_MAC/V_FMAC)
|
||||
#define CK_USE_AMD_V_MAC_INLINE_ASM 1
|
||||
|
||||
// V_DOT inline instructions, less efficient since they require adding
|
||||
// `s_nop`s to avoid hazard
|
||||
#define CK_USE_AMD_V_DOT_INLINE_ASM 0
|
||||
|
||||
// block synchronization only s_wait lgkmcnt(0), not vmcnt(0)
|
||||
#define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1
|
||||
|
||||
Reference in New Issue
Block a user