mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Work around develop validation failure (#513)
* workaround bf16 atten fwd issue on gfx908
* typo
[ROCm/composable_kernel commit: 892a8d769d]
This commit is contained in:
@@ -154,6 +154,13 @@
|
||||
// tuning parameter
|
||||
#define CK_WORKAROUND_SWDEV_325164 0
|
||||
|
||||
// workaround: a BF16 attention kernel for gfx908 is likely affected by a compiler issue
|
||||
#ifdef __gfx908__
|
||||
#define CK_WORKAROUND_SWDEV_XXXXXX_BF16_ATTEN_FWD_GFX908_ISSUE 1
|
||||
#else // __gfx90a__, ...
|
||||
#define CK_WORKAROUND_SWDEV_XXXXXX_BF16_ATTEN_FWD_GFX908_ISSUE 0
|
||||
#endif // __gfx908__
|
||||
|
||||
namespace ck {
|
||||
|
||||
enum struct InMemoryDataOperationEnum
|
||||
|
||||
@@ -874,6 +874,14 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
|
||||
}
|
||||
} // end gemm1
|
||||
|
||||
// workaround compiler issue; see ck/ck.hpp
|
||||
if constexpr(CK_WORKAROUND_SWDEV_XXXXXX_BF16_ATTEN_FWD_GFX908_ISSUE == 1 &&
|
||||
is_same_v<FloatAB, bhalf_t> && MPerBlock == 256 && NPerBlock == 128 &&
|
||||
Gemm1NPerBlock == 128)
|
||||
{
|
||||
__builtin_amdgcn_sched_barrier(0);
|
||||
}
|
||||
|
||||
constexpr auto c_thread_desc_m0_n0_m1_n1_m2_n2_n3_n4 =
|
||||
gemm1_blockwise_gemm.GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4();
|
||||
constexpr auto cm0 = c_thread_desc_m0_n0_m1_n1_m2_n2_n3_n4.GetLength(I0);
|
||||
|
||||
@@ -29,7 +29,7 @@ TYPED_TEST_SUITE(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16, KernelTypes)
|
||||
|
||||
TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16, Test_BF16) { this->Run(); }
|
||||
|
||||
TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16, Test_FPBF_PadM)
|
||||
TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16, Test_BF16_PadM)
|
||||
{
|
||||
this->lengths_ = std::vector<std::vector<int>>{
|
||||
{136, 128, 32, 128, 2, 3},
|
||||
|
||||
Reference in New Issue
Block a user