[CK_TILE] support hdim=192/128 pair for deepseekv3 (#1961)

* support hdim=192/128 pair

* remove useless print

* update
This commit is contained in:
carlushuang
2025-03-11 21:07:40 +08:00
committed by GitHub
parent 0db7c8f0b2
commit 7a93b16ff6
8 changed files with 35 additions and 8 deletions

View File

@@ -112,6 +112,13 @@ struct BlockFmhaPipelineQRKSVSAsync
else
return 2;
}
else if constexpr(kQKHeaddim <= 192)
{
if constexpr(kPadSeqLenK && BiasEnum == BlockAttentionBiasEnum::ELEMENTWISE_BIAS)
return 1;
else
return 2;
}
else if constexpr(kQKHeaddim <= 256)
{
return 1;

View File

@@ -13,6 +13,8 @@ static CK_TILE_HOST_DEVICE constexpr index_t ceil_to_qualified_tile_length(index
return 128;
if(len == 160)
return 256;
if(len == 192)
return 192;
// only length of 96, 160 and power-of-two is supported
if(!(len & (len - 1)))