mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 21:51:28 +00:00
[rocm-libraries] ROCm/rocm-libraries#6201 (commit 5c0697e)
[CK_TILLE] Temporarily remove batch prefill KV cache overflow asserts (#6201) ## Summary - Temporarily remove the KV cache offset overflow assert checks in `FmhaBatchPrefillWithPagedKVCacheKernel` - The asserts are **correct**, but they block project progress in certain configurations - This is a **temporary workaround** to unblock progress; a proper fix will follow ## Note This is NOT a permanent solution. A follow-up PR will add proper overflow handling that addresses the underlying issue without blocking progress.
This commit is contained in:
committed by
assistant-librarian[bot]
parent
a586a1f8bd
commit
020b6f435e
@@ -484,20 +484,6 @@ struct FmhaBatchPrefillWithPagedKVCacheKernel
|
||||
kargs.init_logits_soft_cap(logits_soft_cap);
|
||||
}
|
||||
|
||||
// Check that the maximum offset won't overflow.
|
||||
if constexpr(kPageBlockSize < FmhaPipeline::kN0)
|
||||
{
|
||||
if(num_total_pages > 1)
|
||||
{
|
||||
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_k <=
|
||||
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
|
||||
"KV cache K offset overflow: exceed int32 max");
|
||||
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_v <=
|
||||
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
|
||||
"KV cache V offset overflow: exceed int32 max");
|
||||
}
|
||||
}
|
||||
|
||||
return kargs;
|
||||
}
|
||||
|
||||
@@ -651,20 +637,6 @@ struct FmhaBatchPrefillWithPagedKVCacheKernel
|
||||
kargs.init_logits_soft_cap(logits_soft_cap);
|
||||
}
|
||||
|
||||
// Check that the maximum offset won't overflow.
|
||||
if constexpr(kPageBlockSize < FmhaPipeline::kN0)
|
||||
{
|
||||
if(num_total_pages > 1)
|
||||
{
|
||||
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_k <=
|
||||
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
|
||||
"KV cache K offset overflow: exceed int32 max");
|
||||
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_v <=
|
||||
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
|
||||
"KV cache V offset overflow: exceed int32 max");
|
||||
}
|
||||
}
|
||||
|
||||
return kargs;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user