[CK_TILLE] Temporarily remove batch prefill KV cache overflow asserts (#6201)

## Summary
- Temporarily remove the KV cache offset overflow assert checks in
`FmhaBatchPrefillWithPagedKVCacheKernel`
- The asserts are **correct**, but they block project progress in
certain configurations
- This is a **temporary workaround** to unblock progress; a proper fix
will follow

## Note
This is NOT a permanent solution. A follow-up PR will add proper
overflow handling that addresses the underlying issue without blocking
progress.
This commit is contained in:
Jeff Huang
2026-04-07 20:41:24 +08:00
committed by GitHub
parent a9b3eaffb8
commit 8a29683326

View File

@@ -484,20 +484,6 @@ struct FmhaBatchPrefillWithPagedKVCacheKernel
kargs.init_logits_soft_cap(logits_soft_cap);
}
// Check that the maximum offset won't overflow.
if constexpr(kPageBlockSize < FmhaPipeline::kN0)
{
if(num_total_pages > 1)
{
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_k <=
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
"KV cache K offset overflow: exceed int32 max");
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_v <=
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
"KV cache V offset overflow: exceed int32 max");
}
}
return kargs;
}
@@ -651,20 +637,6 @@ struct FmhaBatchPrefillWithPagedKVCacheKernel
kargs.init_logits_soft_cap(logits_soft_cap);
}
// Check that the maximum offset won't overflow.
if constexpr(kPageBlockSize < FmhaPipeline::kN0)
{
if(num_total_pages > 1)
{
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_k <=
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
"KV cache K offset overflow: exceed int32 max");
assert(static_cast<int64_t>(num_total_pages - 1) * batch_stride_v <=
static_cast<int64_t>(std::numeric_limits<index_t>::max()) &&
"KV cache V offset overflow: exceed int32 max");
}
}
return kargs;
}