Fix sequence dim length for o_dram descriptor in the kernel

This commit is contained in:
Qianfeng Zhang
2025-05-10 16:02:52 +00:00
parent 1d1dd8f1eb
commit 79cd1f0653

View File

@@ -749,7 +749,7 @@ struct HstuAttentionFwdKernel
auto o_dram = [&]() {
const auto o_dram_naive = make_naive_tensor_view<address_space_enum::global>(
o_ptr,
make_tuple(mask.max_uih_len, kargs.hdim_v),
make_tuple(kargs.seqlen, kargs.hdim_v),
make_tuple(kargs.seq_stride_o, 1),
number<HstuAttentionPipeline::kAlignmentO>{},
number<1>{});