mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-28 18:56:59 +00:00
Commit Graph
Select branches
Hide Pull Requests
2217-add-io-module-for-profiler
350_grouped_gemm_fix
6c08c5c46d7a13
AITERKER-112
AITERKER-112-temp
Ali_FA3_BWD
CK/rel_stg_3076
Fa_seqlen1_pass
LWPCK-3549
LWPCK-3549-cleanups
LWPCK-3549-two-stage
LWPCK-3731
SWDEV-539321
SWDEV-561448
_bgp_v5_a
_bgp_v5_b
a8w8_fastgelu_gemm
a8w8_instance
a8w8_test
ad/refactor_targets_test
add-aiter-pytest
add_fmha_instances_for_specific_config
add_fmha_tuned_file
add_vllm_kvcache
afagaj/rocm-rel-7.1
afagaj/rocm-rel-7.1-fix
aghamari/cross-layer-5d-kv-cache
aghamari/ua-on-develop
aghamari/ua-on-develop-v2
aghamari/unified-attention-decode-opt
agpr_control
aick-482
aick-647
aiter_cktile_integration_a8w8_a4w4
aiterker-112-fp8-per-token-per-head-prefill
akuppp
amd-develop
amd-master
amd/dev/gehernan/SWDEV-535598_warpSize_fix
amd/dev/janplehr/fix/compiler-lifetime-warning
amd/dev/jruan/defect_slice_tile
amd/dev/jruan/rms_norm_welford
amd/dev/sdittaka/warpsize_workaround
andriy/ck_tile/basic-tutorials
andriy/f6-cherry-pick
arai/ck_tile/streamk_xcd_remap
arai/ck_tile/tile_engine_restructure
async_load_api
atom_test
attention/pagedkv_prefill
aviralgoel/gemm_tutorial
aviralgoel/memory_pipeline_refactor_2
aviralgoel/python-unbuffered-fix
aviralgoel/test_labels
aviralgoel/tutorials
barkocot/6.2.1-bf16-fix
barkocot/basic-v1-interwave
barkocot/builder-experiment
barkocot/bwd-data-instances-opt
barkocot/ck-tile-direct-load-conv
barkocot/conv-instances-removal
barkocot/explicit-string-out
barkocot/grouped-conv-bwd-wei-split-k-hack
barkocot/lwpck-1068
barkocot/lwpck-1916-dev2
barkocot/lwpck-3735
barkocot/lwpck-3853
barkocot/lwpck-4085
barkocot/skip-a-lds-bwd-conv
barkocot/tmp
batch-prefill-fp8-kvcache-blockscale
batched_gemm
bd_a8w8
bernard/add_fp8_fpmax
bernard/testx_soft_signs
bernard_pa_v_layout
bf16_intrinsic_type
bf16x3_fix_1120
bharriso/revert-sharding
bkc_25.08_candidate
build-time-investigation-tile-gemm
builder_inlineDiff
bwd_data_1c_dev
cderb/prefetch_tuning_250930
cderb/prefetch_tuning_251014
cderb/prefetch_tuning_251021
cderb/tuning_250729
ck-python-documentation
ck-tile-basic-tutorials
ck/aviralgoel/abquant_splitk
ck/aviralgoel/add-naive-gemm-tutorial
ck/moe_int32_overflow
ck/opt_a8w8_m32
ckTileEnginePooling
ckTileEnginePooling2
ck_fa_bwd_opt
ck_fp8xint4_gemm_enhance
ck_int4_moe_dev
ck_int4_moe_enhance
ck_int4_moe_register_spill_debug
ck_moe_bs_splitk
ck_moe_bs_splitk_pr
ck_moe_gemm16x16
ck_moe_merge
ck_moe_new_sorting
ck_moe_support_int64_idx
ck_moe_support_int64_v2
ck_pa_vllm
ck_streamk_2tile_sk_dp
ck_tile/batch_prefill_paged_size_16
ck_tile/fa3_fremont
ck_tile/fa_asm_bwd
ck_tile/fa_asm_bwd2
ck_tile/fa_asm_bwd_sync_fix
ck_tile/fa_bwd_opt2
ck_tile/fa_bwd_opt_rtn
ck_tile/fa_bwd_v3
ck_tile/fa_bwd_v3_sbhd
ck_tile/fa_bwd_v3_test
ck_tile/fa_bwd_v3_tmp
ck_tile/fa_opt_static_softmax
ck_tile/fmha_block_scale
ck_tile/fmha_fp8
ck_tile/fmha_hdim_160
ck_tile/fmha_in_fp8_async
ck_tile/fmha_in_fp8_async_192_128
ck_tile/fmha_in_fp8_split
ck_tile/fmha_nwarp_example
ck_tile/fused_moe
ck_tile/fused_moe_general
ck_tile/gemm_blockscale_eightwarps
ck_tile/gemm_debug_alias
ck_tile/gemm_felix_test
ck_tile/gemm_opt
ck_tile/kvcache_prefill
ck_tile/layer_norm_smalln_opt
ck_tile/ln_add_cache_clear
ck_tile/ln_bw
ck_tile/pagedkv_seqlow4k
ck_tile/splitkv_combine_v2
ck_tile/test_qv192_v128
ck_tile_batch_norm_forward_blockwise_welford
ck_tile_fmha_block_scale
ck_tile_gemm_streamk_draft
ck_tile_toy
ck_tile_tutorial
ck_wpreshuffle
ckfs
cktile/fmha_fp8_scale_p_test
cktile_mxfp4_plus
congma/ck_tile/fix_preshuffle_b
congma/ck_tile/preshuffle_b
congma/dev/abquant_failure
congma/dev/fix_preshuffle_b
congma13/ck_tile/inclusive_scan_sequence
congma13/ck_tile/inclusive_scan_sequence_ck
copilot/build-gemm-example
copilot/investigate-fmha-kernel-execution
copilot/move-pr-3723-to-rocm-libraries
copilot/research-fused-kernel-patterns
copilot/sub-pr-3236
copilot/sub-pr-3341
copilot/sub-pr-3466
copilot/update-fp8-support-in-readme
cshuffle-epilogue-bank-conflict-tests
cshuffle-epilogue-tests
cshuffle-fix
debug_moe
dev/a8w4_and_a8w8splitk_yadai
dev/a8w8_b_preshuffle
dev/ck_moe_gemm
dev/ck_moe_gemm2
dev/ck_moe_gemm2_merge
dev/ck_moe_gemm_output_sorting
dev/ck_moe_int4
dev/cka8w8_tuning
dev/flatmm
dev/gemm_reduce_seperate_moe
dev/huizzhan/ck_basic_gemm_topksoftmax
dev/huizzhan/ck_fusion_gemm_topksoftmax
dev/huizzhan/ck_gemm_softmax_topk
dev/huizzhan/ck_grouped_topksoftmax_test
dev/huizzhan/ck_pw_gemm_topksoftmax
dev/huizzhan/ck_toy_example
dev/huizzhan/fusion_pw_gemm_grouped_topk
dev/merge_u8w8_onlygemm_build
dev/moe_ffn
dev/moe_opt
dev/page_fa
dev/page_fa_cap_logits
dev/paged_window
dev/preshuffle_350
dev/test_es
dev/yadai
develop
develop-archive-2026-05-27
develop-test
develop_deprecated
device_gemm_multiple_d_xdl_cshuffle_v3_b_preshuffle
dkrottap/cp
dlejeune/fmha_fwd_test_all_hdim
dlejeune/fmha_fwd_test_all_hdim_dropout_test
dlejeune/gemm_pipeline_mem_skip_lds
dlejeune/mhc_core
dlejeune/qwen3.5_fmha_batch_prefill_opt2
dlejeune/qwen3.5_opt_gfx942
dlejeune/sinkhorn
dlejeune/ua-sink
dlejeune/ua-swa
dlejeune/ua-swa-v2
docs/5.6.1
docs/5.7.0
docs/5.7.1
docs/6.0.0
docs/6.0.2
docs/6.1.0
docs/6.1.1
docs/6.1.2
docs/6.1.5
docs/6.2.0
docs/6.2.1
docs/6.2.2
docs/6.2.4
docs/6.3.0
docs/6.3.1
docs/6.3.2
docs/6.3.3
docs/6.4.0
docs/6.4.1
docs/6.4.2
docs/6.4.3
docs/7.0.0
docs/7.0.1
docs/7.0.2
docs/7.1.0
docs/7.1.1
docs/7.2.0
docs/7.2.1
ds_read_tr_exp
dteng/dev/flatmm
dump_kernels_ck
dup-status-checks
ecamartins/ck_tile_warp_gemm_play
emimarti/ck_tile/incorrect_validation
enable_persistent_async
epilogue_refactor
eterpstr/abquant-transposec-fix
eterpstr/preshuffle-bquant-for-abquant-preshuffleb
f8blockscale_bpreshuffle_aiter
f8gemm_perf
fa_decode_pipeline
fav3_exp
fav3_group_mode
feat-mixed_input_flatmm
feat/swiglustep-moe-no-quant
features/grouped-conv-perf-uplift
feiw/dev/asm_f8
feiw/dev/ckt_cm9
feiw/dev/f4
feiw/dev/uk_gemm
feiw/gemm1_bns
feiw/gemm1_bns_perf
feiw/mxfp4_moe_2Stages
felix/add_nkpad
felix/ck_hot_fix
felix/flatmm
felix/flatmm_fix_splitk
felix/sorting
felix/tunx_norm
fix-ci-2542
fix-emu-error
fix-flatmm
fix/buffer_rsrc_cast
fix/guard-against-compiler-warning
fix_cktile_sequence
fix_gino
fix_gptoss_sink
fix_int4_tests
fix_moe_a16w4
fix_title
flatmm-moe-gemm-improve-testing-coverage
flatmm-moe-remap-xcd
flatmm-mxfp4-async
flatmm_for_compiler_dteng
flatmm_merge
fmh-ut-fix-squant
fmha_bwd_trload_mfma32
fmha_fwd_test_all_hdim
fmha_pipeline_nwarp_sshuffle_improve
fmha_pipeline_nwarp_sshuffle_n0loop
for_rga
fp16_int4_scale_weight_only_impl_bak
fp4_gu_moe
fp4_mx_bshf_v3
fp4_scale_gemm_dev
fp8_bpreshuffle
gate-fmha-padding
gemm_async_load_opt
gemm_bf16_sk_bug_fix
gemm_getname
gemm_w8_only
gfx950_debug
ginolu/fix-fa-hdim160
ginolu/flatmm_async
ginolu/perf
ginolu/sparge_attention
ginolu/timer_refine
ginolu/ut_async
hack_block_dropout
hdim48_support
hdim_96_160
heuristics-tile-gemm
hot_fix_fp8_gufusion
hot_fix_int32_overflow
hstu_attention_fwd
hstu_attention_fwd_bwd
int4_fp8_convert_opt
int4_pr_based_on_JingPR
int4_weight_only_from_mtgu_golden
int4_xdlop_16x16_fix
int8_weight_only_fix
int_fp8_cvtOpt_pr
ipanfilo/clang22_build_hotfix
jakpiase/ck_tile/conv_pipeline_v5
jakpiase/conv_bwd_data_direct_loads
jakpiase/gemm_pipeline_mem_skip_lds
jakpiase/grouped_conv_bwd_data_clamp
jakpiase/grouped_conv_dl
jakpiase/nchw_conv_optimizations
jakpiase/poc_cba
jakpiase/tmp_branch
jakpiase/tmp_save_conv
japiasec/ck_tile/irregular_tail_vectorloads
jeff/batch-prefill-vectorized-performance-fix
jeonghyun/ckb-add-optimized-kernel-validation
jeonghyun/ckb-almiopen-522-descriptor-init
jeongkim/grouped-conv-bias-bnorm-clamp-tolerance-gfx11
jian.wu.dev.fhma_bwd_d512
jim/ck_tile/fa_bwd_v3
jim/ck_tile/fa_v3_codegen
jim/dev/fav3_bwd_swa
jim/dev/fix_group_mode_mismatch
jim/dev/reduce_instance
jizhan/mixed_prec_gemm_pk_i4
jizhan/reduce_threadwise_multi_d
jizhan/tensor_contraction_v3
jograner/bwd-data-group-merge
jograner/bwd-weight-group-merge-type-string
jograner/bwd-weight-instance
jograner/grouped-gemm-issue
jograner/hotfix-grouped-gemm-two-stage-2
joye_ck_tile_dev
jshumway/analyze-build
jshumway/exp-build-analysis
jshumway/parse-build
jshumway/tensor
jshumway/transform
jshumway/util-readme
jukorhon/fa4-k-preread
jukorhon/fa4-kv128-vgpr
jukorhon/ua-blocktables-oob-fix
jukorhon/ua-multipagetile-fix
jukorhon/ua-multipagetile-fix-fp8
jukorhon/ua-multipagetile-fix-fp8-rebased
jukorhon/ua-multipagetile-fix-fp8-thd
jukorhon/ua-overflow-fix
jukorhon/unified-attention
jukorhon/unified-attention-ck
jukorhon/unified-attention-dev
jun/format_fix
jzhou/pre-load-ds
kabraham/builder_add_constraints
kabraham/builder_bwd_data
kabraham/describe-fn-wmma
kabraham/factory-tests
kabraham/prng_tests_integration
kqian/lynm+bias+quant
kylasa_check_err
kylasa_kdim_pr
kylasa_mdim_functional_working
kylasa_mdim_pingpong
kylasa_ping_pong
kyle/c_column_layout_test
kyle/gemm_test
kyle/grouped_gemm_blockwise
letaoqin/batch_prefile_block_scale_ptkvb
letaoqin/gemm_bias_activation
linsun/convint8_miopen_integration
lirui/vllm_atom_m3_0624
lj/whole_k_pipeline
lwpck-4180
lwpck-4181
lxx/dev/fix_gfx1250_compile
lynm_bwd_dteng
macurtis/amd-master
mainline_deprecated
marcusr/aiesw-32176-w4a16-ck-asym
master
matthias.gfx11_ck
merge-mixed_input_flatmm
merge/aiter_main_and_ck_fp4
meskelin/add_desc_tensorviews_universal
meskelin/refactor-makegemmtensorviews
mh/revert-rmsnorm-align-HF
mh/testing
mhynag/moe-index-support
mi300_time_measurement
mi355_async_load_dev
mi355_transpose_load_dev
migraphx
migraphx-ci-fix
migraphx-update
mock_moesorting_disable
moe_aiter_debug
moe_block_m_128
moe_block_m_32
moe_blockscale_func
moe_bs_fp8_no_asm
moe_bs_fp8_no_asm_buf2lds
moe_bs_stage1_dev
moe_cross_reduce
moe_fp8_persistent
moe_gemm_fuse_activation
moe_inline_develop
moe_xcd_remap
mohsen-backup
mono-split/users/yiding12/fmha-bwd-async-prepare
mono-split/users/yiding12/fmha-bwd-group-persistent
mono-split/users/yiding12/fmha-bwd-workspace
mozga-amd/cache_eneble_auto
mozga-amd/default_epilog_d_support
mozga-amd/fix_bug_transpose
mozga-amd/fix_transpose_matrix
mozga-amd/fix_universal_gemm_traits
mozga-amd/gemm_basic_fix
mozga-amd/grouped_conv_fix
mozga-amd/mulit_abd
mozga-amd/multiple_abd
mozga-amd/multiple_abd_gemm
mozga-amd/pipeline_errors
mozga-amd/rewrite_no_index_reduce_tests_gtest
mozga-amd/simple_dbg_for_cpu
mpodkory/find-transform-optimization
mpodkory/generate-tuple-optimizations
mpodkory/recursive-to-pack-expansion
mpodkory/static-for-indexed
mpodkory/template-optimization-tests
mpodkory/transform-tensor-descriptor-optimization
mtgu/cktile_mxfp4
mtgu/cktile_mxfp4_flatmm_dev
mtgu/dev/ck_moe_gemm2_int4_merge
mtgu/dev/ck_moe_int4
mtgu/dev/deepseekv3_ab_scale
mtgu/dev/gemm_fp8xint4
mtgu/dev/gemm_fp8xint4_Bpreshuffle
mtgu/dev/gemm_fp8xint4_Bpreshuffle_static2static
mtgu/dev/vllm_w8a8
mtgu/int4_moe_tencent
mtgu/mxfp4_gemm
mtgu/pr2/int4_scale_dev
muozturk_bf16_sk_padding_fix
muozturk_streamk_reduction_fix
mx_moe_f4_scale_shuffle
mx_moe_f4_scaleshuffle_Bnoshuffle
mxf4_moe_async_ver
mxfp4_moe_blockscale_buf2lds
mxfp6-flatmm
naive_pa_update
new_develop
new_time_based_ckprofiler_emin
old_ck_streamk_fix
opt-a16w4_moe_gemm2
origin/philipm/documentation-cleanup-5
origin/pmaybank/tile_engine-gfx12
oscar/ds_tune
per_tensor_quant
philipm/ck-tile-docs
philipm/ck_tile/WMMA_GEMM_F16
philipm/ck_tile/WMMA_GEMM_F16-0
philipm/documentation-cleanup
philipm/documentation-cleanup-5
philipm/documentation-cleanup-7
philipm/fix-tile_engine-json-output
pk_i4_t_enable_base
pk_i4_v3_failures_fix
pmaybank/rdna-dev-1
pmaybank/tile_engine-gfx12-1
pmaybank/tile_engine-gfx12_debug
pmaybank/tile_engine_gemm
pmaybank/toy_example
power_analysis
poyenc/fix-missing-template-argument
prec_param
ptpc_gemm_group_persistent
py2.6_noCK_bmm
pytorch/release/2.10
pytorch/release/2.11
pytorch/release/2.12
pytorch/release/2.8
pytorch/release/2.9
pytorch2.6_base
pytorch_release_2.6
qlin/port_gfx11_build_refine
qlin/refine_ck_profiler_test_script
qlin/remap_xdl
radeon-ai/optimised-block_sync_lds
rakesroy/7.0/psdb_test
rakesroy/7.0/psdb_test_onnx
ratio_mask_for_fmla_prefill
re-enable-two-fp16-x-pkint4-tests
refactor_vector_type
refine_flatmm
refine_flatmm2
reg_spill_moe_debug
relbers/moe_sorting/lane_group_sz_to_1
release-staging/rocm-rel-6.2
release-staging/rocm-rel-6.3
release-staging/rocm-rel-6.4
release-staging/rocm-rel-6.5
release-staging/rocm-rel-7.0
release/rocm-rel-5.6
release/rocm-rel-5.7
release/rocm-rel-5.7.00.48
release/rocm-rel-5.7.1.1
release/rocm-rel-6.0
release/rocm-rel-6.0-staging
release/rocm-rel-6.1
release/rocm-rel-6.1-staging
release/rocm-rel-6.1.0.36
release/rocm-rel-6.1.00.36
release/rocm-rel-6.1.1.1
release/rocm-rel-6.1.4.01
release/rocm-rel-6.2
release/rocm-rel-6.2-staging
release/rocm-rel-6.2.0.5
release/rocm-rel-6.2.2.1
release/rocm-rel-6.2.4.1
release/rocm-rel-6.3
release/rocm-rel-6.3-staging
release/rocm-rel-6.3.0.1
release/rocm-rel-6.3.0.2
release/rocm-rel-6.4
release/rocm-rel-6.4.1.2
release/rocm-rel-6.4.2.2
release/rocm-rel-7.0
release/rocm-rel-7.0.2.1
release/rocm-rel-7.0.2.2
release/rocm-rel-7.1
release/rocm-rel-7.1.1.1
release/rocm-rel-7.2
release/rocm-rel-7.2.0.1
release/therock-7.10
release/therock-7.9
revert-3288-streamhpc/grouped-conv-fwd-wmma-tuned-instances
revert-3378-streamhpc/conv_bwd_weight_wmma_instance_selection
revert-3603-eterpstr/206-block-scale-gemm-fp4-support
rocking/fmha-async-intrinsic
rocm-libraries-export-2d4a3223cb
rocm-libraries-export-dbc6589bf2
rocm7.1_gg_performance
ruimin_dbg_conv_fwd
samaario/ua-tune
samidamien/unified_attention
samremes/bmatrix_2d_blockscale
samremes/ck_tile_mx_gemm
samremes/double_buffer_fp8_ab_scale
samremes/fmha_192x128_hdim_occupancy
samremes/fmha_fwd_v3_for_gfx942
samremes/quantize_in_ab_scale_gemm
samremes/temp_tests
satya_temp
shahamed/ck3129
shared/big_day_merge_asm_fix
shuffle_tile_enhance
sink_attn
sinkhorn
sk_ex_fp8_bf8_cktile
sk_reduction_fix_oldsk
sk_tests_backup
so/f4moe
so/moe_a4w4
solin/flatmm
spolifroni-amd-patch-1
spolifroni-amd/edit-conf-py-with-doxylink
srayasam/test
srayasam/test-1
srayasam/test-release
srayasam/test1
srayasam/test2
srayasam/test3
srayasam/testing-branch
srayasam/therock-test
streamhpc/grouped-conv-fwd-extra-flavors
streamhpc/grouped-conv-fwd-wmma-tuned-instances
streamhpc/impl-splitk-device-grouped-conv-fwd-multiple-abd-xdl-cshuffle-v3
streamhpc/mix_prec_microscaling_bquant
streamhpc/wavetile_transfer_bwd_data_and_bwd_wei-support
streamhpc/wmma_gemm_quantization
streamk_debug
streamk_device_grid_info_print
streamk_fix
streamk_old
streamk_revert
support_engine_cache
te_bwd_bias
te_bwd_v3_hd64
te_debug_dbias_1hss
te_v3_bwd_sbhd
tenpercent/async_copy_gemm_v3
tenpercent/cc-skill-build
tenpercent/ck-build-analysis-skill
tenpercent/cktile_rename_f8
tenpercent/compv3_build_time_reduce_experiment
tenpercent/dispatch
tenpercent/generate-identity-sequences
tenpercent/gfx950_lds_experiments
tenpercent/revert-ck-fp8-struct
tenpercent/statically-indexed-array-rewrite
tenpercent/tensor-descriptor-functor-optimization
test-load-tile-transpose
test-mixed_input_flatmm
test_async_v3
test_ck_bd_ticket
test_ck_bug
test_config_heuristic
test_fmha_ck_tile
tests_for_batched_grouped_gemm
testx
testx_bprefill
testx_temp
testx_v2
thomas/experiment
thomning/ck_tile/mx_gemm_packing
tianwyan/streamk
tianxing/unified-attention
tianxing/unified-attention-quantization
tianyuwu/ck_tile/ck_tile_example_bringup
tileengine-restructure
tlakshma_950_support
tlakshma_tileengine_enable_arch
tmp-develop
tomjen12/clang-fix-20-fremont
toy_example
toy_example_ms
try_merge_with_multiple_abd
tune_norm
uai-develop
uai-migraphx
uif2-migraphx
universal_streamk
universal_streamk_debug
update_cka16w16_uc
update_cka8w8_uc_padding
update_cka8w8_uc_padding_dev
users/ArthurLiu/ck_fmha_codegen
users/Vsevolod1983/TempBranchAddLogging
users/andriy/ck/1464-pytorch-gfx1250
users/dahawkin/revert-f86bbb1aefdd047b2b0e886dda831417e790f622
users/darren-amd/ck-gfx1153-release-2.9
users/jam/rdna3-rdna4-fmha-tile-load-fixes
users/msaffari-amd/ck/dispatcher_test_in_aiter
users/randyspauldingamd/dep_parser_monorepo
users/randyspauldingamd/gtest_fixturemap
vec_stores_c_col_v3
vec_stores_c_col_v4
veergopu/add_6764
vmcnt0issue
vpietila/add-fwd-conv-v3-instances-for-unit-group-size
vpietila/bwd-conv-weight-integration-tests
vpietila/ck-profiling-documentation
vpietila/ck-tile-split-k-opt
vpietila/ck-vs-ck-tile-conv-benchmarking
vpietila/ckb-add-ck-tile-bwd-weight-instances
vpietila/ckb-add-defaults-for-optional-template-params
vpietila/ckb-block-tiling
vpietila/ckb-bwd-instances
vpietila/ckb-fwd-bwd-instances
vpietila/ckb-fwd-instance-test-improvements
vpietila/ckb-generaized-conv-factory-baseline
vpietila/ckb-improve-compile-time-errors
vpietila/ckb-refactor-warp-gemm-descriptors
vpietila/ckb-remove-explicit-device-op-flag
vpietila/convolution-builder
vpietila/ggemm-profiling
vpietila/improved-fwd-merged-conv-group-instances
vpietila/int8-perf-on-navi4x
vpietila/lwpck-3530
vpietila/merge-multiple-conv-groups-fully-working-baseline
vpietila/merge-multiple-depthwise-conv-groups-into-single-gemm-batch
vpietila/merge-multiple-fwd-conv-groups-into-single-gemm-batch
vpietila/miopen-dev
vpietila/packed-bf16-cast-for-grouped-conv
vpietila/packed-bf16-cast-for-grouped-conv-v1
vpietila/retina-net-fwd-convs
vpietila/retina-net-fwd-convs-baseline
vpietila/retina-net-training-perf
vpietila/split-k-param-auto-deduce
warp_specialized_scheduling
wave_buffer_resource_patch
whole_k_prefetch_n0loop
wip-f4-redesign
wip-f4-wp-joye
wip-fa-cshuffle
wip_355
wip_355_xcd_remap
wjx/align_v3_pipeline
wjx/atomic_add_bf16
wjx/cK_tile/moe_gemm
wjx/fix_moe_expert_oob
wjx/fix_moe_gfx942
wjx/fix_moe_splitk
wjx/fix_splitk_moe
wjx/flatmm_merge
wjx/grouped_flatmm
wjx/m_grouped_flatmm
wjx/moe_k64_patch
wjx/moe_mx_fp4_for_aiter
wjx/moe_v3_aiter
wjx/moe_v3_fp8_gfx942_spill
wjx/mxfp4_moe_2Stages
wjx/mxfp4_moe_bpreshuffle_v1
wjx/mxfp4_v1_pipe
wjx/preshuffle_format
wjx/reproduce_moe_spill
wjx/topK_weights
wjx/wp_gemm_fix
workable_async_copy
wulley_v_layout
xformers_version
xiangxli_fa_bwd_support_atomic16
xiangxli_fa_ck_bwd_support_atomic16
xiangxli_mask_support_y_ratio_new
xiangxli_support_ratio_mask
xin/add_all_op_tests
xin/test-pip
xor_async_fa
xyt/ln_patch
yadai/moe_a4w4
yanda/wip_355
yandai/a16w4_old_layout
yandai/moe_flatmm_async
yandai/moe_flatmm_async_scale_b16
yandai/wip_mi355
yewang12/bias_for_padding
yewang12/bwd_group_persistent_hack_cu
yewang12/ck-varlen-bwd-det
yewang12/debug_v3_bs3hd
yewang12/debug_v3_bsh3d
yewang12/debug_v3_bshd_bs2hd
yewang12/flash-attn-yiding-ck
yewang12/rocm_flash_attn_cherrypick_PR3615
yewang12/te_bias_all_minf
yewang12/te_deterministic
yewang_qkv_sbhd
yiding12/d64-det
yilin/practice
yiltan-temp
yuyun/mla
zain/TE-native-bshd-thd
zain/ck-graph-fix
zain/ck-graph-fix-cherry
zain/qola/filter
zain/qola/te-receipt
zan/cK_tile/moe_gemm
zan_fix_bufferloadlds
zan_ln
zan_norm
zan_tune
zan_vllm
zan_vllm_layout
zan_vllm_test
zhe_test
zhimding/ck_hot_fix_moe_sorting
zhimding/develop
zhimding/moe_flatmm_async
zqf_base_develop
#1
#100
#1001
#1002
#1003
#1004
#1005
#1006
#1007
#1008
#1009
#101
#1010
#1011
#1012
#1013
#1014
#1015
#1017
#1018
#1019
#1021
#1022
#1023
#1024
#1025
#1026
#1027
#1028
#1029
#1030
#1033
#1035
#1036
#1037
#1039
#1040
#1042
#1043
#1044
#1045
#1046
#1047
#1048
#1049
#1050
#1051
#1052
#1054
#1055
#1056
#1057
#1058
#1059
#106
#1060
#1061
#1062
#1063
#1064
#1065
#1066
#1067
#1068
#1069
#107
#1071
#1072
#1073
#1075
#1077
#1079
#108
#1080
#1081
#1082
#1083
#1084
#1085
#1086
#1087
#1088
#109
#1090
#1091
#1092
#1093
#1094
#1095
#1096
#1097
#1098
#1099
#1100
#1101
#1102
#1103
#1104
#1105
#1106
#1107
#1108
#1109
#111
#1110
#1112
#1114
#1115
#1116
#1117
#1118
#1119
#112
#1120
#1121
#1123
#1124
#1125
#1126
#1127
#1128
#1129
#113
#1130
#1131
#1132
#1133
#1134
#1135
#1136
#1137
#1138
#1139
#114
#1141
#1142
#1143
#1144
#1145
#1147
#1148
#1149
#115
#1150
#1151
#1152
#1153
#1154
#1155
#1156
#1157
#1158
#1159
#116
#1160
#1161
#1162
#1163
#1164
#1165
#1166
#1167
#1168
#1169
#117
#1170
#1172
#1173
#1174
#1175
#1176
#1177
#1178
#118
#1180
#1181
#1182
#1183
#1185
#1186
#1187
#1188
#1189
#119
#1190
#1191
#1192
#1193
#1194
#1195
#1196
#1197
#1198
#120
#1200
#1202
#1203
#1205
#1206
#1207
#1208
#1210
#1211
#1212
#1213
#1214
#1216
#1217
#1218
#1219
#122
#1220
#1221
#1222
#1223
#1224
#1225
#1226
#1227
#1228
#1229
#123
#1230
#1231
#1232
#1234
#1236
#1237
#1238
#1239
#124
#1240
#1241
#1242
#1243
#1244
#1245
#1246
#1247
#1248
#1249
#125
#1250
#1251
#1253
#1254
#1255
#1256
#1257
#1258
#1259
#126
#1260
#1262
#1263
#1264
#1265
#1266
#1267
#1268
#1269
#127
#1270
#1271
#1272
#1273
#1274
#1275
#1277
#1278
#128
#1280
#1281
#1282
#1283
#1284
#1285
#1286
#1287
#1289
#1290
#1291
#1292
#1293
#1295
#1296
#1297
#1299
#13
#130
#1300
#1301
#1302
#1303
#1304
#1305
#1306
#1307
#1308
#1309
#131
#1310
#1311
#1312
#1313
#1314
#1315
#1316
#1317
#1318
#1319
#132
#1320
#1321
#1322
#1323
#1324
#1325
#1326
#1327
#1328
#1329
#133
#1331
#1332
#1333
#1335
#1336
#1337
#1338
#1339
#134
#1340
#1341
#1342
#1343
#1344
#1345
#1346
#1347
#1348
#1349
#1350
#1351
#1352
#1353
#1354
#1355
#1356
#1358
#1359
#1360
#1361
#1362
#1363
#1364
#1365
#1366
#1367
#1368
#1369
#1370
#1372
#1374
#1375
#1376
#1377
#1378
#1379
#1380
#1381
#1382
#1383
#1384
#1385
#1386
#1387
#1388
#1389
#1390
#1391
#1392
#1393
#1394
#1395
#1396
#1397
#1398
#1399
#14
#140
#1400
#1401
#1403
#1404
#1406
#1407
#1408
#1409
#141
#1410
#1411
#1412
#1413
#1414
#1415
#1416
#1417
#1419
#142
#1420
#1421
#1423
#1424
#1425
#1426
#1427
#1428
#1429
#143
#1430
#1432
#1433
#1435
#1437
#1440
#1441
#1442
#1443
#1444
#1445
#1446
#1447
#1448
#1449
#145
#1450
#1451
#1452
#1453
#1454
#1455
#1456
#1457
#1458
#1461
#1462
#1463
#1464
#1465
#1467
#1468
#1469
#1470
#1471
#1472
#1473
#1474
#1475
#1476
#1478
#1479
#148
#1480
#1481
#1482
#1483
#1484
#1485
#1486
#1487
#1488
#1489
#149
#1490
#1491
#1492
#1493
#1494
#1495
#1496
#1497
#1499
#15
#150
#1500
#1501
#1502
#1503
#1504
#1505
#1506
#1507
#1508
#1509
#151
#1511
#1512
#1513
#1515
#1516
#1517
#1518
#1519
#152
#1520
#1521
#1522
#1523
#1524
#1525
#1526
#1527
#1528
#1529
#1530
#1531
#1532
#1533
#1535
#1536
#1537
#1538
#1539
#1540
#1541
#1542
#1543
#1544
#1545
#1546
#1547
#1548
#1549
#155
#1550
#1552
#1553
#1554
#1555
#1556
#1557
#1558
#1559
#156
#1560
#1561
#1562
#1563
#1564
#1565
#1566
#1567
#1568
#1569
#1570
#1571
#1572
#1573
#1574
#1575
#1576
#1577
#1578
#1579
#158
#1582
#1583
#1584
#1585
#1587
#1588
#1589
#159
#1590
#1591
#1592
#1593
#1594
#1595
#1596
#1597
#1598
#1599
#16
#160
#1600
#1601
#1602
#1604
#1605
#1606
#1607
#1608
#1609
#161
#1610
#1611
#1612
#1613
#1614
#1615
#1616
#1617
#1618
#1619
#1620
#1621
#1622
#1623
#1624
#1625
#1626
#1627
#1628
#1629
#163
#1630
#1631
#1632
#1633
#1634
#1635
#1636
#1637
#1639
#1640
#1642
#1643
#1644
#1645
#1647
#1648
#1649
#165
#1650
#1651
#1653
#1654
#1655
#1657
#1658
#1659
#166
#1660
#1661
#1662
#1663
#1664
#1665
#1666
#1667
#1668
#1669
#167
#1670
#1671
#1672
#1673
#1674
#1675
#1676
#1677
#1678
#1679
#168
#1680
#1681
#1682
#1683
#1684
#1685
#1686
#1687
#1688
#1689
#1690
#1691
#1692
#1694
#1695
#1696
#1697
#1698
#1699
#1700
#1701
#1702
#1703
#1704
#1705
#1706
#1708
#171
#1710
#1711
#1712
#1713
#1714
#1715
#1716
#1717
#1718
#1719
#1720
#1721
#1722
#1723
#1724
#1725
#1726
#1728
#1729
#1730
#1731
#1732
#1733
#1734
#1735
#1736
#1737
#1738
#1739
#174
#1740
#1741
#1742
#1743
#1744
#1745
#1746
#1747
#1748
#1749
#175
#1750
#1751
#1752
#1753
#1754
#1755
#1756
#1758
#176
#1760
#1761
#1762
#1763
#1764
#1765
#1766
#1767
#1768
#1769
#1770
#1771
#1772
#1774
#1775
#1776
#1778
#1779
#178
#1783
#1784
#1785
#1786
#1787
#1788
#1789
#1790
#1791
#1792
#1793
#1794
#1795
#1796
#1797
#1798
#1799
#18
#1800
#1801
#1802
#1803
#1804
#1805
#1806
#1807
#1808
#1809
#181
#1810
#1811
#1812
#1813
#1814
#1815
#1816
#1817
#1818
#1819
#182
#1820
#1821
#1822
#1823
#1824
#1825
#1826
#1827
#1828
#1829
#183
#1830
#1831
#1832
#1834
#1835
#1836
#1837
#1838
#1839
#184
#1840
#1842
#1843
#1844
#1845
#1846
#1847
#1848
#1849
#185
#1850
#1851
#1852
#1853
#1854
#1856
#1858
#1859
#186
#1860
#1861
#1862
#1863
#1864
#1866
#1867
#1868
#1869
#187
#1871
#1872
#1873
#1874
#1875
#1876
#1877
#1878
#1879
#188
#1880
#1881
#1882
#1883
#1884
#1885
#1886
#1887
#1888
#1889
#189
#1891
#1892
#1894
#1895
#1896
#1897
#1898
#1899
#19
#190
#1900
#1901
#1902
#1903
#1904
#1905
#1906
#1907
#1908
#1909
#1910
#1911
#1912
#1913
#1914
#1915
#1916
#1917
#1918
#1919
#192
#1920
#1921
#1922
#1923
#1924
#1925
#1927
#1930
#1931
#1932
#1933
#1934
#1935
#1936
#1937
#1938
#1939
#194
#1940
#1941
#1942
#1943
#1944
#1945
#1947
#1948
#1949
#195
#1950
#1951
#1952
#1953
#1954
#1955
#1956
#1957
#1959
#196
#1960
#1961
#1962
#1965
#1966
#1967
#1968
#1969
#197
#1970
#1971
#1972
#1973
#1974
#1975
#1976
#1977
#1978
#1979
#1980
#1981
#1982
#1983
#1984
#1985
#1986
#1987
#1988
#1989
#199
#1990
#1991
#1992
#1993
#1994
#1995
#1996
#1997
#1998
#1999
#2
#20
#200
#2000
#2001
#2002
#2003
#2004
#2005
#2006
#2007
#2008
#2009
#201
#2010
#2011
#2012
#2013
#2014
#2015
#2016
#2018
#2019
#202
#2020
#2021
#2022
#2023
#2024
#2025
#2026
#2027
#2029
#2031
#2032
#2034
#2035
#2036
#2038
#2039
#204
#2040
#2041
#2042
#2043
#2044
#2045
#2046
#2047
#2048
#2049
#205
#2050
#2051
#2052
#2053
#2054
#2055
#2056
#2057
#2058
#2059
#206
#2060
#2061
#2062
#2063
#2064
#2065
#2066
#2067
#2068
#2069
#2070
#2071
#2072
#2073
#2074
#2075
#2077
#2078
#2079
#208
#2080
#2082
#2083
#2084
#2085
#2086
#2087
#2088
#2089
#209
#2090
#2091
#2092
#2093
#2094
#2095
#2096
#2098
#2099
#21
#210
#2100
#2101
#2102
#2103
#2104
#2105
#2106
#2108
#2109
#211
#2110
#2111
#2112
#2113
#2114
#2115
#2116
#2117
#2118
#2119
#212
#2120
#2121
#2122
#2123
#2124
#2125
#2126
#2127
#2128
#2129
#213
#2130
#2131
#2132
#2133
#2134
#2135
#2136
#2137
#2138
#2139
#214
#2140
#2141
#2142
#2143
#2144
#2145
#2146
#2147
#2148
#2149
#215
#2150
#2151
#2152
#2153
#2154
#2155
#2156
#2157
#2158
#2159
#2160
#2161
#2162
#2163
#2164
#2165
#2166
#2167
#2168
#2169
#217
#2170
#2171
#2172
#2173
#2174
#2175
#2176
#2177
#2178
#2179
#2180
#2181
#2182
#2183
#2184
#2185
#2186
#2187
#2188
#2189
#219
#2190
#2191
#2192
#2193
#2194
#2195
#2196
#2197
#2198
#2199
#22
#220
#2200
#2201
#2202
#2203
#2204
#2206
#2207
#2208
#2209
#2210
#2211
#2212
#2213
#2214
#2215
#2216
#2218
#2219
#2221
#2222
#2223
#2224
#2225
#2226
#2227
#2228
#2229
#2230
#2231
#2232
#2233
#2234
#2235
#2236
#2237
#2238
#2239
#224
#2240
#2241
#2242
#2243
#2244
#2245
#2246
#2247
#2248
#2249
#2250
#2251
#2252
#2253
#2254
#2255
#2256
#2257
#2258
#2259
#226
#2260
#2261
#2262
#2263
#2264
#2265
#2266
#2267
#2268
#2269
#2270
#2272
#2273
#2274
#2275
#2276
#2277
#2278
#2279
#228
#2280
#2281
#2282
#2283
#2284
#2285
#2286
#2287
#2288
#2289
#229
#2290
#2291
#2292
#2293
#2294
#2295
#2296
#2297
#2298
#2299
#23
#230
#2300
#2301
#2302
#2303
#2304
#2305
#2306
#2307
#2308
#2309
#231
#2310
#2311
#2312
#2313
#2314
#2315
#2316
#2317
#2318
#2319
#232
#2320
#2321
#2322
#2323
#2324
#2325
#2326
#2327
#2328
#2329
#233
#2330
#2331
#2332
#2333
#2334
#2335
#2336
#2337
#2338
#2339
#234
#2340
#2341
#2342
#2343
#2344
#2345
#2346
#2347
#2348
#2349
#235
#2350
#2351
#2352
#2353
#2354
#2355
#2356
#2357
#2358
#2359
#2360
#2361
#2362
#2363
#2364
#2365
#2366
#2367
#2369
#237
#2370
#2371
#2372
#2373
#2374
#2375
#2376
#2377
#2378
#2379
#238
#2380
#2381
#2382
#2383
#2385
#2386
#2387
#2388
#2389
#239
#2390
#2391
#2392
#2393
#2394
#2395
#2396
#2397
#2398
#2399
#24
#240
#2400
#2401
#2402
#2403
#2404
#2405
#2407
#2408
#2409
#241
#2410
#2412
#2414
#2415
#2416
#2417
#2418
#2419
#242
#2420
#2421
#2422
#2423
#2424
#2425
#2426
#2427
#2428
#2429
#243
#2430
#2431
#2432
#2433
#2434
#2435
#2436
#2437
#2438
#2439
#244
#2440
#2441
#2442
#2443
#2444
#2445
#2446
#2447
#2448
#2449
#245
#2450
#2451
#2452
#2453
#2454
#2455
#2456
#2457
#2458
#2459
#246
#2460
#2461
#2462
#2463
#2464
#2465
#2466
#2467
#2468
#2469
#247
#2470
#2471
#2472
#2473
#2474
#2475
#2476
#2477
#2478
#2479
#2480
#2481
#2482
#2483
#2484
#2485
#2486
#2487
#2488
#2489
#2490
#2491
#2492
#2493
#2494
#2495
#2496
#2497
#2498
#2499
#25
#2500
#2501
#2502
#2503
#2504
#2505
#2506
#2507
#2508
#2509
#251
#2510
#2511
#2512
#2513
#2514
#2515
#2516
#2517
#2518
#2519
#2520
#2521
#2522
#2523
#2524
#2525
#2526
#2527
#2528
#2529
#253
#2530
#2531
#2532
#2533
#2534
#2535
#2536
#2537
#2538
#2539
#254
#2540
#2541
#2542
#2543
#2544
#2545
#2546
#2549
#255
#2550
#2551
#2552
#2553
#2554
#2555
#2556
#2557
#2558
#2559
#256
#2560
#2561
#2562
#2563
#2564
#2565
#2566
#2567
#2568
#2569
#257
#2570
#2571
#2572
#2573
#2574
#2575
#2576
#2577
#2578
#2579
#258
#2580
#2581
#2582
#2583
#2584
#2585
#2586
#2587
#2588
#2589
#259
#2590
#2591
#2592
#2593
#2594
#2595
#2596
#2597
#2598
#2599
#26
#260
#2600
#2601
#2602
#2603
#2604
#2605
#2606
#2607
#2608
#2609
#261
#2610
#2611
#2612
#2613
#2614
#2615
#2616
#2617
#2618
#2619
#262
#2620
#2621
#2622
#2623
#2624
#2625
#2626
#2627
#2628
#2629
#263
#2630
#2631
#2632
#2633
#2634
#2635
#2636
#2637
#2638
#2639
#264
#2640
#2641
#2642
#2643
#2644
#2645
#2646
#2647
#2648
#2649
#265
#2650
#2651
#2652
#2653
#2654
#2655
#2656
#2657
#2658
#2659
#2660
#2661
#2662
#2663
#2664
#2665
#2666
#2667
#2668
#2669
#2670
#2671
#2672
#2673
#2674
#2675
#2676
#2677
#2678
#2679
#268
#2680
#2681
#2682
#2683
#2684
#2685
#2686
#2687
#2688
#2689
#269
#2690
#2691
#2692
#2693
#2695
#2696
#2697
#2698
#2699
#27
#270
#2700
#2701
#2702
#2703
#2704
#2705
#2706
#2707
#2708
#2709
#271
#2710
#2711
#2712
#2713
#2714
#2715
#2716
#2717
#2718
#2719
#272
#2720
#2721
#2722
#2723
#2724
#2725
#2726
#2727
#2728
#2729
#2730
#2731
#2732
#2733
#2734
#2735
#2736
#2737
#2738
#2739
#274
#2740
#2741
#2742
#2743
#2744
#2745
#2746
#2747
#2748
#2749
#275
#2750
#2751
#2752
#2753
#2754
#2755
#2756
#2757
#2758
#276
#2760
#2761
#2762
#2763
#2764
#2765
#2766
#2767
#2768
#2769
#277
#2770
#2771
#2772
#2773
#2774
#2775
#2776
#2777
#2778
#2779
#278
#2780
#2781
#2782
#2783
#2784
#2785
#2786
#2787
#2788
#2789
#279
#2790
#2791
#2792
#2793
#2794
#2795
#2796
#2797
#2798
#2799
#28
#2800
#2801
#2802
#2803
#2804
#2805
#2806
#2807
#2808
#2809
#281
#2810
#2811
#2812
#2813
#2814
#2815
#2816
#2817
#2818
#2819
#282
#2820
#2821
#2822
#2823
#2824
#2825
#2826
#2827
#2828
#2829
#283
#2830
#2831
#2832
#2833
#2834
#2835
#2836
#2837
#2838
#2839
#284
#2840
#2841
#2842
#2843
#2844
#2845
#2846
#2847
#2848
#2849
#285
#2850
#2851
#2852
#2853
#2854
#2855
#2856
#2857
#2858
#2859
#286
#2860
#2861
#2862
#2863
#2864
#2865
#2866
#2867
#2868
#2869
#287
#2870
#2871
#2872
#2873
#2874
#2875
#2876
#2877
#2878
#2879
#288
#2880
#2881
#2882
#2883
#2884
#2885
#2886
#2888
#2889
#289
#2890
#2891
#2892
#2893
#2894
#2895
#2896
#2897
#2899
#29
#290
#2900
#2901
#2902
#2903
#2904
#2905
#2906
#2907
#2908
#2909
#2910
#2911
#2912
#2913
#2914
#2915
#2916
#2917
#2918
#2919
#292
#2920
#2921
#2922
#2923
#2924
#2925
#2926
#2927
#2928
#2929
#293
#2930
#2931
#2932
#2933
#2934
#2935
#2936
#2937
#2938
#2939
#294
#2940
#2941
#2942
#2943
#2944
#2945
#2946
#2947
#2948
#2949
#295
#2950
#2951
#2952
#2953
#2954
#2955
#2956
#2957
#2958
#2959
#296
#2960
#2961
#2962
#2963
#2964
#2965
#2966
#2967
#2968
#2969
#297
#2970
#2971
#2972
#2973
#2974
#2975
#2976
#2977
#2978
#2979
#298
#2980
#2981
#2982
#2983
#2984
#2985
#2986
#2987
#2988
#2989
#299
#2990
#2991
#2992
#2993
#2995
#2996
#2997
#2998
#2999
#3
#30
#300
#3000
#3001
#3002
#3003
#3004
#3005
#3006
#3007
#3008
#3009
#301
#3010
#3011
#3012
#3013
#3014
#3015
#3016
#3017
#3018
#3019
#302
#3020
#3021
#3022
#3023
#3024
#3025
#3026
#3027
#3028
#3029
#303
#3030
#3031
#3032
#3033
#3034
#3035
#3036
#3037
#3038
#3039
#304
#3040
#3041
#3042
#3043
#3044
#3045
#3046
#3047
#3048
#3049
#305
#3050
#3051
#3053
#3054
#3055
#3056
#3057
#3058
#3059
#306
#3060
#3061
#3062
#3063
#3064
#3065
#3066
#3067
#3068
#3069
#307
#3070
#3071
#3072
#3073
#3074
#3075
#3076
#3077
#3078
#3079
#308
#3080
#3081
#3082
#3083
#3084
#3085
#3086
#3087
#3088
#3089
#309
#3090
#3092
#3093
#3094
#3096
#3097
#3098
#3099
#31
#310
#3100
#3101
#3102
#3103
#3104
#3105
#3106
#3107
#3108
#3109
#311
#3110
#3111
#3112
#3113
#3114
#3115
#3116
#3117
#3118
#3119
#312
#3120
#3121
#3122
#3123
#3124
#3125
#3126
#3127
#3128
#3129
#313
#3130
#3131
#3132
#3133
#3134
#3135
#3136
#3137
#3138
#3139
#314
#3140
#3141
#3142
#3143
#3144
#3145
#3146
#3147
#3148
#3149
#315
#3150
#3151
#3152
#3153
#3154
#3155
#3156
#3157
#3158
#3159
#316
#3160
#3161
#3162
#3163
#3164
#3165
#3166
#3167
#3167
#3168
#3169
#317
#3170
#3171
#3172
#3173
#3174
#3175
#3176
#3177
#3178
#3179
#318
#3180
#3181
#3182
#3183
#3184
#3186
#3187
#3188
#3189
#319
#3191
#3192
#3193
#3194
#3195
#3196
#3197
#3198
#3199
#32
#320
#3200
#3201
#3202
#3203
#3204
#3205
#3206
#3207
#3208
#3209
#321
#3210
#3211
#3212
#3213
#3214
#3215
#3216
#3217
#3218
#3219
#322
#3220
#3221
#3222
#3223
#3224
#3225
#3226
#3227
#3228
#3229
#323
#3230
#3231
#3232
#3233
#3234
#3235
#3236
#3237
#3238
#3239
#324
#3240
#3241
#3242
#3243
#3244
#3245
#3246
#3247
#3248
#3249
#3250
#3251
#3252
#3253
#3254
#3255
#3256
#3257
#3258
#3259
#326
#3260
#3261
#3262
#3263
#3264
#3265
#3266
#3267
#3268
#3269
#327
#3270
#3271
#3272
#3273
#3274
#3275
#3276
#3277
#3278
#3279
#328
#3280
#3281
#3282
#3283
#3284
#3285
#3286
#3287
#3288
#3289
#329
#3290
#3291
#3292
#3293
#3294
#3295
#3296
#3297
#3298
#3299
#33
#330
#3300
#3301
#3302
#3303
#3304
#3305
#3306
#3307
#3308
#3309
#331
#3310
#3311
#3312
#3313
#3314
#3315
#3316
#3317
#3318
#3319
#332
#3320
#3321
#3323
#3324
#3325
#3326
#3327
#3328
#3329
#333
#3330
#3331
#3332
#3333
#3334
#3335
#3336
#3337
#3338
#3339
#334
#3340
#3341
#3342
#3343
#3344
#3345
#3346
#3347
#3348
#3349
#335
#3350
#3351
#3352
#3353
#3354
#3355
#3356
#3357
#3358
#3359
#3360
#3361
#3362
#3363
#3364
#3365
#3366
#3368
#3369
#337
#3370
#3371
#3372
#3373
#3374
#3375
#3376
#3377
#3378
#3379
#338
#3380
#3381
#3382
#3383
#3384
#3385
#3386
#3387
#3388
#3389
#339
#3390
#3391
#3392
#3393
#3394
#3395
#3396
#3397
#3399
#34
#340
#3400
#3401
#3402
#3403
#3404
#3405
#3406
#3407
#3408
#3409
#341
#3410
#3411
#3412
#3413
#3414
#3415
#3416
#3417
#3418
#3419
#342
#3420
#3421
#3422
#3423
#3424
#3425
#3426
#3427
#3428
#3429
#343
#3430
#3431
#3432
#3434
#3435
#3436
#3437
#3438
#3439
#344
#3440
#3441
#3442
#3443
#3444
#3445
#3446
#3447
#3448
#3449
#345
#3450
#3451
#3452
#3453
#3454
#3455
#3456
#3457
#3458
#3459
#346
#3460
#3461
#3462
#3463
#3464
#3465
#3466
#3467
#3468
#3469
#347
#3470
#3471
#3472
#3473
#3474
#3475
#3476
#3477
#3478
#3479
#348
#3480
#3481
#3482
#3483
#3484
#3485
#3486
#3487
#3488
#3489
#349
#3490
#3491
#3492
#3493
#3494
#3495
#3496
#3497
#3498
#3499
#35
#3500
#3502
#3503
#3504
#3505
#3506
#3507
#3508
#3509
#351
#3510
#3511
#3512
#3513
#3514
#3515
#3516
#3517
#3518
#3519
#352
#3520
#3521
#3524
#3525
#3526
#3527
#3528
#3529
#353
#3530
#3531
#3532
#3533
#3534
#3535
#3536
#3537
#3538
#3539
#354
#3540
#3541
#3542
#3543
#3544
#3545
#3546
#3547
#3548
#3549
#355
#3550
#3551
#3552
#3553
#3554
#3555
#3556
#3557
#3558
#3559
#356
#3560
#3561
#3562
#3563
#3564
#3565
#3566
#3567
#3568
#3569
#357
#3570
#3571
#3572
#3573
#3576
#3577
#3578
#3579
#3580
#3581
#3582
#3583
#3584
#3585
#3586
#3587
#3588
#3589
#3590
#3591
#3592
#3593
#3594
#3595
#3596
#3597
#3598
#3599
#36
#360
#3600
#3601
#3602
#3603
#3604
#3605
#3606
#3607
#3608
#3609
#361
#3610
#3611
#3612
#3613
#3614
#3615
#3616
#3617
#3618
#3619
#3620
#3621
#3622
#3623
#3624
#3625
#3626
#3627
#3628
#3629
#363
#3630
#3631
#3632
#3633
#3634
#3635
#3636
#3637
#3638
#3639
#364
#3640
#3641
#3642
#3643
#3644
#3645
#3646
#3648
#3649
#365
#3650
#3651
#3652
#3653
#3654
#3655
#3656
#3657
#3658
#3659
#366
#3660
#3661
#3662
#3663
#3665
#3666
#3667
#3668
#3669
#367
#3670
#3671
#3672
#3674
#3675
#3676
#3677
#3678
#3679
#368
#3680
#3681
#3683
#3684
#3685
#3686
#3687
#3688
#3689
#369
#3690
#3691
#3692
#3693
#3694
#3695
#3696
#3697
#3698
#3699
#37
#370
#3700
#3701
#3702
#3703
#3704
#3705
#3706
#3707
#3708
#3709
#371
#3711
#3714
#3715
#3716
#3717
#3718
#3719
#372
#3720
#3721
#3723
#3725
#3726
#3727
#3728
#3729
#373
#3730
#3731
#3732
#3733
#3734
#3735
#3736
#374
#3741
#3742
#3745
#3745
#3746
#3747
#3747
#375
#376
#377
#378
#379
#38
#380
#381
#382
#383
#384
#385
#386
#387
#388
#389
#39
#393
#394
#395
#396
#397
#398
#399
#4
#401
#402
#403
#404
#405
#406
#408
#409
#41
#411
#412
#413
#415
#416
#417
#418
#42
#420
#421
#422
#424
#425
#426
#427
#428
#429
#43
#430
#432
#433
#434
#435
#436
#437
#438
#439
#44
#440
#441
#442
#443
#444
#445
#446
#447
#448
#449
#45
#450
#451
#452
#453
#46
#460
#461
#462
#463
#464
#465
#466
#467
#468
#469
#47
#470
#471
#472
#473
#474
#476
#479
#48
#480
#481
#482
#483
#484
#486
#487
#488
#489
#49
#490
#491
#492
#493
#494
#495
#496
#497
#498
#499
#5
#50
#500
#501
#502
#503
#504
#505
#506
#507
#509
#51
#510
#511
#513
#514
#515
#516
#517
#518
#519
#52
#520
#521
#524
#526
#527
#528
#529
#53
#530
#531
#532
#533
#534
#535
#538
#539
#54
#540
#541
#542
#545
#549
#55
#550
#551
#552
#554
#555
#556
#557
#559
#56
#560
#562
#563
#564
#565
#566
#567
#568
#569
#570
#571
#572
#573
#574
#575
#576
#577
#578
#58
#580
#581
#582
#583
#584
#585
#586
#587
#589
#59
#590
#591
#592
#593
#595
#596
#597
#598
#599
#6
#60
#600
#602
#603
#604
#606
#608
#609
#61
#610
#611
#612
#614
#615
#616
#619
#62
#620
#623
#625
#626
#627
#628
#629
#63
#631
#632
#633
#638
#639
#64
#640
#641
#642
#643
#644
#645
#646
#647
#648
#649
#65
#650
#651
#652
#653
#654
#655
#656
#657
#658
#659
#66
#660
#661
#663
#664
#665
#667
#668
#669
#67
#670
#672
#673
#675
#676
#677
#678
#679
#68
#680
#681
#682
#683
#684
#685
#686
#687
#689
#690
#691
#692
#693
#694
#695
#696
#697
#698
#699
#7
#700
#701
#703
#705
#706
#707
#708
#71
#710
#711
#712
#713
#714
#715
#716
#717
#718
#72
#720
#721
#722
#723
#724
#725
#726
#727
#729
#73
#730
#731
#732
#733
#734
#735
#737
#738
#739
#740
#741
#743
#744
#745
#746
#747
#749
#75
#750
#751
#752
#753
#754
#755
#757
#758
#759
#76
#760
#761
#763
#764
#765
#766
#767
#768
#769
#77
#770
#771
#773
#774
#776
#777
#778
#78
#781
#783
#784
#785
#786
#787
#788
#79
#790
#791
#792
#794
#795
#796
#797
#798
#799
#8
#80
#800
#802
#803
#804
#805
#806
#807
#808
#809
#81
#810
#811
#812
#813
#815
#816
#817
#818
#819
#82
#820
#821
#822
#825
#826
#827
#828
#829
#830
#832
#833
#834
#835
#836
#838
#839
#840
#841
#842
#843
#844
#845
#846
#848
#849
#850
#851
#853
#856
#857
#858
#859
#86
#860
#861
#862
#863
#864
#865
#866
#867
#868
#869
#870
#871
#873
#874
#875
#876
#877
#878
#879
#88
#880
#881
#882
#884
#885
#887
#888
#889
#89
#890
#892
#893
#894
#895
#896
#897
#898
#899
#901
#902
#903
#904
#905
#906
#907
#908
#909
#91
#910
#912
#913
#914
#915
#917
#918
#919
#92
#920
#921
#922
#923
#925
#926
#927
#929
#93
#930
#931
#932
#933
#934
#935
#936
#937
#938
#939
#94
#940
#941
#942
#943
#944
#945
#946
#947
#948
#949
#950
#951
#952
#953
#955
#956
#957
#958
#959
#96
#960
#961
#962
#963
#964
#965
#967
#968
#969
#97
#970
#971
#972
#973
#974
#975
#977
#978
#979
#98
#980
#981
#982
#983
#984
#985
#986
#987
#988
#989
#99
#990
#991
#992
#993
#994
#995
#996
#997
#998
#999
20250912-17
20250912-42
develop-pre-restore-2026-05-27
mock-tag-test
rocm-5.7.0
rocm-5.7.1
rocm-6.0.0
rocm-6.0.2
rocm-6.1.0
rocm-6.1.1
rocm-6.1.2
rocm-6.1.5
rocm-6.2.0
rocm-6.2.1
rocm-6.2.2
rocm-6.2.4
rocm-6.3.0
rocm-6.3.1
rocm-6.3.2
rocm-6.3.3
rocm-6.4.0
rocm-6.4.1
rocm-6.4.2
rocm-6.4.3
rocm-6.4.4
rocm-7.0.0
rocm-7.0.1
rocm-7.0.2
rocm-7.1.0
rocm-7.1.1
rocm-7.2.0
rocm-7.2.1
rocm-7.2.2
rocm-7.2.3
rocm-7.2.4
rocm-test-09212024
therock-7.10
therock-7.9.0
tutorial_hello_world
v0.1-cpu
Select branches
Hide Pull Requests
2217-add-io-module-for-profiler
350_grouped_gemm_fix
6c08c5c46d7a13
AITERKER-112
AITERKER-112-temp
Ali_FA3_BWD
CK/rel_stg_3076
Fa_seqlen1_pass
LWPCK-3549
LWPCK-3549-cleanups
LWPCK-3549-two-stage
LWPCK-3731
SWDEV-539321
SWDEV-561448
_bgp_v5_a
_bgp_v5_b
a8w8_fastgelu_gemm
a8w8_instance
a8w8_test
ad/refactor_targets_test
add-aiter-pytest
add_fmha_instances_for_specific_config
add_fmha_tuned_file
add_vllm_kvcache
afagaj/rocm-rel-7.1
afagaj/rocm-rel-7.1-fix
aghamari/cross-layer-5d-kv-cache
aghamari/ua-on-develop
aghamari/ua-on-develop-v2
aghamari/unified-attention-decode-opt
agpr_control
aick-482
aick-647
aiter_cktile_integration_a8w8_a4w4
aiterker-112-fp8-per-token-per-head-prefill
akuppp
amd-develop
amd-master
amd/dev/gehernan/SWDEV-535598_warpSize_fix
amd/dev/janplehr/fix/compiler-lifetime-warning
amd/dev/jruan/defect_slice_tile
amd/dev/jruan/rms_norm_welford
amd/dev/sdittaka/warpsize_workaround
andriy/ck_tile/basic-tutorials
andriy/f6-cherry-pick
arai/ck_tile/streamk_xcd_remap
arai/ck_tile/tile_engine_restructure
async_load_api
atom_test
attention/pagedkv_prefill
aviralgoel/gemm_tutorial
aviralgoel/memory_pipeline_refactor_2
aviralgoel/python-unbuffered-fix
aviralgoel/test_labels
aviralgoel/tutorials
barkocot/6.2.1-bf16-fix
barkocot/basic-v1-interwave
barkocot/builder-experiment
barkocot/bwd-data-instances-opt
barkocot/ck-tile-direct-load-conv
barkocot/conv-instances-removal
barkocot/explicit-string-out
barkocot/grouped-conv-bwd-wei-split-k-hack
barkocot/lwpck-1068
barkocot/lwpck-1916-dev2
barkocot/lwpck-3735
barkocot/lwpck-3853
barkocot/lwpck-4085
barkocot/skip-a-lds-bwd-conv
barkocot/tmp
batch-prefill-fp8-kvcache-blockscale
batched_gemm
bd_a8w8
bernard/add_fp8_fpmax
bernard/testx_soft_signs
bernard_pa_v_layout
bf16_intrinsic_type
bf16x3_fix_1120
bharriso/revert-sharding
bkc_25.08_candidate
build-time-investigation-tile-gemm
builder_inlineDiff
bwd_data_1c_dev
cderb/prefetch_tuning_250930
cderb/prefetch_tuning_251014
cderb/prefetch_tuning_251021
cderb/tuning_250729
ck-python-documentation
ck-tile-basic-tutorials
ck/aviralgoel/abquant_splitk
ck/aviralgoel/add-naive-gemm-tutorial
ck/moe_int32_overflow
ck/opt_a8w8_m32
ckTileEnginePooling
ckTileEnginePooling2
ck_fa_bwd_opt
ck_fp8xint4_gemm_enhance
ck_int4_moe_dev
ck_int4_moe_enhance
ck_int4_moe_register_spill_debug
ck_moe_bs_splitk
ck_moe_bs_splitk_pr
ck_moe_gemm16x16
ck_moe_merge
ck_moe_new_sorting
ck_moe_support_int64_idx
ck_moe_support_int64_v2
ck_pa_vllm
ck_streamk_2tile_sk_dp
ck_tile/batch_prefill_paged_size_16
ck_tile/fa3_fremont
ck_tile/fa_asm_bwd
ck_tile/fa_asm_bwd2
ck_tile/fa_asm_bwd_sync_fix
ck_tile/fa_bwd_opt2
ck_tile/fa_bwd_opt_rtn
ck_tile/fa_bwd_v3
ck_tile/fa_bwd_v3_sbhd
ck_tile/fa_bwd_v3_test
ck_tile/fa_bwd_v3_tmp
ck_tile/fa_opt_static_softmax
ck_tile/fmha_block_scale
ck_tile/fmha_fp8
ck_tile/fmha_hdim_160
ck_tile/fmha_in_fp8_async
ck_tile/fmha_in_fp8_async_192_128
ck_tile/fmha_in_fp8_split
ck_tile/fmha_nwarp_example
ck_tile/fused_moe
ck_tile/fused_moe_general
ck_tile/gemm_blockscale_eightwarps
ck_tile/gemm_debug_alias
ck_tile/gemm_felix_test
ck_tile/gemm_opt
ck_tile/kvcache_prefill
ck_tile/layer_norm_smalln_opt
ck_tile/ln_add_cache_clear
ck_tile/ln_bw
ck_tile/pagedkv_seqlow4k
ck_tile/splitkv_combine_v2
ck_tile/test_qv192_v128
ck_tile_batch_norm_forward_blockwise_welford
ck_tile_fmha_block_scale
ck_tile_gemm_streamk_draft
ck_tile_toy
ck_tile_tutorial
ck_wpreshuffle
ckfs
cktile/fmha_fp8_scale_p_test
cktile_mxfp4_plus
congma/ck_tile/fix_preshuffle_b
congma/ck_tile/preshuffle_b
congma/dev/abquant_failure
congma/dev/fix_preshuffle_b
congma13/ck_tile/inclusive_scan_sequence
congma13/ck_tile/inclusive_scan_sequence_ck
copilot/build-gemm-example
copilot/investigate-fmha-kernel-execution
copilot/move-pr-3723-to-rocm-libraries
copilot/research-fused-kernel-patterns
copilot/sub-pr-3236
copilot/sub-pr-3341
copilot/sub-pr-3466
copilot/update-fp8-support-in-readme
cshuffle-epilogue-bank-conflict-tests
cshuffle-epilogue-tests
cshuffle-fix
debug_moe
dev/a8w4_and_a8w8splitk_yadai
dev/a8w8_b_preshuffle
dev/ck_moe_gemm
dev/ck_moe_gemm2
dev/ck_moe_gemm2_merge
dev/ck_moe_gemm_output_sorting
dev/ck_moe_int4
dev/cka8w8_tuning
dev/flatmm
dev/gemm_reduce_seperate_moe
dev/huizzhan/ck_basic_gemm_topksoftmax
dev/huizzhan/ck_fusion_gemm_topksoftmax
dev/huizzhan/ck_gemm_softmax_topk
dev/huizzhan/ck_grouped_topksoftmax_test
dev/huizzhan/ck_pw_gemm_topksoftmax
dev/huizzhan/ck_toy_example
dev/huizzhan/fusion_pw_gemm_grouped_topk
dev/merge_u8w8_onlygemm_build
dev/moe_ffn
dev/moe_opt
dev/page_fa
dev/page_fa_cap_logits
dev/paged_window
dev/preshuffle_350
dev/test_es
dev/yadai
develop
develop-archive-2026-05-27
develop-test
develop_deprecated
device_gemm_multiple_d_xdl_cshuffle_v3_b_preshuffle
dkrottap/cp
dlejeune/fmha_fwd_test_all_hdim
dlejeune/fmha_fwd_test_all_hdim_dropout_test
dlejeune/gemm_pipeline_mem_skip_lds
dlejeune/mhc_core
dlejeune/qwen3.5_fmha_batch_prefill_opt2
dlejeune/qwen3.5_opt_gfx942
dlejeune/sinkhorn
dlejeune/ua-sink
dlejeune/ua-swa
dlejeune/ua-swa-v2
docs/5.6.1
docs/5.7.0
docs/5.7.1
docs/6.0.0
docs/6.0.2
docs/6.1.0
docs/6.1.1
docs/6.1.2
docs/6.1.5
docs/6.2.0
docs/6.2.1
docs/6.2.2
docs/6.2.4
docs/6.3.0
docs/6.3.1
docs/6.3.2
docs/6.3.3
docs/6.4.0
docs/6.4.1
docs/6.4.2
docs/6.4.3
docs/7.0.0
docs/7.0.1
docs/7.0.2
docs/7.1.0
docs/7.1.1
docs/7.2.0
docs/7.2.1
ds_read_tr_exp
dteng/dev/flatmm
dump_kernels_ck
dup-status-checks
ecamartins/ck_tile_warp_gemm_play
emimarti/ck_tile/incorrect_validation
enable_persistent_async
epilogue_refactor
eterpstr/abquant-transposec-fix
eterpstr/preshuffle-bquant-for-abquant-preshuffleb
f8blockscale_bpreshuffle_aiter
f8gemm_perf
fa_decode_pipeline
fav3_exp
fav3_group_mode
feat-mixed_input_flatmm
feat/swiglustep-moe-no-quant
features/grouped-conv-perf-uplift
feiw/dev/asm_f8
feiw/dev/ckt_cm9
feiw/dev/f4
feiw/dev/uk_gemm
feiw/gemm1_bns
feiw/gemm1_bns_perf
feiw/mxfp4_moe_2Stages
felix/add_nkpad
felix/ck_hot_fix
felix/flatmm
felix/flatmm_fix_splitk
felix/sorting
felix/tunx_norm
fix-ci-2542
fix-emu-error
fix-flatmm
fix/buffer_rsrc_cast
fix/guard-against-compiler-warning
fix_cktile_sequence
fix_gino
fix_gptoss_sink
fix_int4_tests
fix_moe_a16w4
fix_title
flatmm-moe-gemm-improve-testing-coverage
flatmm-moe-remap-xcd
flatmm-mxfp4-async
flatmm_for_compiler_dteng
flatmm_merge
fmh-ut-fix-squant
fmha_bwd_trload_mfma32
fmha_fwd_test_all_hdim
fmha_pipeline_nwarp_sshuffle_improve
fmha_pipeline_nwarp_sshuffle_n0loop
for_rga
fp16_int4_scale_weight_only_impl_bak
fp4_gu_moe
fp4_mx_bshf_v3
fp4_scale_gemm_dev
fp8_bpreshuffle
gate-fmha-padding
gemm_async_load_opt
gemm_bf16_sk_bug_fix
gemm_getname
gemm_w8_only
gfx950_debug
ginolu/fix-fa-hdim160
ginolu/flatmm_async
ginolu/perf
ginolu/sparge_attention
ginolu/timer_refine
ginolu/ut_async
hack_block_dropout
hdim48_support
hdim_96_160
heuristics-tile-gemm
hot_fix_fp8_gufusion
hot_fix_int32_overflow
hstu_attention_fwd
hstu_attention_fwd_bwd
int4_fp8_convert_opt
int4_pr_based_on_JingPR
int4_weight_only_from_mtgu_golden
int4_xdlop_16x16_fix
int8_weight_only_fix
int_fp8_cvtOpt_pr
ipanfilo/clang22_build_hotfix
jakpiase/ck_tile/conv_pipeline_v5
jakpiase/conv_bwd_data_direct_loads
jakpiase/gemm_pipeline_mem_skip_lds
jakpiase/grouped_conv_bwd_data_clamp
jakpiase/grouped_conv_dl
jakpiase/nchw_conv_optimizations
jakpiase/poc_cba
jakpiase/tmp_branch
jakpiase/tmp_save_conv
japiasec/ck_tile/irregular_tail_vectorloads
jeff/batch-prefill-vectorized-performance-fix
jeonghyun/ckb-add-optimized-kernel-validation
jeonghyun/ckb-almiopen-522-descriptor-init
jeongkim/grouped-conv-bias-bnorm-clamp-tolerance-gfx11
jian.wu.dev.fhma_bwd_d512
jim/ck_tile/fa_bwd_v3
jim/ck_tile/fa_v3_codegen
jim/dev/fav3_bwd_swa
jim/dev/fix_group_mode_mismatch
jim/dev/reduce_instance
jizhan/mixed_prec_gemm_pk_i4
jizhan/reduce_threadwise_multi_d
jizhan/tensor_contraction_v3
jograner/bwd-data-group-merge
jograner/bwd-weight-group-merge-type-string
jograner/bwd-weight-instance
jograner/grouped-gemm-issue
jograner/hotfix-grouped-gemm-two-stage-2
joye_ck_tile_dev
jshumway/analyze-build
jshumway/exp-build-analysis
jshumway/parse-build
jshumway/tensor
jshumway/transform
jshumway/util-readme
jukorhon/fa4-k-preread
jukorhon/fa4-kv128-vgpr
jukorhon/ua-blocktables-oob-fix
jukorhon/ua-multipagetile-fix
jukorhon/ua-multipagetile-fix-fp8
jukorhon/ua-multipagetile-fix-fp8-rebased
jukorhon/ua-multipagetile-fix-fp8-thd
jukorhon/ua-overflow-fix
jukorhon/unified-attention
jukorhon/unified-attention-ck
jukorhon/unified-attention-dev
jun/format_fix
jzhou/pre-load-ds
kabraham/builder_add_constraints
kabraham/builder_bwd_data
kabraham/describe-fn-wmma
kabraham/factory-tests
kabraham/prng_tests_integration
kqian/lynm+bias+quant
kylasa_check_err
kylasa_kdim_pr
kylasa_mdim_functional_working
kylasa_mdim_pingpong
kylasa_ping_pong
kyle/c_column_layout_test
kyle/gemm_test
kyle/grouped_gemm_blockwise
letaoqin/batch_prefile_block_scale_ptkvb
letaoqin/gemm_bias_activation
linsun/convint8_miopen_integration
lirui/vllm_atom_m3_0624
lj/whole_k_pipeline
lwpck-4180
lwpck-4181
lxx/dev/fix_gfx1250_compile
lynm_bwd_dteng
macurtis/amd-master
mainline_deprecated
marcusr/aiesw-32176-w4a16-ck-asym
master
matthias.gfx11_ck
merge-mixed_input_flatmm
merge/aiter_main_and_ck_fp4
meskelin/add_desc_tensorviews_universal
meskelin/refactor-makegemmtensorviews
mh/revert-rmsnorm-align-HF
mh/testing
mhynag/moe-index-support
mi300_time_measurement
mi355_async_load_dev
mi355_transpose_load_dev
migraphx
migraphx-ci-fix
migraphx-update
mock_moesorting_disable
moe_aiter_debug
moe_block_m_128
moe_block_m_32
moe_blockscale_func
moe_bs_fp8_no_asm
moe_bs_fp8_no_asm_buf2lds
moe_bs_stage1_dev
moe_cross_reduce
moe_fp8_persistent
moe_gemm_fuse_activation
moe_inline_develop
moe_xcd_remap
mohsen-backup
mono-split/users/yiding12/fmha-bwd-async-prepare
mono-split/users/yiding12/fmha-bwd-group-persistent
mono-split/users/yiding12/fmha-bwd-workspace
mozga-amd/cache_eneble_auto
mozga-amd/default_epilog_d_support
mozga-amd/fix_bug_transpose
mozga-amd/fix_transpose_matrix
mozga-amd/fix_universal_gemm_traits
mozga-amd/gemm_basic_fix
mozga-amd/grouped_conv_fix
mozga-amd/mulit_abd
mozga-amd/multiple_abd
mozga-amd/multiple_abd_gemm
mozga-amd/pipeline_errors
mozga-amd/rewrite_no_index_reduce_tests_gtest
mozga-amd/simple_dbg_for_cpu
mpodkory/find-transform-optimization
mpodkory/generate-tuple-optimizations
mpodkory/recursive-to-pack-expansion
mpodkory/static-for-indexed
mpodkory/template-optimization-tests
mpodkory/transform-tensor-descriptor-optimization
mtgu/cktile_mxfp4
mtgu/cktile_mxfp4_flatmm_dev
mtgu/dev/ck_moe_gemm2_int4_merge
mtgu/dev/ck_moe_int4
mtgu/dev/deepseekv3_ab_scale
mtgu/dev/gemm_fp8xint4
mtgu/dev/gemm_fp8xint4_Bpreshuffle
mtgu/dev/gemm_fp8xint4_Bpreshuffle_static2static
mtgu/dev/vllm_w8a8
mtgu/int4_moe_tencent
mtgu/mxfp4_gemm
mtgu/pr2/int4_scale_dev
muozturk_bf16_sk_padding_fix
muozturk_streamk_reduction_fix
mx_moe_f4_scale_shuffle
mx_moe_f4_scaleshuffle_Bnoshuffle
mxf4_moe_async_ver
mxfp4_moe_blockscale_buf2lds
mxfp6-flatmm
naive_pa_update
new_develop
new_time_based_ckprofiler_emin
old_ck_streamk_fix
opt-a16w4_moe_gemm2
origin/philipm/documentation-cleanup-5
origin/pmaybank/tile_engine-gfx12
oscar/ds_tune
per_tensor_quant
philipm/ck-tile-docs
philipm/ck_tile/WMMA_GEMM_F16
philipm/ck_tile/WMMA_GEMM_F16-0
philipm/documentation-cleanup
philipm/documentation-cleanup-5
philipm/documentation-cleanup-7
philipm/fix-tile_engine-json-output
pk_i4_t_enable_base
pk_i4_v3_failures_fix
pmaybank/rdna-dev-1
pmaybank/tile_engine-gfx12-1
pmaybank/tile_engine-gfx12_debug
pmaybank/tile_engine_gemm
pmaybank/toy_example
power_analysis
poyenc/fix-missing-template-argument
prec_param
ptpc_gemm_group_persistent
py2.6_noCK_bmm
pytorch/release/2.10
pytorch/release/2.11
pytorch/release/2.12
pytorch/release/2.8
pytorch/release/2.9
pytorch2.6_base
pytorch_release_2.6
qlin/port_gfx11_build_refine
qlin/refine_ck_profiler_test_script
qlin/remap_xdl
radeon-ai/optimised-block_sync_lds
rakesroy/7.0/psdb_test
rakesroy/7.0/psdb_test_onnx
ratio_mask_for_fmla_prefill
re-enable-two-fp16-x-pkint4-tests
refactor_vector_type
refine_flatmm
refine_flatmm2
reg_spill_moe_debug
relbers/moe_sorting/lane_group_sz_to_1
release-staging/rocm-rel-6.2
release-staging/rocm-rel-6.3
release-staging/rocm-rel-6.4
release-staging/rocm-rel-6.5
release-staging/rocm-rel-7.0
release/rocm-rel-5.6
release/rocm-rel-5.7
release/rocm-rel-5.7.00.48
release/rocm-rel-5.7.1.1
release/rocm-rel-6.0
release/rocm-rel-6.0-staging
release/rocm-rel-6.1
release/rocm-rel-6.1-staging
release/rocm-rel-6.1.0.36
release/rocm-rel-6.1.00.36
release/rocm-rel-6.1.1.1
release/rocm-rel-6.1.4.01
release/rocm-rel-6.2
release/rocm-rel-6.2-staging
release/rocm-rel-6.2.0.5
release/rocm-rel-6.2.2.1
release/rocm-rel-6.2.4.1
release/rocm-rel-6.3
release/rocm-rel-6.3-staging
release/rocm-rel-6.3.0.1
release/rocm-rel-6.3.0.2
release/rocm-rel-6.4
release/rocm-rel-6.4.1.2
release/rocm-rel-6.4.2.2
release/rocm-rel-7.0
release/rocm-rel-7.0.2.1
release/rocm-rel-7.0.2.2
release/rocm-rel-7.1
release/rocm-rel-7.1.1.1
release/rocm-rel-7.2
release/rocm-rel-7.2.0.1
release/therock-7.10
release/therock-7.9
revert-3288-streamhpc/grouped-conv-fwd-wmma-tuned-instances
revert-3378-streamhpc/conv_bwd_weight_wmma_instance_selection
revert-3603-eterpstr/206-block-scale-gemm-fp4-support
rocking/fmha-async-intrinsic
rocm-libraries-export-2d4a3223cb
rocm-libraries-export-dbc6589bf2
rocm7.1_gg_performance
ruimin_dbg_conv_fwd
samaario/ua-tune
samidamien/unified_attention
samremes/bmatrix_2d_blockscale
samremes/ck_tile_mx_gemm
samremes/double_buffer_fp8_ab_scale
samremes/fmha_192x128_hdim_occupancy
samremes/fmha_fwd_v3_for_gfx942
samremes/quantize_in_ab_scale_gemm
samremes/temp_tests
satya_temp
shahamed/ck3129
shared/big_day_merge_asm_fix
shuffle_tile_enhance
sink_attn
sinkhorn
sk_ex_fp8_bf8_cktile
sk_reduction_fix_oldsk
sk_tests_backup
so/f4moe
so/moe_a4w4
solin/flatmm
spolifroni-amd-patch-1
spolifroni-amd/edit-conf-py-with-doxylink
srayasam/test
srayasam/test-1
srayasam/test-release
srayasam/test1
srayasam/test2
srayasam/test3
srayasam/testing-branch
srayasam/therock-test
streamhpc/grouped-conv-fwd-extra-flavors
streamhpc/grouped-conv-fwd-wmma-tuned-instances
streamhpc/impl-splitk-device-grouped-conv-fwd-multiple-abd-xdl-cshuffle-v3
streamhpc/mix_prec_microscaling_bquant
streamhpc/wavetile_transfer_bwd_data_and_bwd_wei-support
streamhpc/wmma_gemm_quantization
streamk_debug
streamk_device_grid_info_print
streamk_fix
streamk_old
streamk_revert
support_engine_cache
te_bwd_bias
te_bwd_v3_hd64
te_debug_dbias_1hss
te_v3_bwd_sbhd
tenpercent/async_copy_gemm_v3
tenpercent/cc-skill-build
tenpercent/ck-build-analysis-skill
tenpercent/cktile_rename_f8
tenpercent/compv3_build_time_reduce_experiment
tenpercent/dispatch
tenpercent/generate-identity-sequences
tenpercent/gfx950_lds_experiments
tenpercent/revert-ck-fp8-struct
tenpercent/statically-indexed-array-rewrite
tenpercent/tensor-descriptor-functor-optimization
test-load-tile-transpose
test-mixed_input_flatmm
test_async_v3
test_ck_bd_ticket
test_ck_bug
test_config_heuristic
test_fmha_ck_tile
tests_for_batched_grouped_gemm
testx
testx_bprefill
testx_temp
testx_v2
thomas/experiment
thomning/ck_tile/mx_gemm_packing
tianwyan/streamk
tianxing/unified-attention
tianxing/unified-attention-quantization
tianyuwu/ck_tile/ck_tile_example_bringup
tileengine-restructure
tlakshma_950_support
tlakshma_tileengine_enable_arch
tmp-develop
tomjen12/clang-fix-20-fremont
toy_example
toy_example_ms
try_merge_with_multiple_abd
tune_norm
uai-develop
uai-migraphx
uif2-migraphx
universal_streamk
universal_streamk_debug
update_cka16w16_uc
update_cka8w8_uc_padding
update_cka8w8_uc_padding_dev
users/ArthurLiu/ck_fmha_codegen
users/Vsevolod1983/TempBranchAddLogging
users/andriy/ck/1464-pytorch-gfx1250
users/dahawkin/revert-f86bbb1aefdd047b2b0e886dda831417e790f622
users/darren-amd/ck-gfx1153-release-2.9
users/jam/rdna3-rdna4-fmha-tile-load-fixes
users/msaffari-amd/ck/dispatcher_test_in_aiter
users/randyspauldingamd/dep_parser_monorepo
users/randyspauldingamd/gtest_fixturemap
vec_stores_c_col_v3
vec_stores_c_col_v4
veergopu/add_6764
vmcnt0issue
vpietila/add-fwd-conv-v3-instances-for-unit-group-size
vpietila/bwd-conv-weight-integration-tests
vpietila/ck-profiling-documentation
vpietila/ck-tile-split-k-opt
vpietila/ck-vs-ck-tile-conv-benchmarking
vpietila/ckb-add-ck-tile-bwd-weight-instances
vpietila/ckb-add-defaults-for-optional-template-params
vpietila/ckb-block-tiling
vpietila/ckb-bwd-instances
vpietila/ckb-fwd-bwd-instances
vpietila/ckb-fwd-instance-test-improvements
vpietila/ckb-generaized-conv-factory-baseline
vpietila/ckb-improve-compile-time-errors
vpietila/ckb-refactor-warp-gemm-descriptors
vpietila/ckb-remove-explicit-device-op-flag
vpietila/convolution-builder
vpietila/ggemm-profiling
vpietila/improved-fwd-merged-conv-group-instances
vpietila/int8-perf-on-navi4x
vpietila/lwpck-3530
vpietila/merge-multiple-conv-groups-fully-working-baseline
vpietila/merge-multiple-depthwise-conv-groups-into-single-gemm-batch
vpietila/merge-multiple-fwd-conv-groups-into-single-gemm-batch
vpietila/miopen-dev
vpietila/packed-bf16-cast-for-grouped-conv
vpietila/packed-bf16-cast-for-grouped-conv-v1
vpietila/retina-net-fwd-convs
vpietila/retina-net-fwd-convs-baseline
vpietila/retina-net-training-perf
vpietila/split-k-param-auto-deduce
warp_specialized_scheduling
wave_buffer_resource_patch
whole_k_prefetch_n0loop
wip-f4-redesign
wip-f4-wp-joye
wip-fa-cshuffle
wip_355
wip_355_xcd_remap
wjx/align_v3_pipeline
wjx/atomic_add_bf16
wjx/cK_tile/moe_gemm
wjx/fix_moe_expert_oob
wjx/fix_moe_gfx942
wjx/fix_moe_splitk
wjx/fix_splitk_moe
wjx/flatmm_merge
wjx/grouped_flatmm
wjx/m_grouped_flatmm
wjx/moe_k64_patch
wjx/moe_mx_fp4_for_aiter
wjx/moe_v3_aiter
wjx/moe_v3_fp8_gfx942_spill
wjx/mxfp4_moe_2Stages
wjx/mxfp4_moe_bpreshuffle_v1
wjx/mxfp4_v1_pipe
wjx/preshuffle_format
wjx/reproduce_moe_spill
wjx/topK_weights
wjx/wp_gemm_fix
workable_async_copy
wulley_v_layout
xformers_version
xiangxli_fa_bwd_support_atomic16
xiangxli_fa_ck_bwd_support_atomic16
xiangxli_mask_support_y_ratio_new
xiangxli_support_ratio_mask
xin/add_all_op_tests
xin/test-pip
xor_async_fa
xyt/ln_patch
yadai/moe_a4w4
yanda/wip_355
yandai/a16w4_old_layout
yandai/moe_flatmm_async
yandai/moe_flatmm_async_scale_b16
yandai/wip_mi355
yewang12/bias_for_padding
yewang12/bwd_group_persistent_hack_cu
yewang12/ck-varlen-bwd-det
yewang12/debug_v3_bs3hd
yewang12/debug_v3_bsh3d
yewang12/debug_v3_bshd_bs2hd
yewang12/flash-attn-yiding-ck
yewang12/rocm_flash_attn_cherrypick_PR3615
yewang12/te_bias_all_minf
yewang12/te_deterministic
yewang_qkv_sbhd
yiding12/d64-det
yilin/practice
yiltan-temp
yuyun/mla
zain/TE-native-bshd-thd
zain/ck-graph-fix
zain/ck-graph-fix-cherry
zain/qola/filter
zain/qola/te-receipt
zan/cK_tile/moe_gemm
zan_fix_bufferloadlds
zan_ln
zan_norm
zan_tune
zan_vllm
zan_vllm_layout
zan_vllm_test
zhe_test
zhimding/ck_hot_fix_moe_sorting
zhimding/develop
zhimding/moe_flatmm_async
zqf_base_develop
#1
#100
#1001
#1002
#1003
#1004
#1005
#1006
#1007
#1008
#1009
#101
#1010
#1011
#1012
#1013
#1014
#1015
#1017
#1018
#1019
#1021
#1022
#1023
#1024
#1025
#1026
#1027
#1028
#1029
#1030
#1033
#1035
#1036
#1037
#1039
#1040
#1042
#1043
#1044
#1045
#1046
#1047
#1048
#1049
#1050
#1051
#1052
#1054
#1055
#1056
#1057
#1058
#1059
#106
#1060
#1061
#1062
#1063
#1064
#1065
#1066
#1067
#1068
#1069
#107
#1071
#1072
#1073
#1075
#1077
#1079
#108
#1080
#1081
#1082
#1083
#1084
#1085
#1086
#1087
#1088
#109
#1090
#1091
#1092
#1093
#1094
#1095
#1096
#1097
#1098
#1099
#1100
#1101
#1102
#1103
#1104
#1105
#1106
#1107
#1108
#1109
#111
#1110
#1112
#1114
#1115
#1116
#1117
#1118
#1119
#112
#1120
#1121
#1123
#1124
#1125
#1126
#1127
#1128
#1129
#113
#1130
#1131
#1132
#1133
#1134
#1135
#1136
#1137
#1138
#1139
#114
#1141
#1142
#1143
#1144
#1145
#1147
#1148
#1149
#115
#1150
#1151
#1152
#1153
#1154
#1155
#1156
#1157
#1158
#1159
#116
#1160
#1161
#1162
#1163
#1164
#1165
#1166
#1167
#1168
#1169
#117
#1170
#1172
#1173
#1174
#1175
#1176
#1177
#1178
#118
#1180
#1181
#1182
#1183
#1185
#1186
#1187
#1188
#1189
#119
#1190
#1191
#1192
#1193
#1194
#1195
#1196
#1197
#1198
#120
#1200
#1202
#1203
#1205
#1206
#1207
#1208
#1210
#1211
#1212
#1213
#1214
#1216
#1217
#1218
#1219
#122
#1220
#1221
#1222
#1223
#1224
#1225
#1226
#1227
#1228
#1229
#123
#1230
#1231
#1232
#1234
#1236
#1237
#1238
#1239
#124
#1240
#1241
#1242
#1243
#1244
#1245
#1246
#1247
#1248
#1249
#125
#1250
#1251
#1253
#1254
#1255
#1256
#1257
#1258
#1259
#126
#1260
#1262
#1263
#1264
#1265
#1266
#1267
#1268
#1269
#127
#1270
#1271
#1272
#1273
#1274
#1275
#1277
#1278
#128
#1280
#1281
#1282
#1283
#1284
#1285
#1286
#1287
#1289
#1290
#1291
#1292
#1293
#1295
#1296
#1297
#1299
#13
#130
#1300
#1301
#1302
#1303
#1304
#1305
#1306
#1307
#1308
#1309
#131
#1310
#1311
#1312
#1313
#1314
#1315
#1316
#1317
#1318
#1319
#132
#1320
#1321
#1322
#1323
#1324
#1325
#1326
#1327
#1328
#1329
#133
#1331
#1332
#1333
#1335
#1336
#1337
#1338
#1339
#134
#1340
#1341
#1342
#1343
#1344
#1345
#1346
#1347
#1348
#1349
#1350
#1351
#1352
#1353
#1354
#1355
#1356
#1358
#1359
#1360
#1361
#1362
#1363
#1364
#1365
#1366
#1367
#1368
#1369
#1370
#1372
#1374
#1375
#1376
#1377
#1378
#1379
#1380
#1381
#1382
#1383
#1384
#1385
#1386
#1387
#1388
#1389
#1390
#1391
#1392
#1393
#1394
#1395
#1396
#1397
#1398
#1399
#14
#140
#1400
#1401
#1403
#1404
#1406
#1407
#1408
#1409
#141
#1410
#1411
#1412
#1413
#1414
#1415
#1416
#1417
#1419
#142
#1420
#1421
#1423
#1424
#1425
#1426
#1427
#1428
#1429
#143
#1430
#1432
#1433
#1435
#1437
#1440
#1441
#1442
#1443
#1444
#1445
#1446
#1447
#1448
#1449
#145
#1450
#1451
#1452
#1453
#1454
#1455
#1456
#1457
#1458
#1461
#1462
#1463
#1464
#1465
#1467
#1468
#1469
#1470
#1471
#1472
#1473
#1474
#1475
#1476
#1478
#1479
#148
#1480
#1481
#1482
#1483
#1484
#1485
#1486
#1487
#1488
#1489
#149
#1490
#1491
#1492
#1493
#1494
#1495
#1496
#1497
#1499
#15
#150
#1500
#1501
#1502
#1503
#1504
#1505
#1506
#1507
#1508
#1509
#151
#1511
#1512
#1513
#1515
#1516
#1517
#1518
#1519
#152
#1520
#1521
#1522
#1523
#1524
#1525
#1526
#1527
#1528
#1529
#1530
#1531
#1532
#1533
#1535
#1536
#1537
#1538
#1539
#1540
#1541
#1542
#1543
#1544
#1545
#1546
#1547
#1548
#1549
#155
#1550
#1552
#1553
#1554
#1555
#1556
#1557
#1558
#1559
#156
#1560
#1561
#1562
#1563
#1564
#1565
#1566
#1567
#1568
#1569
#1570
#1571
#1572
#1573
#1574
#1575
#1576
#1577
#1578
#1579
#158
#1582
#1583
#1584
#1585
#1587
#1588
#1589
#159
#1590
#1591
#1592
#1593
#1594
#1595
#1596
#1597
#1598
#1599
#16
#160
#1600
#1601
#1602
#1604
#1605
#1606
#1607
#1608
#1609
#161
#1610
#1611
#1612
#1613
#1614
#1615
#1616
#1617
#1618
#1619
#1620
#1621
#1622
#1623
#1624
#1625
#1626
#1627
#1628
#1629
#163
#1630
#1631
#1632
#1633
#1634
#1635
#1636
#1637
#1639
#1640
#1642
#1643
#1644
#1645
#1647
#1648
#1649
#165
#1650
#1651
#1653
#1654
#1655
#1657
#1658
#1659
#166
#1660
#1661
#1662
#1663
#1664
#1665
#1666
#1667
#1668
#1669
#167
#1670
#1671
#1672
#1673
#1674
#1675
#1676
#1677
#1678
#1679
#168
#1680
#1681
#1682
#1683
#1684
#1685
#1686
#1687
#1688
#1689
#1690
#1691
#1692
#1694
#1695
#1696
#1697
#1698
#1699
#1700
#1701
#1702
#1703
#1704
#1705
#1706
#1708
#171
#1710
#1711
#1712
#1713
#1714
#1715
#1716
#1717
#1718
#1719
#1720
#1721
#1722
#1723
#1724
#1725
#1726
#1728
#1729
#1730
#1731
#1732
#1733
#1734
#1735
#1736
#1737
#1738
#1739
#174
#1740
#1741
#1742
#1743
#1744
#1745
#1746
#1747
#1748
#1749
#175
#1750
#1751
#1752
#1753
#1754
#1755
#1756
#1758
#176
#1760
#1761
#1762
#1763
#1764
#1765
#1766
#1767
#1768
#1769
#1770
#1771
#1772
#1774
#1775
#1776
#1778
#1779
#178
#1783
#1784
#1785
#1786
#1787
#1788
#1789
#1790
#1791
#1792
#1793
#1794
#1795
#1796
#1797
#1798
#1799
#18
#1800
#1801
#1802
#1803
#1804
#1805
#1806
#1807
#1808
#1809
#181
#1810
#1811
#1812
#1813
#1814
#1815
#1816
#1817
#1818
#1819
#182
#1820
#1821
#1822
#1823
#1824
#1825
#1826
#1827
#1828
#1829
#183
#1830
#1831
#1832
#1834
#1835
#1836
#1837
#1838
#1839
#184
#1840
#1842
#1843
#1844
#1845
#1846
#1847
#1848
#1849
#185
#1850
#1851
#1852
#1853
#1854
#1856
#1858
#1859
#186
#1860
#1861
#1862
#1863
#1864
#1866
#1867
#1868
#1869
#187
#1871
#1872
#1873
#1874
#1875
#1876
#1877
#1878
#1879
#188
#1880
#1881
#1882
#1883
#1884
#1885
#1886
#1887
#1888
#1889
#189
#1891
#1892
#1894
#1895
#1896
#1897
#1898
#1899
#19
#190
#1900
#1901
#1902
#1903
#1904
#1905
#1906
#1907
#1908
#1909
#1910
#1911
#1912
#1913
#1914
#1915
#1916
#1917
#1918
#1919
#192
#1920
#1921
#1922
#1923
#1924
#1925
#1927
#1930
#1931
#1932
#1933
#1934
#1935
#1936
#1937
#1938
#1939
#194
#1940
#1941
#1942
#1943
#1944
#1945
#1947
#1948
#1949
#195
#1950
#1951
#1952
#1953
#1954
#1955
#1956
#1957
#1959
#196
#1960
#1961
#1962
#1965
#1966
#1967
#1968
#1969
#197
#1970
#1971
#1972
#1973
#1974
#1975
#1976
#1977
#1978
#1979
#1980
#1981
#1982
#1983
#1984
#1985
#1986
#1987
#1988
#1989
#199
#1990
#1991
#1992
#1993
#1994
#1995
#1996
#1997
#1998
#1999
#2
#20
#200
#2000
#2001
#2002
#2003
#2004
#2005
#2006
#2007
#2008
#2009
#201
#2010
#2011
#2012
#2013
#2014
#2015
#2016
#2018
#2019
#202
#2020
#2021
#2022
#2023
#2024
#2025
#2026
#2027
#2029
#2031
#2032
#2034
#2035
#2036
#2038
#2039
#204
#2040
#2041
#2042
#2043
#2044
#2045
#2046
#2047
#2048
#2049
#205
#2050
#2051
#2052
#2053
#2054
#2055
#2056
#2057
#2058
#2059
#206
#2060
#2061
#2062
#2063
#2064
#2065
#2066
#2067
#2068
#2069
#2070
#2071
#2072
#2073
#2074
#2075
#2077
#2078
#2079
#208
#2080
#2082
#2083
#2084
#2085
#2086
#2087
#2088
#2089
#209
#2090
#2091
#2092
#2093
#2094
#2095
#2096
#2098
#2099
#21
#210
#2100
#2101
#2102
#2103
#2104
#2105
#2106
#2108
#2109
#211
#2110
#2111
#2112
#2113
#2114
#2115
#2116
#2117
#2118
#2119
#212
#2120
#2121
#2122
#2123
#2124
#2125
#2126
#2127
#2128
#2129
#213
#2130
#2131
#2132
#2133
#2134
#2135
#2136
#2137
#2138
#2139
#214
#2140
#2141
#2142
#2143
#2144
#2145
#2146
#2147
#2148
#2149
#215
#2150
#2151
#2152
#2153
#2154
#2155
#2156
#2157
#2158
#2159
#2160
#2161
#2162
#2163
#2164
#2165
#2166
#2167
#2168
#2169
#217
#2170
#2171
#2172
#2173
#2174
#2175
#2176
#2177
#2178
#2179
#2180
#2181
#2182
#2183
#2184
#2185
#2186
#2187
#2188
#2189
#219
#2190
#2191
#2192
#2193
#2194
#2195
#2196
#2197
#2198
#2199
#22
#220
#2200
#2201
#2202
#2203
#2204
#2206
#2207
#2208
#2209
#2210
#2211
#2212
#2213
#2214
#2215
#2216
#2218
#2219
#2221
#2222
#2223
#2224
#2225
#2226
#2227
#2228
#2229
#2230
#2231
#2232
#2233
#2234
#2235
#2236
#2237
#2238
#2239
#224
#2240
#2241
#2242
#2243
#2244
#2245
#2246
#2247
#2248
#2249
#2250
#2251
#2252
#2253
#2254
#2255
#2256
#2257
#2258
#2259
#226
#2260
#2261
#2262
#2263
#2264
#2265
#2266
#2267
#2268
#2269
#2270
#2272
#2273
#2274
#2275
#2276
#2277
#2278
#2279
#228
#2280
#2281
#2282
#2283
#2284
#2285
#2286
#2287
#2288
#2289
#229
#2290
#2291
#2292
#2293
#2294
#2295
#2296
#2297
#2298
#2299
#23
#230
#2300
#2301
#2302
#2303
#2304
#2305
#2306
#2307
#2308
#2309
#231
#2310
#2311
#2312
#2313
#2314
#2315
#2316
#2317
#2318
#2319
#232
#2320
#2321
#2322
#2323
#2324
#2325
#2326
#2327
#2328
#2329
#233
#2330
#2331
#2332
#2333
#2334
#2335
#2336
#2337
#2338
#2339
#234
#2340
#2341
#2342
#2343
#2344
#2345
#2346
#2347
#2348
#2349
#235
#2350
#2351
#2352
#2353
#2354
#2355
#2356
#2357
#2358
#2359
#2360
#2361
#2362
#2363
#2364
#2365
#2366
#2367
#2369
#237
#2370
#2371
#2372
#2373
#2374
#2375
#2376
#2377
#2378
#2379
#238
#2380
#2381
#2382
#2383
#2385
#2386
#2387
#2388
#2389
#239
#2390
#2391
#2392
#2393
#2394
#2395
#2396
#2397
#2398
#2399
#24
#240
#2400
#2401
#2402
#2403
#2404
#2405
#2407
#2408
#2409
#241
#2410
#2412
#2414
#2415
#2416
#2417
#2418
#2419
#242
#2420
#2421
#2422
#2423
#2424
#2425
#2426
#2427
#2428
#2429
#243
#2430
#2431
#2432
#2433
#2434
#2435
#2436
#2437
#2438
#2439
#244
#2440
#2441
#2442
#2443
#2444
#2445
#2446
#2447
#2448
#2449
#245
#2450
#2451
#2452
#2453
#2454
#2455
#2456
#2457
#2458
#2459
#246
#2460
#2461
#2462
#2463
#2464
#2465
#2466
#2467
#2468
#2469
#247
#2470
#2471
#2472
#2473
#2474
#2475
#2476
#2477
#2478
#2479
#2480
#2481
#2482
#2483
#2484
#2485
#2486
#2487
#2488
#2489
#2490
#2491
#2492
#2493
#2494
#2495
#2496
#2497
#2498
#2499
#25
#2500
#2501
#2502
#2503
#2504
#2505
#2506
#2507
#2508
#2509
#251
#2510
#2511
#2512
#2513
#2514
#2515
#2516
#2517
#2518
#2519
#2520
#2521
#2522
#2523
#2524
#2525
#2526
#2527
#2528
#2529
#253
#2530
#2531
#2532
#2533
#2534
#2535
#2536
#2537
#2538
#2539
#254
#2540
#2541
#2542
#2543
#2544
#2545
#2546
#2549
#255
#2550
#2551
#2552
#2553
#2554
#2555
#2556
#2557
#2558
#2559
#256
#2560
#2561
#2562
#2563
#2564
#2565
#2566
#2567
#2568
#2569
#257
#2570
#2571
#2572
#2573
#2574
#2575
#2576
#2577
#2578
#2579
#258
#2580
#2581
#2582
#2583
#2584
#2585
#2586
#2587
#2588
#2589
#259
#2590
#2591
#2592
#2593
#2594
#2595
#2596
#2597
#2598
#2599
#26
#260
#2600
#2601
#2602
#2603
#2604
#2605
#2606
#2607
#2608
#2609
#261
#2610
#2611
#2612
#2613
#2614
#2615
#2616
#2617
#2618
#2619
#262
#2620
#2621
#2622
#2623
#2624
#2625
#2626
#2627
#2628
#2629
#263
#2630
#2631
#2632
#2633
#2634
#2635
#2636
#2637
#2638
#2639
#264
#2640
#2641
#2642
#2643
#2644
#2645
#2646
#2647
#2648
#2649
#265
#2650
#2651
#2652
#2653
#2654
#2655
#2656
#2657
#2658
#2659
#2660
#2661
#2662
#2663
#2664
#2665
#2666
#2667
#2668
#2669
#2670
#2671
#2672
#2673
#2674
#2675
#2676
#2677
#2678
#2679
#268
#2680
#2681
#2682
#2683
#2684
#2685
#2686
#2687
#2688
#2689
#269
#2690
#2691
#2692
#2693
#2695
#2696
#2697
#2698
#2699
#27
#270
#2700
#2701
#2702
#2703
#2704
#2705
#2706
#2707
#2708
#2709
#271
#2710
#2711
#2712
#2713
#2714
#2715
#2716
#2717
#2718
#2719
#272
#2720
#2721
#2722
#2723
#2724
#2725
#2726
#2727
#2728
#2729
#2730
#2731
#2732
#2733
#2734
#2735
#2736
#2737
#2738
#2739
#274
#2740
#2741
#2742
#2743
#2744
#2745
#2746
#2747
#2748
#2749
#275
#2750
#2751
#2752
#2753
#2754
#2755
#2756
#2757
#2758
#276
#2760
#2761
#2762
#2763
#2764
#2765
#2766
#2767
#2768
#2769
#277
#2770
#2771
#2772
#2773
#2774
#2775
#2776
#2777
#2778
#2779
#278
#2780
#2781
#2782
#2783
#2784
#2785
#2786
#2787
#2788
#2789
#279
#2790
#2791
#2792
#2793
#2794
#2795
#2796
#2797
#2798
#2799
#28
#2800
#2801
#2802
#2803
#2804
#2805
#2806
#2807
#2808
#2809
#281
#2810
#2811
#2812
#2813
#2814
#2815
#2816
#2817
#2818
#2819
#282
#2820
#2821
#2822
#2823
#2824
#2825
#2826
#2827
#2828
#2829
#283
#2830
#2831
#2832
#2833
#2834
#2835
#2836
#2837
#2838
#2839
#284
#2840
#2841
#2842
#2843
#2844
#2845
#2846
#2847
#2848
#2849
#285
#2850
#2851
#2852
#2853
#2854
#2855
#2856
#2857
#2858
#2859
#286
#2860
#2861
#2862
#2863
#2864
#2865
#2866
#2867
#2868
#2869
#287
#2870
#2871
#2872
#2873
#2874
#2875
#2876
#2877
#2878
#2879
#288
#2880
#2881
#2882
#2883
#2884
#2885
#2886
#2888
#2889
#289
#2890
#2891
#2892
#2893
#2894
#2895
#2896
#2897
#2899
#29
#290
#2900
#2901
#2902
#2903
#2904
#2905
#2906
#2907
#2908
#2909
#2910
#2911
#2912
#2913
#2914
#2915
#2916
#2917
#2918
#2919
#292
#2920
#2921
#2922
#2923
#2924
#2925
#2926
#2927
#2928
#2929
#293
#2930
#2931
#2932
#2933
#2934
#2935
#2936
#2937
#2938
#2939
#294
#2940
#2941
#2942
#2943
#2944
#2945
#2946
#2947
#2948
#2949
#295
#2950
#2951
#2952
#2953
#2954
#2955
#2956
#2957
#2958
#2959
#296
#2960
#2961
#2962
#2963
#2964
#2965
#2966
#2967
#2968
#2969
#297
#2970
#2971
#2972
#2973
#2974
#2975
#2976
#2977
#2978
#2979
#298
#2980
#2981
#2982
#2983
#2984
#2985
#2986
#2987
#2988
#2989
#299
#2990
#2991
#2992
#2993
#2995
#2996
#2997
#2998
#2999
#3
#30
#300
#3000
#3001
#3002
#3003
#3004
#3005
#3006
#3007
#3008
#3009
#301
#3010
#3011
#3012
#3013
#3014
#3015
#3016
#3017
#3018
#3019
#302
#3020
#3021
#3022
#3023
#3024
#3025
#3026
#3027
#3028
#3029
#303
#3030
#3031
#3032
#3033
#3034
#3035
#3036
#3037
#3038
#3039
#304
#3040
#3041
#3042
#3043
#3044
#3045
#3046
#3047
#3048
#3049
#305
#3050
#3051
#3053
#3054
#3055
#3056
#3057
#3058
#3059
#306
#3060
#3061
#3062
#3063
#3064
#3065
#3066
#3067
#3068
#3069
#307
#3070
#3071
#3072
#3073
#3074
#3075
#3076
#3077
#3078
#3079
#308
#3080
#3081
#3082
#3083
#3084
#3085
#3086
#3087
#3088
#3089
#309
#3090
#3092
#3093
#3094
#3096
#3097
#3098
#3099
#31
#310
#3100
#3101
#3102
#3103
#3104
#3105
#3106
#3107
#3108
#3109
#311
#3110
#3111
#3112
#3113
#3114
#3115
#3116
#3117
#3118
#3119
#312
#3120
#3121
#3122
#3123
#3124
#3125
#3126
#3127
#3128
#3129
#313
#3130
#3131
#3132
#3133
#3134
#3135
#3136
#3137
#3138
#3139
#314
#3140
#3141
#3142
#3143
#3144
#3145
#3146
#3147
#3148
#3149
#315
#3150
#3151
#3152
#3153
#3154
#3155
#3156
#3157
#3158
#3159
#316
#3160
#3161
#3162
#3163
#3164
#3165
#3166
#3167
#3167
#3168
#3169
#317
#3170
#3171
#3172
#3173
#3174
#3175
#3176
#3177
#3178
#3179
#318
#3180
#3181
#3182
#3183
#3184
#3186
#3187
#3188
#3189
#319
#3191
#3192
#3193
#3194
#3195
#3196
#3197
#3198
#3199
#32
#320
#3200
#3201
#3202
#3203
#3204
#3205
#3206
#3207
#3208
#3209
#321
#3210
#3211
#3212
#3213
#3214
#3215
#3216
#3217
#3218
#3219
#322
#3220
#3221
#3222
#3223
#3224
#3225
#3226
#3227
#3228
#3229
#323
#3230
#3231
#3232
#3233
#3234
#3235
#3236
#3237
#3238
#3239
#324
#3240
#3241
#3242
#3243
#3244
#3245
#3246
#3247
#3248
#3249
#3250
#3251
#3252
#3253
#3254
#3255
#3256
#3257
#3258
#3259
#326
#3260
#3261
#3262
#3263
#3264
#3265
#3266
#3267
#3268
#3269
#327
#3270
#3271
#3272
#3273
#3274
#3275
#3276
#3277
#3278
#3279
#328
#3280
#3281
#3282
#3283
#3284
#3285
#3286
#3287
#3288
#3289
#329
#3290
#3291
#3292
#3293
#3294
#3295
#3296
#3297
#3298
#3299
#33
#330
#3300
#3301
#3302
#3303
#3304
#3305
#3306
#3307
#3308
#3309
#331
#3310
#3311
#3312
#3313
#3314
#3315
#3316
#3317
#3318
#3319
#332
#3320
#3321
#3323
#3324
#3325
#3326
#3327
#3328
#3329
#333
#3330
#3331
#3332
#3333
#3334
#3335
#3336
#3337
#3338
#3339
#334
#3340
#3341
#3342
#3343
#3344
#3345
#3346
#3347
#3348
#3349
#335
#3350
#3351
#3352
#3353
#3354
#3355
#3356
#3357
#3358
#3359
#3360
#3361
#3362
#3363
#3364
#3365
#3366
#3368
#3369
#337
#3370
#3371
#3372
#3373
#3374
#3375
#3376
#3377
#3378
#3379
#338
#3380
#3381
#3382
#3383
#3384
#3385
#3386
#3387
#3388
#3389
#339
#3390
#3391
#3392
#3393
#3394
#3395
#3396
#3397
#3399
#34
#340
#3400
#3401
#3402
#3403
#3404
#3405
#3406
#3407
#3408
#3409
#341
#3410
#3411
#3412
#3413
#3414
#3415
#3416
#3417
#3418
#3419
#342
#3420
#3421
#3422
#3423
#3424
#3425
#3426
#3427
#3428
#3429
#343
#3430
#3431
#3432
#3434
#3435
#3436
#3437
#3438
#3439
#344
#3440
#3441
#3442
#3443
#3444
#3445
#3446
#3447
#3448
#3449
#345
#3450
#3451
#3452
#3453
#3454
#3455
#3456
#3457
#3458
#3459
#346
#3460
#3461
#3462
#3463
#3464
#3465
#3466
#3467
#3468
#3469
#347
#3470
#3471
#3472
#3473
#3474
#3475
#3476
#3477
#3478
#3479
#348
#3480
#3481
#3482
#3483
#3484
#3485
#3486
#3487
#3488
#3489
#349
#3490
#3491
#3492
#3493
#3494
#3495
#3496
#3497
#3498
#3499
#35
#3500
#3502
#3503
#3504
#3505
#3506
#3507
#3508
#3509
#351
#3510
#3511
#3512
#3513
#3514
#3515
#3516
#3517
#3518
#3519
#352
#3520
#3521
#3524
#3525
#3526
#3527
#3528
#3529
#353
#3530
#3531
#3532
#3533
#3534
#3535
#3536
#3537
#3538
#3539
#354
#3540
#3541
#3542
#3543
#3544
#3545
#3546
#3547
#3548
#3549
#355
#3550
#3551
#3552
#3553
#3554
#3555
#3556
#3557
#3558
#3559
#356
#3560
#3561
#3562
#3563
#3564
#3565
#3566
#3567
#3568
#3569
#357
#3570
#3571
#3572
#3573
#3576
#3577
#3578
#3579
#3580
#3581
#3582
#3583
#3584
#3585
#3586
#3587
#3588
#3589
#3590
#3591
#3592
#3593
#3594
#3595
#3596
#3597
#3598
#3599
#36
#360
#3600
#3601
#3602
#3603
#3604
#3605
#3606
#3607
#3608
#3609
#361
#3610
#3611
#3612
#3613
#3614
#3615
#3616
#3617
#3618
#3619
#3620
#3621
#3622
#3623
#3624
#3625
#3626
#3627
#3628
#3629
#363
#3630
#3631
#3632
#3633
#3634
#3635
#3636
#3637
#3638
#3639
#364
#3640
#3641
#3642
#3643
#3644
#3645
#3646
#3648
#3649
#365
#3650
#3651
#3652
#3653
#3654
#3655
#3656
#3657
#3658
#3659
#366
#3660
#3661
#3662
#3663
#3665
#3666
#3667
#3668
#3669
#367
#3670
#3671
#3672
#3674
#3675
#3676
#3677
#3678
#3679
#368
#3680
#3681
#3683
#3684
#3685
#3686
#3687
#3688
#3689
#369
#3690
#3691
#3692
#3693
#3694
#3695
#3696
#3697
#3698
#3699
#37
#370
#3700
#3701
#3702
#3703
#3704
#3705
#3706
#3707
#3708
#3709
#371
#3711
#3714
#3715
#3716
#3717
#3718
#3719
#372
#3720
#3721
#3723
#3725
#3726
#3727
#3728
#3729
#373
#3730
#3731
#3732
#3733
#3734
#3735
#3736
#374
#3741
#3742
#3745
#3745
#3746
#3747
#3747
#375
#376
#377
#378
#379
#38
#380
#381
#382
#383
#384
#385
#386
#387
#388
#389
#39
#393
#394
#395
#396
#397
#398
#399
#4
#401
#402
#403
#404
#405
#406
#408
#409
#41
#411
#412
#413
#415
#416
#417
#418
#42
#420
#421
#422
#424
#425
#426
#427
#428
#429
#43
#430
#432
#433
#434
#435
#436
#437
#438
#439
#44
#440
#441
#442
#443
#444
#445
#446
#447
#448
#449
#45
#450
#451
#452
#453
#46
#460
#461
#462
#463
#464
#465
#466
#467
#468
#469
#47
#470
#471
#472
#473
#474
#476
#479
#48
#480
#481
#482
#483
#484
#486
#487
#488
#489
#49
#490
#491
#492
#493
#494
#495
#496
#497
#498
#499
#5
#50
#500
#501
#502
#503
#504
#505
#506
#507
#509
#51
#510
#511
#513
#514
#515
#516
#517
#518
#519
#52
#520
#521
#524
#526
#527
#528
#529
#53
#530
#531
#532
#533
#534
#535
#538
#539
#54
#540
#541
#542
#545
#549
#55
#550
#551
#552
#554
#555
#556
#557
#559
#56
#560
#562
#563
#564
#565
#566
#567
#568
#569
#570
#571
#572
#573
#574
#575
#576
#577
#578
#58
#580
#581
#582
#583
#584
#585
#586
#587
#589
#59
#590
#591
#592
#593
#595
#596
#597
#598
#599
#6
#60
#600
#602
#603
#604
#606
#608
#609
#61
#610
#611
#612
#614
#615
#616
#619
#62
#620
#623
#625
#626
#627
#628
#629
#63
#631
#632
#633
#638
#639
#64
#640
#641
#642
#643
#644
#645
#646
#647
#648
#649
#65
#650
#651
#652
#653
#654
#655
#656
#657
#658
#659
#66
#660
#661
#663
#664
#665
#667
#668
#669
#67
#670
#672
#673
#675
#676
#677
#678
#679
#68
#680
#681
#682
#683
#684
#685
#686
#687
#689
#690
#691
#692
#693
#694
#695
#696
#697
#698
#699
#7
#700
#701
#703
#705
#706
#707
#708
#71
#710
#711
#712
#713
#714
#715
#716
#717
#718
#72
#720
#721
#722
#723
#724
#725
#726
#727
#729
#73
#730
#731
#732
#733
#734
#735
#737
#738
#739
#740
#741
#743
#744
#745
#746
#747
#749
#75
#750
#751
#752
#753
#754
#755
#757
#758
#759
#76
#760
#761
#763
#764
#765
#766
#767
#768
#769
#77
#770
#771
#773
#774
#776
#777
#778
#78
#781
#783
#784
#785
#786
#787
#788
#79
#790
#791
#792
#794
#795
#796
#797
#798
#799
#8
#80
#800
#802
#803
#804
#805
#806
#807
#808
#809
#81
#810
#811
#812
#813
#815
#816
#817
#818
#819
#82
#820
#821
#822
#825
#826
#827
#828
#829
#830
#832
#833
#834
#835
#836
#838
#839
#840
#841
#842
#843
#844
#845
#846
#848
#849
#850
#851
#853
#856
#857
#858
#859
#86
#860
#861
#862
#863
#864
#865
#866
#867
#868
#869
#870
#871
#873
#874
#875
#876
#877
#878
#879
#88
#880
#881
#882
#884
#885
#887
#888
#889
#89
#890
#892
#893
#894
#895
#896
#897
#898
#899
#901
#902
#903
#904
#905
#906
#907
#908
#909
#91
#910
#912
#913
#914
#915
#917
#918
#919
#92
#920
#921
#922
#923
#925
#926
#927
#929
#93
#930
#931
#932
#933
#934
#935
#936
#937
#938
#939
#94
#940
#941
#942
#943
#944
#945
#946
#947
#948
#949
#950
#951
#952
#953
#955
#956
#957
#958
#959
#96
#960
#961
#962
#963
#964
#965
#967
#968
#969
#97
#970
#971
#972
#973
#974
#975
#977
#978
#979
#98
#980
#981
#982
#983
#984
#985
#986
#987
#988
#989
#99
#990
#991
#992
#993
#994
#995
#996
#997
#998
#999
20250912-17
20250912-42
develop-pre-restore-2026-05-27
mock-tag-test
rocm-5.7.0
rocm-5.7.1
rocm-6.0.0
rocm-6.0.2
rocm-6.1.0
rocm-6.1.1
rocm-6.1.2
rocm-6.1.5
rocm-6.2.0
rocm-6.2.1
rocm-6.2.2
rocm-6.2.4
rocm-6.3.0
rocm-6.3.1
rocm-6.3.2
rocm-6.3.3
rocm-6.4.0
rocm-6.4.1
rocm-6.4.2
rocm-6.4.3
rocm-6.4.4
rocm-7.0.0
rocm-7.0.1
rocm-7.0.2
rocm-7.1.0
rocm-7.1.1
rocm-7.2.0
rocm-7.2.1
rocm-7.2.2
rocm-7.2.3
rocm-7.2.4
rocm-test-09212024
therock-7.10
therock-7.9.0
tutorial_hello_world
v0.1-cpu
-
ca36501613
Moving code-lines in hstu pipeline
Qianfeng Zhang
2025-07-07 09:49:16 +00:00 -
d921b14082
Remove using i_loop and num_loops since seqlen_k_curr and seqlen_k_end is enough
Qianfeng Zhang
2025-07-06 14:31:36 +00:00 -
ac5194f783
Let causal == 0 cases to do IsFullTileInsideMask() checking before calling IsTokenPairInsideMask()
Qianfeng Zhang
2025-06-26 10:23:03 +00:00 -
ff807ddd1a
Tiny movement in the code lines of the pipeline
Qianfeng Zhang
2025-06-26 10:09:51 +00:00 -
818722a3c6
Use two work-groups per compute-unit for scheduling the kernel
Qianfeng Zhang
2025-06-26 09:46:33 +00:00 -
1a917993cf
Simplify the codes in all host/device IsTokenPairInsideMask() trying to reduce branching
Qianfeng Zhang
2025-06-23 14:13:55 +00:00 -
57050d93a6
Fix masking for min_full_attn_seqlen > 0 situation
Qianfeng Zhang
2025-06-22 16:23:57 +00:00 -
508f4ac632
Update to test_ck_hstu_mask.sh and test_pytorch_hstu_mask.py to align their testings
Qianfeng Zhang
2025-06-22 15:20:47 +00:00 -
49a12df133
Completely remove the dependency to include/ck_tile/ops/fmha/ops headers
Qianfeng Zhang
2025-06-22 11:29:03 +00:00 -
f62c52a499
Fix in using KV LdsBuffers to avoid un-expected over-writting that causes un-deterministic results
Qianfeng Zhang
2025-06-21 13:48:14 +00:00 -
9cdd64f337
Change while() do to do while() for the main loop to let the compiler to generate more elegant codes
Qianfeng Zhang
2025-06-21 12:58:27 +00:00 -
3413400044
Use batch dim as first grid dim by default and replace env ASSUME_LEAST_VARIED_SEQLEN by ASSUME_HIGHLY_VARIED_SEQLEN
Qianfeng Zhang
2025-06-18 15:57:04 +00:00 -
dcef0260ff
Align the -seqlens=xxx in the mattn0_full0 and mattn256_full256 scripts with the required benchmarks
Qianfeng Zhang
2025-06-18 15:31:47 +00:00 -
07635af84c
Tiny fix in hstu attention IsFullTileInsideMask()
root
2025-06-18 15:12:05 +00:00 -
e033a82bd7
Enable BATCH_AS_FIRST_GRID_DIM grid-scheduling and use ASSUME_LEAST_VARIED_SEQLEN for building control
Qianfeng Zhang
2025-06-10 15:43:19 +00:00 -
d39ba52e07
Improve the VDramTileDistribution and VLds layout for better device loading and reduce bank-conflict
Qianfeng Zhang
2025-06-08 11:22:21 +00:00 -
5f16f5db20
Move GetKPackV() and GetAlignmentV() out of ck_tile fmha to hstu pipeline default policy for better visibility
Qianfeng Zhang
2025-06-07 12:46:40 +00:00 -
88a0838453
Add assert(contextual_seqlen >= 0) in example
Qianfeng Zhang
2025-06-06 14:22:07 +00:00 -
5944a63f11
Update IsFulleTileInsideMask() for kUseLocal is true situtation
Qianfeng Zhang
2025-06-06 14:20:41 +00:00 -
cbc5485589
Move all test and bench scripts to folder scripts
Qianfeng Zhang
2025-06-06 08:22:38 +00:00 -
e36446aca9
Add two scripts
Qianfeng Zhang
2025-06-06 08:14:12 +00:00 -
a8738f4455
Move dividing by max_seqlen to end of Gemm1 loop in the reference hstu-attention codes
Qianfeng Zhang
2025-05-30 16:02:45 +00:00 -
3cc6f4abc8
Tune the settings for hdim-256
Qianfeng Zhang
2025-05-30 08:49:33 +00:00 -
fb5aa39762
Add example parameter alpha to ease the testing
Qianfeng Zhang
2025-05-30 08:47:55 +00:00 -
e59ec37ffb
Convert P to fp16/bf16 before doing second gemm in reference hstu implementation
Qianfeng Zhang
2025-05-29 01:04:19 +00:00 -
c83d8587c7
not-critical updates in example and block_masking codes
Qianfeng Zhang
2025-05-29 01:02:20 +00:00 -
08e381d632
Add init_qkv and dump_output example parameters for easier debugging
Qianfeng Zhang
2025-05-28 15:33:54 +00:00 -
85bc8fd805
Add example parameter max_seqlen and max_target
Qianfeng Zhang
2025-05-27 14:18:41 +00:00 -
46301a85d9
Update to the method for calculating max_seqlen in the example
Qianfeng Zhang
2025-05-27 10:36:43 +00:00 -
a079b95b77
Use NRepetitions2DEpilogue for outputing o_acc tile
Qianfeng Zhang
2025-05-26 14:09:55 +00:00 -
eba3242ab8
Use LDS to in-directly load Q-tile to enable dwordx4 loading and avoid cachelines wasting
Qianfeng Zhang
2025-05-21 16:44:39 +00:00 -
d74b41070f
Update the reference hstu to not do fp32 to fp16/bf16 conversion before P@V gemm
Qianfeng Zhang
2025-05-20 07:50:56 +00:00 -
4833daf43d
Adjust the threshold values for fp16/bf16 in the example
Qianfeng Zhang
2025-05-20 07:48:54 +00:00 -
6e38888f46
Enable RTN fp32 to bf16 conversion by adding compiler option in CMakeLists.txt
Qianfeng Zhang
2025-05-20 07:46:21 +00:00 -
2b94d9261c
Change do-while main-loop to while-do and remove early exiting check
Qianfeng Zhang
2025-05-19 15:38:17 +00:00 -
abc8335c43
Adjust the codes before the main-loop
Qianfeng Zhang
2025-05-19 11:24:59 +00:00 -
946e917e2c
Move k_tile loading and v_tile loading earlier in the loop
Qianfeng Zhang
2025-05-19 10:26:45 +00:00 -
45ac659ae0
Move k_tile loading in the loop earlier
Qianfeng Zhang
2025-05-19 10:16:09 +00:00 -
109dcfc2f0
Replace s_acc and pcomp tile array by single tile object for simplification
Qianfeng Zhang
2025-05-19 07:46:57 +00:00 -
40056b95a9
Add _builtin_amdgcn_sched_barrier(0) for instructing the compiler for better codes isolation
Qianfeng Zhang
2025-05-18 16:19:30 +00:00 -
8c43b793c9
Set the block_per_cu to 3 for hdim-128
Qianfeng Zhang
2025-05-18 15:58:02 +00:00 -
1bbefda240
Prefetch b_warp_tensor for next nIter and move b_warp_windows construction into n-iteration in block_gemm_areg_bsmem_creg for gemm-1
Qianfeng Zhang
2025-05-18 15:02:36 +00:00 -
11718b0af4
Move b_warp_windows construction into k-iteration in block_gemm_areg_bsmem_creg for gemm-0
Qianfeng Zhang
2025-05-18 14:02:17 +00:00 -
d01b4f27c6
Prefetch K for next iteration from LDS in block_gemm_areg_bsmem_creg for gemm-0
Qianfeng Zhang
2025-05-18 13:40:38 +00:00 -
4545d2efc1
Hack block_gemm_areg_bsmem_creg_v2 for gemm_1
Qianfeng Zhang
2025-05-15 14:02:02 +00:00 -
6e7553be77
Rename the hacked block_gemm_areg_bsmem_creg_v2
Qianfeng Zhang
2025-05-15 10:38:15 +00:00 -
e5977717a8
Move the lambda for dividing by max_seqlen from kernel to pipeline
Qianfeng Zhang
2025-05-18 07:56:34 +00:00 -
70237d2e5c
Move the dividing by max_seqlen out of f_silu to be handle outside the main-loop
Qianfeng Zhang
2025-05-18 03:21:36 +00:00 -
27e64a682a
Set example option -save_mask default to 0
Qianfeng Zhang
2025-05-14 13:44:24 +00:00 -
0ee9dff5cb
Add scripts (test_ck_hstu_mask.sh and test_pytorch_hstu_mask.py) for checking mask
Qianfeng Zhang
2025-05-14 02:00:22 +00:00 -
4ff88b4400
Add -save_mask option to the example to output int8 mask tensor
Qianfeng Zhang
2025-05-14 01:54:45 +00:00 -
124539e123
Update the rules of hstu masking
Qianfeng Zhang
2025-05-13 10:37:19 +00:00 -
b2cd7757f0
Add test cases for better functional verification
Qianfeng Zhang
2025-05-10 16:04:06 +00:00 -
3d83d23a55
Fix sequence dim length for o_dram descriptor in the kernel
Qianfeng Zhang
2025-05-10 16:02:52 +00:00 -
010b3f48b3
Revert "Temporarily close the instance for hdim64 and hdim256 to save compiling time"
Qianfeng Zhang
2025-05-07 13:37:15 +00:00 -
bce38c1531
Simplification in the static iterations of block_gemm_areg_bsmem_creg_v2_hack
Qianfeng Zhang
2025-05-07 10:09:23 +00:00 -
87b5aa78bd
Use kK1=16
Qianfeng Zhang
2025-05-07 09:51:16 +00:00 -
bce88a9e73
Use type_convert rather than static_cast in f_silu
Qianfeng Zhang
2025-05-07 07:05:43 +00:00 -
9c3e49a1d0
Add max_seqlen as divider in siLu
Qianfeng Zhang
2025-05-06 16:16:23 +00:00 -
717aae7ce7
Remove using cast_tile_pk_fp16_fp32 for better accuracy for fp16 hstu attention
Qianfeng Zhang
2025-05-06 08:24:03 +00:00 -
380165c3dc
Override and fix GetAlignmentK()
Qianfeng Zhang
2025-05-03 16:17:28 +00:00 -
34998cfd19
Use kN0=32
Qianfeng Zhang
2025-04-30 05:42:43 +00:00 -
94f8d71ee2
Temporarily close the instance for hdim64 and hdim256 to save compiling time
Qianfeng Zhang
2025-04-30 01:54:43 +00:00 -
9df0fad750
Hack block_gemm_areg_bsmem_creg_v2 to let s_acc for gemm_0 not need be cleared first
Qianfeng Zhang
2025-04-28 15:11:54 +00:00 -
6bf4877a20
Adjust the v_tile and k_tile loading location
Qianfeng Zhang
2025-04-28 09:25:09 +00:00 -
ba037426c5
Put two gemms call inside one n0loop unroll
Qianfeng Zhang
2025-04-28 06:41:37 +00:00 -
23852ef4c0
Add IsFullTileInsideMask() to avoid pixel-by-pixel checking when kUseCausl=true but kUseLocal=false
Qianfeng Zhang
2025-04-27 09:31:38 +00:00 -
ddd9227453
Replace set_tile_if() by sweep_tile_span() to reduce branching
Qianfeng Zhang
2025-04-27 05:00:09 +00:00 -
187f4d3f68
Update the GridSize() and GetTileIndex() in hstu kernel
Qianfeng Zhang
2025-04-26 10:01:23 +00:00 -
9a08e1090e
Add scripts for measuring jagged with/no causal cases
Qianfeng Zhang
2025-04-25 15:59:51 +00:00 -
c0128a9156
Tiny update in IsTokenPairInsideMask()
Qianfeng Zhang
2025-04-25 15:36:58 +00:00 -
ac0e593e0d
Use compiler builtin directly in f_silu for float type
Qianfeng Zhang
2025-04-25 15:04:41 +00:00 -
31c21c74d8
Code re-arrangement in pipeline
Qianfeng Zhang
2025-04-25 14:16:29 +00:00 -
eb2564fe46
Update the seqlen_k_curr inside the first gemm loop
Qianfeng Zhang
2025-04-25 13:59:48 +00:00 -
40683ee932
Rename the performance measurement scripts
Qianfeng Zhang
2025-04-25 06:09:17 +00:00 -
79fdd564b8
Add support for WarpGem-16x16x32 in QK-BlockGemm (which enables using ds_write/read_b128 for K
Qianfeng Zhang
2025-04-25 06:06:50 +00:00 -
1986d8c578
Update in K-Lds laying-out to consider for both WarpGemm-32x32x16 and WarpGemm-16x16x16
Qianfeng Zhang
2025-04-24 15:02:57 +00:00 -
6dd83b2a5a
Use 16x16x16 WarpGemm
Qianfeng Zhang
2025-04-24 07:59:28 +00:00 -
7153a99dd4
Using __builtin_amdgcn_rcpf in siLU function
Qianfeng Zhang
2025-04-24 06:28:16 +00:00 -
fb89a013b7
Combine minus with scale_s
Qianfeng Zhang
2025-04-24 05:47:24 +00:00 -
23e80a5964
Move silu calculation to gemm1 iteration and try to interleave gemm_1 and silu
Qianfeng Zhang
2025-04-23 13:10:02 +00:00 -
f40d68c1a9
Update in using masking for the case where kMasking is false and kPadSeqLenK is true
Qianfeng Zhang
2025-04-23 10:47:27 +00:00 -
95b9a277ac
Fix in generate_instances.py and re-generated the instances
Qianfeng Zhang
2025-04-23 04:00:21 +00:00 -
e5fb03a4aa
Back to use exp() instead of exp2() since exp() in ck_tile using fast __builtin_amdgcn_exp2f()
Qianfeng Zhang
2025-04-21 14:41:51 +00:00 -
266e7bc8e9
Use kN0=64 to save vgprs
Qianfeng Zhang
2025-04-22 14:45:27 +00:00 -
8f7a97fe02
Fix the script name
Qianfeng Zhang
2025-04-22 13:43:11 +00:00 -
bace12feac
Fix in GetTileRangeAlongX
Qianfeng Zhang
2025-04-22 13:42:03 +00:00 -
7a7c17802a
Add script compare_with_triton_2.sh for measuring the jagged cases of seqlen 1024/2048/4096/8192/16384/32768
Qianfeng Zhang
2025-04-22 10:16:20 +00:00 -
e731437af1
Change gemm0 to iterate along kN0 so that BlockGemm can overlap with maksing and siLu
Qianfeng Zhang
2025-04-19 15:52:51 +00:00 -
8da21d9cde
Fix the GetTileRangeAlongX() to align with the hstu masking definition when both causal=true and local=true
Qianfeng Zhang
2025-04-18 15:37:49 +00:00 -
065776d42d
Remove un-needed __builtin_amdgcn_sched_barrier(0)
Qianfeng Zhang
2025-04-18 10:05:57 +00:00 -
63acd4638b
Use shared ring Lds buffers for K/V to avoid over-lapping between first-K/last-V or last-K/first-V
Qianfeng Zhang
2025-04-18 09:47:43 +00:00 -
58090fe730
Tiny codes simplification in pipeline
Qianfeng Zhang
2025-04-18 08:22:11 +00:00 -
ec14e9df3e
Remove one line of __builtin_amdgcn_sched_barrier(0)
Qianfeng Zhang
2025-04-17 14:21:14 +00:00 -
c0609d49cd
Fix the integer overflow in total_flops calculation
Qianfeng Zhang
2025-04-17 10:34:13 +00:00 -
1efb2a8f38
Add scripts for comparing with triton
Qianfeng Zhang
2025-04-17 10:33:44 +00:00 -
70b4aa310f
Use exp2() to calculate exp() for better performance
Qianfeng Zhang
2025-04-16 06:54:06 +00:00 -
45019fd5fd
Remove the comparing of row/col to max_uih_len in masking
Qianfeng Zhang
2025-04-16 04:35:42 +00:00 -
ad10a2dd53
Use kM0=128 kN0=64 to completely remove the vgprs spilling
Qianfeng Zhang
2025-04-15 15:08:46 +00:00 -
8b2948b31e
Split HstuBlockMasking into HstuBlockMaskWithLocal and HstuBlockMaskNoLocal to save vgprs for non-local situations
Qianfeng Zhang
2025-04-15 14:40:55 +00:00