mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 23:05:54 +00:00
[CK_TILE][FMHA] Enable gpt-oss sink (#3490)
* Enable gptoss sink Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_splitkv_pipeline_qr_ks_vs.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_splitkv_pipeline_qr_ks_vs.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * add gptoss sink test Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * update CHANGELOG.md Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * fix test args error Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update test_fmha_fwd.cpp * update sink test Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Revert "update sink test" This reverts commit970b4f1686. * update sink test Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * update valid sink_v in splitkv pipeline Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update block_fmha_batch_prefill_pipeline_qr_ks_vs_async.hpp * Update example_fmha_fwd.cpp * fix lse error Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * fix clangformat error Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * fix aiter scale error Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update block_fmha_pipeline_qr_ks_vs.hpp * div scale_s for sink_value Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update fmha_fwd_runner.hpp * update sink_value with bias Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update block_fmha_batch_prefill_pipeline_qr_ks_vs_async.hpp * Fix typo in dropout parameter in fmha_batch_prefill_kernel * Update block_fmha_batch_prefill_pipeline_qr_ks_vs_async.hpp * Update example_fmha_fwd.cpp * Update include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async_trload.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_splitkv_pipeline_nwarp_sshuffle_qr_ks_vs.hpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * optimized some code Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * fix splitkv error Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * update sink reference Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> * Update fmha_fwd_runner.hpp * Update smoke_test_fwd_sink.sh --------- Signed-off-by: Linjun-AMD <Jun.Lin@amd.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com> [ROCm/composable_kernel commit:717ed0b59f]
This commit is contained in:
@@ -114,7 +114,8 @@ auto create_args(int argc, char* argv[])
|
||||
.insert("kv_eff_lens",
|
||||
"",
|
||||
"Batch-mode only: per-batch effective seqlen for KV (exclude PAD).\n"
|
||||
"Comma-separated list of length 'b'. If empty, no override.");
|
||||
"Comma-separated list of length 'b'. If empty, no override.")
|
||||
.insert("init_sink", "0", "value to init the output tensor sink value for validation");
|
||||
|
||||
bool result = arg_parser.parse(argc, argv);
|
||||
return std::make_tuple(result, arg_parser);
|
||||
@@ -157,6 +158,7 @@ auto run(const ck_tile::ArgParser& arg_parser)
|
||||
ck_tile::index_t num_splits = arg_parser.get_int("num_splits");
|
||||
std::string init_method = arg_parser.get_str("init");
|
||||
uint32_t seed = arg_parser.get_uint32("seed");
|
||||
int init_sink_value = arg_parser.get_int("init_sink");
|
||||
|
||||
ck_tile::stream_config stream_config{nullptr,
|
||||
true,
|
||||
@@ -203,6 +205,7 @@ auto run(const ck_tile::ArgParser& arg_parser)
|
||||
init_method,
|
||||
seed,
|
||||
do_validation,
|
||||
init_sink_value,
|
||||
stream_config,
|
||||
json);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user