Addition of Stream-K tests using Tile Engine (#3514)

* Addition of Stream-K tests using Tile Engine

This change adds an implementation for generating Stream-K tests using Tile Engine.
This will generate various test executables for different combinations based on the
config files. This addition has simple tests running for bf16 and fp16, with both
atomic and reduction strategies and compv3 pipeline. The tests rely on the implementation
of Stream-K in Tile Engine.

* integrating addition of tree reduction and editing the README

* temporarily removing parallel and tree reduction from configs while bugs regarding them are being resolved
This commit is contained in:
arai713
2026-01-22 12:53:52 -08:00
committed by GitHub
parent 31a35ecab4
commit b9bb1db5d9
9 changed files with 723 additions and 2 deletions

View File

@@ -98,7 +98,7 @@
},
"reduction_strategy": {
"values": [
"reduction", "atomic"
"atomic"
]
}
}

View File

@@ -377,6 +377,7 @@ class GemmKernelBuilder:
reduction_strategy_map = {
"atomic": "ck_tile::StreamKReductionStrategy::Atomic",
"reduction": "ck_tile::StreamKReductionStrategy::Reduction",
"tree": "ck_tile::StreamKReductionStrategy::TreeReduction",
}
# Determine accumulator type based on datatype
@@ -555,6 +556,11 @@ struct SelectedKernel {{
// Reset sk flags to zero before each repetition of the kernel
workspace_data.SetZero();
}}
else if(reduction_strategy == ck_tile::StreamKReductionStrategy::TreeReduction)
{{
// Reset sk flags to zero before each repetition of the kernel
workspace_data.SetZero();
}}
}};
// Launch kernel

View File

@@ -165,10 +165,13 @@ class GemmProfiler
auto [name, avg_time] = kernel_run_result;
auto dp_persistent =
SelectedKernel::UsePersistentKernel ? "PersistentKernel" : "NonPersistentKernel";
auto reduction_strategy =
SelectedKernel::reduction_strategy == ck_tile::StreamKReductionStrategy::Atomic
? "Atomic"
: "Reduction";
: SelectedKernel::reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction
? "Reduction"
: "TreeReduction";
KernelInstance kernel_instance{
name, dp_persistent, reduction_strategy, gemm_problem, {-1.0f, -1.0f, -1.0f}};