Ck tile gemm padding dim (#1516)

* Support the N dimension padding

* Finished the padding feature for different dimension of K
This commit is contained in:
Thomas Ning
2024-09-18 11:32:29 -07:00
committed by GitHub
parent e84adec3ba
commit 694c300145
4 changed files with 33 additions and 13 deletions

View File

@@ -179,9 +179,9 @@ float invoke_gemm(ck_tile::DeviceMem& a_buf,
std::cout << "The overall perfomance of the GEMM with "
<< "[" << data_type << "]"
<< "batch size: " << batch_size << ". m:" << M << ",n:" << N << ", k:" << K
<< "is: \n";
std::cout << "Running time :" << ave_time << "ms, Throughput" << gb_per_sec << "GB/s \n"
<< "batch size: " << batch_size << ". m:" << M << ", n:" << N << ", k:" << K
<< " is: \n";
std::cout << "Running time: " << ave_time << "ms, Throughput " << gb_per_sec << "GB/s \n"
<< std::flush;
return ave_time;
@@ -235,7 +235,7 @@ int main(int argc, char* argv[])
// The kPadA, kPadB, kPadC & kBlockPerCu should also come from the Codegen part.
constexpr bool kPadA = true;
constexpr bool kPadB = true;
constexpr bool kPadC = false;
constexpr bool kPadC = true;
// This part comes from the Codegen
constexpr ck_tile::index_t M_Tile = 128;
@@ -348,7 +348,7 @@ int main(int argc, char* argv[])
pass_gpu = ck_tile::check_err(c_host_dev, c_host_gpu_ref);
std::cout << "The GPU veification result is:" << (pass_gpu ? "correct" : "fail")
std::cout << "The GPU veification result is: " << (pass_gpu ? "correct" : "fail")
<< std::flush;
}