mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
hand tuned params
This commit is contained in:
@@ -170,7 +170,7 @@ int main()
|
||||
|
||||
int num_thread = std::thread::hardware_concurrency();
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
in.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
|
||||
wei.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
|
||||
#endif
|
||||
@@ -180,7 +180,7 @@ int main()
|
||||
device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device);
|
||||
}
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
host_convolution(in, wei, out_host);
|
||||
|
||||
float error = 0;
|
||||
|
||||
@@ -27,9 +27,9 @@ void device_convolution(
|
||||
constexpr unsigned OutTileSizeH = 2;
|
||||
constexpr unsigned OutTileSizeW = 2;
|
||||
constexpr unsigned NPerBlock = 2;
|
||||
constexpr unsigned KPerBlock = 8;
|
||||
constexpr unsigned KPerBlock = 32;
|
||||
constexpr unsigned CPerBlock = 2;
|
||||
constexpr unsigned YPerBlock = 4;
|
||||
constexpr unsigned YPerBlock = 1;
|
||||
constexpr unsigned XPerBlock = 16;
|
||||
|
||||
constexpr unsigned NPerThread = 2;
|
||||
|
||||
Reference in New Issue
Block a user