mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
refactor
This commit is contained in:
@@ -302,7 +302,7 @@ template <class T>
|
||||
void check_error(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
{
|
||||
float error = 0;
|
||||
float max_diff = 0;
|
||||
float max_diff = -1;
|
||||
float ref_value = 0, result_value = 0;
|
||||
for(int i = 0; i < ref.mData.size(); ++i)
|
||||
{
|
||||
@@ -338,6 +338,14 @@ int main()
|
||||
constexpr unsigned K = 64;
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
#elif 0
|
||||
constexpr unsigned N = 72;
|
||||
constexpr unsigned C = 288;
|
||||
constexpr unsigned HI = 38;
|
||||
constexpr unsigned WI = 38;
|
||||
constexpr unsigned K = 72;
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
#elif 0
|
||||
constexpr unsigned N = 1;
|
||||
constexpr unsigned C = 1;
|
||||
@@ -347,13 +355,13 @@ int main()
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
#elif 0
|
||||
constexpr unsigned N = 1;
|
||||
constexpr unsigned C = 1;
|
||||
constexpr unsigned N = 1;
|
||||
constexpr unsigned C = 1;
|
||||
constexpr unsigned HI = 4;
|
||||
constexpr unsigned WI = 4;
|
||||
constexpr unsigned K = 1;
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
constexpr unsigned K = 1;
|
||||
constexpr unsigned S = 3;
|
||||
constexpr unsigned R = 3;
|
||||
#elif 0
|
||||
constexpr unsigned N = 2;
|
||||
constexpr unsigned C = 3;
|
||||
|
||||
@@ -26,13 +26,13 @@ void device_direct_convolution_1(
|
||||
constexpr auto out_desc = OutDesc{};
|
||||
constexpr unsigned OutTileSizeH = 2;
|
||||
constexpr unsigned OutTileSizeW = 2;
|
||||
constexpr unsigned NPerBlock = 1;
|
||||
constexpr unsigned NPerBlock = 2;
|
||||
constexpr unsigned KPerBlock = 16;
|
||||
constexpr unsigned CPerBlock = 4;
|
||||
constexpr unsigned YPerBlock = 4;
|
||||
constexpr unsigned CPerBlock = 2;
|
||||
constexpr unsigned YPerBlock = 2;
|
||||
constexpr unsigned XPerBlock = 16;
|
||||
|
||||
constexpr unsigned NPerThread = 1;
|
||||
constexpr unsigned NPerThread = 2;
|
||||
constexpr unsigned KPerThread = 4;
|
||||
constexpr unsigned CPerThread = 2;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user