debugged: CUDA should use its own float4 definition

[ROCm/composable_kernel commit: 7a251a0922]
This commit is contained in:
Chao Liu
2019-04-06 15:44:53 -05:00
parent a3f850c5e6
commit 2cd5fbe227
4 changed files with 29 additions and 10 deletions

View File

@@ -580,7 +580,7 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0;
constexpr index_t WPad = 0;
#elif 0
#elif 1
// 1x1 filter, 14x14 image, C = 2048
constexpr index_t N = 128;
constexpr index_t C = 2048;