add global_load and buffer_load inline asm

This commit is contained in:
Chao Liu
2019-09-18 15:41:55 -05:00
parent 5b7a18c506
commit 86cc678f18
3 changed files with 128 additions and 86 deletions

View File

@@ -103,7 +103,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif 0
#elif 1
// 1x1 filter, 8x8 image
// cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@VII 42%
constexpr index_t N = 64;