From e72eece8fcf79d1d3a958089fca1f02bfb71b777 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Thu, 21 Mar 2019 09:59:40 -0500 Subject: [PATCH] added int8x4 --- driver/driver.hip.cpp | 2 +- src/include/data_type.hip.hpp | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/driver/driver.hip.cpp b/driver/driver.hip.cpp index 17b333c69a..b1df58265e 100644 --- a/driver/driver.hip.cpp +++ b/driver/driver.hip.cpp @@ -617,7 +617,7 @@ int main(int argc, char* argv[]) #if 0 in_nchw.GenerateTensorValue(GeneratorTensor_1{}, num_thread); wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread); -#elif 1 +#elif 0 in_nchw.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread); wei_kcyx.GenerateTensorValue(GeneratorTensor_2{-5, 5}, num_thread); #elif 1 diff --git a/src/include/data_type.hip.hpp b/src/include/data_type.hip.hpp index c32e93b6ef..ca1f4dcbae 100644 --- a/src/include/data_type.hip.hpp +++ b/src/include/data_type.hip.hpp @@ -231,17 +231,11 @@ __device__ void fused_multiply_accumulate(float& d, const half2& s0, const half2 __device__ void fused_multiply_accumulate(char& d, const char& s0, const char& s1) { d += s0 * s1; } -// TODO:: this interface is misleading, int32 is actually int8x4 +// TODO:: this interface is misleading, s0, s1 are actually int8x4 // need to make a better interface __device__ void fused_multiply_accumulate(int32_t& d, const int32_t& s0, const int32_t& s1) { #if DEVICE_BACKEND_CUDA -#if 1 // debug d = __dp4a(s0, s1, d); -#elif 1 - asm volatile("dp4a.s32.s32 %0, %1, %2, %3;" : "=r"(d) : "r"(s0), "r"(s1), "r"(d)); -#elif 0 // this is wrong! just for debugging - d += (*reinterpret_cast(&s0)) * (*reinterpret_cast(&s1)); -#endif #endif }