Use __builtin_memcpy to implement bit_cast and for accessing vector from pointer of scalars (#53)

* reworking vector_type

* use __builtin_memcpy for bit_cast and vector access of scalar pointer

* clean up

[ROCm/composable_kernel commit: 64350affc5]
This commit is contained in:
Chao Liu
2021-11-18 09:11:15 -06:00
committed by GitHub
parent 75f9af0fc5
commit bf1768aea4
12 changed files with 152 additions and 49 deletions

View File

@@ -9,7 +9,6 @@
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "gemm_common.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp"
#include "device_base.hpp"
@@ -139,12 +138,12 @@ int main(int argc, char* argv[])
{
case 0: break;
case 1:
a_m_k.GenerateTensorValue(GeneratorTensor_2{-5, 5});
b_k_n.GenerateTensorValue(GeneratorTensor_2{-5, 5});
a_m_k.GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 5});
b_k_n.GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5});
break;
default:
a_m_k.GenerateTensorValue(GeneratorTensor_3<float>{0.0, 1.0});
b_k_n.GenerateTensorValue(GeneratorTensor_3<float>{-0.5, 0.5});
a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 1.0});
b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5});
}
DeviceMem a_m_k_device_buf(sizeof(ADataType) * a_m_k.mDesc.GetElementSpace());