clipboard float4 copy and paste C++ code

This commit is contained in:
Chao Liu
2019-04-05 02:13:29 -05:00
parent 605afd0fb6
commit dabfa77fc6
6 changed files with 246 additions and 155 deletions

View File

@@ -14,12 +14,29 @@ __device__ void threadwise_matrix_copy(SrcMatrix,
for(index_t i = 0; i < NRow; ++i)
{
for(index_t j = 0; j < NCol; ++j)
// optimize for vector-4 load
if(NCol % 4 == 0)
{
const index_t src_index = src_mtx.Get1dIndex(i, j);
const index_t dst_index = dst_mtx.Get1dIndex(i, j);
using vector_t = typename vector_type<Float, 4>::MemoryType;
p_dst[dst_index] = p_src[src_index];
for(index_t j = 0; j < NCol / 4; ++j)
{
const index_t src_index = src_mtx.Get1dIndex(i, 4 * j);
const index_t dst_index = dst_mtx.Get1dIndex(i, 4 * j);
*reinterpret_cast<vector_t*>(&p_dst[dst_index]) =
*reinterpret_cast<const vector_t*>(&p_src[src_index]);
}
}
else
{
for(index_t j = 0; j < NCol; ++j)
{
const index_t src_index = src_mtx.Get1dIndex(i, j);
const index_t dst_index = dst_mtx.Get1dIndex(i, j);
p_dst[dst_index] = p_src[src_index];
}
}
}
}