adding implicit gemm v3

This commit is contained in:
Chao Liu
2019-05-23 22:10:40 -05:00
parent 8a4b59785b
commit 1cc683a3a3
16 changed files with 347 additions and 95 deletions

View File

@@ -67,6 +67,22 @@ __device__ void threadwise_gemm(MatrixA,
integral_constant<bool, TransC>,
FloatC* __restrict__ p_c_thread)
{
#if 0
if(get_thread_local_1d_id() == 0 && get_block_1d_id() == 0)
{
printf("p_a_thread: %f %f %f %f\n",
p_a_thread[0],
p_a_thread[1],
p_a_thread[2],
p_a_thread[3]);
printf("p_b_thread: %f %f %f %f\n",
p_b_thread[0],
p_b_thread[1],
p_b_thread[2],
p_b_thread[3]);
}
#endif
if(TransA && (!TransB) && (!TransC))
{
constexpr auto a_mtx = MatrixA{};