debugging ds_read asm

This commit is contained in:
Chao Liu
2019-04-26 15:34:55 -05:00
parent b93d2e1b57
commit 3ce77700b6
6 changed files with 118 additions and 84 deletions

View File

@@ -183,6 +183,17 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn
GemmDataPerReadA,
GemmDataPerReadB>{};
// choose GEMM implementation here
const auto run_blockwise_batch_gemm = [&](auto... Xs) {
#if 1
return blockwise_batch_gemm.Run(Xs...);
#elif 0
return blockwise_batch_gemm.Run_asm(Xs...);
#else
return blockwise_batch_gemm.Run_asm_v2(Xs...);
#endif
};
// LDS: be careful of alignment
// TODO:: need to properly implement tensor descriptor with alignment
constexpr index_t in_block_space =
@@ -241,13 +252,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn
__syncthreads();
#if 1
blockwise_batch_gemm.Run(p_wei_block, p_in_block, p_out_thread);
#elif 0
blockwise_batch_gemm.Run_asm(p_wei_block, p_in_block, p_out_thread);
#elif 1
blockwise_batch_gemm.Run_asm_v2(p_wei_block, p_in_block, p_out_thread);
#endif
run_blockwise_batch_gemm(p_wei_block, p_in_block, p_out_thread);
__syncthreads();
}
@@ -279,13 +284,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn
__syncthreads();
#if 1
blockwise_batch_gemm.Run(p_wei_block, p_in_block, p_out_thread);
#elif 0
blockwise_batch_gemm.Run_asm(p_wei_block, p_in_block, p_out_thread);
#elif 1
blockwise_batch_gemm.Run_asm_v2(p_wei_block, p_in_block, p_out_thread);
#endif
run_blockwise_batch_gemm(p_wei_block, p_in_block, p_out_thread);
__syncthreads();
}

View File

@@ -199,6 +199,17 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_chwn_cyxk_khwn
GemmDataPerReadA,
GemmDataPerReadB>{};
// choose GEMM implementation here
const auto run_blockwise_batch_gemm = [&](auto... Xs) {
#if 0
return blockwise_batch_gemm.Run(Xs...);
#elif 0
return blockwise_batch_gemm.Run_asm(Xs...);
#else
return blockwise_batch_gemm.Run_asm_v2(Xs...);
#endif
};
// LDS: be careful of alignment
constexpr index_t in_block_space =
in_c_h_w_n_block_desc.GetElementSpace(Number<max_align>{});
@@ -293,15 +304,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_chwn_cyxk_khwn
blockwise_wei_copy.RunLoadRegisterClipboard(p_wei_global_block_offset,
p_wei_register_clipboard);
// LDS double buffer: GEMM on current data
#if 1
blockwise_batch_gemm.Run
#elif 0
blockwise_batch_gemm.Run_asm
#else
blockwise_batch_gemm.Run_asm_v2
#endif
(p_wei_block_now, p_in_block_now, p_out_thread);
run_blockwise_batch_gemm(p_wei_block_now, p_in_block_now, p_out_thread);
// LDS double buffer: store next data to LDS
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
@@ -328,15 +331,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_chwn_cyxk_khwn
blockwise_wei_copy.RunLoadRegisterClipboard(p_wei_global_block_offset,
p_wei_register_clipboard);
// LDS double buffer: GEMM on current data
#if 1
blockwise_batch_gemm.Run
#elif 0
blockwise_batch_gemm.Run_asm
#else
blockwise_batch_gemm.Run_asm_v2
#endif
(p_wei_block_double, p_in_block_double, p_out_thread);
// LDS double buffer: GEMM on current data
run_blockwise_batch_gemm(p_wei_block_double, p_in_block_double, p_out_thread);
// LDS double buffer: store next data to LDS
blockwise_in_copy.RunStoreRegisterClipboard(p_in_register_clipboard,
@@ -347,17 +343,10 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_chwn_cyxk_khwn
// odd iteration
__syncthreads();
// LDS double buffer: GEMM on current data
#if 1
blockwise_batch_gemm.Run
#elif 0
blockwise_batch_gemm.Run_asm
#else
blockwise_batch_gemm.Run_asm_v2
#endif
(p_wei_block_double + wei_block_space,
p_in_block_double + in_block_space,
p_out_thread);
// LDS double buffer: GEMM on current data
run_blockwise_batch_gemm(p_wei_block_double + wei_block_space,
p_in_block_double + in_block_space,
p_out_thread);
}
}
}

View File

@@ -193,6 +193,17 @@ struct GridwiseConvolutionImplicitGemm_v1r3_nchw_cyxk_khwn
GemmDataPerReadA,
GemmDataPerReadB>{};
// choose GEMM implementation here
const auto run_blockwise_batch_gemm = [&](auto... Xs) {
#if 0
return blockwise_batch_gemm.Run(Xs...);
#elif 0
return blockwise_batch_gemm.Run_asm(Xs...);
#else
return blockwise_batch_gemm.Run_asm_v2(Xs...);
#endif
};
// LDS: be careful of alignment
constexpr index_t in_block_space =
in_c_h_w_n_block_desc.GetElementSpace(Number<max_align>{});
@@ -267,7 +278,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_nchw_cyxk_khwn
__syncthreads();
blockwise_batch_gemm.Run(p_wei_block, p_in_block, p_out_thread);
run_blockwise_batch_gemm(p_wei_block, p_in_block, p_out_thread);
__syncthreads();
}
@@ -314,7 +325,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_nchw_cyxk_khwn
__syncthreads();
blockwise_batch_gemm.Run(p_wei_block, p_in_block, p_out_thread);
run_blockwise_batch_gemm(p_wei_block, p_in_block, p_out_thread);
__syncthreads();
}