mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 21:51:28 +00:00
docs: add inline comments about flush_cache and rotating buffer
This commit is contained in:
committed by
Aviral Goel
parent
889ffc0b1d
commit
b03764ca5a
@@ -6,6 +6,12 @@
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
namespace ck_tile {
|
||||
// GPU kernel to invalidate instruction cache for accurate benchmarking.
|
||||
// s_icache_inv: Asynchronously invalidates the L1 instruction cache on this compute unit,
|
||||
// forcing subsequent kernel runs to fetch instructions from HBM instead of cache.
|
||||
// 16x s_nop: Wait cycles (~16 cycles) to ensure cache invalidation completes before kernel
|
||||
// exits. Without these NOPs, the flush may not finish, leading to inconsistent
|
||||
// timing measurements where some instructions remain cached.
|
||||
static __global__ void flush_cache()
|
||||
{
|
||||
asm __volatile__("s_icache_inv \n\t"
|
||||
|
||||
Reference in New Issue
Block a user