docs: add inline comments about flush_cache and rotating buffer

This commit is contained in:
AviralGoelAMD
2025-10-15 02:39:04 +00:00
committed by Aviral Goel
parent 889ffc0b1d
commit b03764ca5a
2 changed files with 39 additions and 8 deletions

View File

@@ -6,6 +6,12 @@
#include <hip/hip_runtime.h>
namespace ck_tile {
// GPU kernel to invalidate instruction cache for accurate benchmarking.
// s_icache_inv: Asynchronously invalidates the L1 instruction cache on this compute unit,
// forcing subsequent kernel runs to fetch instructions from HBM instead of cache.
// 16x s_nop: Wait cycles (~16 cycles) to ensure cache invalidation completes before kernel
// exits. Without these NOPs, the flush may not finish, leading to inconsistent
// timing measurements where some instructions remain cached.
static __global__ void flush_cache()
{
asm __volatile__("s_icache_inv \n\t"