flush_icache.hpp Source File

flush_icache.hpp Source File#

Composable Kernel: flush_icache.hpp Source File
tile/host/flush_icache.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include <hip/hip_runtime.h>
7
8namespace ck_tile {
9// GPU kernel to invalidate instruction cache for accurate benchmarking.
10// s_icache_inv: Asynchronously invalidates the L1 instruction cache on this compute unit,
11// forcing subsequent kernel runs to fetch instructions from HBM instead of cache.
12// 16x s_nop: Wait cycles (~16 cycles) to ensure cache invalidation completes before kernel
13// exits. Without these NOPs, the flush may not finish, leading to inconsistent
14// timing measurements where some instructions remain cached.
15static __global__ void flush_cache()
16{
17 asm __volatile__("s_icache_inv \n\t"
18 "s_nop 0 \n\t"
19 "s_nop 0 \n\t"
20 "s_nop 0 \n\t"
21 "s_nop 0 \n\t"
22 "s_nop 0 \n\t"
23 "s_nop 0 \n\t"
24 "s_nop 0 \n\t"
25 "s_nop 0 \n\t"
26 "s_nop 0 \n\t"
27 "s_nop 0 \n\t"
28 "s_nop 0 \n\t"
29 "s_nop 0 \n\t"
30 "s_nop 0 \n\t"
31 "s_nop 0 \n\t"
32 "s_nop 0 \n\t"
33 "s_nop 0 \n\t" ::
34 :);
35}
36} // namespace ck_tile
Definition tile/core/algorithm/cluster_descriptor.hpp:13