thread_raked, NumWaveGroups > Struct Template Reference

thread_raked, NumWaveGroups > Struct Template Reference#

Composable Kernel: ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups > Struct Template Reference
ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups > Struct Template Reference

#include <static_encoding_pattern.hpp>

Inheritance diagram for ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >:
ck_tile::tile_distribution_encoding_pattern ck_tile::tile_distribution_encoding_pattern

Static Public Member Functions

static CK_TILE_HOST_DEVICE constexpr auto make_2d_static_tile_distribution ()
static CK_TILE_HOST_DEVICE constexpr auto make_shuffled_2d_static_tile_distribution ()

Static Public Attributes

static constexpr index_t warp_size = get_warp_size()
static constexpr index_t num_warps = BlockSize / get_warp_size()
static constexpr index_t LargestVec = (XPerTile * YPerTile) / (num_warps * warp_size)
static constexpr index_t X1 = VecSize > LargestVec ? LargestVec : VecSize
static constexpr index_t X0 = XPerTile / X1
static constexpr index_t Y1 = warp_size / X0
static constexpr index_t Y0 = num_warps / NumWaveGroups
static constexpr index_t Y2 = YPerTile / (Y1 * Y0)

Member Function Documentation

◆ make_2d_static_tile_distribution()

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::make_2d_static_tile_distribution ( )
inlinestaticconstexpr

◆ make_shuffled_2d_static_tile_distribution()

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::make_shuffled_2d_static_tile_distribution ( )
inlinestaticconstexpr

Member Data Documentation

◆ LargestVec

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::LargestVec = (XPerTile * YPerTile) / (num_warps * warp_size)
staticconstexpr

◆ num_warps

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::num_warps = BlockSize / get_warp_size()
staticconstexpr

◆ warp_size

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::warp_size = get_warp_size()
staticconstexpr

◆ X0

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::X0 = XPerTile / X1
staticconstexpr

◆ X1

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::X1 = VecSize > LargestVec ? LargestVec : VecSize
staticconstexpr

◆ Y0

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::Y0 = num_warps / NumWaveGroups
staticconstexpr

◆ Y1

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::Y1 = warp_size / X0
staticconstexpr

◆ Y2

template<index_t BlockSize, index_t YPerTile, index_t XPerTile, index_t VecSize, index_t NumWaveGroups>
index_t ck_tile::tile_distribution_encoding_pattern_2d< BlockSize, YPerTile, XPerTile, VecSize, tile_distribution_pattern::thread_raked, NumWaveGroups >::Y2 = YPerTile / (Y1 * Y0)
staticconstexpr

The documentation for this struct was generated from the following file: