GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize > Struct Template Reference

GridwiseNormalizationSplitK2nd&lt; MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize > Struct Template Reference
ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize > Struct Template Reference

#include <gridwise_normalization_splitk_2nd.hpp>

Public Types

using ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>
using ThreadBufferDimAccessOrder
using ThreadClusterArrangeOrder
using ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, XSrcVectorSize>
using ThreadBufferLengths_M = Sequence<MThreadSliceSize>
using ThreadBufferLengths_M_1 = Sequence<MThreadSliceSize, 1>
using ThreadWelfordSrcDesc_M_1 = decltype(thread_buffer_desc_m_1)
using ThreadWelfordDstDesc_M
using ThreadwiseWelford
using BlockwiseWelford
using PassThroughOp = tensor_operation::element_wise::PassThrough

Static Public Member Functions

static __device__ void Run (const MeanVarGridDesc_M_KBlock &mean_var_grid_desc_m_kblock, const CountGridDesc_M_KBlock &count_grid_desc_m_kblock, const XYGammaBetaGridDesc_M_K &x_grid_desc_m_k, const XYGammaBetaGridDesc_M_K &gamma_grid_desc_m_k, const XYGammaBetaGridDesc_M_K &beta_grid_desc_m_k, const XYGammaBetaGridDesc_M_K &y_grid_desc_m_k, const SaveMeanInvStdGridDesc_M &save_mean_grid_desc_m, const SaveMeanInvStdGridDesc_M &save_inv_std_grid_desc_m, index_t num_k_mean_var_count_iteration, index_t num_k_block_tile_iteration, index_t k_grid_size, ComputeDataType epsilon, const MeanVarDataType *const p_mean_global, const MeanVarDataType *const p_variance_global, const int32_t *const p_welford_count_global, const XDataType *const __restrict__ p_x_global, const GammaDataType *const __restrict__ p_gamma_global, const BetaDataType *const __restrict__ p_beta_global, YDataType *const __restrict__ p_y_global, SaveMeanInvStdDataType *const __restrict__ p_save_mean_global, SaveMeanInvStdDataType *const __restrict__ p_save_inv_std_global, const YElementwiseOperation y_elementwise_op)

Static Public Attributes

static constexpr bool reorder_thread_cluster = (XSrcVectorDim == 0)
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto thread_cluster_desc
static constexpr auto thread_buffer_desc_m_k
static constexpr auto thread_buffer_desc_m
static constexpr auto thread_buffer_desc_m_1
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
static constexpr index_t K_BlockTileStepSize = KThreadClusterSize * XSrcVectorSize
static constexpr auto ThreadBufferNumber = Number<KThreadSliceSize / XSrcVectorSize>{}

Member Typedef Documentation

◆ BlockwiseWelford

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::BlockwiseWelford
Initial value:
BlockwiseWelford<ComputeDataType,
BlockSize,
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, false > BlockwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:82
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_welford_first_half.hpp:68
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_multiblock_welford_first_half.hpp:63

◆ PassThroughOp

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::PassThroughOp = tensor_operation::element_wise::PassThrough

◆ ThreadBufferDimAccessOrder

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadBufferDimAccessOrder
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100

◆ ThreadBufferLengths_M

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadBufferLengths_M = Sequence<MThreadSliceSize>

◆ ThreadBufferLengths_M_1

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadBufferLengths_M_1 = Sequence<MThreadSliceSize, 1>

◆ ThreadBufferLengths_M_K

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, XSrcVectorSize>

◆ ThreadClusterArrangeOrder

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadClusterArrangeOrder

◆ ThreadClusterLengths_M_K

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>

◆ ThreadWelfordDstDesc_M

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadWelfordDstDesc_M
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ ThreadWelfordSrcDesc_M_1

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadWelfordSrcDesc_M_1 = decltype(thread_buffer_desc_m_1)

◆ ThreadwiseWelford

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
using ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadwiseWelford

Member Function Documentation

◆ Run()

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
__device__ void ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::Run ( const MeanVarGridDesc_M_KBlock & mean_var_grid_desc_m_kblock,
const CountGridDesc_M_KBlock & count_grid_desc_m_kblock,
const XYGammaBetaGridDesc_M_K & x_grid_desc_m_k,
const XYGammaBetaGridDesc_M_K & gamma_grid_desc_m_k,
const XYGammaBetaGridDesc_M_K & beta_grid_desc_m_k,
const XYGammaBetaGridDesc_M_K & y_grid_desc_m_k,
const SaveMeanInvStdGridDesc_M & save_mean_grid_desc_m,
const SaveMeanInvStdGridDesc_M & save_inv_std_grid_desc_m,
index_t num_k_mean_var_count_iteration,
index_t num_k_block_tile_iteration,
index_t k_grid_size,
ComputeDataType epsilon,
const MeanVarDataType *const p_mean_global,
const MeanVarDataType *const p_variance_global,
const int32_t *const p_welford_count_global,
const XDataType *const __restrict__ p_x_global,
const GammaDataType *const __restrict__ p_gamma_global,
const BetaDataType *const __restrict__ p_beta_global,
YDataType *const __restrict__ p_y_global,
SaveMeanInvStdDataType *const __restrict__ p_save_mean_global,
SaveMeanInvStdDataType *const __restrict__ p_save_inv_std_global,
const YElementwiseOperation y_elementwise_op )
inlinestatic

Member Data Documentation

◆ I0

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::I1 = Number<1>{}
staticconstexpr

◆ K_BlockTileSize

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
index_t ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ K_BlockTileStepSize

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
index_t ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::K_BlockTileStepSize = KThreadClusterSize * XSrcVectorSize
staticconstexpr

◆ M_BlockTileSize

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
index_t ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ reorder_thread_cluster

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
bool ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::reorder_thread_cluster = (XSrcVectorDim == 0)
staticconstexpr

◆ thread_buffer_desc_m

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::thread_buffer_desc_m
staticconstexpr

◆ thread_buffer_desc_m_1

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::thread_buffer_desc_m_1
staticconstexpr
Initial value:
=
static constexpr auto I1
Definition gridwise_normalization_splitk_2nd.hpp:62

◆ thread_buffer_desc_m_k

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::thread_buffer_desc_m_k
staticconstexpr

◆ thread_cluster_desc

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::thread_cluster_desc
staticconstexpr
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_normalization_splitk_2nd.hpp:64
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_normalization_splitk_2nd.hpp:69

◆ ThreadBufferNumber

template<typename MeanVarDataType, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename SaveMeanInvStdDataType, typename ComputeDataType, typename YElementwiseOperation, typename MeanVarGridDesc_M_KBlock, typename CountGridDesc_M_KBlock, typename XYGammaBetaGridDesc_M_K, typename SaveMeanInvStdGridDesc_M, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, index_t SaveMeanInvStdDstVectorSize>
auto ck::GridwiseNormalizationSplitK2nd< MeanVarDataType, XDataType, GammaDataType, BetaDataType, YDataType, SaveMeanInvStdDataType, ComputeDataType, YElementwiseOperation, MeanVarGridDesc_M_KBlock, CountGridDesc_M_KBlock, XYGammaBetaGridDesc_M_K, SaveMeanInvStdGridDesc_M, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SaveMeanInvStdDstVectorSize >::ThreadBufferNumber = Number<KThreadSliceSize / XSrcVectorSize>{}
staticconstexpr

The documentation for this struct was generated from the following file: