gridwise_normalization_bwd_data.hpp Source File#
gridwise_normalization_bwd_data.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto make_dynamic_buffer(T *p, ElementSpaceSize element_space_size)
Definition dynamic_buffer.hpp:472
Definition gridwise_normalization_bwd_data.hpp:49
typename conditional< XSrcVectorDim==0, Sequence< 1, 0 >, Sequence< 0, 1 > >::type XThreadBufferDimAccessOrder
Definition gridwise_normalization_bwd_data.hpp:77
typename conditional< DYSrcVectorDim==0, Sequence< 1, 0 >, Sequence< 0, 1 > >::type DYThreadBufferDimAccessOrder
Definition gridwise_normalization_bwd_data.hpp:75
static constexpr auto thread_buffer_desc_m_k
Definition gridwise_normalization_bwd_data.hpp:92
static constexpr index_t M_BlockTileSize
Definition gridwise_normalization_bwd_data.hpp:111
typename conditional< MeanInvStdSrcVectorDim==0, Sequence< 1, 0 >, Sequence< 0, 1 > >::type MeanInvStdThreadBufferDimAccessOrder
Definition gridwise_normalization_bwd_data.hpp:81
static constexpr auto I1
Definition gridwise_normalization_bwd_data.hpp:108
PartitionedBlockwiseReduction< ComputeDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, reduce::Add, true > BlockwiseSumReduce
Definition gridwise_normalization_bwd_data.hpp:100
static constexpr auto I0
Definition gridwise_normalization_bwd_data.hpp:107
Sequence< MThreadSliceSize, KThreadSliceSize > ThreadBufferLengths_M_K
Definition gridwise_normalization_bwd_data.hpp:90
typename conditional< GammaSrcVectorDim==0, Sequence< 1, 0 >, Sequence< 0, 1 > >::type GammaThreadBufferDimAccessOrder
Definition gridwise_normalization_bwd_data.hpp:79
DYThreadBufferDimAccessOrder ThreadClusterArrangeOrder
Definition gridwise_normalization_bwd_data.hpp:86
static constexpr auto thread_buffer_desc_m
Definition gridwise_normalization_bwd_data.hpp:95
static constexpr auto thread_cluster_desc
Definition gridwise_normalization_bwd_data.hpp:87
static constexpr index_t K_BlockTileSize
Definition gridwise_normalization_bwd_data.hpp:112
static __device__ void Run(const GridDesc_M_K &dy_grid_desc_m_k, const GridDesc_M_K &x_grid_desc_m_k, const GridDesc_M_K &gamma_grid_desc_m_k, const GridDesc_M_K &mean_grid_desc_m_k, const GridDesc_M_K &inv_std_grid_desc_m_k, const GridDesc_M_K &dx_grid_desc_m_k, index_t num_k_block_tile_iteration, const DYDataType *const __restrict__ p_dy_global, const XDataType *const __restrict__ p_x_global, const GammaDataType *const __restrict__ p_gamma_global, const MeanInvStdDataType *const __restrict__ p_mean_global, const MeanInvStdDataType *const __restrict__ p_inv_std_global, DXDataType *const __restrict__ p_dx_global)
Definition gridwise_normalization_bwd_data.hpp:114
typename conditional< DXDstVectorDim==0, Sequence< 1, 0 >, Sequence< 0, 1 > >::type DXThreadBufferDimAccessOrder
Definition gridwise_normalization_bwd_data.hpp:83
tensor_operation::element_wise::PassThrough PassThroughOp
Definition gridwise_normalization_bwd_data.hpp:98
static constexpr auto I2
Definition gridwise_normalization_bwd_data.hpp:109
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_normalization_bwd_data.hpp:73
Definition reduction_functions_blockwise.hpp:28
static __device__ void Reduce(BufferType &work_buffer, ComputeDataType &in_out_value)
Definition reduction_functions_blockwise.hpp:44
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:16
Definition threadwise_tensor_slice_transfer.hpp:39
Helper structure that facilitates transfer of source (grid) data to destination threads.
Definition threadwise_tensor_slice_transfer.hpp:234
Definition utility/functional.hpp:100
Definition reduction_operator.hpp:37
Definition functional2.hpp:33
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340