BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB, NumMacClusters > Struct Template Reference

BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1&lt; BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB, NumMacClusters &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB, NumMacClusters > Struct Template Reference
ck::BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB, NumMacClusters > Struct Template Reference

#include <blockwise_gemm_xdlops.hpp>

Inheritance diagram for ck::BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB, NumMacClusters >:
ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >

Public Types

using Base
Public Types inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
using ThisThreadBlock
using ElementDataTypeA
using ElementDataTypeB

Additional Inherited Members

Public Member Functions inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
__host__ __device__ constexpr auto & GetCThreadBuffer ()
__host__ __device__ BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1 ()
__device__ void SetIdxSqueezeA (AThreadBuf &a_thread_buf, IdxBuf &idx_buf)
__device__ void Run (const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const
Static Public Member Functions inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
static __device__ auto GetWaveIdx ()
static __device__ auto CalculateAThreadOriginDataIndex ()
static __device__ auto CalculateBThreadOriginDataIndex ()
static __device__ auto CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
static __device__ auto CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >)
__host__ static __device__ constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 ()
__host__ static __device__ constexpr auto MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n)
__host__ static __device__ constexpr auto MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n)
__host__ static __device__ constexpr auto MakeABlockDescriptor_M0_M1_M2_K ()
__host__ static __device__ constexpr auto MakeBBlockDescriptor_N0_N1_N2_K ()
Public Attributes inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, FloatAcc, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > c_thread_buf_
Static Public Attributes inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
static constexpr auto I0
static constexpr auto I1
static constexpr auto I2
static constexpr auto I3
static constexpr index_t MWaves
static constexpr index_t NWaves
static constexpr index_t WaveSize
static constexpr index_t MPerBlock
static constexpr index_t NPerBlock
static constexpr index_t KPerBlock
static constexpr index_t A_K0
static constexpr index_t B_K0
static constexpr index_t A_K1
static constexpr index_t B_K1
static constexpr auto xdlops_gemm
static constexpr index_t KPerThread
static constexpr auto a_block_desc_m0_m1_m2_k
static constexpr auto b_block_desc_n0_n1_n2_k
Protected Types inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
using AThreadCopy
using BThreadCopy
Protected Attributes inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
AThreadCopy a_thread_copy_
BThreadCopy b_thread_copy_
Static Protected Attributes inherited from ck::BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, FloatA, FloatB >
static constexpr auto a_thread_desc_
static constexpr auto b_thread_desc_
static constexpr auto c_thread_desc_

Member Typedef Documentation

◆ Base

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatAcc, typename AK0MK1BlockDesc, typename BK0NK1BlockDesc, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack, typename ComputeTypeA = FloatA, typename ComputeTypeB = FloatB, index_t NumMacClusters = CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS>
using ck::BlockwiseGemmXdlopsInterwave_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_v1< BlockSize, FloatA, FloatB, FloatAcc, AK0MK1BlockDesc, BK0NK1BlockDesc, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, ComputeTypeA, ComputeTypeB, NumMacClusters >::Base
Initial value:
FloatA,
FloatB,
FloatAcc,
AK0MK1BlockDesc,
BK0NK1BlockDesc,
MPerXDL,
NPerXDL,
MRepeat,
NRepeat,
KPack,
ComputeTypeA,
ComputeTypeB>

The documentation for this struct was generated from the following file: