|
| template<typename T, typename ComputeType> |
| CK_TILE_HOST_DEVICE T | ck_tile::add (const T &a, const T &b) |
| CK_TILE_HOST_DEVICE bf16x2_t | ck_tile::add_bf16x2_t (const bf16x2_t &a, const bf16x2_t &b) |
| CK_TILE_HOST_DEVICE bf16x4_t | ck_tile::add_bf16x4_t (const bf16x4_t &a, const bf16x4_t &b) |
| CK_TILE_HOST_DEVICE fp16x2_t | ck_tile::add_f16x2_t (const fp16x2_t &a, const fp16x2_t &b) |
| CK_TILE_HOST_DEVICE fp8x4_t | ck_tile::add_fp8x4_t (const fp8x4_t &a, const fp8x4_t &b) |
| CK_TILE_HOST_DEVICE fp8x8_t | ck_tile::add_fp8x8_t (const fp8x8_t &a, const fp8x8_t &b) |
| CK_TILE_HOST_DEVICE bf8x4_t | ck_tile::add_bf8x4_t (const bf8x4_t &a, const bf8x4_t &b) |
| CK_TILE_HOST_DEVICE bf8x8_t | ck_tile::add_bf8x8_t (const bf8x8_t &a, const bf8x8_t &b) |
| template<typename X> |
| CK_TILE_DEVICE void | ck_tile::atomic_add (X *p_dst, const X &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< bf16x2_t > (bf16x2_t *p_dst, const bf16x2_t &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< bf16x4_t > (bf16x4_t *p_dst, bf16x4_t const &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< fp8x4_t > (fp8x4_t *p_dst, const fp8x4_t &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< bf8x4_t > (bf8x4_t *p_dst, const bf8x4_t &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< fp8x8_t > (fp8x8_t *p_dst, fp8x8_t const &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< bf8x8_t > (bf8x8_t *p_dst, bf8x8_t const &x) |
| template<> |
| CK_TILE_DEVICE void | ck_tile::atomic_add< fp16x2_t > (fp16x2_t *p_dst, fp16x2_t const &x) |
| template<typename T, index_t N> |
| CK_TILE_DEVICE void | ck_tile::atomic_add_g (T *p_dst, const thread_buffer< T, N > &x) |
| template<typename T, index_t N> |
| CK_TILE_DEVICE void | ck_tile::atomic_max_g (T *p_dst, const thread_buffer< T, N > &x) |