33template <
typename BottomTensorView_,
34 typename WindowLengths_,
35 typename StaticTileDistribution_,
39 tile_window_with_static_distribution<BottomTensorView_,
41 StaticTileDistribution_,
45 StaticTileDistribution_>
50 StaticTileDistribution_,
54 StaticTileDistribution_>;
58 static_assert(NumCoord == 1);
60 static_assert(Base::Traits::NumAccess % NumCoord == 0,
61 "wrong! # of access is not divisible by NumCoord");
84 window_origin + window_adaptor_thread_coord_tmp.get_bottom_index();
87 bottom_tensor_view.get_tensor_descriptor(), bottom_tensor_thread_origin_idx_tmp);
91 using Traits =
typename Base::Traits;
92 using SFC_Ys =
typename Traits::SFC_Ys;
95 auto window_adaptor_thread_coord = window_adaptor_thread_coord_tmp;
96 auto bottom_tensor_thread_coord = bottom_tensor_thread_coord_tmp;
98 constexpr auto idx_diff_ys =
106 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
109 make_tuple(window_adaptor_thread_coord, bottom_tensor_thread_coord);
113 template <
index_t i_access_unsupport_ = -1,
bool oob_conditional_check =
true>
133 template <
typename TileWindow_,
134 typename ElementWise_,
135 index_t i_access_unsupport_ = -1,
136 bool oob_conditional_check =
true>
138 ElementWise_ elementwise,
152 template <
typename DistributedTensor,
153 typename TileWindow_,
154 typename ElementWise_,
155 index_t i_access_unsupport_ = -1,
156 bool oob_conditional_check =
true>
158 const TileWindow_& tile_window,
159 ElementWise_ elementwise,
164 using Traits =
typename Base::Traits;
165 using vector_t =
typename Traits::vector_t;
166 using SFC_Ys =
typename Traits::SFC_Ys;
169 constexpr auto sizeOfTuple = TileWindow_::size();
171 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
173 auto window_adaptor_thread_coord =
174 tile_window[
number<0>{}].pre_computed_coords_[iCoord][
I0];
175 auto bottom_tensor_thread_coord =
176 tile_window[
number<0>{}].pre_computed_coords_[iCoord][
I1];
178 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
182 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
188 .get_bottom_tensor_view()
189 .template get_vectorized_elements<vector_t>(
190 bottom_tensor_thread_coord,
197 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
200 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
206 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
211 elementwise(dst_tensor.get_thread_buffer().template at<d>(),
220 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
227 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
233 template <
typename DistributedTensor,
234 index_t i_access_unsupport_ = -1,
235 bool oob_conditional_check =
true>
240 using Traits =
typename Base::Traits;
241 using vector_t =
typename Traits::vector_t;
242 using SFC_Ys =
typename Traits::SFC_Ys;
247 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
252 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
256 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
259 const vector_t vec_value =
263 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
266 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
272 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
275 dst_tensor.get_thread_buffer().template at<d>() =
277 .template get_as<typename Base::DataType>()[j / Traits::PackedSize];
282 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
289 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
295 template <
typename DstTile,
296 index_t i_access_unsupport_ = -1,
297 bool oob_conditional_check =
true,
298 bool pre_nop =
false>
304 using Traits =
typename Base::Traits;
305 using vector_t =
typename Traits::vector_t;
306 using SFC_Ys =
typename Traits::SFC_Ys;
307 static constexpr index_t YElementSize =
308 typename Base::TileDstr{}.get_ys_to_d_descriptor().get_element_space_size();
309 static_assert(YElementSize % (Traits::PackedSize * Traits::ScalarPerVector) == 0);
310 using vectorized_tbuf =
311 array<vector_t, YElementSize / (Traits::PackedSize * Traits::ScalarPerVector)>;
315 auto& dst_vec_tbuf =
reinterpret_cast<vectorized_tbuf&
>(dst_tensor.get_thread_buffer());
318 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
323 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
325 constexpr auto pre_nop_ = [&]() {
326 if constexpr(pre_nop && iCoord == 0 && iCoordAccess == 0)
333 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
335 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys_start) /
337 static_assert(d % Traits::ScalarPerVector == 0);
340 dst_vec_tbuf.template at<d / Traits::ScalarPerVector>(),
341 bottom_tensor_thread_coord,
345#if CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE || \
346 CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE
353 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
360 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
367 template <
typename LdsTileWindow_,
368 index_t i_access_unsupport_ = -1,
369 bool oob_conditional_check =
true,
370 bool pre_nop =
false>
378 using LdsDataType =
typename LdsTileWindow::DataType;
382 static_assert(LdsTileWindow::get_num_of_dimension() == 3);
385 lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
390 lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
392 sizeof(LdsDataType) -
396 lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
398 sizeof(LdsDataType) -
407 using Traits =
typename Base::Traits;
409 using vector_t =
typename Traits::vector_t;
410 using SFC_Ys =
typename Traits::SFC_Ys;
412 LdsDataType* smem = lds_tile.get_bottom_tensor_view().get_buffer_view().p_data_;
415 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
420 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
422 constexpr auto pre_nop_ = [&]() {
423 if constexpr(pre_nop && iCoord == 0 && iCoordAccess == 0)
431 smem, bottom_tensor_thread_coord, 0, pre_nop_);
436 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
443 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
451 template <
typename LdsTileWindow_,
452 index_t i_access_unsupport_ = -1,
453 bool oob_conditional_check =
true>
459 using LdsDataType =
typename LdsTileWindow::DataType;
460 using Traits =
typename Base::Traits;
462 using vector_t =
typename Traits::vector_t;
463 using SFC_Ys =
typename Traits::SFC_Ys;
466 const auto window_origin = lds_tile.get_window_origin();
467 const auto& bottom_tensor_view = lds_tile.get_bottom_tensor_view();
468 const auto& tensor_descriptor = bottom_tensor_view.get_tensor_descriptor();
469 auto smem_base_ptr = bottom_tensor_view.get_buffer_view().p_data_;
471 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
475 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
479 auto lds_bottom_tensor_thread_idx =
480 window_origin + window_adaptor_thread_coord.get_bottom_index();
483 const auto lds_coord =
487 CK_TILE_LDS_ADDR LdsDataType* smem = smem_base_ptr + lds_coord.get_offset();
492 bottom_tensor_thread_coord,
499 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
505 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
511 template <
typename Policy,
index_t i_access_unsupport_ = -1,
bool oob_conditional_check =
true>
521 template <
typename Policy,
522 typename DistributedTensor,
523 index_t i_access_unsupport_ = -1,
524 bool oob_conditional_check =
true>
529 using Traits =
typename Base::Traits;
530 using vector_t =
typename Traits::vector_t;
531 using SFC_Ys =
typename Traits::SFC_Ys;
535 constexpr auto group_func = Policy::group_func;
538 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
543 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
547 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
550 const vector_t vec_value =
552 .template get_transpose_vectorized_elements<vector_t>(
553 bottom_tensor_thread_coord, 0);
555 static_for<0, Traits::ScalarPerVector, 1>{}([&](
auto j) {
558 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
563 constexpr auto grouped_idx_ys = group_func(orig_idx_ys);
565 constexpr index_t linear_distributed_index =
566 tile_dstr.get_ys_to_d_descriptor().calculate_offset(grouped_idx_ys);
568 dst_tensor.get_thread_buffer().template at<linear_distributed_index>() =
569 vec_value.template get_as<typename Base::DataType>()[j];
574 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
581 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
587 template <
index_t i_access_unsupport_ = -1,
bool oob_conditional_check =
true>
593 using Traits =
typename Base::Traits;
595 using vector_t =
typename Traits::vector_t;
596 using SFC_Ys =
typename Traits::SFC_Ys;
601 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
605 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
609 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
615 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
618 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
624 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
627 vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
628 dstr_tensor.get_thread_buffer().template at<d>();
635 bottom_tensor_thread_coord,
643 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
650 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
656 template <
index_t i_access_unsupport_ = -1>
662 using Traits =
typename Base::Traits;
664 using vector_t =
typename Traits::vector_t;
665 using SFC_Ys =
typename Traits::SFC_Ys;
668 static constexpr bool oob_conditional_check =
true;
671 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
676 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
680 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
684 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
687 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
692 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
694 vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
700 .template set_vectorized_elements_raw<vector_t, oob_conditional_check>(
701 bottom_tensor_thread_coord, 0, vec_value);
706 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
713 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
719 template <
index_t i_access_unsupport_ = -1,
bool oob_conditional_check =
true>
726 using Traits =
typename Base::Traits;
728 using vector_t =
typename Traits::vector_t;
729 using SFC_Ys =
typename Traits::SFC_Ys;
734 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
739 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
743 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
748 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
751 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
757 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
760 vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
766 bottom_tensor_thread_coord,
774 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
781 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
787 template <
index_t i_access_unsupport_ = -1,
bool oob_conditional_check =
true,
bool pre_nop>
795 using Traits =
typename Base::Traits;
797 using vector_t =
typename Traits::vector_t;
798 using SFC_Ys =
typename Traits::SFC_Ys;
803 static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
808 static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
812 constexpr auto idx_ys_start = SFC_Ys::get_index(iAccess);
817 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
820 return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
826 tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
829 vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
835 bottom_tensor_thread_coord,
844 constexpr auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
851 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
877 this->
window_origin_ + window_adaptor_thread_coord_tmp.get_bottom_index();
884 using Traits =
typename Base::Traits;
885 using SFC_Ys =
typename Traits::SFC_Ys;
888 auto window_adaptor_thread_coord = window_adaptor_thread_coord_tmp;
889 auto bottom_tensor_thread_coord = bottom_tensor_thread_coord_tmp;
891 constexpr auto idx_diff_ys =
899 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
902 make_tuple(window_adaptor_thread_coord, bottom_tensor_thread_coord);
914template <
typename TensorView_,
915 typename WindowLengths_,
916 typename StaticTileDistribution_,
920 const WindowLengths_& window_lengths,
921 const multi_index<TensorView_::get_num_of_dimension()>& origin,
925 return tile_window_with_static_distribution<remove_cvref_t<TensorView_>,
929 tensor_view, window_lengths, origin, tile_distribution};
933template <
typename TensorView_,
934 typename WindowLengths_,
935 typename StaticTileDistribution_,
939 const WindowLengths_& window_lengths,
940 const multi_index<TensorView_::get_num_of_dimension()>& origin,
944 auto w = tile_window_with_static_distribution<remove_cvref_t<TensorView_>,
948 tensor_view, window_lengths, origin, tile_distribution};
953template <
typename TensorView_,
954 typename WindowLengths_,
955 typename StaticTileDistribution_,
960 StaticTileDistribution_,
964 StaticTileDistribution_,
965 NumCoord>::BottomTensorIndex& step)
970template <
typename TensorView_,
971 typename WindowLengths_,
972 typename StaticTileDistribution_,
977 StaticTileDistribution_,
981 StaticTileDistribution_,
982 NumCoord>::BottomTensorIndex& step)
986 StaticTileDistribution_,
989 static constexpr auto N = T::size();
993template <
typename TileWindowWithStaticDistributionType,
995 typename std::enable_if_t<
996 is_detected<is_tuple, TileWindowWithStaticDistributionType>::value>* =
nullptr>
999 static constexpr auto N = TileWindowWithStaticDistributionType::size();
1011template <
typename BottomTensorView_,
typename WindowLengths_>
1013 :
public tile_window_base<tile_window_with_static_lengths<BottomTensorView_, WindowLengths_>,
1047 template <
typename DataType>
1052 const char* label =
"")
const
1057 printf(
"%s Window Range [%d:%d, %d:%d] (origin: %d, %d):\n",
1066 for(
index_t i = start_i; i < end_i; i++)
1068 for(
index_t j = start_j; j < end_j; j++)
1073 make_tuple(window_origin[0] + i, window_origin[1] + j));
1077 auto buf =
tensor_view.template get_vectorized_elements<ThreadBuf>(coord, 0);
1087template <
typename TensorView_,
typename WindowLengths_>
1090 const WindowLengths_& window_lengths,
1091 const multi_index<TensorView_::get_num_of_dimension()>& origin)
1094 "wrong! lengths should be static");
1102template <
typename TensorView,
typename WindowLengths>
1105 const multi_index<TensorView::get_num_of_dimension()>& origin)
1111template <
typename TensorView,
typename WindowLengths,
typename StaticTileDistribution>
1114 const multi_index<TensorView::get_num_of_dimension()>& origin,
1123template <
typename TensorView,
typename WindowLengths,
typename StaticTileDistribution>
1134template <
typename TensorView,
typename WindowLengths,
typename StaticTileDistribution>
1147template <
typename TensorView_,
typename WindowLengths_>
1156template <
typename NewTensorView_,
1157 typename OldTensorView_,
1158 typename WindowLengths_,
1159 typename StaticTileDistribution_,
1165 StaticTileDistribution_,
1166 NumCoord>& tile_window)
1171 tile_window.get_tile_distribution());
1174template <
typename NewTensorView_,
typename OldTensorView_,
typename WindowLengths_>
1176 const NewTensorView_& new_tensor_view,
1190template <
typename T>
1203template <
typename BottomTensorView_,
1204 typename WindowLengths_,
1205 typename StaticTileDistribution_,
1210 StaticTileDistribution_,
1211 NumCoord>> : std::true_type
1222template <
typename T>
1233template <
typename T>
1244template <
typename BottomTensorView_,
typename WindowLengths_>
1257template <
typename T>
#define CK_TILE_DEVICE
Definition config.hpp:41
#define CK_TILE_LDS_ADDR
Definition config.hpp:58
CK_TILE_HOST_DEVICE auto get_partition_index(Distribution)
Definition tile_distribution.hpp:22
Definition tile/core/algorithm/cluster_descriptor.hpp:13
constexpr decltype(auto) apply(F &&f, Tuple &&t)
Definition tile/core/container/tuple.hpp:526
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
constexpr bool is_tile_window_with_static_distribution_v
Helper variable template to check if a type is a tile window with static distribution.
Definition tile_window.hpp:1223
__device__ uint32_t amd_wave_read_first_lane(uint16_t v)
Definition tile/core/arch/amd_buffer_addressing.hpp:35
CK_TILE_DEVICE auto replace_bottom_tensor_view(const NewTensorView_ &new_tensor_view, const tile_scatter_gather< OldTensorView_, WindowLengths_, StaticTileDistribution_, StaticPageIndexArray_, StaticValidArray_, HsGatherDim, NumCoord > &tile_window)
Definition tile_scatter_gather.hpp:1043
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE auto make_tile_window_raw(const TensorView_ &tensor_view, const WindowLengths_ &window_lengths, const multi_index< TensorView_::get_num_of_dimension()> &origin, const StaticTileDistribution_ &tile_distribution, number< NumCoord >={})
Definition tile_window.hpp:938
CK_TILE_HOST_DEVICE constexpr auto container_concat(const X &x, const Ys &... ys)
Definition tile/core/container/container_helper.hpp:363
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
constant< v > number
Definition tile/core/numeric/integral_constant.hpp:37
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto generate_tuple(F &&f, number< N >)
Definition tile/core/container/tuple.hpp:429
array< index_t, N > multi_index
Definition tile/core/container/multi_index.hpp:17
CK_TILE_HOST_DEVICE constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const Index &coord_step)
Definition tensor_coordinate.hpp:72
CK_TILE_HOST_DEVICE constexpr auto make_tensor_adaptor_coordinate(const Adaptor &adaptor, const TopIndex &idx_top)
Definition tensor_adaptor_coordinate.hpp:55
constexpr bool is_tile_window_with_static_lengths_v
Helper variable template to check if a type is a tile window with static lengths.
Definition tile_window.hpp:1258
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
CK_TILE_DEVICE void m0_set_with_memory(index_t v)
Definition utility.hpp:19
int32_t index_t
Definition integer.hpp:9
CK_TILE_HOST_DEVICE constexpr Y type_convert(X x)
Definition tile/core/numeric/type_convert.hpp:29
CK_TILE_DEVICE void m0_inc_with_memory(index_t v)
Definition utility.hpp:25
CK_TILE_HOST_DEVICE constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const TopIndex &idx_top)
Definition tensor_coordinate.hpp:60
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
A fixed-size array container similar to std::array with additional utilities.
Definition tile/core/container/array.hpp:43
static constexpr bool value
Definition type_traits.hpp:77
Type trait to determine if a type is a tile window with static distribution.
Definition tile_window.hpp:1192
Type trait to determine if a type is a tile window with static lengths.
Definition tile_window.hpp:1235
Definition static_distributed_tensor.hpp:21
CK_TILE_HOST_DEVICE constexpr const auto & get_thread_buffer() const
Definition static_distributed_tensor.hpp:58
Definition tile/core/utility/functional.hpp:43
Definition tensor_view.hpp:41
CK_TILE_HOST_DEVICE constexpr auto & get_tensor_descriptor() const
Definition tensor_view.hpp:61
Definition tile/core/utility/debug.hpp:67
Definition tile_distribution.hpp:72
CK_TILE_HOST_DEVICE constexpr const auto & get_ps_ys_to_xs_adaptor() const
Definition tile_distribution.hpp:126
This class provides description of tile windowed view on the device memory.
Definition tile_window_base.hpp:31
BottomTensorView bottom_tensor_view_
Definition tile_window_base.hpp:85
CK_TILE_DEVICE constexpr auto get_window_origin() const
Definition tile_window_base.hpp:45
remove_cvref_t< typename BottomTensorView::DataType > DataType
Definition tile_window_base.hpp:36
CK_TILE_DEVICE constexpr auto get_bottom_tensor_view() const
Definition tile_window_base.hpp:47
BottomTensorIndex window_origin_
Definition tile_window_base.hpp:79
CK_TILE_DEVICE constexpr auto get_window_lengths() const
Definition tile_window_base.hpp:46
CK_TILE_DEVICE void move(const BottomTensorIndex &step)
Definition tile_window_base.hpp:67
remove_reference_t< BottomTensorView_ > BottomTensorView
Definition tile_window_base.hpp:33
remove_cvref_t< WindowLengths_ > WindowLengths
Definition tile_window_base.hpp:34
array< index_t, NDimBottomTensor > BottomTensorIndex
Definition tile_window_base.hpp:43
WindowLengths window_lengths_
Definition tile_window_base.hpp:81
This class provides tile (windowed) view and access to the device memory.
Definition tile_window.hpp:46
CK_TILE_DEVICE void store_raw(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}) const
Definition tile_window.hpp:658
CK_TILE_DEVICE void move_extended(const typename Base::BottomTensorIndex &step)
Definition tile_window.hpp:858
CK_TILE_DEVICE auto load_transpose() const
Definition tile_window.hpp:512
CK_TILE_DEVICE void set_window_origin_extended(const typename Base::BottomTensorIndex &)
Definition tile_window.hpp:867
array< tuple< typename Base::WindowAdaptorCoord, typename Base::BottomTensorCoord >, NumCoord > pre_computed_coords_
Definition tile_window.hpp:910
CK_TILE_DEVICE void update_raw(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={}) const
Definition tile_window.hpp:789
CK_TILE_DEVICE auto async_load(LdsTileWindow_ &&lds_tile, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:454
CK_TILE_DEVICE auto load_transpose(DistributedTensor &dst_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:525
CK_TILE_DEVICE auto load(DistributedTensor &dst_tensor, const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:157
CK_TILE_DEVICE auto load(DistributedTensor &dst_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:236
CK_TILE_DEVICE auto load(number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:114
static constexpr auto I0
Definition tile_window.hpp:56
CK_TILE_DEVICE constexpr tile_window_with_static_distribution()=default
CK_TILE_DEVICE auto load(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Load tile with elementwise function.
Definition tile_window.hpp:137
CK_TILE_DEVICE void load_raw(DstTile &dst_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={}) const
Definition tile_window.hpp:299
static constexpr auto I1
Definition tile_window.hpp:57
CK_TILE_DEVICE auto async_load_raw(LdsTileWindow_ &&lds_tile, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={}) const
Definition tile_window.hpp:371
tile_window_with_tile_dstr_base< tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, StaticTileDistribution_, NumCoord >, BottomTensorView_, WindowLengths_, StaticTileDistribution_ > Base
Definition tile_window.hpp:47
CK_TILE_DEVICE void update(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:721
static constexpr index_t NumAccessPerCoord
Definition tile_window.hpp:62
CK_TILE_DEVICE constexpr tile_window_with_static_distribution(const typename Base::BottomTensorView &bottom_tensor_view, const typename Base::WindowLengths &window_lengths, const typename Base::BottomTensorIndex &window_origin, const typename Base::TileDstr &tile_distribution)
Definition tile_window.hpp:66
CK_TILE_DEVICE void store(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition tile_window.hpp:588
This class provides description of tile windowed view on the device memory.
Definition tile_window.hpp:1016
CK_TILE_DEVICE constexpr tile_window_with_static_lengths(const typename Base::BottomTensorView &bottom_tensor_view, const typename Base::WindowLengths &window_lengths, const typename Base::BottomTensorIndex &window_origin)
Definition tile_window.hpp:1024
tile_window_base< tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ >, BottomTensorView_, WindowLengths_ > Base
Definition tile_window.hpp:1017
CK_TILE_DEVICE void print_tile_window_range(index_t start_i, index_t end_i, index_t start_j, index_t end_j, const char *label="") const
Definition tile_window.hpp:1048
CK_TILE_DEVICE constexpr tile_window_with_static_lengths()=default
Definition tile_window_base.hpp:94
remove_cvref_t< StaticTileDistribution_ > TileDstr
Definition tile_window_base.hpp:95
CK_TILE_DEVICE void move_window_adaptor_and_bottom_tensor_thread_coordinate(WindowAdaptorCoord &window_adaptor_thread_coord, BottomTensorCoord &bottom_tensor_thread_coord, const ATopIndex &idx_diff_adaptor_top) const
Definition tile_window_base.hpp:129
TileDstr tile_dstr_
Definition tile_window_base.hpp:253
Definition tile/core/container/tuple.hpp:192