From 3a15507aa53a8e5fc0c3fa0ff82658bc6a5e678f Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Fri, 24 Jan 2025 16:58:46 -0800 Subject: [PATCH] [CUDAX] Rename `hierarchy_dimensions_fragment` to `hierarchy_dimensions` and remove the old alias (#3496) * Remove hierarchy_dimensions_fragment * Fix format --- .../__hierarchy/hierarchy_dimensions.cuh | 93 ++++++++----------- .../experimental/__launch/configuration.cuh | 10 +- cudax/test/hierarchy/hierarchy_smoke.cu | 16 ++-- 3 files changed, 51 insertions(+), 68 deletions(-) diff --git a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh index 989db68ab4b..6ad343f5b03 100644 --- a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh @@ -57,12 +57,8 @@ _CCCL_NODISCARD _CUDAX_API constexpr auto __as_level(_LevelFn* __fn) noexcept -> template using __level_type_of = typename _Level::level_type; -template -struct hierarchy_dimensions_fragment; - -// If lowest unit in the hierarchy is thread, it can be considered a full hierarchy and not only a fragment -template -using hierarchy_dimensions = hierarchy_dimensions_fragment; +template +struct hierarchy_dimensions; namespace detail { @@ -86,7 +82,7 @@ template struct has_level_helper; template -struct has_level_helper> +struct has_level_helper> : public ::cuda::std::__fold_or<::cuda::std::is_same_v>...> {}; @@ -101,7 +97,7 @@ struct has_unit {}; template -struct has_unit> : ::cuda::std::is_same +struct has_unit> : ::cuda::std::is_same {}; template @@ -153,13 +149,13 @@ _CUDAX_API constexpr auto __reverse_indices(::cuda::std::index_sequence<_Id...>) } template -struct __make_hierarchy_fragment +struct __make_hierarchy { template _CCCL_NODISCARD _CUDAX_TRIVIAL_API static constexpr auto __apply_reverse(const Levels& ls, ::cuda::std::index_sequence<_Ids...>) noexcept { - return __make_hierarchy_fragment()(::cuda::std::get<_Ids>(ls)...); + return __make_hierarchy()(::cuda::std::get<_Ids>(ls)...); } template @@ -171,7 +167,7 @@ struct __make_hierarchy_fragment LUnit>; if constexpr (__can_stack) { - return hierarchy_dimensions_fragment(UnitOrDefault{}, ls...); + return hierarchy_dimensions(UnitOrDefault{}, ls...); } else if constexpr (!Reversed) { @@ -357,8 +353,7 @@ struct __empty_hierarchy * This type combines a number of level_dimensions objects to represent dimensions of a (possibly partial) * hierarchy of CUDA threads. It supports accessing individual levels or queries combining dimensions * of multiple levels. - * This type should not be created directly and make_hierarchy or make_hierarchy_fragment functions - * should be used instead. + * This type should not be created directly and make_hierarchy function should be used instead. * For every level, the unit for its dimensions is implied by the next level in the hierarchy, except * for the last type, for which its the BottomUnit template argument. * In case the BottomUnit type is thread_level, the hierarchy is considered complete and there @@ -382,38 +377,37 @@ struct __empty_hierarchy * level_dimensions instances or types derived from it */ template -struct hierarchy_dimensions_fragment +struct hierarchy_dimensions { static_assert(::cuda::std::is_base_of_v || ::cuda::std::is_same_v); ::cuda::std::tuple levels; - _CUDAX_API constexpr hierarchy_dimensions_fragment(const Levels&... ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions(const Levels&... ls) noexcept : levels(ls...) {} - _CUDAX_API constexpr hierarchy_dimensions_fragment(const BottomUnit&, const Levels&... ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions(const BottomUnit&, const Levels&... ls) noexcept : levels(ls...) {} - _CUDAX_API constexpr hierarchy_dimensions_fragment(const ::cuda::std::tuple& ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions(const ::cuda::std::tuple& ls) noexcept : levels(ls) {} - _CUDAX_API constexpr hierarchy_dimensions_fragment(const BottomUnit&, const ::cuda::std::tuple& ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions(const BottomUnit&, const ::cuda::std::tuple& ls) noexcept : levels(ls) {} # if !defined(_CCCL_NO_THREE_WAY_COMPARISON) && !_CCCL_COMPILER(MSVC, <, 19, 39) && !_CCCL_COMPILER(GCC, <, 12) - _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr bool - operator==(const hierarchy_dimensions_fragment&) const noexcept = default; + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr bool operator==(const hierarchy_dimensions&) const noexcept = default; # else // ^^^ !_CCCL_NO_THREE_WAY_COMPARISON ^^^ / vvv _CCCL_NO_THREE_WAY_COMPARISON vvv _CCCL_NODISCARD_FRIEND _CUDAX_API constexpr bool - operator==(const hierarchy_dimensions_fragment& left, const hierarchy_dimensions_fragment& right) noexcept + operator==(const hierarchy_dimensions& left, const hierarchy_dimensions& right) noexcept { return left.levels == right.levels; } _CCCL_NODISCARD_FRIEND _CUDAX_API constexpr bool - operator!=(const hierarchy_dimensions_fragment& left, const hierarchy_dimensions_fragment& right) noexcept + operator!=(const hierarchy_dimensions& left, const hierarchy_dimensions& right) noexcept { return left.levels != right.levels; } @@ -425,8 +419,8 @@ private: _CCCL_NODISCARD _CUDAX_API static constexpr auto levels_range_static(const ::cuda::std::tuple& levels) noexcept { - static_assert(has_level>); - static_assert(has_level_or_unit>); + static_assert(has_level>); + static_assert(has_level_or_unit>); static_assert(detail::legal_unit_for_level); return ::cuda::std::apply(detail::get_levels_range, levels); } @@ -444,13 +438,13 @@ private: template _CCCL_NODISCARD _CUDAX_API constexpr auto operator()(const Selected&... levels) const noexcept { - return hierarchy_dimensions_fragment(levels...); + return hierarchy_dimensions(levels...); } }; public: template - friend struct hierarchy_dimensions_fragment; + friend struct hierarchy_dimensions; template using extents_type = decltype(::cuda::std::apply( @@ -461,7 +455,7 @@ public: * @brief Get a fragment of this hierarchy * * This member function can be used to get a fragment of the hierarchy its called on. - * It returns a hierarchy_dimensions_fragment that includes levels starting with the + * It returns a hierarchy_dimensions that includes levels starting with the * level specified in Level and ending with a level before Unit. Toegether with * hierarchy_add_level function it can be used to create a new hierarchy that is a modification * of an existing hierarchy. @@ -532,8 +526,8 @@ public: // template // using extents_type = ::cuda::std::invoke_result_t< - // decltype(&hierarchy_dimensions_fragment::template extents), - // hierarchy_dimensions_fragment, + // decltype(&hierarchy_dimensions::template extents), + // hierarchy_dimensions, // Unit(), // Level()>; @@ -727,7 +721,7 @@ public: template _CUDAX_API constexpr auto level(const Level&) const noexcept { - static_assert(has_level>); + static_assert(has_level>); return ::cuda::std::apply(detail::get_level_helper{}, levels); } @@ -743,7 +737,7 @@ public: //! //! @return Hierarchy holding the combined levels from both hierarchies template - constexpr auto combine(const hierarchy_dimensions_fragment& other) const + constexpr auto combine(const hierarchy_dimensions& other) const { using this_top_level = __level_type_of<::cuda::std::__type_index_c<0, Levels...>>; using this_bottom_level = __level_type_of<::cuda::std::__type_index_c>; @@ -754,8 +748,8 @@ public: // Easily stackable case, example this is (grid), other is (cluster, block) return ::cuda::std::apply(fragment_helper(), ::cuda::std::tuple_cat(levels, other.levels)); } - else if constexpr (has_level> - && (!has_level> + else if constexpr (has_level> + && (!has_level> || ::cuda::std::is_same_v) ) { // Overlap with this on the top, e.g. this is (grid, cluster), other is (cluster, block), can fully overlap @@ -778,8 +772,8 @@ public: else { // Overlap with this on the bottom, e.g. this is (cluster, block), other is (grid, cluster), can fully overlap - static_assert(has_level> - && (!has_level> + static_assert(has_level> + && (!has_level> || ::cuda::std::is_same_v), "Can't combine the hierarchies"); @@ -790,7 +784,7 @@ public: } # ifndef _CCCL_DOXYGEN_INVOKED // Do not document - constexpr hierarchy_dimensions_fragment combine([[maybe_unused]] __empty_hierarchy __empty) const + constexpr hierarchy_dimensions combine([[maybe_unused]] __empty_hierarchy __empty) const { return *this; } @@ -843,24 +837,13 @@ constexpr auto _CCCL_HOST get_launch_dimensions(const hierarchy_dimensions -constexpr auto make_hierarchy_fragment(L1 l1, Levels... ls) noexcept -{ - return detail::__make_hierarchy_fragment()(detail::__as_level(l1), detail::__as_level(ls)...); -} - +// TODO consider having LUnit optional argument for template argument deduction /** * @brief Creates a hierarchy from passed in levels. * * This function takes any number of level_dimensions or derived objects * and creates a hierarchy out of them. Levels need to be in ascending * or descending order and the lowest level needs to be valid for thread_level unit. - * To create a hierarchy not ending with thread_level unit, use make_hierarchy_fragment - * instead. * * @par Snippet * @code @@ -874,10 +857,10 @@ constexpr auto make_hierarchy_fragment(L1 l1, Levels... ls) noexcept * @endcode * @par */ -template +template constexpr auto make_hierarchy(L1 l1, Levels... ls) noexcept { - return detail::__make_hierarchy_fragment()(detail::__as_level(l1), detail::__as_level(ls)...); + return detail::__make_hierarchy()(detail::__as_level(l1), detail::__as_level(ls)...); } /** @@ -894,16 +877,16 @@ constexpr auto make_hierarchy(L1 l1, Levels... ls) noexcept * * using namespace cuda::experimental; * - * auto partial1 = make_hierarchy_fragment(grid_dims(256), cluster_dims<4>()); + * auto partial1 = make_hierarchy(grid_dims(256), cluster_dims<4>()); * auto hierarchy1 = hierarchy_add_level(partial1, block_dims<8, 8, 8>()); - * auto partial2 = make_hierarchy_fragment(block_dims<8, 8, 8>(), cluster_dims<4>()); + * auto partial2 = make_hierarchy(block_dims<8, 8, 8>(), cluster_dims<4>()); * auto hierarchy2 = hierarchy_add_level(partial2, grid_dims(256)); * static_assert(cuda::std::is_same_v); * @endcode * @par */ template -constexpr auto hierarchy_add_level(const hierarchy_dimensions_fragment& hierarchy, NewLevel lnew) +constexpr auto hierarchy_add_level(const hierarchy_dimensions& hierarchy, NewLevel lnew) { auto new_level = detail::__as_level(lnew); using AddedLevel = decltype(new_level); @@ -912,7 +895,7 @@ constexpr auto hierarchy_add_level(const hierarchy_dimensions_fragment>) { - return hierarchy_dimensions_fragment( + return hierarchy_dimensions( ::cuda::std::tuple_cat(::cuda::std::make_tuple(new_level), hierarchy.levels)); } else @@ -920,7 +903,7 @@ constexpr auto hierarchy_add_level(const hierarchy_dimensions_fragment, bottom_level>, "Not supported order of levels in hierarchy"); using NewUnit = detail::__default_unit_below<__level_type_of>; - return hierarchy_dimensions_fragment( + return hierarchy_dimensions( ::cuda::std::tuple_cat(hierarchy.levels, ::cuda::std::make_tuple(new_level))); } } diff --git a/cudax/include/cuda/experimental/__launch/configuration.cuh b/cudax/include/cuda/experimental/__launch/configuration.cuh index b0b1ce5955e..71d2f03662d 100644 --- a/cudax/include/cuda/experimental/__launch/configuration.cuh +++ b/cudax/include/cuda/experimental/__launch/configuration.cuh @@ -463,7 +463,7 @@ template _CUDAX_HOST_API constexpr auto operator&(const level_dimensions& l1, const level_dimensions& l2) noexcept { - return kernel_config(make_hierarchy_fragment(l1, l2)); + return kernel_config(make_hierarchy(l1, l2)); } template @@ -505,9 +505,9 @@ _CCCL_NODISCARD constexpr auto operator&(const hierarchy_dimensions& */ template _CCCL_NODISCARD constexpr auto -make_config(const hierarchy_dimensions_fragment& dims, const Opts&... opts) noexcept +make_config(const hierarchy_dimensions& dims, const Opts&... opts) noexcept { - return kernel_config, Opts...>(dims, opts...); + return kernel_config, Opts...>(dims, opts...); } /** @@ -544,7 +544,7 @@ _CCCL_NODISCARD constexpr auto __process_config_args(const ::cuda::std::tuple, previous)); + return kernel_config(::cuda::std::apply(make_hierarchy, previous)); } } @@ -562,7 +562,7 @@ __process_config_args(const ::cuda::std::tuple& previous, const Arg& ar } else { - return kernel_config(::cuda::std::apply(make_hierarchy_fragment, previous), arg, rest...); + return kernel_config(::cuda::std::apply(make_hierarchy, previous), arg, rest...); } } else diff --git a/cudax/test/hierarchy/hierarchy_smoke.cu b/cudax/test/hierarchy/hierarchy_smoke.cu index cf359aa3318..871783c07f5 100644 --- a/cudax/test/hierarchy/hierarchy_smoke.cu +++ b/cudax/test/hierarchy/hierarchy_smoke.cu @@ -478,9 +478,9 @@ TEST_CASE("Examples", "[hierarchy]") static_assert(decltype(hierarchy.level(cluster).dims)::static_extent(0) == 4); } { - auto partial1 = make_hierarchy_fragment(grid_dims(256), cluster_dims<4>()); + auto partial1 = make_hierarchy(grid_dims(256), cluster_dims<4>()); [[maybe_unused]] auto hierarchy1 = hierarchy_add_level(partial1, block_dims<8, 8, 8>()); - auto partial2 = make_hierarchy_fragment(block_dims<8, 8, 8>(), cluster_dims<4>()); + auto partial2 = make_hierarchy(block_dims<8, 8, 8>(), cluster_dims<4>()); [[maybe_unused]] auto hierarchy2 = hierarchy_add_level(partial2, grid_dims(256)); static_assert(cuda::std::is_same_v); } @@ -533,8 +533,8 @@ TEST_CASE("hierarchy merge", "[hierarchy]") { SECTION("Non overlapping") { - auto h1 = cudax::make_hierarchy_fragment(cudax::grid_dims<2>()); - auto h2 = cudax::make_hierarchy_fragment(cudax::block_dims<3>()); + auto h1 = cudax::make_hierarchy(cudax::grid_dims<2>()); + auto h2 = cudax::make_hierarchy(cudax::block_dims<3>()); auto combined = h1.combine(h2); static_assert(combined.count(cudax::thread) == 6); static_assert(combined.count(cudax::thread, cudax::block) == 3); @@ -549,8 +549,8 @@ TEST_CASE("hierarchy merge", "[hierarchy]") } SECTION("Overlapping") { - auto h1 = cudax::make_hierarchy_fragment(cudax::grid_dims<2>(), cudax::cluster_dims<3>()); - auto h2 = cudax::make_hierarchy_fragment(cudax::block_dims<4>(), cudax::cluster_dims<5>()); + auto h1 = cudax::make_hierarchy(cudax::grid_dims<2>(), cudax::cluster_dims<3>()); + auto h2 = cudax::make_hierarchy(cudax::block_dims<4>(), cudax::cluster_dims<5>()); auto combined = h1.combine(h2); static_assert(combined.count(cudax::thread) == 24); static_assert(combined.count(cudax::thread, cudax::block) == 4); @@ -566,13 +566,13 @@ TEST_CASE("hierarchy merge", "[hierarchy]") static_assert(cuda::std::is_same_v); static_assert(ultimate_combination.count(cudax::thread) == 24); - auto block_level_replacement = cudax::make_hierarchy_fragment(cudax::block_dims<6>()); + auto block_level_replacement = cudax::make_hierarchy(cudax::block_dims<6>()); auto with_block_replaced = block_level_replacement.combine(combined); static_assert(with_block_replaced.count(cudax::thread) == 36); static_assert(with_block_replaced.count(cudax::thread, cudax::block) == 6); auto grid_cluster_level_replacement = - cudax::make_hierarchy_fragment(cudax::grid_dims<7>(), cudax::cluster_dims<8>()); + cudax::make_hierarchy(cudax::grid_dims<7>(), cudax::cluster_dims<8>()); auto with_grid_cluster_replaced = grid_cluster_level_replacement.combine(combined); static_assert(with_grid_cluster_replaced.count(cudax::thread) == 7 * 8 * 4); static_assert(with_grid_cluster_replaced.count(cudax::block, cudax::cluster) == 8);