diff --git a/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh b/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh index 9663eed2973..a5c187974e2 100644 --- a/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh @@ -228,6 +228,52 @@ public: _CCCL_ASSERT_CUDA_API(::cudaMemPoolDestroy, "~async_memory_pool() failed to destroy pool", __pool_handle_); } + //! @brief Tries to release memory. + //! @param __min_bytes_to_keep the minimal guaranteed size of the pool. + //! @note If the pool has less than \p __minBytesToKeep reserved, the trim_to operation is a no-op. Otherwise the pool + //! will be guaranteed to have at least \p __minBytesToKeep bytes reserved after the operation. + void trim_to(const size_t __min_bytes_to_keep) + { + _CCCL_TRY_CUDA_API(::cudaMemPoolTrimTo, + "Failed to call cudaMemPoolTrimTo in async_memory_pool::trim_to", + __pool_handle_, + __min_bytes_to_keep); + } + + //! @brief Gets the value of an attribute of the pool. + //! @param __attribute the attribute to be set. + //! @return The value of the attribute. For boolean attributes any value not equal to 0 equates to true. + size_t get_attribute(::cudaMemPoolAttr __attr) const + { + size_t __value = 0; + _CCCL_TRY_CUDA_API( + ::cudaMemPoolGetAttribute, + "Failed to call cudaMemPoolSetAttribute in async_memory_pool::get_attribute", + __pool_handle_, + __attr, + static_cast(&__value)); + return __value; + } + + //! @brief Sets an attribute of the pool to a given value. + //! @param __attribute the attribute to be set. + //! @param __value the new value of that attribute. + //! @note For boolean attributes we cast \p __value to bool. + void set_attribute(::cudaMemPoolAttr __attr, size_t __value) + { + if (__attr == ::cudaMemPoolAttrReservedMemCurrent || __attr == cudaMemPoolAttrUsedMemCurrent) + { + _CUDA_VSTD_NOVERSION::__throw_invalid_argument("Invalid attribute passed to async_memory_pool::set_attribute."); + } + + _CCCL_TRY_CUDA_API( + ::cudaMemPoolSetAttribute, + "Failed to call cudaMemPoolSetAttribute in async_memory_pool::set_attribute", + __pool_handle_, + __attr, + static_cast(&__value)); + } + //! @brief Equality comparison with another \c async_memory_pool. //! @returns true if the stored ``cudaMemPool_t`` are equal. _CCCL_NODISCARD constexpr bool operator==(async_memory_pool const& __rhs) const noexcept diff --git a/cudax/test/memory_resource/async_memory_pool.cu b/cudax/test/memory_resource/async_memory_pool.cu index 9db268c5b88..a64a9e49098 100644 --- a/cudax/test/memory_resource/async_memory_pool.cu +++ b/cudax/test/memory_resource/async_memory_pool.cu @@ -13,6 +13,9 @@ #include #include +#include + +#include #include @@ -209,3 +212,225 @@ TEST_CASE("async_memory_pool comparison", "[memory_resource]") CHECK(current_default_pool != first); } } + +TEST_CASE("async_memory_pool accessors", "[memory_resource]") +{ + int current_device{}; + { + _CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to querry current device with with cudaGetDevice.", ¤t_device); + } + + int driver_version = 0; + { + _CCCL_TRY_CUDA_API(::cudaDriverGetVersion, "Failed to call cudaDriverGetVersion", &driver_version); + } + + ::cudaMemPool_t current_default_pool{}; + { + _CCCL_TRY_CUDA_API(::cudaDeviceGetDefaultMemPool, + "Failed to call cudaDeviceGetDefaultMemPool", + ¤t_default_pool, + current_device); + } + + SECTION("async_memory_pool::set_attribute") + { + cudax::mr::async_memory_pool pool{current_device}; + + { // cudaMemPoolReuseFollowEventDependencies + // Get the attribute value + bool attr = pool.get_attribute(::cudaMemPoolReuseFollowEventDependencies) != 0; + + // Set it to the opposite + pool.set_attribute(::cudaMemPoolReuseFollowEventDependencies, !attr); + + // Retrieve again and verify it was changed + bool new_attr = pool.get_attribute(::cudaMemPoolReuseFollowEventDependencies) != 0; + CHECK(attr == !new_attr); + + // Set it back + pool.set_attribute(::cudaMemPoolReuseFollowEventDependencies, attr); + } + + { // cudaMemPoolReuseAllowOpportunistic + // Get the attribute value + bool attr = pool.get_attribute(::cudaMemPoolReuseAllowOpportunistic) != 0; + + // Set it to the opposite + pool.set_attribute(::cudaMemPoolReuseAllowOpportunistic, !attr); + + // Retrieve again and verify it was changed + bool new_attr = pool.get_attribute(::cudaMemPoolReuseAllowOpportunistic) != 0; + CHECK(attr == !new_attr); + + // Set it back + pool.set_attribute(::cudaMemPoolReuseAllowOpportunistic, attr); + } + + { // cudaMemPoolReuseAllowInternalDependencies + // Get the attribute value + bool attr = pool.get_attribute(::cudaMemPoolReuseAllowInternalDependencies) != 0; + + // Set it to the opposite + pool.set_attribute(::cudaMemPoolReuseAllowInternalDependencies, !attr); + + // Retrieve again and verify it was changed + bool new_attr = pool.get_attribute(::cudaMemPoolReuseAllowInternalDependencies) != 0; + CHECK(attr == !new_attr); + + // Set it back + pool.set_attribute(::cudaMemPoolReuseAllowInternalDependencies, attr); + } + + { // cudaMemPoolAttrReleaseThreshold + // Get the attribute value + size_t attr = pool.get_attribute(::cudaMemPoolAttrReleaseThreshold); + + // Set it to something else + pool.set_attribute(::cudaMemPoolAttrReleaseThreshold, 2 * attr); + + // Retrieve again and verify it was changed + size_t new_attr = pool.get_attribute(::cudaMemPoolAttrReleaseThreshold); + CHECK(new_attr == 2 * attr); + + // Set it back + pool.set_attribute(::cudaMemPoolAttrReleaseThreshold, attr); + } + + { // cudaMemPoolAttrReservedMemHigh + // Get the attribute value + size_t attr = pool.get_attribute(::cudaMemPoolAttrReservedMemHigh); + + // Set it to something else + pool.set_attribute(::cudaMemPoolAttrReservedMemHigh, 2 * attr); + + // Retrieve again and verify it was changed + size_t new_attr = pool.get_attribute(::cudaMemPoolAttrReservedMemHigh); + CHECK(new_attr == 2 * attr); + + // Set it back + pool.set_attribute(::cudaMemPoolAttrReservedMemHigh, attr); + } + + { // cudaMemPoolAttrUsedMemHigh + // Get the attribute value + size_t attr = pool.get_attribute(::cudaMemPoolAttrUsedMemHigh); + + // Set it to something else + pool.set_attribute(::cudaMemPoolAttrUsedMemHigh, 2 * attr); + + // Retrieve again and verify it was changed + size_t new_attr = pool.get_attribute(::cudaMemPoolAttrUsedMemHigh); + CHECK(new_attr == 2 * attr); + + // Set it back + pool.set_attribute(::cudaMemPoolAttrUsedMemHigh, attr); + } + + // prime the pool to a given size + cudax::mr::async_memory_resource resource{pool}; + cudax::stream stream{}; + + // Allocate 2 buffers + auto* ptr = resource.allocate_async(2048 * sizeof(int), stream); + stream.wait(); + + { // cudaMemPoolAttrReservedMemCurrent + // Get the attribute value + size_t attr = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(attr >= 2048 * sizeof(int)); + // cudaMemPoolAttrReservedMemCurrent cannot be set + try + { + pool.set_attribute(::cudaMemPoolAttrReservedMemCurrent, attr); + CHECK(false); + } + catch (::std::invalid_argument& err) + { + CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0); + } + catch (...) + { + CHECK(false); + } + } + + { // cudaMemPoolAttrUsedMemCurrent + // Get the attribute value + size_t attr = pool.get_attribute(::cudaMemPoolAttrUsedMemCurrent); + CHECK(attr == 2048 * sizeof(int)); + // cudaMemPoolAttrUsedMemCurrent cannot be set + try + { + pool.set_attribute(::cudaMemPoolAttrUsedMemCurrent, attr); + CHECK(false); + } + catch (::std::invalid_argument& err) + { + CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0); + } + catch (...) + { + CHECK(false); + } + } + + // Free the last allocation + resource.deallocate_async(ptr, 2048 * sizeof(int), stream); + stream.wait(); + } + + SECTION("async_memory_pool::trim_to") + { + cudax::mr::async_memory_pool pool{current_device}; + // prime the pool to a given size + cudax::mr::async_memory_resource resource{pool}; + cudax::stream stream{}; + + // Allocate 2 buffers + auto* ptr1 = resource.allocate_async(2048 * sizeof(int), stream); + auto* ptr2 = resource.allocate_async(2048 * sizeof(int), stream); + resource.deallocate_async(ptr1, 2048 * sizeof(int), stream); + stream.wait(); + + // Ensure that we still hold some memory, otherwise everything is freed + auto backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(backing_size >= 4096 * sizeof(int)); + + // Trim the pool to something smaller than currently held + pool.trim_to(1024); + + // Should be a noop + auto noop_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(backing_size == noop_backing_size); + + // Trim to larger than ever allocated + pool.trim_to(backing_size * 24); + + // Should be a noop + auto another_noop_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(backing_size == another_noop_backing_size); + + // Trim to smaller than current backing but larger than current allocated + pool.trim_to(2560 * sizeof(int)); + + // Check the backing size again + auto new_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(new_backing_size <= backing_size); + CHECK(new_backing_size >= 4096 * sizeof(int)); + + // Free the last allocation + resource.deallocate_async(ptr2, 2048 * sizeof(int), stream); + stream.wait(); + + // There is nothing allocated anymore, so all memory is released + auto no_backing = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(no_backing == 0); + + // We can still trim the pool without effect + pool.trim_to(2560 * sizeof(int)); + + auto still_no_backing = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent); + CHECK(still_no_backing == 0); + } +}