Implement some CUDA API calls for async_memory_pool

NVIDIA · Sep 25, 2024 · b88fc90 · b88fc90
1 parent 0f0fdc2
commit b88fc90
Show file tree

Hide file tree

Showing 2 changed files with 271 additions and 0 deletions.
diff --git a/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh b/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh
@@ -228,6 +228,52 @@ public:
     _CCCL_ASSERT_CUDA_API(::cudaMemPoolDestroy, "~async_memory_pool() failed to destroy pool", __pool_handle_);
   }
 
+  //! @brief Tries to release memory.
+  //! @param __min_bytes_to_keep the minimal guaranteed size of the pool.
+  //! @note If the pool has less than \p __minBytesToKeep reserved, the trim_to operation is a no-op. Otherwise the pool
+  //! will be guaranteed to have at least \p __minBytesToKeep bytes reserved after the operation.
+  void trim_to(const size_t __min_bytes_to_keep)
+  {
+    _CCCL_TRY_CUDA_API(::cudaMemPoolTrimTo,
+                       "Failed to call cudaMemPoolTrimTo in async_memory_pool::trim_to",
+                       __pool_handle_,
+                       __min_bytes_to_keep);
+  }
+
+  //! @brief Gets the value of an attribute of the pool.
+  //! @param __attribute the attribute to be set.
+  //! @return The value of the attribute. For boolean attributes any value not equal to 0 equates to true.
+  size_t get_attribute(::cudaMemPoolAttr __attr) const
+  {
+    size_t __value = 0;
+    _CCCL_TRY_CUDA_API(
+      ::cudaMemPoolGetAttribute,
+      "Failed to call cudaMemPoolSetAttribute in async_memory_pool::get_attribute",
+      __pool_handle_,
+      __attr,
+      static_cast<void*>(&__value));
+    return __value;
+  }
+
+  //! @brief Sets an attribute of the pool to a given value.
+  //! @param __attribute the attribute to be set.
+  //! @param __value the new value of that attribute.
+  //! @note For boolean attributes we cast \p __value to bool.
+  void set_attribute(::cudaMemPoolAttr __attr, size_t __value)
+  {
+    if (__attr == ::cudaMemPoolAttrReservedMemCurrent || __attr == cudaMemPoolAttrUsedMemCurrent)
+    {
+      _CUDA_VSTD_NOVERSION::__throw_invalid_argument("Invalid attribute passed to async_memory_pool::set_attribute.");
+    }
+
+    _CCCL_TRY_CUDA_API(
+      ::cudaMemPoolSetAttribute,
+      "Failed to call cudaMemPoolSetAttribute in async_memory_pool::set_attribute",
+      __pool_handle_,
+      __attr,
+      static_cast<void*>(&__value));
+  }
+
   //! @brief Equality comparison with another \c async_memory_pool.
   //! @returns true if the stored ``cudaMemPool_t`` are equal.
   _CCCL_NODISCARD constexpr bool operator==(async_memory_pool const& __rhs) const noexcept

diff --git a/cudax/test/memory_resource/async_memory_pool.cu b/cudax/test/memory_resource/async_memory_pool.cu
@@ -13,6 +13,9 @@
 #include <cuda/stream_ref>
 
 #include <cuda/experimental/memory_resource.cuh>
+#include <cuda/experimental/stream.cuh>
+
+#include <stdexcept>
 
 #include <catch2/catch.hpp>
 
@@ -209,3 +212,225 @@ TEST_CASE("async_memory_pool comparison", "[memory_resource]")
     CHECK(current_default_pool != first);
   }
 }
+
+TEST_CASE("async_memory_pool accessors", "[memory_resource]")
+{
+  int current_device{};
+  {
+    _CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to querry current device with with cudaGetDevice.", &current_device);
+  }
+
+  int driver_version = 0;
+  {
+    _CCCL_TRY_CUDA_API(::cudaDriverGetVersion, "Failed to call cudaDriverGetVersion", &driver_version);
+  }
+
+  ::cudaMemPool_t current_default_pool{};
+  {
+    _CCCL_TRY_CUDA_API(::cudaDeviceGetDefaultMemPool,
+                       "Failed to call cudaDeviceGetDefaultMemPool",
+                       &current_default_pool,
+                       current_device);
+  }
+
+  SECTION("async_memory_pool::set_attribute")
+  {
+    cudax::mr::async_memory_pool pool{current_device};
+
+    { // cudaMemPoolReuseFollowEventDependencies
+      // Get the attribute value
+      bool attr = pool.get_attribute(::cudaMemPoolReuseFollowEventDependencies) != 0;
+
+      // Set it to the opposite
+      pool.set_attribute(::cudaMemPoolReuseFollowEventDependencies, !attr);
+
+      // Retrieve again and verify it was changed
+      bool new_attr = pool.get_attribute(::cudaMemPoolReuseFollowEventDependencies) != 0;
+      CHECK(attr == !new_attr);
+
+      // Set it back
+      pool.set_attribute(::cudaMemPoolReuseFollowEventDependencies, attr);
+    }
+
+    { // cudaMemPoolReuseAllowOpportunistic
+      // Get the attribute value
+      bool attr = pool.get_attribute(::cudaMemPoolReuseAllowOpportunistic) != 0;
+
+      // Set it to the opposite
+      pool.set_attribute(::cudaMemPoolReuseAllowOpportunistic, !attr);
+
+      // Retrieve again and verify it was changed
+      bool new_attr = pool.get_attribute(::cudaMemPoolReuseAllowOpportunistic) != 0;
+      CHECK(attr == !new_attr);
+
+      // Set it back
+      pool.set_attribute(::cudaMemPoolReuseAllowOpportunistic, attr);
+    }
+
+    { // cudaMemPoolReuseAllowInternalDependencies
+      // Get the attribute value
+      bool attr = pool.get_attribute(::cudaMemPoolReuseAllowInternalDependencies) != 0;
+
+      // Set it to the opposite
+      pool.set_attribute(::cudaMemPoolReuseAllowInternalDependencies, !attr);
+
+      // Retrieve again and verify it was changed
+      bool new_attr = pool.get_attribute(::cudaMemPoolReuseAllowInternalDependencies) != 0;
+      CHECK(attr == !new_attr);
+
+      // Set it back
+      pool.set_attribute(::cudaMemPoolReuseAllowInternalDependencies, attr);
+    }
+
+    { // cudaMemPoolAttrReleaseThreshold
+      // Get the attribute value
+      size_t attr = pool.get_attribute(::cudaMemPoolAttrReleaseThreshold);
+
+      // Set it to something else
+      pool.set_attribute(::cudaMemPoolAttrReleaseThreshold, 2 * attr);
+
+      // Retrieve again and verify it was changed
+      size_t new_attr = pool.get_attribute(::cudaMemPoolAttrReleaseThreshold);
+      CHECK(new_attr == 2 * attr);
+
+      // Set it back
+      pool.set_attribute(::cudaMemPoolAttrReleaseThreshold, attr);
+    }
+
+    { // cudaMemPoolAttrReservedMemHigh
+      // Get the attribute value
+      size_t attr = pool.get_attribute(::cudaMemPoolAttrReservedMemHigh);
+
+      // Set it to something else
+      pool.set_attribute(::cudaMemPoolAttrReservedMemHigh, 2 * attr);
+
+      // Retrieve again and verify it was changed
+      size_t new_attr = pool.get_attribute(::cudaMemPoolAttrReservedMemHigh);
+      CHECK(new_attr == 2 * attr);
+
+      // Set it back
+      pool.set_attribute(::cudaMemPoolAttrReservedMemHigh, attr);
+    }
+
+    { // cudaMemPoolAttrUsedMemHigh
+      // Get the attribute value
+      size_t attr = pool.get_attribute(::cudaMemPoolAttrUsedMemHigh);
+
+      // Set it to something else
+      pool.set_attribute(::cudaMemPoolAttrUsedMemHigh, 2 * attr);
+
+      // Retrieve again and verify it was changed
+      size_t new_attr = pool.get_attribute(::cudaMemPoolAttrUsedMemHigh);
+      CHECK(new_attr == 2 * attr);
+
+      // Set it back
+      pool.set_attribute(::cudaMemPoolAttrUsedMemHigh, attr);
+    }
+
+    // prime the pool to a given size
+    cudax::mr::async_memory_resource resource{pool};
+    cudax::stream stream{};
+
+    // Allocate 2 buffers
+    auto* ptr = resource.allocate_async(2048 * sizeof(int), stream);
+    stream.wait();
+
+    { // cudaMemPoolAttrReservedMemCurrent
+      // Get the attribute value
+      size_t attr = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+      CHECK(attr >= 2048 * sizeof(int));
+      // cudaMemPoolAttrReservedMemCurrent cannot be set
+      try
+      {
+        pool.set_attribute(::cudaMemPoolAttrReservedMemCurrent, attr);
+        CHECK(false);
+      }
+      catch (::std::invalid_argument& err)
+      {
+        CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0);
+      }
+      catch (...)
+      {
+        CHECK(false);
+      }
+    }
+
+    { // cudaMemPoolAttrUsedMemCurrent
+      // Get the attribute value
+      size_t attr = pool.get_attribute(::cudaMemPoolAttrUsedMemCurrent);
+      CHECK(attr == 2048 * sizeof(int));
+      // cudaMemPoolAttrUsedMemCurrent cannot be set
+      try
+      {
+        pool.set_attribute(::cudaMemPoolAttrUsedMemCurrent, attr);
+        CHECK(false);
+      }
+      catch (::std::invalid_argument& err)
+      {
+        CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0);
+      }
+      catch (...)
+      {
+        CHECK(false);
+      }
+    }
+
+    // Free the last allocation
+    resource.deallocate_async(ptr, 2048 * sizeof(int), stream);
+    stream.wait();
+  }
+
+  SECTION("async_memory_pool::trim_to")
+  {
+    cudax::mr::async_memory_pool pool{current_device};
+    // prime the pool to a given size
+    cudax::mr::async_memory_resource resource{pool};
+    cudax::stream stream{};
+
+    // Allocate 2 buffers
+    auto* ptr1 = resource.allocate_async(2048 * sizeof(int), stream);
+    auto* ptr2 = resource.allocate_async(2048 * sizeof(int), stream);
+    resource.deallocate_async(ptr1, 2048 * sizeof(int), stream);
+    stream.wait();
+
+    // Ensure that we still hold some memory, otherwise everything is freed
+    auto backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+    CHECK(backing_size >= 4096 * sizeof(int));
+
+    // Trim the pool to something smaller than currently held
+    pool.trim_to(1024);
+
+    // Should be a noop
+    auto noop_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+    CHECK(backing_size == noop_backing_size);
+
+    // Trim to larger than ever allocated
+    pool.trim_to(backing_size * 24);
+
+    // Should be a noop
+    auto another_noop_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+    CHECK(backing_size == another_noop_backing_size);
+
+    // Trim to smaller than current backing but larger than current allocated
+    pool.trim_to(2560 * sizeof(int));
+
+    // Check the backing size again
+    auto new_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+    CHECK(new_backing_size <= backing_size);
+    CHECK(new_backing_size >= 4096 * sizeof(int));
+
+    // Free the last allocation
+    resource.deallocate_async(ptr2, 2048 * sizeof(int), stream);
+    stream.wait();
+
+    // There is nothing allocated anymore, so all memory is released
+    auto no_backing = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+    CHECK(no_backing == 0);
+
+    // We can still trim the pool without effect
+    pool.trim_to(2560 * sizeof(int));
+
+    auto still_no_backing = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
+    CHECK(still_no_backing == 0);
+  }
+}