Skip to content

Commit

Permalink
Implement some CUDA API calls for async_memory_pool
Browse files Browse the repository at this point in the history
  • Loading branch information
miscco committed Sep 25, 2024
1 parent 0f0fdc2 commit b88fc90
Show file tree
Hide file tree
Showing 2 changed files with 271 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,52 @@ public:
_CCCL_ASSERT_CUDA_API(::cudaMemPoolDestroy, "~async_memory_pool() failed to destroy pool", __pool_handle_);
}

//! @brief Tries to release memory.
//! @param __min_bytes_to_keep the minimal guaranteed size of the pool.
//! @note If the pool has less than \p __minBytesToKeep reserved, the trim_to operation is a no-op. Otherwise the pool
//! will be guaranteed to have at least \p __minBytesToKeep bytes reserved after the operation.
void trim_to(const size_t __min_bytes_to_keep)
{
_CCCL_TRY_CUDA_API(::cudaMemPoolTrimTo,
"Failed to call cudaMemPoolTrimTo in async_memory_pool::trim_to",
__pool_handle_,
__min_bytes_to_keep);
}

//! @brief Gets the value of an attribute of the pool.
//! @param __attribute the attribute to be set.
//! @return The value of the attribute. For boolean attributes any value not equal to 0 equates to true.
size_t get_attribute(::cudaMemPoolAttr __attr) const
{
size_t __value = 0;
_CCCL_TRY_CUDA_API(
::cudaMemPoolGetAttribute,
"Failed to call cudaMemPoolSetAttribute in async_memory_pool::get_attribute",
__pool_handle_,
__attr,
static_cast<void*>(&__value));
return __value;
}

//! @brief Sets an attribute of the pool to a given value.
//! @param __attribute the attribute to be set.
//! @param __value the new value of that attribute.
//! @note For boolean attributes we cast \p __value to bool.
void set_attribute(::cudaMemPoolAttr __attr, size_t __value)
{
if (__attr == ::cudaMemPoolAttrReservedMemCurrent || __attr == cudaMemPoolAttrUsedMemCurrent)
{
_CUDA_VSTD_NOVERSION::__throw_invalid_argument("Invalid attribute passed to async_memory_pool::set_attribute.");
}

_CCCL_TRY_CUDA_API(
::cudaMemPoolSetAttribute,
"Failed to call cudaMemPoolSetAttribute in async_memory_pool::set_attribute",
__pool_handle_,
__attr,
static_cast<void*>(&__value));
}

//! @brief Equality comparison with another \c async_memory_pool.
//! @returns true if the stored ``cudaMemPool_t`` are equal.
_CCCL_NODISCARD constexpr bool operator==(async_memory_pool const& __rhs) const noexcept
Expand Down
225 changes: 225 additions & 0 deletions cudax/test/memory_resource/async_memory_pool.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include <cuda/stream_ref>

#include <cuda/experimental/memory_resource.cuh>
#include <cuda/experimental/stream.cuh>

#include <stdexcept>

#include <catch2/catch.hpp>

Expand Down Expand Up @@ -209,3 +212,225 @@ TEST_CASE("async_memory_pool comparison", "[memory_resource]")
CHECK(current_default_pool != first);
}
}

TEST_CASE("async_memory_pool accessors", "[memory_resource]")
{
int current_device{};
{
_CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to querry current device with with cudaGetDevice.", &current_device);
}

int driver_version = 0;
{
_CCCL_TRY_CUDA_API(::cudaDriverGetVersion, "Failed to call cudaDriverGetVersion", &driver_version);
}

::cudaMemPool_t current_default_pool{};
{
_CCCL_TRY_CUDA_API(::cudaDeviceGetDefaultMemPool,
"Failed to call cudaDeviceGetDefaultMemPool",
&current_default_pool,
current_device);
}

SECTION("async_memory_pool::set_attribute")
{
cudax::mr::async_memory_pool pool{current_device};

{ // cudaMemPoolReuseFollowEventDependencies
// Get the attribute value
bool attr = pool.get_attribute(::cudaMemPoolReuseFollowEventDependencies) != 0;

// Set it to the opposite
pool.set_attribute(::cudaMemPoolReuseFollowEventDependencies, !attr);

// Retrieve again and verify it was changed
bool new_attr = pool.get_attribute(::cudaMemPoolReuseFollowEventDependencies) != 0;
CHECK(attr == !new_attr);

// Set it back
pool.set_attribute(::cudaMemPoolReuseFollowEventDependencies, attr);
}

{ // cudaMemPoolReuseAllowOpportunistic
// Get the attribute value
bool attr = pool.get_attribute(::cudaMemPoolReuseAllowOpportunistic) != 0;

// Set it to the opposite
pool.set_attribute(::cudaMemPoolReuseAllowOpportunistic, !attr);

// Retrieve again and verify it was changed
bool new_attr = pool.get_attribute(::cudaMemPoolReuseAllowOpportunistic) != 0;
CHECK(attr == !new_attr);

// Set it back
pool.set_attribute(::cudaMemPoolReuseAllowOpportunistic, attr);
}

{ // cudaMemPoolReuseAllowInternalDependencies
// Get the attribute value
bool attr = pool.get_attribute(::cudaMemPoolReuseAllowInternalDependencies) != 0;

// Set it to the opposite
pool.set_attribute(::cudaMemPoolReuseAllowInternalDependencies, !attr);

// Retrieve again and verify it was changed
bool new_attr = pool.get_attribute(::cudaMemPoolReuseAllowInternalDependencies) != 0;
CHECK(attr == !new_attr);

// Set it back
pool.set_attribute(::cudaMemPoolReuseAllowInternalDependencies, attr);
}

{ // cudaMemPoolAttrReleaseThreshold
// Get the attribute value
size_t attr = pool.get_attribute(::cudaMemPoolAttrReleaseThreshold);

// Set it to something else
pool.set_attribute(::cudaMemPoolAttrReleaseThreshold, 2 * attr);

// Retrieve again and verify it was changed
size_t new_attr = pool.get_attribute(::cudaMemPoolAttrReleaseThreshold);
CHECK(new_attr == 2 * attr);

// Set it back
pool.set_attribute(::cudaMemPoolAttrReleaseThreshold, attr);
}

{ // cudaMemPoolAttrReservedMemHigh
// Get the attribute value
size_t attr = pool.get_attribute(::cudaMemPoolAttrReservedMemHigh);

// Set it to something else
pool.set_attribute(::cudaMemPoolAttrReservedMemHigh, 2 * attr);

// Retrieve again and verify it was changed
size_t new_attr = pool.get_attribute(::cudaMemPoolAttrReservedMemHigh);
CHECK(new_attr == 2 * attr);

// Set it back
pool.set_attribute(::cudaMemPoolAttrReservedMemHigh, attr);
}

{ // cudaMemPoolAttrUsedMemHigh
// Get the attribute value
size_t attr = pool.get_attribute(::cudaMemPoolAttrUsedMemHigh);

// Set it to something else
pool.set_attribute(::cudaMemPoolAttrUsedMemHigh, 2 * attr);

// Retrieve again and verify it was changed
size_t new_attr = pool.get_attribute(::cudaMemPoolAttrUsedMemHigh);
CHECK(new_attr == 2 * attr);

// Set it back
pool.set_attribute(::cudaMemPoolAttrUsedMemHigh, attr);
}

// prime the pool to a given size
cudax::mr::async_memory_resource resource{pool};
cudax::stream stream{};

// Allocate 2 buffers
auto* ptr = resource.allocate_async(2048 * sizeof(int), stream);
stream.wait();

{ // cudaMemPoolAttrReservedMemCurrent
// Get the attribute value
size_t attr = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(attr >= 2048 * sizeof(int));
// cudaMemPoolAttrReservedMemCurrent cannot be set
try
{
pool.set_attribute(::cudaMemPoolAttrReservedMemCurrent, attr);
CHECK(false);
}
catch (::std::invalid_argument& err)
{
CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0);
}
catch (...)
{
CHECK(false);
}
}

{ // cudaMemPoolAttrUsedMemCurrent
// Get the attribute value
size_t attr = pool.get_attribute(::cudaMemPoolAttrUsedMemCurrent);
CHECK(attr == 2048 * sizeof(int));
// cudaMemPoolAttrUsedMemCurrent cannot be set
try
{
pool.set_attribute(::cudaMemPoolAttrUsedMemCurrent, attr);
CHECK(false);
}
catch (::std::invalid_argument& err)
{
CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0);
}
catch (...)
{
CHECK(false);
}
}

// Free the last allocation
resource.deallocate_async(ptr, 2048 * sizeof(int), stream);
stream.wait();
}

SECTION("async_memory_pool::trim_to")
{
cudax::mr::async_memory_pool pool{current_device};
// prime the pool to a given size
cudax::mr::async_memory_resource resource{pool};
cudax::stream stream{};

// Allocate 2 buffers
auto* ptr1 = resource.allocate_async(2048 * sizeof(int), stream);
auto* ptr2 = resource.allocate_async(2048 * sizeof(int), stream);
resource.deallocate_async(ptr1, 2048 * sizeof(int), stream);
stream.wait();

// Ensure that we still hold some memory, otherwise everything is freed
auto backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(backing_size >= 4096 * sizeof(int));

// Trim the pool to something smaller than currently held
pool.trim_to(1024);

// Should be a noop
auto noop_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(backing_size == noop_backing_size);

// Trim to larger than ever allocated
pool.trim_to(backing_size * 24);

// Should be a noop
auto another_noop_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(backing_size == another_noop_backing_size);

// Trim to smaller than current backing but larger than current allocated
pool.trim_to(2560 * sizeof(int));

// Check the backing size again
auto new_backing_size = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(new_backing_size <= backing_size);
CHECK(new_backing_size >= 4096 * sizeof(int));

// Free the last allocation
resource.deallocate_async(ptr2, 2048 * sizeof(int), stream);
stream.wait();

// There is nothing allocated anymore, so all memory is released
auto no_backing = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(no_backing == 0);

// We can still trim the pool without effect
pool.trim_to(2560 * sizeof(int));

auto still_no_backing = pool.get_attribute(::cudaMemPoolAttrReservedMemCurrent);
CHECK(still_no_backing == 0);
}
}

0 comments on commit b88fc90

Please sign in to comment.