Implement cudax::shared_resource

We currently have two basic building blocks around memory resources, `any_resource` and `resource_ref`. However, while they make owning and sharing resources much easier, we can still run into lifetime issues. If a user wants to pass a resource into a library function that might exceed the lifetime of the resource, they would need to move it into an any_resource. However, they also might want to share that resource among multiple functions, e.g a pool allocator. We need a way to properly share a resource in those circumstances. Enter `shared_resource`. Rather than storing an `any_resource` this holds a `shared_ptr<any_resource>`. With that we can happily copy / move them around and without touching the stored resource.
NVIDIA · Sep 10, 2024 · e9911d0 · e9911d0
1 parent d5492d5
commit e9911d0
Show file tree

Hide file tree

Showing 7 changed files with 513 additions and 176 deletions.
diff --git a/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh
@@ -0,0 +1,153 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H
+#define _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+// If the memory resource header was included without the experimental flag,
+// tell the user to define the experimental flag.
+#if defined(_CUDA_MEMORY_RESOURCE) && !defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
+#  error "To use the experimental memory resource, define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE"
+#endif
+
+// cuda::mr is unavable on MSVC 2017
+#if defined(_CCCL_COMPILER_MSVC_2017)
+#  error "The shared_resource header is not supported on MSVC 2017"
+#endif
+
+#if !defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
+#  define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE
+#endif
+
+#include <cuda/__memory_resource/resource.h>
+#include <cuda/std/__type_traits/is_swappable.h>
+#include <cuda/std/__type_traits/type_set.h>
+#include <cuda/std/__utility/move.h>
+
+#include <cuda/experimental/__memory_resource/any_resource.cuh>
+
+#include <memory>
+
+namespace cuda::experimental::mr
+{
+
+//! @rst
+//! .. _cudax-memory-resource-shared-resource:
+//!
+//! Resource wrapper to share ownership of a resource
+//! --------------------------------------------------
+//!
+//! ``shared_resource`` holds a reference counted :ref:`any_resource <cudax-memory-resource-basic-any-resource>`.
+//! This allows the user to pass a resource around with reference semantics while avoiding lifetime issues.
+//!
+//! @endrst
+template <class... _Properties>
+class shared_resource
+{
+private:
+  ::std::shared_ptr<::cuda::experimental::mr::any_resource<_Properties...>> __resource_;
+
+  template <class... _OtherProperties>
+  static constexpr bool __properties_match =
+    _CUDA_VSTD::__type_set_contains<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>;
+
+  template <class _Resource>
+  static constexpr bool __different_resource_with =
+    !_CCCL_TRAIT(::cuda::std::is_same, ::cuda::std::remove_cvref_t<_Resource>, shared_resource)
+    && _CUDA_VMR::resource_with<_Resource, _Properties...>;
+
+public:
+  //! @brief Constructs a \c shared_resource from a type that satisfies the \c resource concept as well as all
+  //! properties.
+  //! @param __res The resource to be wrapped within the \c shared_resource.
+  _LIBCUDACXX_TEMPLATE(class _Resource)
+  _LIBCUDACXX_REQUIRES(__different_resource_with<_Resource>)
+  shared_resource(_Resource&& __res)
+      : __resource_(::std::make_shared<::cuda::experimental::mr::any_resource<_Properties...>>(_CUDA_VSTD::move(__res)))
+  {}
+
+  //! @brief Allocate memory of size at least \p __bytes using the stored resource.
+  //! @param __bytes The size in bytes of the allocation.
+  //! @param __alignment The requested alignment of the allocation.
+  //! @return Pointer to the newly allocated memory
+  _CCCL_NODISCARD void* allocate(size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t))
+  {
+    return __resource_->allocate(__bytes, __alignment);
+  }
+
+  //! @brief Deallocate memory pointed to by \p __ptr using the stored resource.
+  //! @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
+  //! @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
+  //! @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
+  void deallocate(void* __ptr, size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t)) noexcept
+  {
+    return __resource_->deallocate(__ptr, __bytes, __alignment);
+  }
+
+  //! @brief Swaps a \c shared_resource with another one.
+  //! @param __other The other \c shared_resource.
+  void swap(shared_resource& __other) noexcept
+  {
+    _CUDA_VSTD::swap(__resource_, __other.__resource_);
+  }
+
+  //! @brief Equality comparison between two \c shared_resource
+  //! @param __rhs The other \c shared_resource
+  //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
+  //! the result of that equality comparison. Otherwise returns false.
+  _LIBCUDACXX_TEMPLATE(class... _OtherProperties)
+  _LIBCUDACXX_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties))
+                         _LIBCUDACXX_AND __properties_match<_OtherProperties...>)
+  _CCCL_NODISCARD bool operator==(const shared_resource<_OtherProperties...>& __rhs) const
+  {
+    return (__resource_ && __rhs.__resource_)
+           ? (*__resource_ == *__rhs.__resource_)
+           : (__resource_ && __rhs.__resource_);
+  }
+
+  //! @brief Inequality comparison between two \c shared_resource
+  //! @param __rhs The other \c shared_resource
+  //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
+  //! the inverse result of that equality comparison. Otherwise returns true.
+  _LIBCUDACXX_TEMPLATE(class... _OtherProperties)
+  _LIBCUDACXX_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties))
+                         _LIBCUDACXX_AND __properties_match<_OtherProperties...>)
+  _CCCL_NODISCARD bool operator!=(const shared_resource<_OtherProperties...>& __rhs) const
+  {
+    return __resource_ != __rhs.__resource_;
+  }
+
+  //! @brief Forwards the stateless properties
+  _LIBCUDACXX_TEMPLATE(class _Property)
+  _LIBCUDACXX_REQUIRES((!property_with_value<_Property>) _LIBCUDACXX_AND(_CUDA_VSTD::_One_of<_Property, _Properties...>))
+  friend void get_property(const shared_resource&, _Property) noexcept {}
+
+  //! @brief Forwards the stateful properties
+  _LIBCUDACXX_TEMPLATE(class _Property)
+  _LIBCUDACXX_REQUIRES(property_with_value<_Property> _LIBCUDACXX_AND(_CUDA_VSTD::_One_of<_Property, _Properties...>))
+  _CCCL_NODISCARD_FRIEND __property_value_t<_Property> get_property(const shared_resource& __res, _Property) noexcept
+  {
+    _CUDA_VMR::_Property_vtable<_Property> const& __prop = __res;
+    return __prop.__property_fn(__res._Get_object());
+  }
+};
+} // namespace cuda::experimental::mr
+
+#endif // _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H
diff --git a/cudax/include/cuda/experimental/memory_resource.cuh b/cudax/include/cuda/experimental/memory_resource.cuh
@@ -14,5 +14,6 @@
 #include <cuda/experimental/__memory_resource/any_resource.cuh>
 #include <cuda/experimental/__memory_resource/async_memory_pool.cuh>
 #include <cuda/experimental/__memory_resource/async_memory_resource.cuh>
+#include <cuda/experimental/__memory_resource/shared_resource.cuh>
 
 #endif // __CUDAX_MEMORY_RESOURCE___
diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt
@@ -94,6 +94,7 @@ foreach(cn_target IN LISTS cudax_TARGETS)
     memory_resource/any_resource.cu
     memory_resource/async_memory_pool.cu
     memory_resource/async_memory_resource.cu
+    memory_resource/shared_resource.cu
   )
 
   cudax_add_catch2_test(test_target async_tests ${cn_target}

diff --git a/cudax/test/memory_resource/any_resource.cu b/cudax/test/memory_resource/any_resource.cu
@@ -10,185 +10,10 @@
 
 #include <cuda/experimental/memory_resource.cuh>
 
-#include <cstddef>
-#include <cstdint>
-
-#include "cuda/std/detail/libcxx/include/cstddef"
+#include "test_resource.h"
 #include <catch2/catch.hpp>
 #include <testing.cuh>
 
-using std::size_t;
-using std::uintptr_t;
-
-struct Counts
-{
-  int object_count     = 0;
-  int move_count       = 0;
-  int copy_count       = 0;
-  int allocate_count   = 0;
-  int deallocate_count = 0;
-  int equal_to_count   = 0;
-  int new_count        = 0;
-  int delete_count     = 0;
-
-  friend std::ostream& operator<<(std::ostream& os, const Counts& counts)
-  {
-    return os
-        << "object: " << counts.object_count << ", " //
-        << "move: " << counts.move_count << ", " //
-        << "copy: " << counts.copy_count << ", " //
-        << "allocate: " << counts.allocate_count << ", " //
-        << "deallocate: " << counts.deallocate_count << ", " //
-        << "equal_to: " << counts.equal_to_count << ", " //
-        << "new: " << counts.new_count << ", " //
-        << "delete: " << counts.delete_count;
-  }
-
-  friend bool operator==(const Counts& lhs, const Counts& rhs) noexcept
-  {
-    return lhs.object_count == rhs.object_count && //
-           lhs.move_count == rhs.move_count && //
-           lhs.copy_count == rhs.copy_count && //
-           lhs.allocate_count == rhs.allocate_count && //
-           lhs.deallocate_count == rhs.deallocate_count && //
-           lhs.equal_to_count == rhs.equal_to_count && //
-           lhs.new_count == rhs.new_count && //
-           lhs.delete_count == rhs.delete_count; //
-  }
-
-  friend bool operator!=(const Counts& lhs, const Counts& rhs) noexcept
-  {
-    return !(lhs == rhs);
-  }
-};
-
-struct test_fixture_
-{
-  Counts counts;
-  size_t bytes_ = 0;
-  size_t align_ = 0;
-  static thread_local Counts* counts_;
-
-  test_fixture_() noexcept
-      : counts()
-  {
-    counts_ = &counts;
-  }
-
-  size_t bytes(size_t sz) noexcept
-  {
-    bytes_ = sz;
-    return bytes_;
-  }
-
-  size_t align(size_t align) noexcept
-  {
-    align_ = align;
-    return align_;
-  }
-};
-
-thread_local Counts* test_fixture_::counts_ = nullptr;
-
-template <class>
-using test_fixture = test_fixture_;
-
-template <class T>
-struct test_resource
-{
-  int data;
-  test_fixture_* fixture;
-  T cookie[2] = {0xDEADBEEF, 0xDEADBEEF};
-
-  explicit test_resource(int i, test_fixture_* fix) noexcept
-      : data(i)
-      , fixture(fix)
-  {
-    ++fixture->counts.object_count;
-  }
-
-  test_resource(test_resource&& other) noexcept
-      : data(other.data)
-      , fixture(other.fixture)
-  {
-    other._assert_valid();
-    ++fixture->counts.move_count;
-    ++fixture->counts.object_count;
-    other.cookie[0] = other.cookie[1] = 0x0C07FEFE;
-  }
-
-  test_resource(const test_resource& other) noexcept
-      : data(other.data)
-      , fixture(other.fixture)
-  {
-    other._assert_valid();
-    ++fixture->counts.copy_count;
-    ++fixture->counts.object_count;
-  }
-
-  ~test_resource()
-  {
-    --fixture->counts.object_count;
-  }
-
-  void* allocate(std::size_t bytes, std::size_t align)
-  {
-    _assert_valid();
-    CHECK(bytes == fixture->bytes_);
-    CHECK(align == fixture->align_);
-    ++fixture->counts.allocate_count;
-    return fixture;
-  }
-
-  void deallocate(void* ptr, std::size_t bytes, std::size_t align) noexcept
-  {
-    _assert_valid();
-    CHECK(ptr == fixture);
-    CHECK(bytes == fixture->bytes_);
-    CHECK(align == fixture->align_);
-    ++fixture->counts.deallocate_count;
-    return;
-  }
-
-  friend bool operator==(const test_resource& lhs, const test_resource& rhs)
-  {
-    lhs._assert_valid();
-    rhs._assert_valid();
-    ++lhs.fixture->counts.equal_to_count;
-    return lhs.data == rhs.data;
-  }
-
-  friend bool operator!=(const test_resource& lhs, const test_resource& rhs)
-  {
-    FAIL("any_resource should only be calling operator==");
-    return lhs.data != rhs.data;
-  }
-
-  void _assert_valid() const noexcept
-  {
-    REQUIRE(cookie[0] == 0xDEADBEEF);
-    REQUIRE(cookie[1] == 0xDEADBEEF);
-  }
-
-  static void* operator new(::cuda::std::size_t size)
-  {
-    ++test_fixture_::counts_->new_count;
-    return ::operator new(size);
-  }
-
-  static void operator delete(void* pv) noexcept
-  {
-    ++test_fixture_::counts_->delete_count;
-    return ::operator delete(pv);
-  }
-};
-
-using big_resource   = test_resource<uintptr_t>;
-using small_resource = test_resource<unsigned int>;
-
-static_assert(sizeof(big_resource) > sizeof(cuda::mr::_AnyResourceStorage));
-static_assert(sizeof(small_resource) <= sizeof(cuda::mr::_AnyResourceStorage));
-
 TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", big_resource, small_resource)
 {
   using TestResource    = TestType;