Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement cudax::shared_resource #2398

Merged
merged 9 commits into from
Sep 19, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H
#define _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

// If the memory resource header was included without the experimental flag,
// tell the user to define the experimental flag.
#if defined(_CUDA_MEMORY_RESOURCE) && !defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
# error "To use the experimental memory resource, define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE"
#endif

// cuda::mr is unavable on MSVC 2017
#if defined(_CCCL_COMPILER_MSVC_2017)
# error "The shared_resource header is not supported on MSVC 2017"
#endif

#if !defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
# define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE
#endif

#include <cuda/__memory_resource/resource.h>
#include <cuda/std/__type_traits/is_swappable.h>
#include <cuda/std/__utility/move.h>
#include <cuda/std/atomic>

namespace cuda::experimental::mr
{

//! @rst
//! .. _cudax-memory-resource-shared-resource:
//!
//! Resource wrapper to share ownership of a resource
//! --------------------------------------------------
//!
//! ``shared_resource`` holds a reference counted instance of a memory resource. This allows
//! the user to pass a resource around with reference semantics while avoiding lifetime issues.
//!
//! @endrst
template <class _Resource>
struct shared_resource
{
static_assert(_CUDA_VMR::resource<_Resource>, "");

//! @brief Constructs a \c shared_resource refering to an object of type \c _Resource
//! that has been constructed with arguments \c __args. The \c _Resource object is
//! dynamically allocated with \c new.
//! @param __args The arguments to be passed to the \c _Resource constructor.
template <class... _Args>
explicit shared_resource(_Args&&... __args)
: __control_block(new _Control_block{_Resource{_CUDA_VSTD::forward<_Args>(__args)...}, 1})
{}

//! @brief Copy-constructs a \c shared_resource object resulting in an copy that shares
//! ownership of the wrapped resource with \c __other.
//! @param __other The \c shared_resource object to copy from.
shared_resource(const shared_resource& __other) noexcept
: __control_block(__other.__control_block)
{
if (__control_block)
{
__control_block->__ref_count.fetch_add(1, _CUDA_VSTD::memory_order_relaxed);
}
}

//! @brief Move-constructs a \c shared_resource assuming ownership of the resource stored
//! in \c __other.
//! @param __other The \c shared_resource object to move from.
//! @post \c __other is left in a valid but unspecified state.
shared_resource(shared_resource&& __other) noexcept
: __control_block(_CUDA_VSTD::exchange(__other.__control_block, nullptr))
{}

//! @brief Releases the reference held by this \c shared_resource object. If this is the
//! last reference to the wrapped resource, the resource is deleted.
~shared_resource()
{
if (__control_block && __control_block->__ref_count.fetch_sub(1, _CUDA_VSTD::memory_order_acq_rel) == 1)
{
delete __control_block;
}
}

//! @brief Copy-assigns from \c __other. Self-assignment is a no-op. Otherwise, the reference
//! held by this \c shared_resource object is released and a new reference is acquired to the
//! wrapped resource of \c __other, if any.
//! @param __other The \c shared_resource object to copy from.
shared_resource& operator=(const shared_resource& __other) noexcept
{
if (this != &__other)
{
shared_resource(__other).swap(*this);
}

return *this;
}

//! @brief Move-assigns from \c __other. Self-assignment is a no-op. Otherwise, the reference
//! held by this \c shared_resource object is released, while the reference held both \c __other
//! is transfered to this object.
//! @param __other The \c shared_resource object to move from.
/// @post \c __other is left in a valid but unspecified state.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "valid but unspecified state" mean? Is it valid to use other?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is the common phrase of move semantics.

It means dont touch it

shared_resource& operator=(shared_resource&& __other) noexcept
{
if (this != &__other)
{
shared_resource(_CUDA_VSTD::move(__other)).swap(*this);
}

return *this;
}

//! @brief Swaps a \c shared_resource with another one.
//! @param __other The other \c shared_resource.
void swap(shared_resource& __other) noexcept
{
_CUDA_VSTD::swap(__control_block, __other.__control_block);
}

//! @brief Swaps a \c shared_resource with another one.
//! @param __other The other \c shared_resource.
friend void swap(shared_resource& __left, shared_resource& __right) noexcept
{
__left.swap(__right);
}

//! @brief Allocate memory of size at least \p __bytes using the stored resource.
//! @param __bytes The size in bytes of the allocation.
//! @param __alignment The requested alignment of the allocation.
//! @return Pointer to the newly allocated memory
_CCCL_NODISCARD void* allocate(size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t))
{
return __control_block->__resource.allocate(__bytes, __alignment);
}

//! @brief Deallocate memory pointed to by \p __ptr using the stored resource.
//! @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
//! @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
//! @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
void deallocate(void* __ptr, size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t)) noexcept
{
__control_block->__resource.deallocate(__ptr, __bytes, __alignment);
}

//! @brief Enqueues an allocation of memory of size at least \p __bytes using
//! the wrapped resource. The allocation is performed asynchronously on stream \c __stream.
//! @pre \c _Resource must satisfy \c async_resource.
//! @param __bytes The size in bytes of the allocation.
//! @param __alignment The requested alignment of the allocation.
//! @return Pointer to the newly allocated memory.
//! @note The caller is responsible for ensuring that the memory is not accessed until the
//! operation has completed.
_LIBCUDACXX_TEMPLATE(class _ThisResource = _Resource)
_LIBCUDACXX_REQUIRES(_CUDA_VMR::async_resource<_ThisResource>)
_CCCL_NODISCARD void* async_allocate(size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)
{
return this->__control_block->__resource.async_allocate(__bytes, __alignment, __stream);
}

//! @brief Enqueues the deallocation of memory pointed to by \c __ptr. The deallocation is
//! performed asynchronously on stream \c __stream.
//! @pre \c _Resource must satisfy \c async_resource.
//! @param __bytes The number of bytes that was passed to the `async_allocate` call that returned
//! \p __ptr.
//! @param __alignment The alignment that was passed to the `async_allocate` call that returned
//! \p __ptr.
//! @note The caller is responsible for ensuring that the memory is not accessed after the
//! operation has completed.
_LIBCUDACXX_TEMPLATE(class _ThisResource = _Resource)
_LIBCUDACXX_REQUIRES(_CUDA_VMR::async_resource<_ThisResource>)
void async_deallocate(void* __ptr, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)
{
this->__control_block->__resource.async_deallocate(__ptr, __bytes, __alignment, __stream);
}

//! @brief Equality comparison between two \c shared_resource
//! @param __lhs The first \c shared_resource
//! @param __rhs The other \c shared_resource
//! @return Checks whether the objects refer to resources that compare equal.
_CCCL_NODISCARD_FRIEND bool operator==(const shared_resource& __lhs, const shared_resource& __rhs)
{
if (__lhs.__control_block == __rhs.__control_block)
{
return true;
}

if (__lhs.__control_block == nullptr || __rhs.__control_block == nullptr)
{
return false;
}

return __lhs.__control_block->__resource == __rhs.__control_block->__resource;
}

//! @brief Equality comparison between two \c shared_resource
//! @param __lhs The first \c shared_resource
//! @param __rhs The other \c shared_resource
//! @return Checks whether the objects refer to resources that compare unequal.
_CCCL_NODISCARD_FRIEND bool operator!=(const shared_resource& __lhs, const shared_resource& __rhs)
{
return !(__lhs == __rhs);
}

//! @brief Forwards the stateless properties
_LIBCUDACXX_TEMPLATE(class _Property)
_LIBCUDACXX_REQUIRES((!property_with_value<_Property>) _LIBCUDACXX_AND(has_property<_Resource, _Property>))
friend void get_property(const shared_resource&, _Property) noexcept {}

//! @brief Forwards the stateful properties
_LIBCUDACXX_TEMPLATE(class _Property)
_LIBCUDACXX_REQUIRES(property_with_value<_Property> _LIBCUDACXX_AND(has_property<_Resource, _Property>))
_CCCL_NODISCARD_FRIEND __property_value_t<_Property> get_property(const shared_resource& __self, _Property) noexcept
{
return get_property(__self.__control_block->__resource, _Property{});
}

private:
// Use a custom shared_ptr implementation because (a) we don't need to support weak_ptr so we only
// need one pointer, not two, and (b) this implementation can work on device also.
struct _Control_block
{
_Resource __resource;
_CUDA_VSTD::atomic<int> __ref_count;
};

_Control_block* __control_block;
};

} // namespace cuda::experimental::mr

#endif // _CUDAX__MEMORY_RESOURCE_SHARED_RESOURCE_H
1 change: 1 addition & 0 deletions cudax/include/cuda/experimental/memory_resource.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
#include <cuda/experimental/__memory_resource/any_resource.cuh>
#include <cuda/experimental/__memory_resource/async_memory_pool.cuh>
#include <cuda/experimental/__memory_resource/async_memory_resource.cuh>
#include <cuda/experimental/__memory_resource/shared_resource.cuh>

#endif // __CUDAX_MEMORY_RESOURCE___
1 change: 1 addition & 0 deletions cudax/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ foreach(cn_target IN LISTS cudax_TARGETS)
memory_resource/any_resource.cu
memory_resource/async_memory_pool.cu
memory_resource/async_memory_resource.cu
memory_resource/shared_resource.cu
)

cudax_add_catch2_test(test_target async_tests ${cn_target}
Expand Down
Loading
Loading