Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PoC]: Implement cuda::experimental::vector #1869

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cudax/cmake/cudaxBuildCompilerTargets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
# be linked into the developer build targets, as they include both
# cudax.compiler_interface and cccl.compiler_interface_cppXX.

find_package(libcudacxx ${cudax_VERSION} EXACT CONFIG REQUIRED
find_package(Thrust ${cudax_VERSION} EXACT CONFIG REQUIRED
NO_DEFAULT_PATH # Only check the explicit path in HINTS:
HINTS "${CCCL_SOURCE_DIR}/lib/cmake/libcudacxx/"
HINTS "${CCCL_SOURCE_DIR}/lib/cmake/thrust/"
)

function(cudax_build_compiler_targets)
Expand Down Expand Up @@ -73,6 +73,8 @@ function(cudax_build_compiler_targets)
cccl.compiler_interface_cpp${dialect}
cudax.compiler_interface
libcudacxx::libcudacxx
CUB::CUB
Thrust::Thrust
)
endforeach()

Expand Down
4 changes: 4 additions & 0 deletions cudax/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ function(cudax_add_example target_name_var example_src cudax_target)
${cudax_target}
cudax.examples.thrust
)
target_compile_options(${example_target} PRIVATE
"-DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE"
)

cudax_clone_target_properties(${example_target} ${cudax_target})
target_include_directories(${example_target} PRIVATE "${CUB_SOURCE_DIR}/examples")

Expand Down
1 change: 0 additions & 1 deletion cudax/examples/simple_p2p.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
* Unified Virtual Address Space (UVA) features.
*/

#define LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE
#include <cuda/memory_resource>

#include <cuda/experimental/algorithm.cuh>
Expand Down
90 changes: 90 additions & 0 deletions cudax/examples/vector_add_modern.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

/**
* Vector addition: C = A + B.
*
* This sample is a very basic sample that implements element by element
* vector addition. It is the same as the sample illustrating Chapter 2
* of the programming guide with some additions like error checking.
*/

#include <thrust/generate.h>
#include <thrust/random.h>
#include <thrust/transform.h>

#include <cuda/experimental/container.cuh>
#include <cuda/experimental/memory_resource.cuh>
#include <cuda/experimental/stream.cuh>

#include <iostream>

namespace cudax = cuda::experimental;

constexpr int numElements = 50000;

struct generator
{
thrust::default_random_engine gen{};
thrust::uniform_real_distribution<float> dist{-10.0f, 10.0f};

__host__ __device__ generator(const unsigned seed)
: gen{seed}
{}

__host__ __device__ float operator()() noexcept
{
return dist(gen);
}
};

int main()
{
// A CUDA stream on which to execute the vector addition kernel
cudax::stream stream{};

// The execution policy we want to use to run all work on the same stream
auto policy = thrust::cuda::par_nosync.on(stream.get());

// An environment we use to pass all necessary information to the containers
cudax::env_t<cuda::mr::device_accessible> env{cudax::device_memory_resource{}, stream};

// Allocate the two inputs and output, but do not zero initialize via `cudax::uninit`
cudax::async_device_vector<float> A{env, numElements, cudax::uninit};
cudax::async_device_vector<float> B{env, numElements, cudax::uninit};
cudax::async_device_vector<float> C{env, numElements, cudax::uninit};

// Fill both vectors on stream using a random number generator
thrust::generate(policy, A.begin(), A.end(), generator{42});
thrust::generate(policy, B.begin(), B.end(), generator{1337});

// Add the vectors together
thrust::transform(policy, A.begin(), A.end(), B.begin(), C.begin(), cuda::std::plus<>{});

// Verify that the result vector is correct, by copying it to host
cudax::env_t<cuda::mr::host_accessible> host_env{cudax::pinned_memory_resource{}, stream};
cudax::async_host_vector<float> h_A{host_env, A};
cudax::async_host_vector<float> h_B{host_env, B};
cudax::async_host_vector<float> h_C{host_env, C};

// Do not forget to sync afterwards
stream.wait();

for (int i = 0; i < numElements; ++i)
{
if (cuda::std::abs(h_A[i] + h_B[i] - h_C[i]) > 1e-5)
{
std::cerr << "Result verification failed at element " << i << "\n";
exit(EXIT_FAILURE);
}
}

return 0;
}
Loading
Loading