Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework CUDA compiler checks #3057

Merged
merged 7 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions cub/cub/device/dispatch/dispatch_transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# pragma system_header
#endif // no system header

#if defined(_CCCL_CUDA_COMPILER) && _CCCL_CUDACC_BELOW(11, 5)
#if _CCCL_CUDACC_BELOW(11, 5)
_CCCL_NV_DIAG_SUPPRESS(186)
# include <cuda_pipeline_primitives.h>
// we cannot re-enable the warning here, because it is triggered outside the translation unit
// see also: https://godbolt.org/z/1x8b4hn3G
#endif // defined(_CCCL_CUDA_COMPILER) && _CCCL_CUDACC_BELOW(11, 5)
#endif // _CCCL_CUDACC_BELOW(11, 5)

#include <cub/detail/uninitialized_copy.cuh>
#include <cub/util_arch.cuh>
Expand Down Expand Up @@ -46,7 +46,7 @@ _CCCL_NV_DIAG_SUPPRESS(186)
#include <cassert>

// cooperative groups do not support NVHPC yet
#ifndef _CCCL_CUDA_COMPILER_NVHPC
#if !_CCCL_CUDA_COMPILER(NVHPC)
# include <cooperative_groups.h>
# include <cooperative_groups/memcpy_async.h>
#endif
Expand All @@ -55,9 +55,9 @@ CUB_NAMESPACE_BEGIN

// The ublkcp kernel needs PTX features that are only available and understood by nvcc >=12.
// Also, cooperative groups do not support NVHPC yet.
#if _CCCL_CUDACC_AT_LEAST(12, 0) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
#if _CCCL_CUDACC_AT_LEAST(12) && !_CCCL_CUDA_COMPILER(NVHPC)
# define _CUB_HAS_TRANSFORM_UBLKCP
#endif // _CCCL_CUDACC_AT_LEAST(12, 0) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
#endif // _CCCL_CUDACC_AT_LEAST(12) && !_CCCL_CUDA_COMPILER(NVHPC)

namespace detail
{
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/launcher/cuda_driver.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# pragma system_header
#endif // no system header

#if _CCCL_CUDACC_AT_LEAST(12, 0)
#if _CCCL_CUDACC_AT_LEAST(12)

# include <cuda.h>

Expand Down Expand Up @@ -80,4 +80,4 @@ struct CudaDriverLauncherFactory

CUB_NAMESPACE_END

#endif // _CCCL_CUDACC_AT_LEAST(12, 0)
#endif // _CCCL_CUDACC_AT_LEAST(0)
48 changes: 24 additions & 24 deletions cub/cub/thread/thread_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -451,15 +451,15 @@ struct SimdMin<__half>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __half2 operator()(__half2 a, __half2 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2half2_rn(::cuda::minimum<>{}(__half2float(a.x), __half2float(b.x)),
::cuda::minimum<>{}(__half2float(a.y), __half2float(b.y)));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmin2(a, b);),
(return __halves2half2(__float2half(::cuda::minimum<>{}(__half2float(a.x), __half2float(b.x))),
__float2half(::cuda::minimum<>{}(__half2float(a.y), __half2float(b.y))));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -486,16 +486,16 @@ struct SimdMin<__nv_bfloat16>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __nv_bfloat162 operator()(__nv_bfloat162 a, __nv_bfloat162 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2bfloat162_rn(::cuda::minimum<>{}(__bfloat162float(a.x), __bfloat162float(b.x)),
::cuda::minimum<>{}(__bfloat162float(a.y), __bfloat162float(b.y)));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmin2(a, b);),
(return cub::internal::halves2bfloat162(
__float2bfloat16(::cuda::minimum<>{}(__bfloat162float(a.x), __bfloat162float(b.x))),
__float2bfloat16(::cuda::minimum<>{}(__bfloat162float(a.y), __bfloat162float(b.y))));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand Down Expand Up @@ -542,15 +542,15 @@ struct SimdMax<__half>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __half2 operator()(__half2 a, __half2 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2half2_rn(::cuda::maximum<>{}(__half2float(a.x), __half2float(b.x)),
::cuda::maximum<>{}(__half2float(a.y), __half2float(b.y)));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmax2(a, b);),
(return __halves2half2(__float2half(::cuda::maximum<>{}(__half2float(a.x), __half2float(b.x))),
__float2half(::cuda::maximum<>{}(__half2float(a.y), __half2float(b.y))));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -565,16 +565,16 @@ struct SimdMax<__nv_bfloat16>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __nv_bfloat162 operator()(__nv_bfloat162 a, __nv_bfloat162 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2bfloat162_rn(::cuda::maximum<>{}(__bfloat162float(a.x), __bfloat162float(b.x)),
::cuda::maximum<>{}(__bfloat162float(a.y), __bfloat162float(b.y)));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmax2(a, b);),
(return cub::internal::halves2bfloat162(
__float2bfloat16(::cuda::maximum<>{}(__bfloat162float(a.x), __bfloat162float(b.x))),
__float2bfloat16(::cuda::maximum<>{}(__bfloat162float(a.y), __bfloat162float(b.y))));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -597,14 +597,14 @@ struct SimdSum<__half>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __half2 operator()(__half2 a, __half2 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2half2_rn(__half2float(a.x) + __half2float(b.x), __half2float(a.y) + __half2float(b.y));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_53,
(return __hadd2(a, b);),
(return __halves2half2(__float2half(__half2float(a.x) + __half2float(b.x)),
__float2half(__half2float(a.y) + __half2float(b.y)));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -619,16 +619,16 @@ struct SimdSum<__nv_bfloat16>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __nv_bfloat162 operator()(__nv_bfloat162 a, __nv_bfloat162 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2bfloat162_rn(
__bfloat162float(a.x) + __bfloat162float(b.x), __bfloat162float(a.y) + __bfloat162float(b.y));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(
NV_PROVIDES_SM_80,
(return __hadd2(a, b);),
(return cub::internal::halves2bfloat162(__float2bfloat16(__bfloat162float(a.x) + __bfloat162float(b.x)),
__float2bfloat16(__bfloat162float(a.y) + __bfloat162float(b.y)));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -651,14 +651,14 @@ struct SimdMul<__half>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __half2 operator()(__half2 a, __half2 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2half2_rn(__half2float(a.x) * __half2float(b.x), __half2float(a.y) * __half2float(b.y));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_53,
(return __hmul2(a, b);),
(return __halves2half2(__float2half(__half2float(a.x) * __half2float(b.x)),
__float2half(__half2float(a.y) * __half2float(b.y)));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -673,15 +673,15 @@ struct SimdMul<__nv_bfloat16>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __nv_bfloat162 operator()(__nv_bfloat162 a, __nv_bfloat162 b) const
{
# if _CCCL_CUDACC_BELOW(12, 0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
# if _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER(NVHPC)
return __floats2bfloat162_rn(
__bfloat162float(a.x) * __bfloat162float(b.x), __bfloat162float(a.y) * __bfloat162float(b.y));
# else // ^^^ _CCCL_CUDACC_BELOW(12, 0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
# else // ^^^ _CCCL_CUDACC_BELOW(12) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmul2(a, b);),
(return halves2bfloat162(__float2bfloat16(__bfloat162float(a.x) * __bfloat162float(b.x)),
__float2bfloat16(__bfloat162float(a.y) * __bfloat162float(b.y)));));
# endif // !_CCCL_CUDACC_BELOW(12, 0) || !_CCCL_CUDA_COMPILER_NVHPC
# endif // !_CCCL_CUDACC_BELOW(12) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand Down
4 changes: 2 additions & 2 deletions cub/cub/util_compiler.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
#endif

// figure out which device compiler we're using
#if defined(_CCCL_CUDA_COMPILER_NVCC) || defined(_CCCL_CUDA_COMPILER_NVHPC)
#if _CCCL_CUDA_COMPILER(NVCC) || _CCCL_CUDA_COMPILER(NVHPC)
//! deprecated [Since 2.7]
# define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_NVCC
#elif _CCCL_COMPILER(MSVC)
Expand All @@ -97,7 +97,7 @@
# define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_GCC
#elif _CCCL_COMPILER(CLANG)
// CUDA-capable clang should behave similar to NVCC.
# if defined(_CCCL_CUDA_COMPILER_NVCC)
# if _CCCL_CUDA_COMPILER(NVCC)
//! deprecated [Since 2.7]
# define CUB_DEVICE_COMPILER CUB_DEVICE_COMPILER_NVCC
# else
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/util_macro.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ constexpr _CCCL_HOST_DEVICE auto max CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u
#if !defined(CUB_DISABLE_KERNEL_VISIBILITY_WARNING_SUPPRESSION)
_CCCL_DIAG_SUPPRESS_GCC("-Wattributes")
_CCCL_DIAG_SUPPRESS_CLANG("-Wattributes")
# if !defined(_CCCL_CUDA_COMPILER_NVHPC)
# if !_CCCL_CUDA_COMPILER(NVHPC)
_CCCL_DIAG_SUPPRESS_NVHPC(attribute_requires_external_linkage)
# endif // !_CCCL_CUDA_COMPILER_NVHPC
# if _CCCL_COMPILER(ICC)
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/continue_on.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ namespace cuda::experimental::__async
{
struct continue_on_t
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)
template <class... _As>
using __set_value_tuple_t = __tuple<set_value_t, __decay_t<_As>...>;

Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/just.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ extern __fn_t<just_stopped_t>* __just_tag<__stopped, _Void>;
template <__disposition_t _Disposition>
struct __just
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)

using _JustTag = decltype(__detail::__just_tag<_Disposition>());
using _SetTag = decltype(__detail::__set_tag<_Disposition>());
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/just_from.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ struct _A_STOPPED_COMPLETION_MUST_HAVE_NO_ARGUMENTS;
template <__disposition_t _Disposition>
struct __just_from
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)

using _JustTag = decltype(__detail::__just_from_tag<_Disposition>());
using _SetTag = decltype(__detail::__set_tag<_Disposition>());
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/let_value.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ extern __fn_t<let_stopped_t>* __let_tag<__stopped, _Void>;
template <__disposition_t _Disposition>
struct __let
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)
using _LetTag = decltype(__detail::__let_tag<_Disposition>());
using _SetTag = decltype(__detail::__set_tag<_Disposition>());

Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/read_env.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ struct THE_CURRENT_ENVIRONMENT_LACKS_THIS_QUERY;

struct read_env_t
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)
template <class _Query, class _Env>
using __error_env_lacks_query = //
_ERROR<_WHERE(_IN_ALGORITHM, read_env_t),
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/start_detached.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ namespace cuda::experimental::__async
{
struct start_detached_t
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)
struct __opstate_base_t : __immovable
{};

Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/start_on.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ struct __sch_env_t

_CCCL_GLOBAL_CONSTANT struct start_on_t
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)

template <class _Rcvr, class _Sch, class _CvSndr>
struct __opstate_t
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/sync_wait.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ namespace cuda::experimental::__async
/// sender.
struct sync_wait_t
{
# if !defined(_CCCL_CUDA_COMPILER_NVCC)
# if !_CCCL_CUDA_COMPILER(NVCC)

private:
# endif // _CCCL_CUDA_COMPILER_NVCC
# endif // _CCCL_CUDA_COMPILER(NVCC)
struct __env_t
{
run_loop* __loop_;
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/then.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ using __completion = __completion_<__call_result_t<_Fn, _Ts...>, __nothrow_calla
template <__disposition_t _Disposition>
struct __upon_t
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)
using _UponTag = decltype(__detail::__upon_tag<_Disposition>());
using _SetTag = decltype(__detail::__set_tag<_Disposition>());

Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__async/write_env.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ namespace cuda::experimental::__async
{
struct write_env_t
{
#if !defined(_CCCL_CUDA_COMPILER_NVCC)
#if !_CCCL_CUDA_COMPILER(NVCC)

private:
#endif // _CCCL_CUDA_COMPILER_NVCC
#endif // _CCCL_CUDA_COMPILER(NVCC)
template <class _Rcvr, class _Sndr, class _Env>
struct __opstate_t
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
// cudaMallocAsync was introduced in CTK 11.2
#if !_CCCL_COMPILER(MSVC2017) && _CCCL_CUDACC_AT_LEAST(11, 2)

# if defined(_CCCL_CUDA_COMPILER_CLANG)
# if _CCCL_CUDA_COMPILER(CLANG)
# include <cuda_runtime.h>
# include <cuda_runtime_api.h>
# endif // _CCCL_CUDA_COMPILER_CLANG
# endif // _CCCL_CUDA_COMPILER(CLANG)

# include <cuda/__memory_resource/get_property.h>
# include <cuda/__memory_resource/properties.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
// cudaMallocAsync was introduced in CTK 11.2
#if !_CCCL_COMPILER(MSVC2017) && _CCCL_CUDACC_AT_LEAST(11, 2)

# if defined(_CCCL_CUDA_COMPILER_CLANG)
# if _CCCL_CUDA_COMPILER(CLANG)
# include <cuda_runtime.h>
# include <cuda_runtime_api.h>
# endif // _CCCL_CUDA_COMPILER_CLANG
# endif // _CCCL_CUDA_COMPILER(CLANG)

# include <cuda/__memory_resource/get_property.h>
# include <cuda/__memory_resource/properties.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
# pragma system_header
#endif // no system header

#if defined(_CCCL_CUDA_COMPILER_CLANG)
#if _CCCL_CUDA_COMPILER(CLANG)
# include <cuda_runtime_api.h>
#endif // _CCCL_CUDA_COMPILER_CLANG
#endif // _CCCL_CUDA_COMPILER(CLANG)

#include <cuda/__memory_resource/get_property.h>
#include <cuda/__memory_resource/properties.h>
Expand Down
Loading