Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit c4eadef

Browse files
committed
Use CUB's new CDP macros.
1 parent 50316c7 commit c4eadef

24 files changed

+942
-2048
lines changed

thrust/system/cuda/config.h

-13
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,6 @@
3232
// older releases. This header will always pull in version info:
3333
#include <cub/util_namespace.cuh>
3434

35-
#if defined(__CUDACC__) || defined(_NVHPC_CUDA)
36-
# if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__)
37-
# define __THRUST_HAS_CUDART__ 1
38-
# define THRUST_RUNTIME_FUNCTION __host__ __device__ __forceinline__
39-
# else
40-
# define __THRUST_HAS_CUDART__ 0
41-
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
42-
# endif
43-
#else
44-
# define __THRUST_HAS_CUDART__ 0
45-
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
46-
#endif
47-
4835
#ifdef THRUST_AGENT_ENTRY_NOINLINE
4936
#define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__
5037
#else

thrust/system/cuda/detail/adjacent_difference.h

+18-25
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <thrust/detail/config.h>
3030

3131
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
32+
3233
#include <thrust/detail/cstdint.h>
3334
#include <thrust/detail/minmax.h>
3435
#include <thrust/detail/temporary_array.h>
@@ -41,6 +42,7 @@
4142
#include <thrust/type_traits/is_contiguous_iterator.h>
4243
#include <thrust/type_traits/remove_cvref.h>
4344

45+
#include <cub/detail/cdp_dispatch.cuh>
4446
#include <cub/device/device_adjacent_difference.cuh>
4547
#include <cub/device/device_select.cuh>
4648
#include <cub/util_math.cuh>
@@ -64,7 +66,7 @@ namespace __adjacent_difference {
6466
class InputIt,
6567
class OutputIt,
6668
class BinaryOp>
67-
cudaError_t THRUST_RUNTIME_FUNCTION
69+
cudaError_t CUB_RUNTIME_FUNCTION
6870
doit_step(void *d_temp_storage,
6971
size_t &temp_storage_bytes,
7072
InputIt first,
@@ -114,7 +116,7 @@ namespace __adjacent_difference {
114116
template <class InputIt,
115117
class OutputIt,
116118
class BinaryOp>
117-
cudaError_t THRUST_RUNTIME_FUNCTION
119+
cudaError_t CUB_RUNTIME_FUNCTION
118120
doit_step(void *d_temp_storage,
119121
size_t &temp_storage_bytes,
120122
InputIt first,
@@ -139,7 +141,7 @@ namespace __adjacent_difference {
139141
template <class InputIt,
140142
class OutputIt,
141143
class BinaryOp>
142-
cudaError_t THRUST_RUNTIME_FUNCTION
144+
cudaError_t CUB_RUNTIME_FUNCTION
143145
doit_step(void *d_temp_storage,
144146
size_t &temp_storage_bytes,
145147
InputIt first,
@@ -181,7 +183,7 @@ namespace __adjacent_difference {
181183
typename InputIt,
182184
typename OutputIt,
183185
typename BinaryOp>
184-
OutputIt THRUST_RUNTIME_FUNCTION
186+
OutputIt CUB_RUNTIME_FUNCTION
185187
adjacent_difference(execution_policy<Derived>& policy,
186188
InputIt first,
187189
InputIt last,
@@ -260,27 +262,18 @@ adjacent_difference(execution_policy<Derived> &policy,
260262
OutputIt result,
261263
BinaryOp binary_op)
262264
{
263-
OutputIt ret = result;
264-
if (__THRUST_HAS_CUDART__)
265-
{
266-
ret = __adjacent_difference::adjacent_difference(policy,
267-
first,
268-
last,
269-
result,
270-
binary_op);
271-
}
272-
else
273-
{
274-
#if !__THRUST_HAS_CUDART__
275-
ret = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
276-
first,
277-
last,
278-
result,
279-
binary_op);
280-
#endif
281-
}
282-
283-
return ret;
265+
CUB_CDP_DISPATCH(
266+
(result = __adjacent_difference::adjacent_difference(policy,
267+
first,
268+
last,
269+
result,
270+
binary_op);),
271+
(result = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
272+
first,
273+
last,
274+
result,
275+
binary_op);));
276+
return result;
284277
}
285278

286279
template <class Derived,

thrust/system/cuda/detail/copy.h

+17-29
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@
2828

2929
#include <thrust/detail/config.h>
3030

31+
#include <thrust/advance.h>
32+
3133
#include <thrust/system/cuda/config.h>
3234
#include <thrust/system/cuda/detail/execution_policy.h>
3335
#include <thrust/system/cuda/detail/cross_system.h>
3436

37+
#include <cub/detail/cdp_dispatch.cuh>
38+
3539
THRUST_NAMESPACE_BEGIN
3640

3741
template <typename DerivedPolicy, typename InputIt, typename OutputIt>
@@ -117,22 +121,11 @@ copy(execution_policy<System> &system,
117121
InputIterator last,
118122
OutputIterator result)
119123
{
120-
OutputIterator ret = result;
121-
if (__THRUST_HAS_CUDART__)
122-
{
123-
ret = __copy::device_to_device(system, first, last, result);
124-
}
125-
else
126-
{
127-
#if !__THRUST_HAS_CUDART__
128-
ret = thrust::copy(cvt_to_seq(derived_cast(system)),
129-
first,
130-
last,
131-
result);
132-
#endif
133-
}
134-
135-
return ret;
124+
CUB_CDP_DISPATCH(
125+
(result = __copy::device_to_device(system, first, last, result);),
126+
(result =
127+
thrust::copy(cvt_to_seq(derived_cast(system)), first, last, result);));
128+
return result;
136129
} // end copy()
137130

138131
__thrust_exec_check_disable__
@@ -146,19 +139,14 @@ copy_n(execution_policy<System> &system,
146139
Size n,
147140
OutputIterator result)
148141
{
149-
OutputIterator ret = result;
150-
if (__THRUST_HAS_CUDART__)
151-
{
152-
ret = __copy::device_to_device(system, first, first + n, result);
153-
}
154-
else
155-
{
156-
#if !__THRUST_HAS_CUDART__
157-
ret = thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);
158-
#endif
159-
}
160-
161-
return ret;
142+
CUB_CDP_DISPATCH(
143+
(result = __copy::device_to_device(system,
144+
first,
145+
thrust::next(first, n),
146+
result);),
147+
(result =
148+
thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);));
149+
return result;
162150
} // end copy_n()
163151
#endif
164152

thrust/system/cuda/detail/copy_if.h

+40-63
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,20 @@
2929
#include <thrust/detail/config.h>
3030

3131
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
32-
#include <thrust/system/cuda/config.h>
3332

33+
#include <thrust/detail/alignment.h>
3434
#include <thrust/detail/cstdint.h>
35+
#include <thrust/detail/function.h>
3536
#include <thrust/detail/temporary_array.h>
36-
#include <thrust/system/cuda/detail/util.h>
37-
#include <cub/device/device_select.cuh>
37+
#include <thrust/distance.h>
38+
#include <thrust/system/cuda/config.h>
3839
#include <thrust/system/cuda/detail/core/agent_launcher.h>
3940
#include <thrust/system/cuda/detail/core/util.h>
4041
#include <thrust/system/cuda/detail/par_to_seq.h>
41-
#include <thrust/detail/function.h>
42-
#include <thrust/distance.h>
43-
#include <thrust/detail/alignment.h>
42+
#include <thrust/system/cuda/detail/util.h>
4443

44+
#include <cub/detail/cdp_dispatch.cuh>
45+
#include <cub/device/device_select.cuh>
4546
#include <cub/util_math.cuh>
4647

4748
THRUST_NAMESPACE_BEGIN
@@ -598,17 +599,17 @@ namespace __copy_if {
598599
class Predicate,
599600
class Size,
600601
class NumSelectedOutIt>
601-
static cudaError_t THRUST_RUNTIME_FUNCTION
602-
doit_step(void * d_temp_storage,
603-
size_t & temp_storage_bytes,
604-
ItemsIt items,
605-
StencilIt stencil,
606-
OutputIt output_it,
607-
Predicate predicate,
608-
NumSelectedOutIt num_selected_out,
609-
Size num_items,
610-
cudaStream_t stream,
611-
bool debug_sync)
602+
CUB_RUNTIME_FUNCTION
603+
static cudaError_t doit_step(void * d_temp_storage,
604+
size_t & temp_storage_bytes,
605+
ItemsIt items,
606+
StencilIt stencil,
607+
OutputIt output_it,
608+
Predicate predicate,
609+
NumSelectedOutIt num_selected_out,
610+
Size num_items,
611+
cudaStream_t stream,
612+
bool debug_sync)
612613
{
613614
if (num_items == 0)
614615
return cudaSuccess;
@@ -695,7 +696,7 @@ namespace __copy_if {
695696
typename StencilIt,
696697
typename OutputIt,
697698
typename Predicate>
698-
THRUST_RUNTIME_FUNCTION
699+
CUB_RUNTIME_FUNCTION
699700
OutputIt copy_if(execution_policy<Derived>& policy,
700701
InputIt first,
701702
InputIt last,
@@ -789,28 +790,18 @@ copy_if(execution_policy<Derived> &policy,
789790
OutputIterator result,
790791
Predicate pred)
791792
{
792-
OutputIterator ret = result;
793-
794-
if (__THRUST_HAS_CUDART__)
795-
{
796-
ret = __copy_if::copy_if(policy,
797-
first,
798-
last,
799-
__copy_if::no_stencil_tag(),
800-
result,
801-
pred);
802-
}
803-
else
804-
{
805-
#if !__THRUST_HAS_CUDART__
806-
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
807-
first,
808-
last,
809-
result,
810-
pred);
811-
#endif
812-
}
813-
return ret;
793+
CUB_CDP_DISPATCH((result = __copy_if::copy_if(policy,
794+
first,
795+
last,
796+
__copy_if::no_stencil_tag(),
797+
result,
798+
pred);),
799+
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
800+
first,
801+
last,
802+
result,
803+
pred);));
804+
return result;
814805
} // func copy_if
815806

816807
__thrust_exec_check_disable__
@@ -827,29 +818,15 @@ copy_if(execution_policy<Derived> &policy,
827818
OutputIterator result,
828819
Predicate pred)
829820
{
830-
OutputIterator ret = result;
831-
832-
if (__THRUST_HAS_CUDART__)
833-
{
834-
ret = __copy_if::copy_if(policy,
835-
first,
836-
last,
837-
stencil,
838-
result,
839-
pred);
840-
}
841-
else
842-
{
843-
#if !__THRUST_HAS_CUDART__
844-
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
845-
first,
846-
last,
847-
stencil,
848-
result,
849-
pred);
850-
#endif
851-
}
852-
return ret;
821+
CUB_CDP_DISPATCH(
822+
(result = __copy_if::copy_if(policy, first, last, stencil, result, pred);),
823+
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
824+
first,
825+
last,
826+
stencil,
827+
result,
828+
pred);));
829+
return result;
853830
} // func copy_if
854831

855832
} // namespace cuda_cub

0 commit comments

Comments
 (0)