Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit c2d5261

Browse files
committed
Use CUB's new CDP macros.
1 parent e9953c8 commit c2d5261

24 files changed

+962
-2070
lines changed

thrust/system/cuda/config.h

-13
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,6 @@
3232
// older releases. This header will always pull in version info:
3333
#include <cub/util_namespace.cuh>
3434

35-
#if defined(__CUDACC__) || defined(_NVHPC_CUDA)
36-
# if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__)
37-
# define __THRUST_HAS_CUDART__ 1
38-
# define THRUST_RUNTIME_FUNCTION __host__ __device__ __forceinline__
39-
# else
40-
# define __THRUST_HAS_CUDART__ 0
41-
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
42-
# endif
43-
#else
44-
# define __THRUST_HAS_CUDART__ 0
45-
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
46-
#endif
47-
4835
#ifdef THRUST_AGENT_ENTRY_NOINLINE
4936
#define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__
5037
#else

thrust/system/cuda/detail/adjacent_difference.h

+24-32
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,22 @@
2929
#include <thrust/detail/config.h>
3030

3131
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
32-
#include <thrust/system/cuda/config.h>
3332

3433
#include <thrust/detail/cstdint.h>
34+
#include <thrust/detail/minmax.h>
35+
#include <thrust/detail/mpl/math.h>
3536
#include <thrust/detail/temporary_array.h>
36-
#include <thrust/system/cuda/detail/util.h>
37-
#include <cub/device/device_select.cuh>
38-
#include <cub/block/block_adjacent_difference.cuh>
37+
#include <thrust/distance.h>
38+
#include <thrust/functional.h>
39+
#include <thrust/system/cuda/config.h>
3940
#include <thrust/system/cuda/detail/core/agent_launcher.h>
40-
#include <thrust/system/cuda/detail/par_to_seq.h>
4141
#include <thrust/system/cuda/detail/dispatch.h>
42-
#include <thrust/functional.h>
43-
#include <thrust/distance.h>
44-
#include <thrust/detail/mpl/math.h>
45-
#include <thrust/detail/minmax.h>
42+
#include <thrust/system/cuda/detail/par_to_seq.h>
43+
#include <thrust/system/cuda/detail/util.h>
4644

45+
#include <cub/block/block_adjacent_difference.cuh>
46+
#include <cub/detail/cdp_dispatch.cuh>
47+
#include <cub/device/device_select.cuh>
4748
#include <cub/util_math.cuh>
4849

4950
THRUST_NAMESPACE_BEGIN
@@ -362,7 +363,7 @@ namespace __adjacent_difference {
362363
class OutputIt,
363364
class BinaryOp,
364365
class Size>
365-
cudaError_t THRUST_RUNTIME_FUNCTION
366+
cudaError_t CUB_RUNTIME_FUNCTION
366367
doit_step(void * d_temp_storage,
367368
size_t & temp_storage_bytes,
368369
InputIt first,
@@ -436,7 +437,7 @@ namespace __adjacent_difference {
436437
typename InputIt,
437438
typename OutputIt,
438439
typename BinaryOp>
439-
OutputIt THRUST_RUNTIME_FUNCTION
440+
OutputIt CUB_RUNTIME_FUNCTION
440441
adjacent_difference(execution_policy<Derived>& policy,
441442
InputIt first,
442443
InputIt last,
@@ -490,27 +491,18 @@ adjacent_difference(execution_policy<Derived> &policy,
490491
OutputIt result,
491492
BinaryOp binary_op)
492493
{
493-
OutputIt ret = result;
494-
if (__THRUST_HAS_CUDART__)
495-
{
496-
ret = __adjacent_difference::adjacent_difference(policy,
497-
first,
498-
last,
499-
result,
500-
binary_op);
501-
}
502-
else
503-
{
504-
#if !__THRUST_HAS_CUDART__
505-
ret = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
506-
first,
507-
last,
508-
result,
509-
binary_op);
510-
#endif
511-
}
512-
513-
return ret;
494+
CUB_CDP_DISPATCH(
495+
(result = __adjacent_difference::adjacent_difference(policy,
496+
first,
497+
last,
498+
result,
499+
binary_op);),
500+
(result = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
501+
first,
502+
last,
503+
result,
504+
binary_op);));
505+
return result;
514506
}
515507

516508
template <class Derived,

thrust/system/cuda/detail/copy.h

+17-29
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@
2828

2929
#include <thrust/detail/config.h>
3030

31+
#include <thrust/advance.h>
32+
3133
#include <thrust/system/cuda/config.h>
3234
#include <thrust/system/cuda/detail/execution_policy.h>
3335
#include <thrust/system/cuda/detail/cross_system.h>
3436

37+
#include <cub/detail/cdp_dispatch.cuh>
38+
3539
THRUST_NAMESPACE_BEGIN
3640

3741
template <typename DerivedPolicy, typename InputIt, typename OutputIt>
@@ -117,22 +121,11 @@ copy(execution_policy<System> &system,
117121
InputIterator last,
118122
OutputIterator result)
119123
{
120-
OutputIterator ret = result;
121-
if (__THRUST_HAS_CUDART__)
122-
{
123-
ret = __copy::device_to_device(system, first, last, result);
124-
}
125-
else
126-
{
127-
#if !__THRUST_HAS_CUDART__
128-
ret = thrust::copy(cvt_to_seq(derived_cast(system)),
129-
first,
130-
last,
131-
result);
132-
#endif
133-
}
134-
135-
return ret;
124+
CUB_CDP_DISPATCH(
125+
(result = __copy::device_to_device(system, first, last, result);),
126+
(result =
127+
thrust::copy(cvt_to_seq(derived_cast(system)), first, last, result);));
128+
return result;
136129
} // end copy()
137130

138131
__thrust_exec_check_disable__
@@ -146,19 +139,14 @@ copy_n(execution_policy<System> &system,
146139
Size n,
147140
OutputIterator result)
148141
{
149-
OutputIterator ret = result;
150-
if (__THRUST_HAS_CUDART__)
151-
{
152-
ret = __copy::device_to_device(system, first, first + n, result);
153-
}
154-
else
155-
{
156-
#if !__THRUST_HAS_CUDART__
157-
ret = thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);
158-
#endif
159-
}
160-
161-
return ret;
142+
CUB_CDP_DISPATCH(
143+
(result = __copy::device_to_device(system,
144+
first,
145+
thrust::next(first, n),
146+
result);),
147+
(result =
148+
thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);));
149+
return result;
162150
} // end copy_n()
163151
#endif
164152

thrust/system/cuda/detail/copy_if.h

+40-63
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,20 @@
2929
#include <thrust/detail/config.h>
3030

3131
#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
32-
#include <thrust/system/cuda/config.h>
3332

33+
#include <thrust/detail/alignment.h>
3434
#include <thrust/detail/cstdint.h>
35+
#include <thrust/detail/function.h>
3536
#include <thrust/detail/temporary_array.h>
36-
#include <thrust/system/cuda/detail/util.h>
37-
#include <cub/device/device_select.cuh>
37+
#include <thrust/distance.h>
38+
#include <thrust/system/cuda/config.h>
3839
#include <thrust/system/cuda/detail/core/agent_launcher.h>
3940
#include <thrust/system/cuda/detail/core/util.h>
4041
#include <thrust/system/cuda/detail/par_to_seq.h>
41-
#include <thrust/detail/function.h>
42-
#include <thrust/distance.h>
43-
#include <thrust/detail/alignment.h>
42+
#include <thrust/system/cuda/detail/util.h>
4443

44+
#include <cub/detail/cdp_dispatch.cuh>
45+
#include <cub/device/device_select.cuh>
4546
#include <cub/util_math.cuh>
4647

4748
THRUST_NAMESPACE_BEGIN
@@ -598,17 +599,17 @@ namespace __copy_if {
598599
class Predicate,
599600
class Size,
600601
class NumSelectedOutIt>
601-
static cudaError_t THRUST_RUNTIME_FUNCTION
602-
doit_step(void * d_temp_storage,
603-
size_t & temp_storage_bytes,
604-
ItemsIt items,
605-
StencilIt stencil,
606-
OutputIt output_it,
607-
Predicate predicate,
608-
NumSelectedOutIt num_selected_out,
609-
Size num_items,
610-
cudaStream_t stream,
611-
bool debug_sync)
602+
CUB_RUNTIME_FUNCTION
603+
static cudaError_t doit_step(void * d_temp_storage,
604+
size_t & temp_storage_bytes,
605+
ItemsIt items,
606+
StencilIt stencil,
607+
OutputIt output_it,
608+
Predicate predicate,
609+
NumSelectedOutIt num_selected_out,
610+
Size num_items,
611+
cudaStream_t stream,
612+
bool debug_sync)
612613
{
613614
if (num_items == 0)
614615
return cudaSuccess;
@@ -695,7 +696,7 @@ namespace __copy_if {
695696
typename StencilIt,
696697
typename OutputIt,
697698
typename Predicate>
698-
THRUST_RUNTIME_FUNCTION
699+
CUB_RUNTIME_FUNCTION
699700
OutputIt copy_if(execution_policy<Derived>& policy,
700701
InputIt first,
701702
InputIt last,
@@ -789,28 +790,18 @@ copy_if(execution_policy<Derived> &policy,
789790
OutputIterator result,
790791
Predicate pred)
791792
{
792-
OutputIterator ret = result;
793-
794-
if (__THRUST_HAS_CUDART__)
795-
{
796-
ret = __copy_if::copy_if(policy,
797-
first,
798-
last,
799-
__copy_if::no_stencil_tag(),
800-
result,
801-
pred);
802-
}
803-
else
804-
{
805-
#if !__THRUST_HAS_CUDART__
806-
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
807-
first,
808-
last,
809-
result,
810-
pred);
811-
#endif
812-
}
813-
return ret;
793+
CUB_CDP_DISPATCH((result = __copy_if::copy_if(policy,
794+
first,
795+
last,
796+
__copy_if::no_stencil_tag(),
797+
result,
798+
pred);),
799+
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
800+
first,
801+
last,
802+
result,
803+
pred);));
804+
return result;
814805
} // func copy_if
815806

816807
__thrust_exec_check_disable__
@@ -827,29 +818,15 @@ copy_if(execution_policy<Derived> &policy,
827818
OutputIterator result,
828819
Predicate pred)
829820
{
830-
OutputIterator ret = result;
831-
832-
if (__THRUST_HAS_CUDART__)
833-
{
834-
ret = __copy_if::copy_if(policy,
835-
first,
836-
last,
837-
stencil,
838-
result,
839-
pred);
840-
}
841-
else
842-
{
843-
#if !__THRUST_HAS_CUDART__
844-
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
845-
first,
846-
last,
847-
stencil,
848-
result,
849-
pred);
850-
#endif
851-
}
852-
return ret;
821+
CUB_CDP_DISPATCH(
822+
(result = __copy_if::copy_if(policy, first, last, stencil, result, pred);),
823+
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
824+
first,
825+
last,
826+
stencil,
827+
result,
828+
pred);));
829+
return result;
853830
} // func copy_if
854831

855832
} // namespace cuda_cub

0 commit comments

Comments
 (0)