Skip to content

Commit e203809

Browse files
committed
more thrust exception handling (#357)
1 parent f8039ec commit e203809

8 files changed

+110
-92
lines changed

src/broadcast_kernel.cu

+5-5
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,11 @@ void BroadcastBackwardKernelGPU(
241241
// cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
242242

243243
// Sort COO first
244-
thrust::sort_by_key(thrust::device, //
245-
d_out_map, // key begin
246-
d_out_map + nnz, // key end
247-
d_in_map // value begin
248-
);
244+
THRUST_CHECK(thrust::sort_by_key(thrust::device, //
245+
d_out_map, // key begin
246+
d_out_map + nnz, // key end
247+
d_in_map // value begin
248+
));
249249

250250
cusparseSpMMAlg_t mm_alg;
251251
#if defined(CUDART_VERSION) && (CUDART_VERSION < 10010)

src/coordinate_map_gpu.cu

+4-4
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,7 @@ CoordinateFieldMapGPU<coordinate_field_type, coordinate_int_type,
973973
m_coordinate_size);
974974

975975
CUDA_CHECK(cudaStreamSynchronize(0));
976-
kernel_map.decompose();
976+
THRUST_CHECK(kernel_map.decompose());
977977
LOG_DEBUG("origin map decomposed");
978978

979979
return kernel_map;
@@ -1660,7 +1660,7 @@ CoordinateMapGPU<coordinate_type, TemplatedAllocator>::kernel_map(
16601660
CUDA_CHECK(cudaStreamSynchronize(0));
16611661
LOG_DEBUG("Preallocated kernel map done");
16621662

1663-
kernel_map.decompose();
1663+
THRUST_CHECK(kernel_map.decompose());
16641664
base_type::m_byte_allocator.deallocate(
16651665
reinterpret_cast<char *>(d_p_count_per_thread),
16661666
num_threads * sizeof(index_type));
@@ -1730,7 +1730,7 @@ CoordinateMapGPU<coordinate_type, TemplatedAllocator>::kernel_map(
17301730
CUDA_CHECK(cudaMemcpy(kernel_map.out_maps.data(), d_p_valid_out_index,
17311731
valid_size * sizeof(index_type),
17321732
cudaMemcpyDeviceToDevice));
1733-
kernel_map.decompose();
1733+
THRUST_CHECK(kernel_map.decompose());
17341734

17351735
base_type::m_byte_allocator.deallocate(
17361736
reinterpret_cast<char *>(d_p_valid_in_index),
@@ -1961,7 +1961,7 @@ CoordinateMapGPU<coordinate_type, TemplatedAllocator>::origin_map(
19611961
m_coordinate_size);
19621962

19631963
CUDA_CHECK(cudaStreamSynchronize(0));
1964-
kernel_map.decompose();
1964+
THRUST_CHECK(kernel_map.decompose());
19651965
LOG_DEBUG("origin map decomposed");
19661966

19671967
return kernel_map;

src/gpu.cuh

+7
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,13 @@ namespace minkowski {
155155
<< __FILE__ << ":" << __LINE__); \
156156
}
157157

158+
#define THRUST_CATCH \
159+
catch (thrust::system_error e) { \
160+
throw std::runtime_error(Formatter() \
161+
<< "Thrust error: " << e.what() << " at " \
162+
<< __FILE__ << ":" << __LINE__); \
163+
}
164+
158165
// CUDA: library error reporting.
159166
const char *cublasGetErrorString(cublasStatus_t error);
160167

src/kernel_map.cuh

+31-24
Original file line numberDiff line numberDiff line change
@@ -314,15 +314,18 @@ public:
314314
LOG_DEBUG("Decomposing", kernels.end() - kernels.begin(), "elements");
315315
// the memory space must be initialized first!
316316
// sort
317-
thrust::sort_by_key(thrust::device, //
318-
kernels.begin(), // key begin
319-
kernels.end(), // key end
320-
thrust::make_zip_iterator( // value begin
321-
thrust::make_tuple( //
322-
in_maps.begin(), //
323-
out_maps.begin() //
324-
) //
325-
));
317+
try {
318+
thrust::sort_by_key(thrust::device, //
319+
kernels.begin(), // key begin
320+
kernels.end(), // key end
321+
thrust::make_zip_iterator( // value begin
322+
thrust::make_tuple( //
323+
in_maps.begin(), //
324+
out_maps.begin() //
325+
) //
326+
));
327+
}
328+
THRUST_CATCH;
326329

327330
#ifdef DEBUG
328331
size_type map_size =
@@ -357,21 +360,25 @@ public:
357360
gpu_storage<index_type, byte_allocator_type> out_key_min(m_capacity);
358361
gpu_storage<index_type, byte_allocator_type> out_key_size(m_capacity);
359362

360-
auto end = thrust::reduce_by_key(
361-
thrust::device, // policy
362-
kernels.begin(), // key begin
363-
kernels.end(), // key end
364-
thrust::make_zip_iterator(
365-
thrust::make_tuple(min_begin, size_begin)), // value begin
366-
out_key.begin(), // key out begin
367-
thrust::make_zip_iterator(thrust::make_tuple(
368-
out_key_min.begin(), out_key_size.begin())), // value out begin
369-
thrust::equal_to<index_type>(), // key equal binary predicate
370-
detail::min_size_functor<index_type>() // value binary operator
371-
);
372-
373-
size_type num_unique_keys = end.first - out_key.begin();
374-
LOG_DEBUG(num_unique_keys, "unique kernel map keys found");
363+
size_type num_unique_keys;
364+
365+
try {
366+
auto end = thrust::reduce_by_key(
367+
thrust::device, // policy
368+
kernels.begin(), // key begin
369+
kernels.end(), // key end
370+
thrust::make_zip_iterator(
371+
thrust::make_tuple(min_begin, size_begin)), // value begin
372+
out_key.begin(), // key out begin
373+
thrust::make_zip_iterator(thrust::make_tuple(
374+
out_key_min.begin(), out_key_size.begin())), // value out begin
375+
thrust::equal_to<index_type>(), // key equal binary predicate
376+
detail::min_size_functor<index_type>() // value binary operator
377+
);
378+
num_unique_keys = end.first - out_key.begin();
379+
LOG_DEBUG(num_unique_keys, "unique kernel map keys found");
380+
}
381+
THRUST_CATCH;
375382

376383
auto const cpu_out_keys = out_key.to_vector(num_unique_keys);
377384
auto const cpu_out_offset = out_key_min.to_vector(num_unique_keys);

src/pooling_avg_kernel.cu

+16-14
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,10 @@ void NonzeroAvgPoolingForwardKernelGPU(
214214
CUDA_CHECK(cudaMemcpy(sorted_col_ptr, kernel_map.in_maps.begin(),
215215
sparse_nnzs * sizeof(Itype), cudaMemcpyDeviceToDevice));
216216

217-
thrust::sort_by_key(thrust::device, //
218-
sorted_row_ptr, // key begin
219-
sorted_row_ptr + sparse_nnzs, // key end
220-
sorted_col_ptr);
217+
THRUST_CHECK(thrust::sort_by_key(thrust::device, //
218+
sorted_row_ptr, // key begin
219+
sorted_row_ptr + sparse_nnzs, // key end
220+
sorted_col_ptr));
221221

222222
// +---------+ +---+
223223
// | spm | | i |
@@ -280,16 +280,18 @@ void NonzeroAvgPoolingForwardKernelGPU(
280280
(Dtype *)allocator.allocate(sparse_nnzs * sizeof(Dtype));
281281

282282
// reduce by key
283-
auto end = thrust::reduce_by_key(thrust::device, // policy
284-
sorted_row_ptr, // key begin
285-
sorted_row_ptr + sparse_nnzs, // key end
286-
d_ones, // value begin
287-
unique_row_ptr, // key out begin
288-
reduced_val_ptr // value out begin
289-
);
290-
291-
int num_unique_keys = end.first - unique_row_ptr;
292-
LOG_DEBUG("Num unique keys:", num_unique_keys);
283+
int num_unique_keys;
284+
try {
285+
auto end = thrust::reduce_by_key(thrust::device, // policy
286+
sorted_row_ptr, // key begin
287+
sorted_row_ptr + sparse_nnzs, // key end
288+
d_ones, // value begin
289+
unique_row_ptr, // key out begin
290+
reduced_val_ptr // value out begin
291+
);
292+
num_unique_keys = end.first - unique_row_ptr;
293+
LOG_DEBUG("Num unique keys:", num_unique_keys);
294+
} THRUST_CATCH;
293295

294296
#ifdef DEBUG
295297
Itype *p_unique_row = (Itype *)std::malloc(num_unique_keys * sizeof(Itype));

src/pooling_max_kernel.cu

+18-15
Original file line numberDiff line numberDiff line change
@@ -147,31 +147,34 @@ void max_pool_forward_pointer_kernel_gpu(
147147
MapItype *d_reduced_out_map = d_scr + 2 * nmap + 2; // reduced output maps
148148

149149
// create number of in_feat per out, and starting index
150-
thrust::sequence(thrust::device, d_index, d_index + nmap);
150+
THRUST_CHECK(thrust::sequence(thrust::device, d_index, d_index + nmap));
151151

152152
////////////////////////////////
153153
// Reduction
154154
////////////////////////////////
155155
// sort d_out_map and d_in_map with the d_out_map so that in_feat are
156156
// placed adjacent according to out_map
157157
if (!is_sorted)
158-
thrust::sort_by_key(thrust::device, d_out_map, d_out_map + nmap, d_in_map);
158+
THRUST_CHECK(thrust::sort_by_key(thrust::device, d_out_map,
159+
d_out_map + nmap, d_in_map));
159160

160161
thrust::equal_to<MapItype> equal_pred;
161162
thrust::minimum<MapItype> min_op;
162-
163-
auto reduction_pair =
164-
thrust::reduce_by_key(thrust::device, // execution policy
165-
d_out_map, // key begin
166-
d_out_map + nmap, // key end
167-
d_index, // val begin
168-
d_reduced_out_map, // key out begin
169-
d_in_map_min, // val out begin
170-
equal_pred, // binary pred
171-
min_op); // binary op
172-
CUDA_CHECK(cudaStreamSynchronize(0));
173-
174-
size_t num_unique_out_map = reduction_pair.first - d_reduced_out_map;
163+
size_t num_unique_out_map;
164+
165+
try {
166+
auto reduction_pair =
167+
thrust::reduce_by_key(thrust::device, // execution policy
168+
d_out_map, // key begin
169+
d_out_map + nmap, // key end
170+
d_index, // val begin
171+
d_reduced_out_map, // key out begin
172+
d_in_map_min, // val out begin
173+
equal_pred, // binary pred
174+
min_op); // binary op
175+
CUDA_CHECK(cudaStreamSynchronize(0));
176+
num_unique_out_map = reduction_pair.first - d_reduced_out_map;
177+
} THRUST_CATCH;
175178

176179
#ifdef DEBUG
177180
std::cout << "num_unique_out_map: " << num_unique_out_map << "\n";

src/spmm.cu

+27-28
Original file line numberDiff line numberDiff line change
@@ -235,15 +235,15 @@ torch::Tensor coo_spmm(torch::Tensor const &rows, torch::Tensor const &cols,
235235
CUDA_CHECK(cudaMemcpy(sorted_val_ptr, values_ptr, nnz * sizeof(scalar_t),
236236
cudaMemcpyDeviceToDevice));
237237

238-
thrust::sort_by_key(thrust::device, //
239-
sorted_row_ptr, // key begin
240-
sorted_row_ptr + nnz, // key end
241-
thrust::make_zip_iterator( // value begin
242-
thrust::make_tuple( //
243-
sorted_col_ptr, //
244-
sorted_val_ptr //
245-
) //
246-
));
238+
THRUST_CHECK(thrust::sort_by_key(thrust::device, //
239+
sorted_row_ptr, // key begin
240+
sorted_row_ptr + nnz, // key end
241+
thrust::make_zip_iterator( // value begin
242+
thrust::make_tuple( //
243+
sorted_col_ptr, //
244+
sorted_val_ptr //
245+
) //
246+
)));
247247
LOG_DEBUG("sorted row", cudaDeviceSynchronize());
248248
} else {
249249
sorted_row_ptr = row_indices_ptr;
@@ -481,10 +481,10 @@ coo_spmm_average(torch::Tensor const &rows, torch::Tensor const &cols,
481481
CUDA_CHECK(cudaMemcpy(sorted_col_ptr, col_indices_ptr,
482482
nnz * sizeof(th_int_type), cudaMemcpyDeviceToDevice));
483483

484-
thrust::sort_by_key(thrust::device, //
485-
sorted_row_ptr, // key begin
486-
sorted_row_ptr + nnz, // key end
487-
sorted_col_ptr);
484+
THRUST_CHECK(thrust::sort_by_key(thrust::device, //
485+
sorted_row_ptr, // key begin
486+
sorted_row_ptr + nnz, // key end
487+
sorted_col_ptr));
488488

489489
/////////////////////////////////////////////////////////////////////////
490490
// Create vals
@@ -496,21 +496,20 @@ coo_spmm_average(torch::Tensor const &rows, torch::Tensor const &cols,
496496
(scalar_t *)c10::cuda::CUDACachingAllocator::raw_alloc(
497497
nnz * sizeof(scalar_t));
498498
torch::Tensor ones = at::ones({nnz}, mat2.options());
499-
500-
// reduce by key
501-
auto end = thrust::reduce_by_key(
502-
thrust::device, // policy
503-
sorted_row_ptr, // key begin
504-
sorted_row_ptr + nnz, // key end
505-
reinterpret_cast<scalar_t *>(ones.data_ptr()), // value begin
506-
unique_row_ptr, // key out begin
507-
reduced_val_ptr // value out begin
508-
);
509-
510-
int num_unique_keys = end.first - unique_row_ptr;
511-
LOG_DEBUG("Num unique keys:", num_unique_keys);
512-
513-
// Create values
499+
int num_unique_keys;
500+
try {
501+
// reduce by key
502+
auto end = thrust::reduce_by_key(
503+
thrust::device, // policy
504+
sorted_row_ptr, // key begin
505+
sorted_row_ptr + nnz, // key end
506+
reinterpret_cast<scalar_t *>(ones.data_ptr()), // value begin
507+
unique_row_ptr, // key out begin
508+
reduced_val_ptr // value out begin
509+
);
510+
num_unique_keys = end.first - unique_row_ptr;
511+
LOG_DEBUG("Num unique keys:", num_unique_keys);
512+
} THRUST_CATCH;
514513

515514
// Copy the results to the correct output
516515
inverse_val<th_int_type, scalar_t>

tests/cpp/coordinate_map_gpu_test.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ coordinate_map_batch_find_test(const torch::Tensor &coordinates,
178178
std::vector<index_type> cpu_firsts(NR);
179179
std::vector<index_type> cpu_seconds(NR);
180180

181-
thrust::copy(firsts.cbegin(), firsts.cend(), cpu_firsts.begin());
182-
thrust::copy(seconds.cbegin(), seconds.cend(), cpu_seconds.begin());
181+
THRUST_CHECK(thrust::copy(firsts.cbegin(), firsts.cend(), cpu_firsts.begin()));
182+
THRUST_CHECK(thrust::copy(seconds.cbegin(), seconds.cend(), cpu_seconds.begin()));
183183
return std::make_pair(cpu_firsts, cpu_seconds);
184184
}
185185

0 commit comments

Comments
 (0)