Skip to content

Commit d825b51

Browse files
Bugfix for sort algorithm when precompiling kernels is not possible (#2469)
--------- Signed-off-by: Dan Hoeflinger <[email protected]>
1 parent 604f434 commit d825b51

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,22 @@ struct __parallel_radix_sort_iteration
686686
__scan_wg_size =
687687
sycl::min(__scan_wg_size, oneapi::dpl::__internal::__kernel_work_group_size(__q, __local_scan_kernel));
688688
__count_wg_size = sycl::max(__count_sg_size, __reorder_sg_size);
689+
#else
690+
// When kernel compilation is disabled, use conservative fallback values
691+
// Get device sub-group sizes and pick a suitable one for radix sort
692+
const auto __subgroup_sizes = __q.get_device().template get_info<sycl::info::device::sub_group_sizes>();
693+
// The radix sort kernels are optimized for sub-group size 16 to avoid register spills
694+
// and efficiently handle 4-bit radix (16 buckets). Prefer 16, then 32, then 8.
695+
if (std::find(__subgroup_sizes.begin(), __subgroup_sizes.end(), 16) != __subgroup_sizes.end())
696+
__reorder_sg_size = 16;
697+
else if (std::find(__subgroup_sizes.begin(), __subgroup_sizes.end(), 32) != __subgroup_sizes.end())
698+
__reorder_sg_size = 32;
699+
else if (std::find(__subgroup_sizes.begin(), __subgroup_sizes.end(), 8) != __subgroup_sizes.end())
700+
__reorder_sg_size = 8;
701+
// else keep __reorder_sg_size = __max_sg_size
702+
703+
// For __count_wg_size, use the maximum of the current value and __reorder_sg_size
704+
__count_wg_size = sycl::max(__count_wg_size, __reorder_sg_size);
689705
#endif
690706
const ::std::uint32_t __radix_states = 1 << __radix_bits;
691707

0 commit comments

Comments
 (0)