uxlfoundation
diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h
@@ -292,15 +292,15 @@ __histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_
                                               const sycl::event& __init_event, ::std::uint16_t __work_group_size,
                                               _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager)
 {
-    using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name;
+    using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;
 
     using _iters_per_work_item_t = ::std::integral_constant<::std::uint16_t, __iters_per_work_item>;
 
     // Required to include _iters_per_work_item_t in kernel name because we compile multiple kernels and decide between
     // them at runtime.  Other compile time arguments aren't required as it is the user's responsibility to provide a
     // unique kernel name to the policy for each call when using no-unamed-lambdas
     using _RegistersLocalReducName = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
-        __histo_kernel_register_local_red<_iters_per_work_item_t, _kernel_base_name>>;
+        __histo_kernel_register_local_red<_iters_per_work_item_t, _CustomName>>;
 
     return __histogram_general_registers_local_reduction_submitter<__iters_per_work_item, __bins_per_work_item,
                                                                    _RegistersLocalReducName>()(
@@ -387,15 +387,15 @@ __histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag,
                                   const sycl::event& __init_event, ::std::uint16_t __work_group_size, _Range1&& __input,
                                   _Range2&& __bins, const _BinHashMgr& __binhash_manager)
 {
-    using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name;
+    using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;
 
     using _iters_per_work_item_t = ::std::integral_constant<::std::uint16_t, __iters_per_work_item>;
 
     // Required to include _iters_per_work_item_t in kernel name because we compile multiple kernels and decide between
     // them at runtime.  Other compile time arguments aren't required as it is the user's responsibility to provide a
     // unique kernel name to the policy for each call when using no-unamed-lambdas
     using _local_atomics_name = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
-        __histo_kernel_local_atomics<_iters_per_work_item_t, _kernel_base_name>>;
+        __histo_kernel_local_atomics<_iters_per_work_item_t, _CustomName>>;
 
     return __histogram_general_local_atomics_submitter<__iters_per_work_item, _local_atomics_name>()(
         ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input),
@@ -486,10 +486,10 @@ __histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_bac
                                            ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins,
                                            const _BinHashMgr& __binhash_manager)
 {
-    using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name;
+    using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>;
 
     using _global_atomics_name = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<
-        __histo_kernel_private_glocal_atomics<_kernel_base_name>>;
+        __histo_kernel_private_glocal_atomics<_CustomName>>;
 
     return __histogram_general_private_global_atomics_submitter<_global_atomics_name>()(
         oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event,

diff --git a/test/parallel_api/ranges/std_ranges_test.h b/test/parallel_api/ranges/std_ranges_test.h
@@ -34,11 +34,27 @@ static_assert(ONEDPL_HAS_RANGE_ALGORITHMS >= 202409L);
 #include <ranges>
 #include <algorithm>
 #include <memory>
+#include <array>
 
 namespace test_std_ranges
 {
 
-inline constexpr std::size_t big_sz = (1<<25) + 10; //32M
+// The largest specializations of algorithms with device policies handle 16M+ elements.
+inline constexpr int big_size = (1<<24) + 10; //16M
+
+// ~100K is sufficient for parallel policies.
+// It also usually results in using multiple-work-group specializations for device policies.
+inline constexpr int medium_size = (1<<17) + 10; //128K
+
+// It is a sufficient size for sequential policies.
+// It also usually results in using single-work-group specializations for device policies.
+inline constexpr int small_size = 2025;
+
+#if TEST_DPCPP_BACKEND_PRESENT
+inline constexpr std::array<int, 3> big_sz = {/*serial*/ small_size, /*par*/ medium_size, /*device*/ big_size};
+#else
+inline constexpr std::array<int, 2> big_sz = {/*serial*/ small_size, /*par*/ medium_size};
+#endif
 
 #if TEST_DPCPP_BACKEND_PRESENT
 template<int call_id = 0>
@@ -50,8 +66,6 @@ auto dpcpp_policy()
 }
 #endif //TEST_DPCPP_BACKEND_PRESENT
 
-auto host_policies() { return std::true_type{};}
-
 enum TestDataMode
 {
     data_in,
@@ -147,20 +161,18 @@ bool is_range<T, std::void_t<decltype(std::declval<T&>().begin())>> = true;
 template<typename DataType, typename Container, TestDataMode test_mode = data_in>
 struct test
 {
-    const int max_n = 10;
-    template<typename Policy>
-    std::enable_if_t<std::is_same_v<Policy, std::true_type>>
-    operator()(Policy, auto algo, auto& checker, auto... args)
+    void
+    host_policies(int n_serial, int n_parallel, auto algo, auto& checker, auto... args)
     {
-        operator()(oneapi::dpl::execution::seq, algo, checker, args...);
-        operator()(oneapi::dpl::execution::unseq, algo, checker, args...);
-        operator()(oneapi::dpl::execution::par, algo, checker,  args...);
-        operator()(oneapi::dpl::execution::par_unseq, algo, checker, args...);
+        operator()(n_serial, oneapi::dpl::execution::seq, algo, checker, args...);
+        operator()(n_serial, oneapi::dpl::execution::unseq, algo, checker, args...);
+        operator()(n_parallel, oneapi::dpl::execution::par, algo, checker,  args...);
+        operator()(n_parallel, oneapi::dpl::execution::par_unseq, algo, checker, args...);
     }
 
     template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
-    std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in>
-    operator()(Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
+    std::enable_if_t<mode == data_in>
+    operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
     {
         Container cont_in(exec, max_n, [](auto i) { return i;});
         Container cont_exp(exec, max_n, [](auto i) { return i;});
@@ -176,7 +188,7 @@ struct test
         static_assert(std::is_same_v<decltype(res), decltype(checker(r_in, args...))>, "Wrong return type");
 
         auto bres = ret_in_val(expected_res, expected_view.begin()) == ret_in_val(res, r_in.begin());
-        EXPECT_TRUE(bres, (std::string("wrong return value from algo with ranges: ") + typeid(Algo).name() + 
+        EXPECT_TRUE(bres, (std::string("wrong return value from algo with ranges: ") + typeid(Algo).name() +
                 typeid(decltype(tr_in(std::declval<Container&>()()))).name()).c_str());
 
         //check result
@@ -188,7 +200,7 @@ struct test
 private:
     template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
     void
-    process_data_in_out(int n_in, int n_out, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in,
+    process_data_in_out(int max_n, int n_in, int n_out, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in,
                         TransOut tr_out, auto... args)
     {
         static_assert(mode == data_in_out || mode == data_in_out_lim);
@@ -225,28 +237,28 @@ struct test
 
 public:
     template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
-    std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_out>
-    operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
+    std::enable_if_t<mode == data_in_out>
+    operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
     {
         const int r_size = max_n;
-        process_data_in_out(r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
+        process_data_in_out(max_n, r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
     }
 
     template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
-    std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_out_lim>
-    operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
+    std::enable_if_t<mode == data_in_out_lim>
+    operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
     {
         const int r_size = max_n;
-        process_data_in_out(r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
+        process_data_in_out(max_n, r_size, r_size, std::forward<Policy>(exec), algo, checker, args...);
 
         //test case size of input range is less than size of output and vice-versa
-        process_data_in_out(r_size/2, r_size, exec, algo, checker, args...);
-        process_data_in_out(r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
+        process_data_in_out(max_n, r_size/2, r_size, exec, algo, checker, args...);
+        process_data_in_out(max_n, r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
     }
 
     template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
-    std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_in>
-    operator()(Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
+    std::enable_if_t<mode == data_in_in>
+    operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut, auto... args)
     {
         Container cont_in1(exec, max_n, [](auto i) { return i;});
         Container cont_in2(exec, max_n, [](auto i) { return i % 5 ? i : 0;});
@@ -270,7 +282,8 @@ struct test
 private:
     template<typename Policy, typename Algo, typename Checker, typename TransIn, typename TransOut, TestDataMode mode = test_mode>
     void
-    process_data_in_in_out(int n_in1, int n_in2, int n_out, Policy&& exec, Algo algo, Checker& checker, TransIn tr_in, TransOut tr_out, auto... args)
+    process_data_in_in_out(int max_n, int n_in1, int n_in2, int n_out, Policy&& exec, Algo algo, Checker& checker,
+                           TransIn tr_in, TransOut tr_out, auto... args)
     {
         static_assert(mode == data_in_in_out || mode == data_in_in_out_lim);
 
@@ -307,23 +320,23 @@ struct test
 
 public:
     template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
-    std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_in_out>
-    operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
+    std::enable_if_t<mode == data_in_in_out>
+    operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
     {
         const int r_size = max_n;
-        process_data_in_in_out(r_size, r_size, r_size*2, std::forward<Policy>(exec), algo, checker, args...);
+        process_data_in_in_out(max_n, r_size, r_size, r_size*2, std::forward<Policy>(exec), algo, checker, args...);
     }
 
     template<typename Policy, typename Algo, typename Checker, TestDataMode mode = test_mode>
-    std::enable_if_t<!std::is_same_v<Policy, std::true_type> && mode == data_in_in_out_lim>
-    operator()(Policy&& exec, Algo algo, Checker& checker, auto... args)
+    std::enable_if_t<mode == data_in_in_out_lim>
+    operator()(int max_n, Policy&& exec, Algo algo, Checker& checker, auto... args)
     {
         const int r_size = max_n;
-        process_data_in_in_out(r_size, r_size, r_size, exec, algo, checker, args...);
-        process_data_in_in_out(r_size, r_size, r_size*2, exec, algo, checker, args...);
-        process_data_in_in_out(r_size/2, r_size, r_size, exec, algo, checker, args...);
-        process_data_in_in_out(r_size, r_size/2, r_size, exec, algo, checker, args...);
-        process_data_in_in_out(r_size, r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
+        process_data_in_in_out(max_n, r_size, r_size, r_size, exec, algo, checker, args...);
+        process_data_in_in_out(max_n, r_size, r_size, r_size*2, exec, algo, checker, args...);
+        process_data_in_in_out(max_n, r_size/2, r_size, r_size, exec, algo, checker, args...);
+        process_data_in_in_out(max_n, r_size, r_size/2, r_size, exec, algo, checker, args...);
+        process_data_in_in_out(max_n, r_size, r_size, r_size/2, std::forward<Policy>(exec), algo, checker, args...);
     }
 private:
 
@@ -361,7 +374,7 @@ struct test
 template<typename T, typename ViewType>
 struct host_subrange_impl
 {
-    static_assert(std::is_trivially_copyable_v<T>, 
+    static_assert(std::is_trivially_copyable_v<T>,
         "Memory initialization within the class relies on trivially copyability of the type T");
 
     using type = ViewType;
@@ -409,9 +422,9 @@ struct host_vector
 
     template<typename Policy>
     host_vector(Policy&&, T* data, int n): vec(data, data + n), p(data) {}
-    
+
     template<typename Policy, typename DataGen>
-    host_vector(Policy&&, int n, DataGen gen): vec(n) 
+    host_vector(Policy&&, int n, DataGen gen): vec(n)
     {
         for(int i = 0; i < n; ++i)
             vec[i] = gen(i);
@@ -514,12 +527,35 @@ using  usm_span = usm_subrange_impl<T, std::span<T>>;
 template<int call_id = 0, typename T = int, TestDataMode mode = data_in>
 struct test_range_algo
 {
-    const int max_n = 10;
+    const int n_serial = small_size;
+    const int n_parallel = small_size;
+#if TEST_DPCPP_BACKEND_PRESENT
+    const int n_device = small_size;
+#endif
+
+    test_range_algo() = default;
+
+    // Mode with a uniform number of elements for each policy type
+#if TEST_DPCPP_BACKEND_PRESENT
+    test_range_algo(int n) : n_serial(n), n_parallel(n), n_device(n) {}
+#else
+    test_range_algo(int n) : n_serial(n), n_parallel(n) {}
+#endif
+
+    // Mode that tests different policy types with different sizes.
+    // Serial (seq/unseq), parallel (par/par_unseq), and device policies
+    // specialize algorithms for different number of elements, which this mode covers.
+#if TEST_DPCPP_BACKEND_PRESENT
+    test_range_algo(std::array<int, 3> sizes) : n_serial(sizes[0]), n_parallel(sizes[1]), n_device(sizes[2]) {}
+#else
+    test_range_algo(std::array<int, 2> sizes) : n_serial(sizes[0]), n_parallel(sizes[1]) {}
+#endif
+
     void test_view(auto view, auto algo, auto& checker, auto... args)
     {
-        test<T, host_subrange<T>, mode>{max_n}(host_policies(), algo, checker, view, std::identity{}, args...);
+        test<T, host_subrange<T>, mode>{}.host_policies(n_serial, n_parallel, algo, checker, view, std::identity{}, args...);
 #if TEST_DPCPP_BACKEND_PRESENT
-        test<T, usm_subrange<T>, mode>{max_n}(dpcpp_policy<call_id>(), algo, checker, view, std::identity{}, args...);
+        test<T, usm_subrange<T>, mode>{}(n_device, dpcpp_policy<call_id>(), algo, checker, view, std::identity{}, args...);
 #endif //TEST_DPCPP_BACKEND_PRESENT
     }
 
@@ -531,13 +567,19 @@ struct test_range_algo
         auto span_view = [](auto&& v) { return std::span(v); };
 #endif
 
-        test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, std::identity{}, std::identity{}, args...);
-        test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, subrange_view, std::identity{}, args...);
-        test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker, std::views::all, std::identity{}, args...);
-        test<T, host_subrange<T>, mode>{max_n}(host_policies(), algo, checker, std::views::all, std::identity{}, args...);
+        test<T, host_vector<T>, mode>{}.host_policies(
+            n_serial, n_parallel, algo, checker, std::identity{}, std::identity{}, args...);
+        test<T, host_vector<T>, mode>{}.host_policies(
+            n_serial, n_parallel, algo, checker, subrange_view, std::identity{}, args...);
+        test<T, host_vector<T>, mode>{}.host_policies(
+            n_serial, n_parallel, algo, checker, std::views::all, std::identity{}, args...);
+        test<T, host_subrange<T>, mode>{}.host_policies(
+            n_serial, n_parallel, algo, checker, std::views::all, std::identity{}, args...);
 #if TEST_CPP20_SPAN_PRESENT
-        test<T, host_vector<T>, mode>{max_n}(host_policies(), algo, checker,  span_view, std::identity{}, args...);
-        test<T, host_span<T>, mode>{max_n}(host_policies(), algo, checker, std::views::all, std::identity{}, args...);
+        test<T, host_vector<T>, mode>{}.host_policies(
+            n_serial, n_parallel, algo, checker,  span_view, std::identity{}, args...);
+        test<T, host_span<T>, mode>{}.host_policies(
+            n_serial, n_parallel, algo, checker, std::views::all, std::identity{}, args...);
 #endif
 
 #if TEST_DPCPP_BACKEND_PRESENT
@@ -548,11 +590,15 @@ struct test_range_algo
             if constexpr(!std::disjunction_v<std::is_member_pointer<decltype(args)>...>)
 #endif
             {
-                test<T, usm_vector<T>, mode>{max_n}(dpcpp_policy<call_id + 10>(), algo, checker, subrange_view, subrange_view, args...);
-                test<T, usm_subrange<T>, mode>{max_n}(dpcpp_policy<call_id + 30>(), algo, checker, std::identity{}, std::identity{}, args...);
+                test<T, usm_vector<T>, mode>{}(
+                    n_device, dpcpp_policy<call_id + 10>(), algo, checker, subrange_view, subrange_view, args...);
+                test<T, usm_subrange<T>, mode>{}(
+                    n_device, dpcpp_policy<call_id + 30>(), algo, checker, std::identity{}, std::identity{}, args...);
 #if TEST_CPP20_SPAN_PRESENT
-                test<T, usm_vector<T>, mode>{max_n}(dpcpp_policy<call_id + 20>(), algo, checker, span_view, subrange_view, args...);
-                test<T, usm_span<T>, mode>{max_n}(dpcpp_policy<call_id + 40>(), algo, checker, std::identity{}, std::identity{}, args...);
+                test<T, usm_vector<T>, mode>{}(
+                    n_device, dpcpp_policy<call_id + 20>(), algo, checker, span_view, subrange_view, args...);
+                test<T, usm_span<T>, mode>{}(
+                    n_device, dpcpp_policy<call_id + 40>(), algo, checker, std::identity{}, std::identity{}, args...);
 #endif
             }
         }

diff --git a/test/parallel_api/ranges/std_ranges_test_views.pass.cpp b/test/parallel_api/ranges/std_ranges_test_views.pass.cpp
@@ -22,10 +22,10 @@ main()
     using namespace test_std_ranges;
     namespace dpl_ranges = oneapi::dpl::ranges;
 
-    const int n = 1<<25; //32M
+    const int n = medium_size;
 
     //transform view
-    test_range_algo<0>{n}.test_view(std::views::transform([](const auto a) { return a*2; }), 
+    test_range_algo<0>{n}.test_view(std::views::transform([](const auto a) { return a*2; }),
         dpl_ranges::find_if, std::ranges::find_if, pred, proj);
 
     //reverse view

diff --git a/test/parallel_api/ranges/std_ranges_transform_iota.pass.cpp b/test/parallel_api/ranges/std_ranges_transform_iota.pass.cpp
@@ -23,7 +23,7 @@ main()
     namespace dpl_ranges = oneapi::dpl::ranges;
     const char* err_msg = "Wrong effect algo transform with unsized ranges.";
 
-    const int n = 1<<25; //32M
+    const int n = medium_size;
     std::ranges::iota_view view1(0, n); //size range
     std::ranges::iota_view view2(0, std::unreachable_sentinel_t{}); //unsized
 

diff --git a/test/parallel_api/ranges/std_ranges_transform_iota_sycl.pass.cpp b/test/parallel_api/ranges/std_ranges_transform_iota_sycl.pass.cpp
@@ -23,7 +23,7 @@ main()
     namespace dpl_ranges = oneapi::dpl::ranges;
     const char* err_msg = "Wrong effect algo transform with unsized ranges.";
 
-    const int n = 1<<25; //32M
+    const int n = big_size;
     std::ranges::iota_view view1(0, n); //size range
     std::ranges::iota_view view2(0, std::unreachable_sentinel_t{}); //unsized