|
16 | 16 |
|
17 | 17 | //================================================================================
|
18 | 18 | // this file has been auto-generated, do not modify its contents!
|
19 |
| -// date: 2024-07-24 15:35:29.178410 |
20 |
| -// git hash: 986ca557aa59f869d68fe1e7184c2228517ea52d |
| 19 | +// date: 2024-09-23 14:12:25.024358 |
| 20 | +// git hash: 3a88b56a57cce5e1f3365aa6e8efb76a14f7f865 |
21 | 21 | //================================================================================
|
22 | 22 |
|
23 | 23 | #ifndef KERNEL_FLOAT_MACROS_H
|
|
85 | 85 |
|
86 | 86 | #define KERNEL_FLOAT_MAX_ALIGNMENT (32)
|
87 | 87 |
|
88 |
| -#ifndef KERNEL_FLOAT_FAST_MATH |
| 88 | +#if KERNEL_FLOAT_FAST_MATH |
89 | 89 | #define KERNEL_FLOAT_POLICY ::kernel_float::fast_policy;
|
90 | 90 | #endif
|
91 | 91 |
|
@@ -424,7 +424,6 @@ struct alignas(Alignment) aligned_array<T, 1, Alignment> {
|
424 | 424 | };
|
425 | 425 |
|
426 | 426 | template<typename T, size_t Alignment>
|
427 |
| - |
428 | 427 | struct aligned_array<T, 0, Alignment> {
|
429 | 428 | KERNEL_FLOAT_INLINE
|
430 | 429 | T* data() {
|
@@ -807,19 +806,23 @@ namespace detail {
|
807 | 806 | template<typename Policy, typename F, size_t N, typename Output, typename... Args>
|
808 | 807 | struct map_policy_impl {
|
809 | 808 | static constexpr size_t packet_size = preferred_vector_size<Output>::value;
|
| 809 | + static constexpr size_t remainder = N % packet_size; |
810 | 810 |
|
811 | 811 | KERNEL_FLOAT_INLINE static void call(F fun, Output* output, const Args*... args) {
|
812 | 812 | if constexpr (N / packet_size > 0) {
|
813 | 813 | #pragma unroll
|
814 |
| - for (size_t i = 0; i < N - N % packet_size; i += packet_size) { |
815 |
| - Policy::template type<F, N, Output, Args...>::call(fun, output + i, (args + i)...); |
| 814 | + for (size_t i = 0; i < N - remainder; i += packet_size) { |
| 815 | + Policy::template type<F, packet_size, Output, Args...>::call( |
| 816 | + fun, |
| 817 | + output + i, |
| 818 | + (args + i)...); |
816 | 819 | }
|
817 | 820 | }
|
818 | 821 |
|
819 |
| - if constexpr (N % packet_size > 0) { |
| 822 | + if constexpr (remainder > 0) { |
820 | 823 | #pragma unroll
|
821 |
| - for (size_t i = N - N % packet_size; i < N; i++) { |
822 |
| - Policy::template type<F, N, Output, Args...>::call(fun, output + i, (args + i)...); |
| 824 | + for (size_t i = N - remainder; i < N; i++) { |
| 825 | + Policy::template type<F, 1, Output, Args...>::call(fun, output + i, (args + i)...); |
823 | 826 | }
|
824 | 827 | }
|
825 | 828 | }
|
|
0 commit comments