| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include "kleidicv_thread/kleidicv_thread.h" | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <functional> | ||
| 11 | #include <limits> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "kleidicv/arithmetics/rotate.h" | ||
| 15 | #include "kleidicv/arithmetics/scale.h" | ||
| 16 | #include "kleidicv/conversions/rgb_to_yuv.h" | ||
| 17 | #include "kleidicv/conversions/yuv_to_rgb.h" | ||
| 18 | #include "kleidicv/ctypes.h" | ||
| 19 | #include "kleidicv/filters/blur_and_downsample.h" | ||
| 20 | #include "kleidicv/filters/gaussian_blur.h" | ||
| 21 | #include "kleidicv/filters/median_blur.h" | ||
| 22 | #include "kleidicv/filters/scharr.h" | ||
| 23 | #include "kleidicv/filters/separable_filter_2d.h" | ||
| 24 | #include "kleidicv/filters/sobel.h" | ||
| 25 | #include "kleidicv/kleidicv.h" | ||
| 26 | #include "kleidicv/resize/resize_linear.h" | ||
| 27 | #include "kleidicv/transform/remap.h" | ||
| 28 | #include "kleidicv/transform/warp_perspective.h" | ||
| 29 | |||
| 30 | typedef std::function<kleidicv_error_t(unsigned, unsigned)> FunctionCallback; | ||
| 31 | |||
| 32 | 30790 | static kleidicv_error_t kleidicv_thread_std_function_callback( | |
| 33 | unsigned task_begin, unsigned task_end, void *data) { | ||
| 34 | 30790 | auto *callback = reinterpret_cast<FunctionCallback *>(data); | |
| 35 | 61580 | return (*callback)(task_begin, task_end); | |
| 36 | 30790 | } | |
| 37 | |||
| 38 | // Operations in the Neon backend have both a vector path and a scalar path. | ||
| 39 | // The vector path is used to process most data and the scalar path is used to | ||
| 40 | // process the parts of the data that don't fit into the vector width. | ||
| 41 | // For floating point operations in particular, the results may be very slightly | ||
| 42 | // different between vector and scalar paths. | ||
| 43 | // | ||
| 44 | // When using multithreading, images are divided into parts to be processed by | ||
| 45 | // each thread, and this could change which parts of the data end up being | ||
| 46 | // processed by the vector and scalar paths. | ||
| 47 | // | ||
| 48 | // If an implementation is sensitive to these very slight differences, set | ||
| 49 | // min_batch_size to the Neon vector length (16 bytes). That makes every batch | ||
| 50 | // handed to a thread a multiple of the vector width; only the final batch may | ||
| 51 | // be longer to reach the end of the data. No batch can be shorter than vector | ||
| 52 | // length because that could change behaviour for operations that try to avoid | ||
| 53 | // the tail loop (see the TryToAvoidTailLoop class). | ||
| 54 | // This technique only works if the data is longer than vector length. | ||
| 55 | // | ||
| 56 | // On the other hand, measurements showed that increasing the batch size can | ||
| 57 | // cause degradation of the multithreaded performance. | ||
| 58 | template <typename Callback> | ||
| 59 | 14344 | inline kleidicv_error_t parallel_batches(Callback callback, | |
| 60 | kleidicv_thread_multithreading mt, | ||
| 61 | unsigned count, | ||
| 62 | unsigned min_batch_size = 1) { | ||
| 63 | 14344 | const unsigned task_count = std::max(1U, (count) / min_batch_size); | |
| 64 | 45134 | FunctionCallback f = [=](unsigned task_begin, unsigned task_end) { | |
| 65 | 30790 | unsigned begin = task_begin * min_batch_size, | |
| 66 | 30790 | end = task_end * min_batch_size; | |
| 67 |
126/126✓ Branch 0 taken 1160 times.
✓ Branch 1 taken 1000 times.
✓ Branch 2 taken 328 times.
✓ Branch 3 taken 120 times.
✓ Branch 4 taken 116 times.
✓ Branch 5 taken 100 times.
✓ Branch 6 taken 116 times.
✓ Branch 7 taken 100 times.
✓ Branch 8 taken 116 times.
✓ Branch 9 taken 100 times.
✓ Branch 10 taken 116 times.
✓ Branch 11 taken 100 times.
✓ Branch 12 taken 232 times.
✓ Branch 13 taken 200 times.
✓ Branch 14 taken 116 times.
✓ Branch 15 taken 100 times.
✓ Branch 16 taken 128 times.
✓ Branch 17 taken 112 times.
✓ Branch 18 taken 328 times.
✓ Branch 19 taken 120 times.
✓ Branch 20 taken 116 times.
✓ Branch 21 taken 100 times.
✓ Branch 22 taken 348 times.
✓ Branch 23 taken 300 times.
✓ Branch 24 taken 696 times.
✓ Branch 25 taken 600 times.
✓ Branch 26 taken 348 times.
✓ Branch 27 taken 300 times.
✓ Branch 28 taken 348 times.
✓ Branch 29 taken 300 times.
✓ Branch 30 taken 348 times.
✓ Branch 31 taken 300 times.
✓ Branch 32 taken 232 times.
✓ Branch 33 taken 200 times.
✓ Branch 34 taken 232 times.
✓ Branch 35 taken 200 times.
✓ Branch 36 taken 232 times.
✓ Branch 37 taken 200 times.
✓ Branch 38 taken 116 times.
✓ Branch 39 taken 100 times.
✓ Branch 40 taken 116 times.
✓ Branch 41 taken 100 times.
✓ Branch 42 taken 116 times.
✓ Branch 43 taken 100 times.
✓ Branch 44 taken 116 times.
✓ Branch 45 taken 100 times.
✓ Branch 46 taken 116 times.
✓ Branch 47 taken 100 times.
✓ Branch 48 taken 116 times.
✓ Branch 49 taken 100 times.
✓ Branch 50 taken 96 times.
✓ Branch 51 taken 160 times.
✓ Branch 52 taken 2325 times.
✓ Branch 53 taken 1895 times.
✓ Branch 54 taken 820 times.
✓ Branch 55 taken 865 times.
✓ Branch 56 taken 720 times.
✓ Branch 57 taken 800 times.
✓ Branch 58 taken 740 times.
✓ Branch 59 taken 820 times.
✓ Branch 60 taken 520 times.
✓ Branch 61 taken 720 times.
✓ Branch 62 taken 60 times.
✓ Branch 63 taken 60 times.
✓ Branch 64 taken 60 times.
✓ Branch 65 taken 60 times.
✓ Branch 66 taken 60 times.
✓ Branch 67 taken 60 times.
✓ Branch 68 taken 60 times.
✓ Branch 69 taken 60 times.
✓ Branch 70 taken 60 times.
✓ Branch 71 taken 60 times.
✓ Branch 72 taken 76 times.
✓ Branch 73 taken 68 times.
✓ Branch 74 taken 60 times.
✓ Branch 75 taken 60 times.
✓ Branch 76 taken 56 times.
✓ Branch 77 taken 28 times.
✓ Branch 78 taken 16 times.
✓ Branch 79 taken 4 times.
✓ Branch 80 taken 56 times.
✓ Branch 81 taken 28 times.
✓ Branch 82 taken 56 times.
✓ Branch 83 taken 28 times.
✓ Branch 84 taken 56 times.
✓ Branch 85 taken 28 times.
✓ Branch 86 taken 56 times.
✓ Branch 87 taken 28 times.
✓ Branch 88 taken 152 times.
✓ Branch 89 taken 112 times.
✓ Branch 90 taken 180 times.
✓ Branch 91 taken 100 times.
✓ Branch 92 taken 48 times.
✓ Branch 93 taken 12 times.
✓ Branch 94 taken 16 times.
✓ Branch 95 taken 4 times.
✓ Branch 96 taken 180 times.
✓ Branch 97 taken 100 times.
✓ Branch 98 taken 180 times.
✓ Branch 99 taken 100 times.
✓ Branch 100 taken 180 times.
✓ Branch 101 taken 100 times.
✓ Branch 102 taken 152 times.
✓ Branch 103 taken 112 times.
✓ Branch 104 taken 116 times.
✓ Branch 105 taken 100 times.
✓ Branch 106 taken 110 times.
✓ Branch 107 taken 120 times.
✓ Branch 108 taken 110 times.
✓ Branch 109 taken 120 times.
✓ Branch 110 taken 165 times.
✓ Branch 111 taken 180 times.
✓ Branch 112 taken 232 times.
✓ Branch 113 taken 200 times.
✓ Branch 114 taken 232 times.
✓ Branch 115 taken 200 times.
✓ Branch 116 taken 464 times.
✓ Branch 117 taken 400 times.
✓ Branch 118 taken 464 times.
✓ Branch 119 taken 400 times.
✓ Branch 120 taken 464 times.
✓ Branch 121 taken 400 times.
✓ Branch 122 taken 464 times.
✓ Branch 123 taken 400 times.
✓ Branch 124 taken 232 times.
✓ Branch 125 taken 200 times.
|
30790 | if (task_end == task_count) { |
| 68 | 14344 | end = count; | |
| 69 | 14344 | } | |
| 70 | 61580 | return callback(begin, end); | |
| 71 | 30790 | }; | |
| 72 | 28688 | return mt.parallel(kleidicv_thread_std_function_callback, &f, | |
| 73 | 14344 | mt.parallel_data, task_count); | |
| 74 | 14344 | } | |
| 75 | |||
| 76 | template <typename SrcT, typename DstT, typename F, typename... Args> | ||
| 77 | 4047 | inline kleidicv_error_t kleidicv_thread_unary_op_impl( | |
| 78 | F f, kleidicv_thread_multithreading mt, const SrcT *src, size_t src_stride, | ||
| 79 | DstT *dst, size_t dst_stride, size_t width, size_t height, Args... args) { | ||
| 80 | 13291 | auto callback = [=](unsigned begin, unsigned end) { | |
| 81 | 18488 | return f(src + static_cast<ptrdiff_t>(begin * src_stride / sizeof(SrcT)), | |
| 82 | 9244 | src_stride, | |
| 83 | 9244 | dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(DstT)), | |
| 84 | 9244 | dst_stride, width, end - begin, args...); | |
| 85 | }; | ||
| 86 | 8094 | return parallel_batches(callback, mt, height); | |
| 87 | 4047 | } | |
| 88 | |||
| 89 | template <typename SrcT, typename DstT, typename F, typename... Args> | ||
| 90 | 3000 | inline kleidicv_error_t kleidicv_thread_binary_op_impl( | |
| 91 | F f, kleidicv_thread_multithreading mt, const SrcT *src_a, | ||
| 92 | size_t src_a_stride, const SrcT *src_b, size_t src_b_stride, DstT *dst, | ||
| 93 | size_t dst_stride, size_t width, size_t height, Args... args) { | ||
| 94 | 9480 | auto callback = [=](unsigned begin, unsigned end) { | |
| 95 | 12960 | return f( | |
| 96 | 6480 | src_a + static_cast<ptrdiff_t>(begin * src_a_stride / sizeof(SrcT)), | |
| 97 | 6480 | src_a_stride, | |
| 98 | 6480 | src_b + static_cast<ptrdiff_t>(begin * src_b_stride / sizeof(SrcT)), | |
| 99 | 6480 | src_b_stride, | |
| 100 | 6480 | dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(DstT)), | |
| 101 | 6480 | dst_stride, width, end - begin, args...); | |
| 102 | }; | ||
| 103 | 6000 | return parallel_batches(callback, mt, height); | |
| 104 | 3000 | } | |
| 105 | |||
| 106 | #define KLEIDICV_THREAD_UNARY_OP_IMPL(suffix, src_type, dst_type) \ | ||
| 107 | kleidicv_error_t kleidicv_thread_##suffix( \ | ||
| 108 | const src_type *src, size_t src_stride, dst_type *dst, \ | ||
| 109 | size_t dst_stride, size_t width, size_t height, \ | ||
| 110 | kleidicv_thread_multithreading mt) { \ | ||
| 111 | return kleidicv_thread_unary_op_impl(kleidicv_##suffix, mt, src, \ | ||
| 112 | src_stride, dst, dst_stride, width, \ | ||
| 113 | height); \ | ||
| 114 | } | ||
| 115 | |||
| 116 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(gray_to_rgb_u8, uint8_t, uint8_t); | |
| 117 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(gray_to_rgba_u8, uint8_t, uint8_t); | |
| 118 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_bgr_u8, uint8_t, uint8_t); | |
| 119 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_rgb_u8, uint8_t, uint8_t); | |
| 120 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_bgra_u8, uint8_t, uint8_t); | |
| 121 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_rgba_u8, uint8_t, uint8_t); | |
| 122 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_bgra_u8, uint8_t, uint8_t); | |
| 123 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_rgba_u8, uint8_t, uint8_t); | |
| 124 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_bgr_u8, uint8_t, uint8_t); | |
| 125 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_rgb_u8, uint8_t, uint8_t); | |
| 126 | 120 | KLEIDICV_THREAD_UNARY_OP_IMPL(exp_f32, float, float); | |
| 127 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(f32_to_s8, float, int8_t); | |
| 128 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(f32_to_u8, float, uint8_t); | |
| 129 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(s8_to_f32, int8_t, float); | |
| 130 | 100 | KLEIDICV_THREAD_UNARY_OP_IMPL(u8_to_f32, uint8_t, float); | |
| 131 | |||
| 132 | #define KLEIDICV_THREAD_INRANGE_OP_IMPL(suffix, src_type, dst_type) \ | ||
| 133 | kleidicv_error_t kleidicv_thread_##suffix( \ | ||
| 134 | const src_type *src, size_t src_stride, dst_type *dst, \ | ||
| 135 | size_t dst_stride, size_t width, size_t height, src_type lower_bound, \ | ||
| 136 | src_type upper_bound, kleidicv_thread_multithreading mt) { \ | ||
| 137 | return kleidicv_thread_unary_op_impl(kleidicv_##suffix, mt, src, \ | ||
| 138 | src_stride, dst, dst_stride, width, \ | ||
| 139 | height, lower_bound, upper_bound); \ | ||
| 140 | } | ||
| 141 | |||
| 142 | 100 | KLEIDICV_THREAD_INRANGE_OP_IMPL(in_range_u8, uint8_t, uint8_t); | |
| 143 | 100 | KLEIDICV_THREAD_INRANGE_OP_IMPL(in_range_f32, float, uint8_t); | |
| 144 | |||
| 145 | 100 | kleidicv_error_t kleidicv_thread_threshold_binary_u8( | |
| 146 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
| 147 | size_t width, size_t height, uint8_t threshold, uint8_t value, | ||
| 148 | kleidicv_thread_multithreading mt) { | ||
| 149 | 200 | return kleidicv_thread_unary_op_impl(kleidicv_threshold_binary_u8, mt, src, | |
| 150 | 100 | src_stride, dst, dst_stride, width, | |
| 151 | 100 | height, threshold, value); | |
| 152 | } | ||
| 153 | |||
| 154 | 116 | kleidicv_error_t kleidicv_thread_scale_u8(const uint8_t *src, size_t src_stride, | |
| 155 | uint8_t *dst, size_t dst_stride, | ||
| 156 | size_t width, size_t height, | ||
| 157 | double scale, double shift, | ||
| 158 | kleidicv_thread_multithreading mt) { | ||
| 159 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 116 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 116 times.
|
116 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 160 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 116 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 116 times.
|
116 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 161 |
5/6✗ Branch 0 not taken.
✓ Branch 1 taken 116 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 112 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 112 times.
|
116 | CHECK_IMAGE_SIZE(width, height); |
| 162 | |||
| 163 | 112 | const std::array<uint8_t, 256> precalculated_table = | |
| 164 | 112 | kleidicv::neon::precalculate_scale_table_u8(scale, shift); | |
| 165 | 112 | return kleidicv_thread_unary_op_impl( | |
| 166 | 112 | kleidicv::neon::scale_with_precalculated_table_u8, mt, src, src_stride, | |
| 167 | 112 | dst, dst_stride, width, height, scale, shift, precalculated_table); | |
| 168 | 116 | } | |
| 169 | |||
| 170 | 120 | kleidicv_error_t kleidicv_thread_scale_f32(const float *src, size_t src_stride, | |
| 171 | float *dst, size_t dst_stride, | ||
| 172 | size_t width, size_t height, | ||
| 173 | double scale, double shift, | ||
| 174 | kleidicv_thread_multithreading mt) { | ||
| 175 | 240 | return kleidicv_thread_unary_op_impl(kleidicv_scale_f32, mt, src, src_stride, | |
| 176 | 120 | dst, dst_stride, width, height, scale, | |
| 177 | 120 | shift); | |
| 178 | } | ||
| 179 | |||
| 180 | 100 | kleidicv_error_t kleidicv_thread_scale_u8_f16( | |
| 181 | const uint8_t *src, size_t src_stride, float16_t *dst, size_t dst_stride, | ||
| 182 | size_t width, size_t height, double scale, double shift, | ||
| 183 | kleidicv_thread_multithreading mt) { | ||
| 184 | 200 | return kleidicv_thread_unary_op_impl(kleidicv_scale_u8_f16, mt, src, | |
| 185 | 100 | src_stride, dst, dst_stride, width, | |
| 186 | 100 | height, scale, shift); | |
| 187 | } | ||
| 188 | |||
| 189 | #define KLEIDICV_THREAD_BINARY_OP_IMPL(suffix, type) \ | ||
| 190 | kleidicv_error_t kleidicv_thread_##suffix( \ | ||
| 191 | const type *src_a, size_t src_a_stride, const type *src_b, \ | ||
| 192 | size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ | ||
| 193 | size_t height, kleidicv_thread_multithreading mt) { \ | ||
| 194 | return kleidicv_thread_binary_op_impl(kleidicv_##suffix, mt, src_a, \ | ||
| 195 | src_a_stride, src_b, src_b_stride, \ | ||
| 196 | dst, dst_stride, width, height); \ | ||
| 197 | } | ||
| 198 | |||
| 199 | #define KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(suffix, type, scaletype) \ | ||
| 200 | kleidicv_error_t kleidicv_thread_##suffix( \ | ||
| 201 | const type *src_a, size_t src_a_stride, const type *src_b, \ | ||
| 202 | size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ | ||
| 203 | size_t height, scaletype scale, kleidicv_thread_multithreading mt) { \ | ||
| 204 | return kleidicv_thread_binary_op_impl( \ | ||
| 205 | kleidicv_##suffix, mt, src_a, src_a_stride, src_b, src_b_stride, dst, \ | ||
| 206 | dst_stride, width, height, scale); \ | ||
| 207 | } | ||
| 208 | |||
| 209 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s8, int8_t); | |
| 210 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u8, uint8_t); | |
| 211 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s16, int16_t); | |
| 212 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u16, uint16_t); | |
| 213 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s32, int32_t); | |
| 214 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u32, uint32_t); | |
| 215 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s64, int64_t); | |
| 216 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u64, uint64_t); | |
| 217 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s8, int8_t); | |
| 218 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u8, uint8_t); | |
| 219 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s16, int16_t); | |
| 220 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u16, uint16_t); | |
| 221 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s32, int32_t); | |
| 222 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u32, uint32_t); | |
| 223 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s64, int64_t); | |
| 224 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u64, uint64_t); | |
| 225 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_u8, uint8_t); | |
| 226 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_s8, int8_t); | |
| 227 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_u16, uint16_t); | |
| 228 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_s16, int16_t); | |
| 229 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_s32, int32_t); | |
| 230 | 100 | KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_u8, uint8_t, double); | |
| 231 | 100 | KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_s8, int8_t, double); | |
| 232 | 100 | KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_u16, uint16_t, double); | |
| 233 | 100 | KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_s16, int16_t, double); | |
| 234 | 100 | KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_s32, int32_t, double); | |
| 235 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(bitwise_and, uint8_t); | |
| 236 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(compare_equal_u8, uint8_t); | |
| 237 | 100 | KLEIDICV_THREAD_BINARY_OP_IMPL(compare_greater_u8, uint8_t); | |
| 238 | |||
| 239 | 100 | kleidicv_error_t kleidicv_thread_saturating_add_abs_with_threshold_s16( | |
| 240 | const int16_t *src_a, size_t src_a_stride, const int16_t *src_b, | ||
| 241 | size_t src_b_stride, int16_t *dst, size_t dst_stride, size_t width, | ||
| 242 | size_t height, int16_t threshold, kleidicv_thread_multithreading mt) { | ||
| 243 | 100 | return kleidicv_thread_binary_op_impl( | |
| 244 | 100 | kleidicv_saturating_add_abs_with_threshold_s16, mt, src_a, src_a_stride, | |
| 245 | 100 | src_b, src_b_stride, dst, dst_stride, width, height, threshold); | |
| 246 | } | ||
| 247 | |||
| 248 | 172 | kleidicv_error_t kleidicv_thread_rotate(const void *src, size_t src_stride, | |
| 249 | size_t width, size_t height, void *dst, | ||
| 250 | size_t dst_stride, int angle, | ||
| 251 | size_t element_size, | ||
| 252 | kleidicv_thread_multithreading mt) { | ||
| 253 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 160 times.
|
172 | if (!kleidicv::rotate_is_implemented(src, dst, angle, element_size)) { |
| 254 | 12 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 255 | } | ||
| 256 | // reading in columns and writing out rows tends to perform better | ||
| 257 | 416 | auto callback = [=](unsigned begin, unsigned end) { | |
| 258 | 512 | return kleidicv_rotate( | |
| 259 | 256 | static_cast<const uint8_t *>(src) + begin * element_size, src_stride, | |
| 260 | 256 | end - begin, height, static_cast<uint8_t *>(dst) + begin * dst_stride, | |
| 261 | 256 | dst_stride, angle, element_size); | |
| 262 | }; | ||
| 263 | 160 | return parallel_batches(callback, mt, width, 64); | |
| 264 | 172 | } | |
| 265 | |||
| 266 | 2045 | kleidicv_error_t kleidicv_thread_yuv_to_rgb_u8( | |
| 267 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
| 268 | size_t width, size_t height, kleidicv_color_conversion_t color_format, | ||
| 269 | kleidicv_thread_multithreading mt) { | ||
| 270 | // Extract the base format | ||
| 271 | 4090 | const size_t base_format = static_cast<size_t>( | |
| 272 | 2045 | color_format & KLEIDICV_COLOR_CONVERSION_YUV_FMT_MASK); | |
| 273 |
2/2✓ Branch 0 taken 400 times.
✓ Branch 1 taken 1645 times.
|
2045 | if (base_format == KLEIDICV_COLOR_CONVERSION_FMT_YUV444) { |
| 274 | 800 | return kleidicv_thread_unary_op_impl(kleidicv_yuv444_to_rgb_u8, mt, src, | |
| 275 | 400 | src_stride, dst, dst_stride, width, | |
| 276 | 400 | height, color_format); | |
| 277 | } | ||
| 278 | |||
| 279 |
2/2✓ Branch 0 taken 780 times.
✓ Branch 1 taken 865 times.
|
1645 | if (base_format == KLEIDICV_COLOR_CONVERSION_FMT_YUV422) { |
| 280 | 1560 | return kleidicv_thread_unary_op_impl(kleidicv_yuv422_to_rgb_u8, mt, src, | |
| 281 | 780 | src_stride, dst, dst_stride, width, | |
| 282 | 780 | height, color_format); | |
| 283 | } | ||
| 284 | |||
| 285 | 2550 | auto callback = [=](unsigned begin, unsigned end) { | |
| 286 | 3370 | return kleidicv_yuv420p_to_rgb_stripe_u8( | |
| 287 | 1685 | src, src_stride, dst, dst_stride, width, height, color_format, | |
| 288 | 1685 | static_cast<size_t>(begin), static_cast<size_t>(end)); | |
| 289 | }; | ||
| 290 | 865 | return parallel_batches(callback, mt, (height + 1) / 2); | |
| 291 | 2045 | } | |
| 292 | |||
| 293 | 800 | kleidicv_error_t kleidicv_thread_rgb_to_yuv_semiplanar_u8( | |
| 294 | const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, | ||
| 295 | uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, | ||
| 296 | kleidicv_color_conversion_t color_format, | ||
| 297 | kleidicv_thread_multithreading mt) { | ||
| 298 | 2320 | auto callback = [=](unsigned begin, unsigned end) { | |
| 299 | 3040 | return kleidicv_rgb_to_yuv420sp_stripe_u8( | |
| 300 | 1520 | src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, | |
| 301 | 1520 | color_format, static_cast<size_t>(begin), static_cast<size_t>(end)); | |
| 302 | }; | ||
| 303 | 1600 | return parallel_batches(callback, mt, (height + 1) / 2); | |
| 304 | 800 | } | |
| 305 | |||
| 306 | 1535 | kleidicv_error_t kleidicv_thread_rgb_to_yuv_u8( | |
| 307 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
| 308 | size_t width, size_t height, kleidicv_color_conversion_t color_format, | ||
| 309 | kleidicv_thread_multithreading mt) { | ||
| 310 | // Extract the base format | ||
| 311 | 3070 | const size_t base_format = static_cast<size_t>( | |
| 312 | 1535 | color_format & KLEIDICV_COLOR_CONVERSION_YUV_FMT_MASK); | |
| 313 |
2/2✓ Branch 0 taken 400 times.
✓ Branch 1 taken 1135 times.
|
1535 | if (base_format == KLEIDICV_COLOR_CONVERSION_FMT_YUV444) { |
| 314 | 800 | return kleidicv_thread_unary_op_impl(kleidicv_rgb_to_yuv444_u8, mt, src, | |
| 315 | 400 | src_stride, dst, dst_stride, width, | |
| 316 | 400 | height, color_format); | |
| 317 | } | ||
| 318 | |||
| 319 |
2/2✓ Branch 0 taken 315 times.
✓ Branch 1 taken 820 times.
|
1135 | if (base_format == KLEIDICV_COLOR_CONVERSION_FMT_YUV422) { |
| 320 | 630 | return kleidicv_thread_unary_op_impl(kleidicv_rgb_to_yuv422_u8, mt, src, | |
| 321 | 315 | src_stride, dst, dst_stride, width, | |
| 322 | 315 | height, color_format); | |
| 323 | } | ||
| 324 | |||
| 325 | 2380 | auto callback = [=](unsigned begin, unsigned end) { | |
| 326 | 3120 | return kleidicv_rgb_to_yuv420p_stripe_u8( | |
| 327 | 1560 | src, src_stride, dst, dst_stride, width, height, color_format, | |
| 328 | 1560 | static_cast<size_t>(begin), static_cast<size_t>(end)); | |
| 329 | }; | ||
| 330 | 820 | return parallel_batches(callback, mt, (height + 1) / 2); | |
| 331 | 1535 | } | |
| 332 | |||
| 333 | 720 | kleidicv_error_t kleidicv_thread_yuv_semiplanar_to_rgb_u8( | |
| 334 | const uint8_t *src_y, size_t src_y_stride, const uint8_t *src_uv, | ||
| 335 | size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, | ||
| 336 | size_t height, kleidicv_color_conversion_t color_format, | ||
| 337 | kleidicv_thread_multithreading mt) { | ||
| 338 | 1960 | auto callback = [=](unsigned begin, unsigned end) { | |
| 339 | 1240 | size_t row_begin = size_t{begin} * 2; | |
| 340 | 1240 | size_t row_end = std::min<size_t>(height, size_t{end} * 2); | |
| 341 | 1240 | size_t row_uv = begin; | |
| 342 | 3720 | return kleidicv_yuv_semiplanar_to_rgb_u8( | |
| 343 | 1240 | src_y + row_begin * src_y_stride, src_y_stride, | |
| 344 | 1240 | src_uv + row_uv * src_uv_stride, src_uv_stride, | |
| 345 | 1240 | dst + row_begin * dst_stride, dst_stride, width, row_end - row_begin, | |
| 346 | 1240 | color_format); | |
| 347 | 1240 | }; | |
| 348 | 1440 | return parallel_batches(callback, mt, (height + 1) / 2); | |
| 349 | 720 | } | |
| 350 | |||
| 351 | template <typename ScalarType, typename FunctionType> | ||
| 352 | 368 | kleidicv_error_t parallel_min_max(FunctionType min_max_func, | |
| 353 | const ScalarType *src, size_t src_stride, | ||
| 354 | size_t width, size_t height, | ||
| 355 | ScalarType *p_min_value, | ||
| 356 | ScalarType *p_max_value, | ||
| 357 | kleidicv_thread_multithreading mt) { | ||
| 358 | 736 | std::vector<ScalarType> min_values(height, | |
| 359 | 368 | std::numeric_limits<ScalarType>::max()); | |
| 360 | 736 | std::vector<ScalarType> max_values(height, | |
| 361 | 368 | std::numeric_limits<ScalarType>::lowest()); | |
| 362 | |||
| 363 | 1112 | auto callback = [&](unsigned begin, unsigned end) { | |
| 364 | 1488 | return min_max_func(src + begin * (src_stride / sizeof(ScalarType)), | |
| 365 | 744 | src_stride, width, end - begin, | |
| 366 |
12/12✓ Branch 0 taken 104 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 104 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 104 times.
✓ Branch 5 taken 16 times.
✓ Branch 6 taken 104 times.
✓ Branch 7 taken 16 times.
✓ Branch 8 taken 104 times.
✓ Branch 9 taken 16 times.
✓ Branch 10 taken 128 times.
✓ Branch 11 taken 16 times.
|
744 | p_min_value ? min_values.data() + begin : nullptr, |
| 367 |
12/12✓ Branch 0 taken 104 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 104 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 104 times.
✓ Branch 5 taken 16 times.
✓ Branch 6 taken 104 times.
✓ Branch 7 taken 16 times.
✓ Branch 8 taken 104 times.
✓ Branch 9 taken 16 times.
✓ Branch 10 taken 128 times.
✓ Branch 11 taken 16 times.
|
744 | p_max_value ? max_values.data() + begin : nullptr); |
| 368 | }; | ||
| 369 | |||
| 370 | 368 | auto return_val = parallel_batches(callback, mt, height); | |
| 371 | |||
| 372 |
12/12✓ Branch 0 taken 52 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 52 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 52 times.
✓ Branch 5 taken 8 times.
✓ Branch 6 taken 52 times.
✓ Branch 7 taken 8 times.
✓ Branch 8 taken 52 times.
✓ Branch 9 taken 8 times.
✓ Branch 10 taken 60 times.
✓ Branch 11 taken 8 times.
|
368 | if (p_min_value) { |
| 373 | 320 | *p_min_value = std::numeric_limits<ScalarType>::max(); | |
| 374 |
12/12✓ Branch 0 taken 288 times.
✓ Branch 1 taken 52 times.
✓ Branch 2 taken 288 times.
✓ Branch 3 taken 52 times.
✓ Branch 4 taken 288 times.
✓ Branch 5 taken 52 times.
✓ Branch 6 taken 288 times.
✓ Branch 7 taken 52 times.
✓ Branch 8 taken 288 times.
✓ Branch 9 taken 52 times.
✓ Branch 10 taken 368 times.
✓ Branch 11 taken 60 times.
|
2128 | for (ScalarType m : min_values) { |
| 375 |
12/12✓ Branch 0 taken 214 times.
✓ Branch 1 taken 74 times.
✓ Branch 2 taken 214 times.
✓ Branch 3 taken 74 times.
✓ Branch 4 taken 214 times.
✓ Branch 5 taken 74 times.
✓ Branch 6 taken 214 times.
✓ Branch 7 taken 74 times.
✓ Branch 8 taken 214 times.
✓ Branch 9 taken 74 times.
✓ Branch 10 taken 284 times.
✓ Branch 11 taken 84 times.
|
1808 | if (m < *p_min_value) { |
| 376 | 454 | *p_min_value = m; | |
| 377 | 454 | } | |
| 378 | 1808 | } | |
| 379 | 320 | } | |
| 380 |
12/12✓ Branch 0 taken 52 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 52 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 52 times.
✓ Branch 5 taken 8 times.
✓ Branch 6 taken 52 times.
✓ Branch 7 taken 8 times.
✓ Branch 8 taken 52 times.
✓ Branch 9 taken 8 times.
✓ Branch 10 taken 60 times.
✓ Branch 11 taken 8 times.
|
368 | if (p_max_value) { |
| 381 | 320 | *p_max_value = std::numeric_limits<ScalarType>::lowest(); | |
| 382 |
12/12✓ Branch 0 taken 288 times.
✓ Branch 1 taken 52 times.
✓ Branch 2 taken 288 times.
✓ Branch 3 taken 52 times.
✓ Branch 4 taken 288 times.
✓ Branch 5 taken 52 times.
✓ Branch 6 taken 288 times.
✓ Branch 7 taken 52 times.
✓ Branch 8 taken 288 times.
✓ Branch 9 taken 52 times.
✓ Branch 10 taken 368 times.
✓ Branch 11 taken 60 times.
|
2128 | for (ScalarType m : max_values) { |
| 383 |
12/12✓ Branch 0 taken 215 times.
✓ Branch 1 taken 73 times.
✓ Branch 2 taken 215 times.
✓ Branch 3 taken 73 times.
✓ Branch 4 taken 214 times.
✓ Branch 5 taken 74 times.
✓ Branch 6 taken 214 times.
✓ Branch 7 taken 74 times.
✓ Branch 8 taken 214 times.
✓ Branch 9 taken 74 times.
✓ Branch 10 taken 283 times.
✓ Branch 11 taken 85 times.
|
1808 | if (m > *p_max_value) { |
| 384 | 453 | *p_max_value = m; | |
| 385 | 453 | } | |
| 386 | 1808 | } | |
| 387 | 320 | } | |
| 388 | 368 | return return_val; | |
| 389 | 368 | } | |
| 390 | |||
| 391 | #define DEFINE_KLEIDICV_THREAD_MIN_MAX(suffix, type) \ | ||
| 392 | kleidicv_error_t kleidicv_thread_min_max_##suffix( \ | ||
| 393 | const type *src, size_t src_stride, size_t width, size_t height, \ | ||
| 394 | type *p_min_value, type *p_max_value, \ | ||
| 395 | kleidicv_thread_multithreading mt) { \ | ||
| 396 | return parallel_min_max(kleidicv_min_max_##suffix, src, src_stride, width, \ | ||
| 397 | height, p_min_value, p_max_value, mt); \ | ||
| 398 | } | ||
| 399 | |||
| 400 | 60 | DEFINE_KLEIDICV_THREAD_MIN_MAX(u8, uint8_t); | |
| 401 | 60 | DEFINE_KLEIDICV_THREAD_MIN_MAX(s8, int8_t); | |
| 402 | 60 | DEFINE_KLEIDICV_THREAD_MIN_MAX(u16, uint16_t); | |
| 403 | 60 | DEFINE_KLEIDICV_THREAD_MIN_MAX(s16, int16_t); | |
| 404 | 60 | DEFINE_KLEIDICV_THREAD_MIN_MAX(s32, int32_t); | |
| 405 | 68 | DEFINE_KLEIDICV_THREAD_MIN_MAX(f32, float); | |
| 406 | |||
| 407 | template <typename ScalarType, typename FunctionType> | ||
| 408 | 60 | kleidicv_error_t parallel_min_max_loc(FunctionType min_max_loc_func, | |
| 409 | const ScalarType *src, size_t src_stride, | ||
| 410 | size_t width, size_t height, | ||
| 411 | size_t *p_min_offset, | ||
| 412 | size_t *p_max_offset, | ||
| 413 | kleidicv_thread_multithreading mt) { | ||
| 414 | 60 | std::vector<size_t> min_offsets(height, 0); | |
| 415 | 60 | std::vector<size_t> max_offsets(height, 0); | |
| 416 | |||
| 417 | 180 | auto callback = [&](unsigned begin, unsigned end) { | |
| 418 | 240 | return min_max_loc_func( | |
| 419 | 120 | src + begin * (src_stride / sizeof(ScalarType)), src_stride, width, | |
| 420 |
2/2✓ Branch 0 taken 104 times.
✓ Branch 1 taken 16 times.
|
120 | end - begin, p_min_offset ? min_offsets.data() + begin : nullptr, |
| 421 |
2/2✓ Branch 0 taken 104 times.
✓ Branch 1 taken 16 times.
|
120 | p_max_offset ? max_offsets.data() + begin : nullptr); |
| 422 | }; | ||
| 423 | 60 | auto return_val = parallel_batches(callback, mt, height); | |
| 424 | |||
| 425 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 8 times.
|
60 | if (p_min_offset) { |
| 426 | 52 | *p_min_offset = 0; | |
| 427 |
2/2✓ Branch 0 taken 288 times.
✓ Branch 1 taken 52 times.
|
340 | for (size_t i = 0; i < min_offsets.size(); ++i) { |
| 428 | 288 | size_t offs = min_offsets[i] + i * src_stride; | |
| 429 |
4/4✓ Branch 0 taken 239 times.
✓ Branch 1 taken 49 times.
✓ Branch 2 taken 239 times.
✓ Branch 3 taken 49 times.
|
576 | if (src[offs / sizeof(ScalarType)] < |
| 430 | 288 | src[*p_min_offset / sizeof(ScalarType)]) { | |
| 431 | 49 | *p_min_offset = offs; | |
| 432 | 49 | } | |
| 433 | 288 | } | |
| 434 | 52 | } | |
| 435 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 8 times.
|
60 | if (p_max_offset) { |
| 436 | 52 | *p_max_offset = 0; | |
| 437 |
2/2✓ Branch 0 taken 288 times.
✓ Branch 1 taken 52 times.
|
340 | for (size_t i = 0; i < max_offsets.size(); ++i) { |
| 438 | 288 | size_t offs = max_offsets[i] + i * src_stride; | |
| 439 |
4/4✓ Branch 0 taken 239 times.
✓ Branch 1 taken 49 times.
✓ Branch 2 taken 239 times.
✓ Branch 3 taken 49 times.
|
576 | if (src[offs / sizeof(ScalarType)] > |
| 440 | 288 | src[*p_max_offset / sizeof(ScalarType)]) { | |
| 441 | 49 | *p_max_offset = offs; | |
| 442 | 49 | } | |
| 443 | 288 | } | |
| 444 | 52 | } | |
| 445 | 60 | return return_val; | |
| 446 | 60 | } | |
| 447 | |||
| 448 | #define DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(suffix, type) \ | ||
| 449 | kleidicv_error_t kleidicv_thread_min_max_loc_##suffix( \ | ||
| 450 | const type *src, size_t src_stride, size_t width, size_t height, \ | ||
| 451 | size_t *p_min_offset, size_t *p_max_offset, \ | ||
| 452 | kleidicv_thread_multithreading mt) { \ | ||
| 453 | return parallel_min_max_loc(kleidicv_min_max_loc_##suffix, src, \ | ||
| 454 | src_stride, width, height, p_min_offset, \ | ||
| 455 | p_max_offset, mt); \ | ||
| 456 | } | ||
| 457 | |||
| 458 | 60 | DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(u8, uint8_t); | |
| 459 | |||
| 460 | template <typename F> | ||
| 461 | 112 | kleidicv_error_t kleidicv_thread_filter(F filter, size_t width, size_t height, | |
| 462 | size_t channels, size_t kernel_width, | ||
| 463 | size_t kernel_height, | ||
| 464 | kleidicv_filter_context_t *context, | ||
| 465 | kleidicv_thread_multithreading mt) { | ||
| 466 | 448 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 467 | // The context contains a buffer that can only fit a single row, so can't be | ||
| 468 | // shared between threads. Since we don't know how many threads there are, | ||
| 469 | // create and destroy a context every time this callback is called. Only use | ||
| 470 | // the context argument for the first thread. | ||
| 471 | 336 | bool create_context = 0 != y_begin; | |
| 472 | 336 | kleidicv_filter_context_t *thread_context = context; | |
| 473 |
8/8✓ Branch 0 taken 28 times.
✓ Branch 1 taken 56 times.
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 56 times.
✓ Branch 4 taken 28 times.
✓ Branch 5 taken 56 times.
✓ Branch 6 taken 28 times.
✓ Branch 7 taken 56 times.
|
336 | if (create_context) { |
| 474 | 448 | kleidicv_error_t context_create_result = kleidicv_filter_context_create( | |
| 475 | 224 | &thread_context, channels, kernel_width, kernel_height, width, | |
| 476 | 224 | height); | |
| 477 | // Excluded from coverage because it's impractical to test this. | ||
| 478 | // MockMallocToFail can't be used because malloc is used in thread setup. | ||
| 479 | // GCOVR_EXCL_START | ||
| 480 | − | if (KLEIDICV_OK != context_create_result) { | |
| 481 | − | return context_create_result; | |
| 482 | } | ||
| 483 | // GCOVR_EXCL_STOP | ||
| 484 | 224 | } | |
| 485 | |||
| 486 | 336 | kleidicv_error_t result = filter(y_begin, y_end, thread_context); | |
| 487 | |||
| 488 |
8/8✓ Branch 0 taken 28 times.
✓ Branch 1 taken 56 times.
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 56 times.
✓ Branch 4 taken 28 times.
✓ Branch 5 taken 56 times.
✓ Branch 6 taken 28 times.
✓ Branch 7 taken 56 times.
|
336 | if (create_context) { |
| 489 | 448 | kleidicv_error_t context_release_result = | |
| 490 | 224 | kleidicv_filter_context_release(thread_context); | |
| 491 |
4/8✗ Branch 0 not taken.
✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 56 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 56 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 56 times.
|
224 | if (KLEIDICV_OK == result) { |
| 492 | 224 | result = context_release_result; | |
| 493 | 224 | } | |
| 494 | 224 | } | |
| 495 | 336 | return result; | |
| 496 | 336 | }; | |
| 497 | 224 | return parallel_batches(callback, mt, height); | |
| 498 | 112 | } | |
| 499 | |||
| 500 | 208 | kleidicv_error_t kleidicv_thread_gaussian_blur_u8( | |
| 501 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
| 502 | size_t width, size_t height, size_t channels, size_t kernel_width, | ||
| 503 | size_t kernel_height, float sigma_x, float sigma_y, | ||
| 504 | kleidicv_border_type_t border_type, kleidicv_thread_multithreading mt) { | ||
| 505 | 208 | auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); | |
| 506 |
4/4✓ Branch 0 taken 204 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 172 times.
✓ Branch 3 taken 32 times.
|
208 | if (!fixed_border_type || |
| 507 | 408 | !kleidicv::gaussian_blur_is_implemented(width, height, kernel_width, | |
| 508 | 204 | kernel_height, sigma_x, sigma_y, | |
| 509 | 204 | channels, *fixed_border_type)) { | |
| 510 | 176 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 511 | } | ||
| 512 | |||
| 513 |
4/6✓ Branch 0 taken 4 times.
✓ Branch 1 taken 28 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 4 times.
|
32 | if (kernel_width <= 7 || kernel_width == 15 || kernel_width == 21) { |
| 514 | 112 | auto callback = [=](size_t y_begin, size_t y_end) { | |
| 515 | 168 | return kleidicv_gaussian_blur_fixed_stripe_u8( | |
| 516 | 84 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 517 | 84 | channels, kernel_width, kernel_height, sigma_x, sigma_y, | |
| 518 | 84 | *fixed_border_type); | |
| 519 | }; | ||
| 520 | 28 | return parallel_batches(callback, mt, height); | |
| 521 | 28 | } | |
| 522 | 24 | auto callback = [=](size_t y_begin, size_t y_end) { | |
| 523 | 40 | return kleidicv_gaussian_blur_arbitrary_stripe_u8( | |
| 524 | 20 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 525 | 20 | channels, kernel_width, kernel_height, sigma_x, sigma_y, | |
| 526 | 20 | *fixed_border_type); | |
| 527 | }; | ||
| 528 | 4 | return parallel_batches(callback, mt, height); | |
| 529 | 208 | } | |
| 530 | |||
| 531 | 108 | kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( | |
| 532 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
| 533 | size_t width, size_t height, size_t channels, const uint8_t *kernel_x, | ||
| 534 | size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, | ||
| 535 | kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, | ||
| 536 | kleidicv_thread_multithreading mt) { | ||
| 537 |
4/4✓ Branch 0 taken 76 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 76 times.
✓ Branch 3 taken 32 times.
|
216 | if (!kleidicv::separable_filter_2d_is_implemented(width, height, kernel_width, |
| 538 | 108 | kernel_height)) { | |
| 539 | 76 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 540 | } | ||
| 541 | |||
| 542 | 32 | auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); | |
| 543 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
|
32 | if (!fixed_border_type) { |
| 544 | 4 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 545 | } | ||
| 546 | |||
| 547 | 112 | auto callback = [=](size_t y_begin, size_t y_end, | |
| 548 | kleidicv_filter_context_t *thread_context) { | ||
| 549 | 168 | return kleidicv_separable_filter_2d_stripe_u8( | |
| 550 | 84 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 551 | 84 | channels, kernel_x, kernel_width, kernel_y, kernel_height, | |
| 552 | 84 | *fixed_border_type, thread_context); | |
| 553 | }; | ||
| 554 | 56 | return kleidicv_thread_filter(callback, width, height, channels, kernel_width, | |
| 555 | 28 | kernel_height, context, mt); | |
| 556 | 108 | } | |
| 557 | |||
| 558 | 108 | kleidicv_error_t kleidicv_thread_separable_filter_2d_u16( | |
| 559 | const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, | ||
| 560 | size_t width, size_t height, size_t channels, const uint16_t *kernel_x, | ||
| 561 | size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height, | ||
| 562 | kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, | ||
| 563 | kleidicv_thread_multithreading mt) { | ||
| 564 |
4/4✓ Branch 0 taken 76 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 76 times.
✓ Branch 3 taken 32 times.
|
216 | if (!kleidicv::separable_filter_2d_is_implemented(width, height, kernel_width, |
| 565 | 108 | kernel_height)) { | |
| 566 | 76 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 567 | } | ||
| 568 | |||
| 569 | 32 | auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); | |
| 570 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
|
32 | if (!fixed_border_type) { |
| 571 | 4 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 572 | } | ||
| 573 | |||
| 574 | 112 | auto callback = [=](size_t y_begin, size_t y_end, | |
| 575 | kleidicv_filter_context_t *thread_context) { | ||
| 576 | 168 | return kleidicv_separable_filter_2d_stripe_u16( | |
| 577 | 84 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 578 | 84 | channels, kernel_x, kernel_width, kernel_y, kernel_height, | |
| 579 | 84 | *fixed_border_type, thread_context); | |
| 580 | }; | ||
| 581 | 56 | return kleidicv_thread_filter(callback, width, height, channels, kernel_width, | |
| 582 | 28 | kernel_height, context, mt); | |
| 583 | 108 | } | |
| 584 | |||
| 585 | 108 | kleidicv_error_t kleidicv_thread_separable_filter_2d_s16( | |
| 586 | const int16_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, | ||
| 587 | size_t width, size_t height, size_t channels, const int16_t *kernel_x, | ||
| 588 | size_t kernel_width, const int16_t *kernel_y, size_t kernel_height, | ||
| 589 | kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, | ||
| 590 | kleidicv_thread_multithreading mt) { | ||
| 591 |
4/4✓ Branch 0 taken 76 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 76 times.
✓ Branch 3 taken 32 times.
|
216 | if (!kleidicv::separable_filter_2d_is_implemented(width, height, kernel_width, |
| 592 | 108 | kernel_height)) { | |
| 593 | 76 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 594 | } | ||
| 595 | |||
| 596 | 32 | auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); | |
| 597 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
|
32 | if (!fixed_border_type) { |
| 598 | 4 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 599 | } | ||
| 600 | |||
| 601 | 112 | auto callback = [=](size_t y_begin, size_t y_end, | |
| 602 | kleidicv_filter_context_t *thread_context) { | ||
| 603 | 168 | return kleidicv_separable_filter_2d_stripe_s16( | |
| 604 | 84 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 605 | 84 | channels, kernel_x, kernel_width, kernel_y, kernel_height, | |
| 606 | 84 | *fixed_border_type, thread_context); | |
| 607 | }; | ||
| 608 | 56 | return kleidicv_thread_filter(callback, width, height, channels, kernel_width, | |
| 609 | 28 | kernel_height, context, mt); | |
| 610 | 108 | } | |
| 611 | |||
| 612 | 108 | kleidicv_error_t kleidicv_thread_blur_and_downsample_u8( | |
| 613 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 614 | uint8_t *dst, size_t dst_stride, size_t channels, | ||
| 615 | kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, | ||
| 616 | kleidicv_thread_multithreading mt) { | ||
| 617 |
4/4✓ Branch 0 taken 76 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 76 times.
✓ Branch 3 taken 32 times.
|
216 | if (!kleidicv::blur_and_downsample_is_implemented(src_width, src_height, |
| 618 | 108 | channels)) { | |
| 619 | 76 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 620 | } | ||
| 621 | |||
| 622 | 32 | auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); | |
| 623 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
|
32 | if (!fixed_border_type) { |
| 624 | 4 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 625 | } | ||
| 626 | |||
| 627 | 112 | auto callback = [=](unsigned y_begin, unsigned y_end, | |
| 628 | kleidicv_filter_context_t *thread_context) { | ||
| 629 | 168 | return kleidicv_blur_and_downsample_stripe_u8( | |
| 630 | 84 | src, src_stride, src_width, src_height, dst, dst_stride, y_begin, y_end, | |
| 631 | 84 | channels, *fixed_border_type, thread_context); | |
| 632 | }; | ||
| 633 | 56 | return kleidicv_thread_filter(callback, src_width, src_height, channels, 5, 5, | |
| 634 | 28 | context, mt); | |
| 635 | 108 | } | |
| 636 | |||
| 637 | 204 | kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( | |
| 638 | const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, | ||
| 639 | size_t width, size_t height, size_t channels, | ||
| 640 | kleidicv_thread_multithreading mt) { | ||
| 641 |
2/2✓ Branch 0 taken 92 times.
✓ Branch 1 taken 112 times.
|
204 | if (!kleidicv::sobel_is_implemented(width, height, 3)) { |
| 642 | 92 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 643 | } | ||
| 644 | |||
| 645 | 376 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 646 | 528 | return kleidicv_sobel_3x3_horizontal_stripe_s16_u8( | |
| 647 | 264 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 648 | 264 | channels); | |
| 649 | }; | ||
| 650 | 112 | return parallel_batches(callback, mt, height); | |
| 651 | 204 | } | |
| 652 | |||
| 653 | 532 | kleidicv_error_t kleidicv_thread_median_blur_u8( | |
| 654 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
| 655 | size_t width, size_t height, size_t channels, size_t kernel_width, | ||
| 656 | size_t kernel_height, kleidicv_border_type_t border_type, | ||
| 657 | kleidicv_thread_multithreading mt) { | ||
| 658 | 1064 | auto result_pair = kleidicv::median_blur_is_implemented( | |
| 659 | 532 | src, src_stride, dst, dst_stride, width, height, channels, kernel_width, | |
| 660 | 532 | kernel_height, border_type); | |
| 661 | |||
| 662 | 532 | auto checks_result = result_pair.first; | |
| 663 | 532 | auto fixed_border_type = result_pair.second; | |
| 664 |
2/2✓ Branch 0 taken 416 times.
✓ Branch 1 taken 116 times.
|
532 | if (checks_result != KLEIDICV_OK) { |
| 665 | 416 | return checks_result; | |
| 666 | } | ||
| 667 | |||
| 668 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 16 times.
|
116 | if (kernel_width <= 7) { |
| 669 | 380 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 670 | 560 | return kleidicv_median_blur_sorting_network_stripe_u8( | |
| 671 | 280 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 672 | 280 | channels, kernel_width, kernel_height, fixed_border_type); | |
| 673 | }; | ||
| 674 | 100 | return parallel_batches(callback, mt, height); | |
| 675 | 100 | } | |
| 676 | |||
| 677 |
3/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 12 times.
|
16 | if (kernel_width > 7 && kernel_width <= 15) { |
| 678 | 72 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 679 | 120 | return kleidicv_median_blur_small_hist_stripe_u8( | |
| 680 | 60 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 681 | 60 | channels, kernel_width, kernel_height, fixed_border_type); | |
| 682 | }; | ||
| 683 | 12 | return parallel_batches(callback, mt, height); | |
| 684 | 12 | } | |
| 685 | |||
| 686 | 24 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 687 | 40 | return kleidicv_median_blur_large_hist_stripe_u8( | |
| 688 | 20 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 689 | 20 | channels, kernel_width, kernel_height, fixed_border_type); | |
| 690 | }; | ||
| 691 | 4 | return parallel_batches(callback, mt, height); | |
| 692 | 532 | } | |
| 693 | |||
| 694 | 532 | kleidicv_error_t kleidicv_thread_median_blur_s16( | |
| 695 | const int16_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, | ||
| 696 | size_t width, size_t height, size_t channels, size_t kernel_width, | ||
| 697 | size_t kernel_height, kleidicv_border_type_t border_type, | ||
| 698 | kleidicv_thread_multithreading mt) { | ||
| 699 | 1064 | auto result_pair = kleidicv::median_blur_is_implemented( | |
| 700 | 532 | src, src_stride, dst, dst_stride, width, height, channels, kernel_width, | |
| 701 | 532 | kernel_height, border_type); | |
| 702 | |||
| 703 | 532 | auto checks_result = result_pair.first; | |
| 704 | 532 | auto fixed_border_type = result_pair.second; | |
| 705 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 100 times.
|
532 | if (checks_result != KLEIDICV_OK) { |
| 706 | 432 | return checks_result; | |
| 707 | } | ||
| 708 | |||
| 709 | 380 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 710 | 560 | return kleidicv_median_blur_sorting_network_stripe_s16( | |
| 711 | 280 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 712 | 280 | channels, kernel_width, kernel_height, fixed_border_type); | |
| 713 | }; | ||
| 714 | 100 | return parallel_batches(callback, mt, height); | |
| 715 | 532 | } | |
| 716 | |||
| 717 | 532 | kleidicv_error_t kleidicv_thread_median_blur_u16( | |
| 718 | const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, | ||
| 719 | size_t width, size_t height, size_t channels, size_t kernel_width, | ||
| 720 | size_t kernel_height, kleidicv_border_type_t border_type, | ||
| 721 | kleidicv_thread_multithreading mt) { | ||
| 722 | 1064 | auto result_pair = kleidicv::median_blur_is_implemented( | |
| 723 | 532 | src, src_stride, dst, dst_stride, width, height, channels, kernel_width, | |
| 724 | 532 | kernel_height, border_type); | |
| 725 | |||
| 726 | 532 | auto checks_result = result_pair.first; | |
| 727 | 532 | auto fixed_border_type = result_pair.second; | |
| 728 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 100 times.
|
532 | if (checks_result != KLEIDICV_OK) { |
| 729 | 432 | return checks_result; | |
| 730 | } | ||
| 731 | |||
| 732 | 380 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 733 | 560 | return kleidicv_median_blur_sorting_network_stripe_u16( | |
| 734 | 280 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 735 | 280 | channels, kernel_width, kernel_height, fixed_border_type); | |
| 736 | }; | ||
| 737 | 100 | return parallel_batches(callback, mt, height); | |
| 738 | 532 | } | |
| 739 | |||
| 740 | 532 | kleidicv_error_t kleidicv_thread_median_blur_f32( | |
| 741 | const float *src, size_t src_stride, float *dst, size_t dst_stride, | ||
| 742 | size_t width, size_t height, size_t channels, size_t kernel_width, | ||
| 743 | size_t kernel_height, kleidicv_border_type_t border_type, | ||
| 744 | kleidicv_thread_multithreading mt) { | ||
| 745 | 1064 | auto result_pair = kleidicv::median_blur_is_implemented( | |
| 746 | 532 | src, src_stride, dst, dst_stride, width, height, channels, kernel_width, | |
| 747 | 532 | kernel_height, border_type); | |
| 748 | |||
| 749 | 532 | auto checks_result = result_pair.first; | |
| 750 | 532 | auto fixed_border_type = result_pair.second; | |
| 751 |
2/2✓ Branch 0 taken 432 times.
✓ Branch 1 taken 100 times.
|
532 | if (checks_result != KLEIDICV_OK) { |
| 752 | 432 | return checks_result; | |
| 753 | } | ||
| 754 | |||
| 755 | 380 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 756 | 560 | return kleidicv_median_blur_sorting_network_stripe_f32( | |
| 757 | 280 | src, src_stride, dst, dst_stride, width, height, y_begin, y_end, | |
| 758 | 280 | channels, kernel_width, kernel_height, fixed_border_type); | |
| 759 | }; | ||
| 760 | 100 | return parallel_batches(callback, mt, height); | |
| 761 | 532 | } | |
| 762 | |||
| 763 | 204 | kleidicv_error_t kleidicv_thread_sobel_3x3_vertical_s16_u8( | |
| 764 | const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, | ||
| 765 | size_t width, size_t height, size_t channels, | ||
| 766 | kleidicv_thread_multithreading mt) { | ||
| 767 |
2/2✓ Branch 0 taken 92 times.
✓ Branch 1 taken 112 times.
|
204 | if (!kleidicv::sobel_is_implemented(width, height, 3)) { |
| 768 | 92 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 769 | } | ||
| 770 | |||
| 771 | 376 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 772 | 528 | return kleidicv_sobel_3x3_vertical_stripe_s16_u8(src, src_stride, dst, | |
| 773 | 264 | dst_stride, width, height, | |
| 774 | 264 | y_begin, y_end, channels); | |
| 775 | }; | ||
| 776 | 112 | return parallel_batches(callback, mt, height); | |
| 777 | 204 | } | |
| 778 | |||
| 779 | 104 | kleidicv_error_t kleidicv_thread_scharr_interleaved_s16_u8( | |
| 780 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 781 | size_t src_channels, int16_t *dst, size_t dst_stride, | ||
| 782 | kleidicv_thread_multithreading mt) { | ||
| 783 |
4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 100 times.
|
208 | if (!kleidicv::scharr_interleaved_is_implemented(src_width, src_height, |
| 784 | 104 | src_channels)) { | |
| 785 | 4 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 786 | } | ||
| 787 | |||
| 788 | 316 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 789 | 432 | return kleidicv_scharr_interleaved_stripe_s16_u8( | |
| 790 | 216 | src, src_stride, src_width, src_height, src_channels, dst, dst_stride, | |
| 791 | 216 | y_begin, y_end); | |
| 792 | }; | ||
| 793 | |||
| 794 | // height is decremented by 2 as the result has less rows. | ||
| 795 | 100 | return parallel_batches(callback, mt, src_height - 2); | |
| 796 | 104 | } | |
| 797 | |||
| 798 | 120 | kleidicv_error_t kleidicv_thread_resize_to_quarter_u8( | |
| 799 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 800 | uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 801 | kleidicv_thread_multithreading mt) { | ||
| 802 | 350 | auto callback = [=](unsigned begin, unsigned end) { | |
| 803 | 230 | size_t src_begin = size_t{begin} * 2; | |
| 804 | 230 | size_t src_end = std::min<size_t>(src_height, size_t{end} * 2); | |
| 805 | 230 | size_t dst_begin = begin; | |
| 806 | 230 | size_t dst_end = std::min<size_t>(dst_height, end); | |
| 807 | |||
| 808 | // half of odd height is rounded towards zero? | ||
| 809 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 210 times.
|
230 | if (dst_begin == dst_end) { |
| 810 | 20 | return KLEIDICV_OK; | |
| 811 | } | ||
| 812 | |||
| 813 | 420 | return kleidicv_resize_to_quarter_u8( | |
| 814 | 210 | src + src_begin * src_stride, src_stride, src_width, | |
| 815 | 210 | src_end - src_begin, dst + dst_begin * dst_stride, dst_stride, | |
| 816 | 210 | dst_width, dst_end - dst_begin); | |
| 817 | 230 | }; | |
| 818 | 240 | return parallel_batches(callback, mt, (src_height + 1) / 2); | |
| 819 | 120 | } | |
| 820 | |||
| 821 | 125 | kleidicv_error_t kleidicv_thread_resize_linear_u8( | |
| 822 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 823 | uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 824 | kleidicv_thread_multithreading mt) { | ||
| 825 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 120 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 120 times.
|
250 | if (!kleidicv::resize_linear_u8_is_implemented(src_width, src_height, |
| 826 | 125 | dst_width, dst_height)) { | |
| 827 | 5 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 828 | } | ||
| 829 | 350 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 830 | 460 | return kleidicv_resize_linear_stripe_u8( | |
| 831 | 230 | src, src_stride, src_width, src_height, y_begin, | |
| 832 | 230 | std::min<size_t>(src_height, y_end + 1), dst, dst_stride, dst_width, | |
| 833 | 230 | dst_height); | |
| 834 | }; | ||
| 835 | 120 | return parallel_batches(callback, mt, std::max<size_t>(1, src_height - 1)); | |
| 836 | 125 | } | |
| 837 | |||
| 838 | 185 | kleidicv_error_t kleidicv_thread_resize_linear_f32( | |
| 839 | const float *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 840 | float *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 841 | kleidicv_thread_multithreading mt) { | ||
| 842 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 180 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 180 times.
|
370 | if (!kleidicv::resize_linear_f32_is_implemented(src_width, src_height, |
| 843 | 185 | dst_width, dst_height)) { | |
| 844 | 5 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 845 | } | ||
| 846 | 525 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 847 | 690 | return kleidicv_resize_linear_stripe_f32( | |
| 848 | 345 | src, src_stride, src_width, src_height, y_begin, | |
| 849 | 345 | std::min<size_t>(src_height, y_end + 1), dst, dst_stride, dst_width, | |
| 850 | 345 | dst_height); | |
| 851 | }; | ||
| 852 | 180 | return parallel_batches(callback, mt, std::max<size_t>(1, src_height - 1)); | |
| 853 | 185 | } | |
| 854 | |||
| 855 | 208 | kleidicv_error_t kleidicv_thread_remap_s16_u8( | |
| 856 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 857 | uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 858 | size_t channels, const int16_t *mapxy, size_t mapxy_stride, | ||
| 859 | kleidicv_border_type_t border_type, const uint8_t *border_value, | ||
| 860 | kleidicv_thread_multithreading mt) { | ||
| 861 |
4/4✓ Branch 0 taken 8 times.
✓ Branch 1 taken 200 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 200 times.
|
416 | if (!kleidicv::remap_s16_is_implemented<uint8_t>(src_stride, src_width, |
| 862 | 208 | src_height, dst_width, | |
| 863 | 208 | border_type, channels)) { | |
| 864 | 8 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 865 | } | ||
| 866 | 632 | auto callback = [=](unsigned begin, unsigned end) { | |
| 867 | 864 | return kleidicv_remap_s16_u8( | |
| 868 | 432 | src, src_stride, src_width, src_height, | |
| 869 | 432 | dst + begin * dst_stride / sizeof(uint8_t), dst_stride, dst_width, | |
| 870 | 432 | end - begin, channels, | |
| 871 | 432 | mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)), | |
| 872 | 432 | mapxy_stride, border_type, border_value); | |
| 873 | }; | ||
| 874 | 200 | return parallel_batches(callback, mt, dst_height); | |
| 875 | 208 | } | |
| 876 | |||
| 877 | 208 | kleidicv_error_t kleidicv_thread_remap_s16_u16( | |
| 878 | const uint16_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 879 | uint16_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 880 | size_t channels, const int16_t *mapxy, size_t mapxy_stride, | ||
| 881 | kleidicv_border_type_t border_type, const uint16_t *border_value, | ||
| 882 | kleidicv_thread_multithreading mt) { | ||
| 883 |
4/4✓ Branch 0 taken 8 times.
✓ Branch 1 taken 200 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 200 times.
|
416 | if (!kleidicv::remap_s16_is_implemented<uint16_t>(src_stride, src_width, |
| 884 | 208 | src_height, dst_width, | |
| 885 | 208 | border_type, channels)) { | |
| 886 | 8 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 887 | } | ||
| 888 | 632 | auto callback = [=](unsigned begin, unsigned end) { | |
| 889 | 864 | return kleidicv_remap_s16_u16( | |
| 890 | 432 | src, src_stride, src_width, src_height, | |
| 891 | 432 | dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint16_t)), | |
| 892 | 432 | dst_stride, dst_width, end - begin, channels, | |
| 893 | 432 | mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)), | |
| 894 | 432 | mapxy_stride, border_type, border_value); | |
| 895 | }; | ||
| 896 | 200 | return parallel_batches(callback, mt, dst_height); | |
| 897 | 208 | } | |
| 898 | |||
| 899 | 408 | kleidicv_error_t kleidicv_thread_remap_s16point5_u8( | |
| 900 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 901 | uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 902 | size_t channels, const int16_t *mapxy, size_t mapxy_stride, | ||
| 903 | const uint16_t *mapfrac, size_t mapfrac_stride, | ||
| 904 | kleidicv_border_type_t border_type, const uint8_t *border_value, | ||
| 905 | kleidicv_thread_multithreading mt) { | ||
| 906 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 400 times.
|
408 | if (!kleidicv::remap_s16point5_is_implemented<uint8_t>( |
| 907 | 408 | src_stride, src_width, src_height, dst_width, border_type, | |
| 908 | 408 | channels)) { | |
| 909 | 8 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 910 | } | ||
| 911 | 1264 | auto callback = [=](unsigned begin, unsigned end) { | |
| 912 | 1728 | return kleidicv_remap_s16point5_u8( | |
| 913 | 864 | src, src_stride, src_width, src_height, | |
| 914 | 864 | dst + begin * dst_stride / sizeof(uint8_t), dst_stride, dst_width, | |
| 915 | 864 | end - begin, channels, | |
| 916 | 864 | mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)), | |
| 917 | 864 | mapxy_stride, | |
| 918 | 1728 | mapfrac + | |
| 919 | 864 | static_cast<ptrdiff_t>(begin * mapfrac_stride / sizeof(uint16_t)), | |
| 920 | 864 | mapfrac_stride, border_type, border_value); | |
| 921 | }; | ||
| 922 | 400 | return parallel_batches(callback, mt, dst_height); | |
| 923 | 408 | } | |
| 924 | |||
| 925 | 408 | kleidicv_error_t kleidicv_thread_remap_s16point5_u16( | |
| 926 | const uint16_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 927 | uint16_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 928 | size_t channels, const int16_t *mapxy, size_t mapxy_stride, | ||
| 929 | const uint16_t *mapfrac, size_t mapfrac_stride, | ||
| 930 | kleidicv_border_type_t border_type, const uint16_t *border_value, | ||
| 931 | kleidicv_thread_multithreading mt) { | ||
| 932 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 400 times.
|
408 | if (!kleidicv::remap_s16point5_is_implemented<uint16_t>( |
| 933 | 408 | src_stride, src_width, src_height, dst_width, border_type, | |
| 934 | 408 | channels)) { | |
| 935 | 8 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 936 | } | ||
| 937 | 1264 | auto callback = [=](unsigned begin, unsigned end) { | |
| 938 | 1728 | return kleidicv_remap_s16point5_u16( | |
| 939 | 864 | src, src_stride, src_width, src_height, | |
| 940 | 864 | dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint16_t)), | |
| 941 | 864 | dst_stride, dst_width, end - begin, channels, | |
| 942 | 864 | mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)), | |
| 943 | 864 | mapxy_stride, | |
| 944 | 1728 | mapfrac + | |
| 945 | 864 | static_cast<ptrdiff_t>(begin * mapfrac_stride / sizeof(uint16_t)), | |
| 946 | 864 | mapfrac_stride, border_type, border_value); | |
| 947 | }; | ||
| 948 | 400 | return parallel_batches(callback, mt, dst_height); | |
| 949 | 408 | } | |
| 950 | |||
| 951 | 408 | kleidicv_error_t kleidicv_thread_remap_f32_u8( | |
| 952 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 953 | uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 954 | size_t channels, const float *mapx, size_t mapx_stride, const float *mapy, | ||
| 955 | size_t mapy_stride, kleidicv_interpolation_type_t interpolation, | ||
| 956 | kleidicv_border_type_t border_type, const uint8_t *border_value, | ||
| 957 | kleidicv_thread_multithreading mt) { | ||
| 958 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 400 times.
|
408 | if (!kleidicv::remap_f32_is_implemented<uint8_t>( |
| 959 | 408 | src_stride, src_width, src_height, dst_width, dst_height, border_type, | |
| 960 | 408 | channels, interpolation)) { | |
| 961 | 8 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 962 | } | ||
| 963 | 1264 | auto callback = [=](unsigned begin, unsigned end) { | |
| 964 | 1728 | return kleidicv_remap_f32_u8( | |
| 965 | 864 | src, src_stride, src_width, src_height, | |
| 966 | 864 | dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint8_t)), | |
| 967 | 864 | dst_stride, dst_width, end - begin, channels, | |
| 968 | 864 | mapx + static_cast<ptrdiff_t>(begin * mapx_stride / sizeof(float)), | |
| 969 | 864 | mapx_stride, | |
| 970 | 864 | mapy + static_cast<ptrdiff_t>(begin * mapy_stride / sizeof(float)), | |
| 971 | 864 | mapy_stride, interpolation, border_type, border_value); | |
| 972 | }; | ||
| 973 | 400 | return parallel_batches(callback, mt, dst_height); | |
| 974 | 408 | } | |
| 975 | |||
| 976 | 408 | kleidicv_error_t kleidicv_thread_remap_f32_u16( | |
| 977 | const uint16_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 978 | uint16_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 979 | size_t channels, const float *mapx, size_t mapx_stride, const float *mapy, | ||
| 980 | size_t mapy_stride, kleidicv_interpolation_type_t interpolation, | ||
| 981 | kleidicv_border_type_t border_type, const uint16_t *border_value, | ||
| 982 | kleidicv_thread_multithreading mt) { | ||
| 983 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 400 times.
|
408 | if (!kleidicv::remap_f32_is_implemented<uint16_t>( |
| 984 | 408 | src_stride, src_width, src_height, dst_width, dst_height, border_type, | |
| 985 | 408 | channels, interpolation)) { | |
| 986 | 8 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 987 | } | ||
| 988 | 1264 | auto callback = [=](unsigned begin, unsigned end) { | |
| 989 | 1728 | return kleidicv_remap_f32_u16( | |
| 990 | 864 | src, src_stride, src_width, src_height, | |
| 991 | 864 | dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint16_t)), | |
| 992 | 864 | dst_stride, dst_width, end - begin, channels, | |
| 993 | 864 | mapx + static_cast<ptrdiff_t>(begin * mapx_stride / sizeof(float)), | |
| 994 | 864 | mapx_stride, | |
| 995 | 864 | mapy + static_cast<ptrdiff_t>(begin * mapy_stride / sizeof(float)), | |
| 996 | 864 | mapy_stride, interpolation, border_type, border_value); | |
| 997 | }; | ||
| 998 | 400 | return parallel_batches(callback, mt, dst_height); | |
| 999 | 408 | } | |
| 1000 | |||
| 1001 | 216 | kleidicv_error_t kleidicv_thread_warp_perspective_u8( | |
| 1002 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 1003 | uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, | ||
| 1004 | const float transformation[9], size_t channels, | ||
| 1005 | kleidicv_interpolation_type_t interpolation, | ||
| 1006 | kleidicv_border_type_t border_type, const uint8_t *border_value, | ||
| 1007 | kleidicv_thread_multithreading mt) { | ||
| 1008 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 200 times.
|
216 | if (!kleidicv::warp_perspective_is_implemented<uint8_t>( |
| 1009 | 216 | dst_width, channels, interpolation, border_type)) { | |
| 1010 | 16 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 1011 | } | ||
| 1012 | |||
| 1013 | 632 | auto callback = [=](unsigned y_begin, unsigned y_end) { | |
| 1014 | 864 | return kleidicv_warp_perspective_stripe_u8( | |
| 1015 | 432 | src, src_stride, src_width, src_height, dst, dst_stride, dst_width, | |
| 1016 | 432 | dst_height, y_begin, std::min<size_t>(dst_height, y_end + 1), | |
| 1017 | 432 | transformation, channels, interpolation, border_type, border_value); | |
| 1018 | }; | ||
| 1019 | 200 | return parallel_batches(callback, mt, dst_height); | |
| 1020 | 216 | } | |
| 1021 |