KleidiCV Coverage Report


Directory: ./
File: kleidicv_thread/src/kleidicv_thread.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 634 634 100.0%
Functions: 359 359 100.0%
Branches: 382 394 97.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv_thread/kleidicv_thread.h"
6
7 #include <algorithm>
8 #include <cstddef>
9 #include <cstdint>
10 #include <functional>
11 #include <limits>
12 #include <vector>
13
14 #include "kleidicv/arithmetics/rotate.h"
15 #include "kleidicv/arithmetics/scale.h"
16 #include "kleidicv/conversions/rgb_to_yuv_420.h"
17 #include "kleidicv/conversions/yuv_420_to_rgb.h"
18 #include "kleidicv/ctypes.h"
19 #include "kleidicv/filters/blur_and_downsample.h"
20 #include "kleidicv/filters/gaussian_blur.h"
21 #include "kleidicv/filters/median_blur.h"
22 #include "kleidicv/filters/scharr.h"
23 #include "kleidicv/filters/separable_filter_2d.h"
24 #include "kleidicv/filters/sobel.h"
25 #include "kleidicv/kleidicv.h"
26 #include "kleidicv/resize/resize_linear.h"
27 #include "kleidicv/transform/remap.h"
28 #include "kleidicv/transform/warp_perspective.h"
29
30 typedef std::function<kleidicv_error_t(unsigned, unsigned)> FunctionCallback;
31
32 12110 static kleidicv_error_t kleidicv_thread_std_function_callback(
33 unsigned task_begin, unsigned task_end, void *data) {
34 12110 auto *callback = reinterpret_cast<FunctionCallback *>(data);
35 24220 return (*callback)(task_begin, task_end);
36 12110 }
37
38 // Operations in the Neon backend have both a vector path and a scalar path.
39 // The vector path is used to process most data and the scalar path is used to
40 // process the parts of the data that don't fit into the vector width.
41 // For floating point operations in particular, the results may be very slightly
42 // different between vector and scalar paths.
43 // When using multithreading, images are divided into parts to be processed by
44 // each thread, and this could change which parts of the data end up being
45 // processed by the vector and scalar paths. Since the threading may be
46 // non-deterministic in how it divides up the image, this non-determinism could
47 // leak through in the values of the output. This could cause subtle bugs.
48 //
49 // To avoid this problem, this function passes data to each thread in batches
50 // that are a multiple of the Neon vector width in size (16 bytes). The
51 // exception is the last batch, which may be longer in order to extend to the
52 // end of the data. No batch can be shorter than vector length as this could
53 // cause different behaviour for operations that try to avoid the tail loop (see
54 // the TryToAvoidTailLoop class) - this technique only works if the data is
55 // longer than vector length.
56 //
57 // Typically with how this function is used, batches will be 16 image rows or
58 // row pairs, which is likely to be far coarser alignment than is needed.
59 // However it's unlikely that threading on a finer-grained level would provide a
60 // performance benefit.
61 template <typename Callback>
62 8967 inline kleidicv_error_t parallel_batches(Callback callback,
63 kleidicv_thread_multithreading mt,
64 unsigned count,
65 unsigned min_batch_size = 16) {
66 8967 const unsigned task_count = std::max(1U, (count) / min_batch_size);
67 21077 FunctionCallback f = [=](unsigned task_begin, unsigned task_end) {
68 12110 unsigned begin = task_begin * min_batch_size,
69 12110 end = task_end * min_batch_size;
70
138/140
✓ Branch 0 taken 594 times.
✓ Branch 1 taken 1350 times.
✓ Branch 2 taken 36 times.
✓ Branch 3 taken 90 times.
✓ Branch 4 taken 33 times.
✓ Branch 5 taken 75 times.
✓ Branch 6 taken 33 times.
✓ Branch 7 taken 75 times.
✓ Branch 8 taken 33 times.
✓ Branch 9 taken 75 times.
✓ Branch 10 taken 33 times.
✓ Branch 11 taken 75 times.
✓ Branch 12 taken 66 times.
✓ Branch 13 taken 150 times.
✓ Branch 14 taken 33 times.
✓ Branch 15 taken 75 times.
✓ Branch 16 taken 33 times.
✓ Branch 17 taken 84 times.
✓ Branch 18 taken 36 times.
✓ Branch 19 taken 90 times.
✓ Branch 20 taken 99 times.
✓ Branch 21 taken 225 times.
✓ Branch 22 taken 198 times.
✓ Branch 23 taken 450 times.
✓ Branch 24 taken 99 times.
✓ Branch 25 taken 225 times.
✓ Branch 26 taken 99 times.
✓ Branch 27 taken 225 times.
✓ Branch 28 taken 99 times.
✓ Branch 29 taken 225 times.
✓ Branch 30 taken 66 times.
✓ Branch 31 taken 150 times.
✓ Branch 32 taken 66 times.
✓ Branch 33 taken 150 times.
✓ Branch 34 taken 66 times.
✓ Branch 35 taken 150 times.
✓ Branch 36 taken 33 times.
✓ Branch 37 taken 75 times.
✓ Branch 38 taken 33 times.
✓ Branch 39 taken 75 times.
✓ Branch 40 taken 33 times.
✓ Branch 41 taken 75 times.
✓ Branch 42 taken 33 times.
✓ Branch 43 taken 75 times.
✓ Branch 44 taken 33 times.
✓ Branch 45 taken 75 times.
✓ Branch 46 taken 33 times.
✓ Branch 47 taken 75 times.
✓ Branch 48 taken 72 times.
✓ Branch 49 taken 120 times.
✓ Branch 50 taken 4 times.
✓ Branch 51 taken 108 times.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 108 times.
✓ Branch 54 taken 4 times.
✓ Branch 55 taken 108 times.
✓ Branch 56 taken 4 times.
✓ Branch 57 taken 108 times.
✓ Branch 58 taken 4 times.
✓ Branch 59 taken 56 times.
✓ Branch 60 taken 4 times.
✓ Branch 61 taken 56 times.
✓ Branch 62 taken 4 times.
✓ Branch 63 taken 56 times.
✓ Branch 64 taken 4 times.
✓ Branch 65 taken 56 times.
✓ Branch 66 taken 4 times.
✓ Branch 67 taken 56 times.
✓ Branch 68 taken 4 times.
✓ Branch 69 taken 56 times.
✓ Branch 70 taken 4 times.
✓ Branch 71 taken 56 times.
✓ Branch 72 taken 4 times.
✓ Branch 73 taken 56 times.
✗ Branch 74 not taken.
✓ Branch 75 taken 608 times.
✓ Branch 76 taken 3 times.
✓ Branch 77 taken 45 times.
✓ Branch 78 taken 3 times.
✓ Branch 79 taken 45 times.
✓ Branch 80 taken 3 times.
✓ Branch 81 taken 45 times.
✓ Branch 82 taken 3 times.
✓ Branch 83 taken 45 times.
✓ Branch 84 taken 3 times.
✓ Branch 85 taken 45 times.
✓ Branch 86 taken 3 times.
✓ Branch 87 taken 51 times.
✓ Branch 88 taken 3 times.
✓ Branch 89 taken 45 times.
✓ Branch 90 taken 12 times.
✓ Branch 91 taken 21 times.
✓ Branch 92 taken 3 times.
✓ Branch 93 taken 3 times.
✓ Branch 94 taken 12 times.
✓ Branch 95 taken 21 times.
✓ Branch 96 taken 12 times.
✓ Branch 97 taken 21 times.
✓ Branch 98 taken 12 times.
✓ Branch 99 taken 21 times.
✓ Branch 100 taken 12 times.
✓ Branch 101 taken 21 times.
✓ Branch 102 taken 36 times.
✓ Branch 103 taken 84 times.
✓ Branch 104 taken 39 times.
✓ Branch 105 taken 75 times.
✓ Branch 106 taken 9 times.
✓ Branch 107 taken 9 times.
✓ Branch 108 taken 3 times.
✓ Branch 109 taken 3 times.
✓ Branch 110 taken 39 times.
✓ Branch 111 taken 75 times.
✓ Branch 112 taken 39 times.
✓ Branch 113 taken 75 times.
✓ Branch 114 taken 39 times.
✓ Branch 115 taken 75 times.
✓ Branch 116 taken 36 times.
✓ Branch 117 taken 84 times.
✓ Branch 118 taken 33 times.
✓ Branch 119 taken 75 times.
✗ Branch 120 not taken.
✓ Branch 121 taken 96 times.
✓ Branch 122 taken 8 times.
✓ Branch 123 taken 96 times.
✓ Branch 124 taken 12 times.
✓ Branch 125 taken 144 times.
✓ Branch 126 taken 66 times.
✓ Branch 127 taken 150 times.
✓ Branch 128 taken 66 times.
✓ Branch 129 taken 150 times.
✓ Branch 130 taken 132 times.
✓ Branch 131 taken 300 times.
✓ Branch 132 taken 132 times.
✓ Branch 133 taken 300 times.
✓ Branch 134 taken 132 times.
✓ Branch 135 taken 300 times.
✓ Branch 136 taken 132 times.
✓ Branch 137 taken 300 times.
✓ Branch 138 taken 66 times.
✓ Branch 139 taken 150 times.
12110 if (task_end == task_count) {
71 8967 end = count;
72 8967 }
73 24220 return callback(begin, end);
74 12110 };
75 17934 return mt.parallel(kleidicv_thread_std_function_callback, &f,
76 8967 mt.parallel_data, task_count);
77 8967 }
78
79 template <typename SrcT, typename DstT, typename F, typename... Args>
80 2139 inline kleidicv_error_t kleidicv_thread_unary_op_impl(
81 F f, kleidicv_thread_multithreading mt, const SrcT *src, size_t src_stride,
82 DstT *dst, size_t dst_stride, size_t width, size_t height, Args... args) {
83 5208 auto callback = [=](unsigned begin, unsigned end) {
84 6138 return f(src + static_cast<ptrdiff_t>(begin * src_stride / sizeof(SrcT)),
85 3069 src_stride,
86 3069 dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(DstT)),
87 3069 dst_stride, width, end - begin, args...);
88 };
89 4278 return parallel_batches(callback, mt, height);
90 2139 }
91
92 template <typename SrcT, typename DstT, typename F, typename... Args>
93 2250 inline kleidicv_error_t kleidicv_thread_binary_op_impl(
94 F f, kleidicv_thread_multithreading mt, const SrcT *src_a,
95 size_t src_a_stride, const SrcT *src_b, size_t src_b_stride, DstT *dst,
96 size_t dst_stride, size_t width, size_t height, Args... args) {
97 5490 auto callback = [=](unsigned begin, unsigned end) {
98 6480 return f(
99 3240 src_a + static_cast<ptrdiff_t>(begin * src_a_stride / sizeof(SrcT)),
100 3240 src_a_stride,
101 3240 src_b + static_cast<ptrdiff_t>(begin * src_b_stride / sizeof(SrcT)),
102 3240 src_b_stride,
103 3240 dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(DstT)),
104 3240 dst_stride, width, end - begin, args...);
105 };
106 4500 return parallel_batches(callback, mt, height);
107 2250 }
108
109 #define KLEIDICV_THREAD_UNARY_OP_IMPL(suffix, src_type, dst_type) \
110 kleidicv_error_t kleidicv_thread_##suffix( \
111 const src_type *src, size_t src_stride, dst_type *dst, \
112 size_t dst_stride, size_t width, size_t height, \
113 kleidicv_thread_multithreading mt) { \
114 return kleidicv_thread_unary_op_impl(kleidicv_##suffix, mt, src, \
115 src_stride, dst, dst_stride, width, \
116 height); \
117 }
118
119 75 KLEIDICV_THREAD_UNARY_OP_IMPL(gray_to_rgb_u8, uint8_t, uint8_t);
120 75 KLEIDICV_THREAD_UNARY_OP_IMPL(gray_to_rgba_u8, uint8_t, uint8_t);
121 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_bgr_u8, uint8_t, uint8_t);
122 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_rgb_u8, uint8_t, uint8_t);
123 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_bgra_u8, uint8_t, uint8_t);
124 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_rgba_u8, uint8_t, uint8_t);
125 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_bgra_u8, uint8_t, uint8_t);
126 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_rgba_u8, uint8_t, uint8_t);
127 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_bgr_u8, uint8_t, uint8_t);
128 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_rgb_u8, uint8_t, uint8_t);
129 75 KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_bgr_u8, uint8_t, uint8_t);
130 75 KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_bgra_u8, uint8_t, uint8_t);
131 75 KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_rgb_u8, uint8_t, uint8_t);
132 75 KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_rgba_u8, uint8_t, uint8_t);
133 75 KLEIDICV_THREAD_UNARY_OP_IMPL(bgr_to_yuv_u8, uint8_t, uint8_t);
134 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_yuv_u8, uint8_t, uint8_t);
135 75 KLEIDICV_THREAD_UNARY_OP_IMPL(bgra_to_yuv_u8, uint8_t, uint8_t);
136 75 KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_yuv_u8, uint8_t, uint8_t);
137 90 KLEIDICV_THREAD_UNARY_OP_IMPL(exp_f32, float, float);
138 75 KLEIDICV_THREAD_UNARY_OP_IMPL(f32_to_s8, float, int8_t);
139 75 KLEIDICV_THREAD_UNARY_OP_IMPL(f32_to_u8, float, uint8_t);
140 75 KLEIDICV_THREAD_UNARY_OP_IMPL(s8_to_f32, int8_t, float);
141 75 KLEIDICV_THREAD_UNARY_OP_IMPL(u8_to_f32, uint8_t, float);
142
143 #define KLEIDICV_THREAD_INRANGE_OP_IMPL(suffix, src_type, dst_type) \
144 kleidicv_error_t kleidicv_thread_##suffix( \
145 const src_type *src, size_t src_stride, dst_type *dst, \
146 size_t dst_stride, size_t width, size_t height, src_type lower_bound, \
147 src_type upper_bound, kleidicv_thread_multithreading mt) { \
148 return kleidicv_thread_unary_op_impl(kleidicv_##suffix, mt, src, \
149 src_stride, dst, dst_stride, width, \
150 height, lower_bound, upper_bound); \
151 }
152
153 75 KLEIDICV_THREAD_INRANGE_OP_IMPL(in_range_u8, uint8_t, uint8_t);
154 75 KLEIDICV_THREAD_INRANGE_OP_IMPL(in_range_f32, float, uint8_t);
155
156 75 kleidicv_error_t kleidicv_thread_threshold_binary_u8(
157 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
158 size_t width, size_t height, uint8_t threshold, uint8_t value,
159 kleidicv_thread_multithreading mt) {
160 150 return kleidicv_thread_unary_op_impl(kleidicv_threshold_binary_u8, mt, src,
161 75 src_stride, dst, dst_stride, width,
162 75 height, threshold, value);
163 }
164
165 93 kleidicv_error_t kleidicv_thread_scale_u8(const uint8_t *src, size_t src_stride,
166 uint8_t *dst, size_t dst_stride,
167 size_t width, size_t height,
168 float scale, float shift,
169 kleidicv_thread_multithreading mt) {
170
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 90 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 90 times.
93 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
171
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 87 times.
90 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
172
5/6
✗ Branch 0 not taken.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 84 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 84 times.
87 CHECK_IMAGE_SIZE(width, height);
173
174 84 const std::array<uint8_t, 256> precalculated_table =
175 84 kleidicv::neon::precalculate_scale_table_u8(scale, shift);
176 84 return kleidicv_thread_unary_op_impl(
177 84 kleidicv::neon::scale_with_precalculated_table, mt, src, src_stride, dst,
178 84 dst_stride, width, height, scale, shift, precalculated_table);
179 93 }
180
181 90 kleidicv_error_t kleidicv_thread_scale_f32(const float *src, size_t src_stride,
182 float *dst, size_t dst_stride,
183 size_t width, size_t height,
184 float scale, float shift,
185 kleidicv_thread_multithreading mt) {
186 180 return kleidicv_thread_unary_op_impl(kleidicv_scale_f32, mt, src, src_stride,
187 90 dst, dst_stride, width, height, scale,
188 90 shift);
189 }
190
191 #define KLEIDICV_THREAD_BINARY_OP_IMPL(suffix, type) \
192 kleidicv_error_t kleidicv_thread_##suffix( \
193 const type *src_a, size_t src_a_stride, const type *src_b, \
194 size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \
195 size_t height, kleidicv_thread_multithreading mt) { \
196 return kleidicv_thread_binary_op_impl(kleidicv_##suffix, mt, src_a, \
197 src_a_stride, src_b, src_b_stride, \
198 dst, dst_stride, width, height); \
199 }
200
201 #define KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(suffix, type, scaletype) \
202 kleidicv_error_t kleidicv_thread_##suffix( \
203 const type *src_a, size_t src_a_stride, const type *src_b, \
204 size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \
205 size_t height, scaletype scale, kleidicv_thread_multithreading mt) { \
206 return kleidicv_thread_binary_op_impl( \
207 kleidicv_##suffix, mt, src_a, src_a_stride, src_b, src_b_stride, dst, \
208 dst_stride, width, height, scale); \
209 }
210
211 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s8, int8_t);
212 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u8, uint8_t);
213 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s16, int16_t);
214 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u16, uint16_t);
215 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s32, int32_t);
216 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u32, uint32_t);
217 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_s64, int64_t);
218 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_add_u64, uint64_t);
219 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s8, int8_t);
220 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u8, uint8_t);
221 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s16, int16_t);
222 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u16, uint16_t);
223 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s32, int32_t);
224 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u32, uint32_t);
225 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_s64, int64_t);
226 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_sub_u64, uint64_t);
227 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_u8, uint8_t);
228 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_s8, int8_t);
229 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_u16, uint16_t);
230 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_s16, int16_t);
231 75 KLEIDICV_THREAD_BINARY_OP_IMPL(saturating_absdiff_s32, int32_t);
232 75 KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_u8, uint8_t, double);
233 75 KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_s8, int8_t, double);
234 75 KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_u16, uint16_t, double);
235 75 KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_s16, int16_t, double);
236 75 KLEIDICV_THREAD_BINARY_OP_SCALE_IMPL(saturating_multiply_s32, int32_t, double);
237 75 KLEIDICV_THREAD_BINARY_OP_IMPL(bitwise_and, uint8_t);
238 75 KLEIDICV_THREAD_BINARY_OP_IMPL(compare_equal_u8, uint8_t);
239 75 KLEIDICV_THREAD_BINARY_OP_IMPL(compare_greater_u8, uint8_t);
240
241 75 kleidicv_error_t kleidicv_thread_saturating_add_abs_with_threshold_s16(
242 const int16_t *src_a, size_t src_a_stride, const int16_t *src_b,
243 size_t src_b_stride, int16_t *dst, size_t dst_stride, size_t width,
244 size_t height, int16_t threshold, kleidicv_thread_multithreading mt) {
245 75 return kleidicv_thread_binary_op_impl(
246 75 kleidicv_saturating_add_abs_with_threshold_s16, mt, src_a, src_a_stride,
247 75 src_b, src_b_stride, dst, dst_stride, width, height, threshold);
248 }
249
250 129 kleidicv_error_t kleidicv_thread_rotate(const void *src, size_t src_stride,
251 size_t width, size_t height, void *dst,
252 size_t dst_stride, int angle,
253 size_t element_size,
254 kleidicv_thread_multithreading mt) {
255
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 120 times.
129 if (!kleidicv::rotate_is_implemented(src, dst, angle, element_size)) {
256 9 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
257 }
258 // reading in columns and writing out rows tends to perform better
259 312 auto callback = [=](unsigned begin, unsigned end) {
260 384 return kleidicv_rotate(
261 192 static_cast<const uint8_t *>(src) + begin * element_size, src_stride,
262 192 end - begin, height, static_cast<uint8_t *>(dst) + begin * dst_stride,
263 192 dst_stride, angle, element_size);
264 };
265 120 return parallel_batches(callback, mt, width, 64);
266 129 }
267
268 108 kleidicv_error_t kleidicv_thread_yuv_p_to_bgr_u8(
269 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
270 size_t width, size_t height, bool is_yv12,
271 kleidicv_thread_multithreading mt) {
272 220 auto callback = [=](unsigned begin, unsigned end) {
273 224 return kleidicv_yuv_p_to_bgr_stripe_u8(
274 112 src, src_stride, dst, dst_stride, width, height, is_yv12,
275 112 static_cast<size_t>(begin), static_cast<size_t>(end));
276 };
277 216 return parallel_batches(callback, mt, (height + 1) / 2);
278 108 }
279
280 108 kleidicv_error_t kleidicv_thread_yuv_p_to_bgra_u8(
281 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
282 size_t width, size_t height, bool is_yv12,
283 kleidicv_thread_multithreading mt) {
284 220 auto callback = [=](unsigned begin, unsigned end) {
285 224 return kleidicv_yuv_p_to_bgra_stripe_u8(
286 112 src, src_stride, dst, dst_stride, width, height, is_yv12,
287 112 static_cast<size_t>(begin), static_cast<size_t>(end));
288 };
289 216 return parallel_batches(callback, mt, (height + 1) / 2);
290 108 }
291
292 108 kleidicv_error_t kleidicv_thread_yuv_p_to_rgb_u8(
293 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
294 size_t width, size_t height, bool is_yv12,
295 kleidicv_thread_multithreading mt) {
296 220 auto callback = [=](unsigned begin, unsigned end) {
297 224 return kleidicv_yuv_p_to_rgb_stripe_u8(
298 112 src, src_stride, dst, dst_stride, width, height, is_yv12,
299 112 static_cast<size_t>(begin), static_cast<size_t>(end));
300 };
301 216 return parallel_batches(callback, mt, (height + 1) / 2);
302 108 }
303
304 108 kleidicv_error_t kleidicv_thread_yuv_p_to_rgba_u8(
305 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
306 size_t width, size_t height, bool is_yv12,
307 kleidicv_thread_multithreading mt) {
308 220 auto callback = [=](unsigned begin, unsigned end) {
309 224 return kleidicv_yuv_p_to_rgba_stripe_u8(
310 112 src, src_stride, dst, dst_stride, width, height, is_yv12,
311 112 static_cast<size_t>(begin), static_cast<size_t>(end));
312 };
313 216 return parallel_batches(callback, mt, (height + 1) / 2);
314 108 }
315
316 56 kleidicv_error_t kleidicv_thread_rgb_to_yuv420_p_u8(
317 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
318 size_t width, size_t height, bool is_yv12,
319 kleidicv_thread_multithreading mt) {
320 116 auto callback = [=](unsigned begin, unsigned end) {
321 120 return kleidicv_rgb_to_yuv420_p_stripe_u8(
322 60 src, src_stride, dst, dst_stride, width, height, is_yv12,
323 60 static_cast<size_t>(begin), static_cast<size_t>(end));
324 };
325 112 return parallel_batches(callback, mt, (height + 1) / 2);
326 56 }
327
328 56 kleidicv_error_t kleidicv_thread_rgba_to_yuv420_p_u8(
329 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
330 size_t width, size_t height, bool is_yv12,
331 kleidicv_thread_multithreading mt) {
332 116 auto callback = [=](unsigned begin, unsigned end) {
333 120 return kleidicv_rgba_to_yuv420_p_stripe_u8(
334 60 src, src_stride, dst, dst_stride, width, height, is_yv12,
335 60 static_cast<size_t>(begin), static_cast<size_t>(end));
336 };
337 112 return parallel_batches(callback, mt, (height + 1) / 2);
338 56 }
339
340 56 kleidicv_error_t kleidicv_thread_bgr_to_yuv420_p_u8(
341 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
342 size_t width, size_t height, bool is_yv12,
343 kleidicv_thread_multithreading mt) {
344 116 auto callback = [=](unsigned begin, unsigned end) {
345 120 return kleidicv_bgr_to_yuv420_p_stripe_u8(
346 60 src, src_stride, dst, dst_stride, width, height, is_yv12,
347 60 static_cast<size_t>(begin), static_cast<size_t>(end));
348 };
349 112 return parallel_batches(callback, mt, (height + 1) / 2);
350 56 }
351
352 56 kleidicv_error_t kleidicv_thread_bgra_to_yuv420_p_u8(
353 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
354 size_t width, size_t height, bool is_yv12,
355 kleidicv_thread_multithreading mt) {
356 116 auto callback = [=](unsigned begin, unsigned end) {
357 120 return kleidicv_bgra_to_yuv420_p_stripe_u8(
358 60 src, src_stride, dst, dst_stride, width, height, is_yv12,
359 60 static_cast<size_t>(begin), static_cast<size_t>(end));
360 };
361 112 return parallel_batches(callback, mt, (height + 1) / 2);
362 56 }
363
364 56 kleidicv_error_t kleidicv_thread_rgb_to_yuv420_sp_u8(
365 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
366 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
367 bool is_nv21, kleidicv_thread_multithreading mt) {
368 116 auto callback = [=](unsigned begin, unsigned end) {
369 120 return kleidicv_rgb_to_yuv420_sp_stripe_u8(
370 60 src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height,
371 60 is_nv21, static_cast<size_t>(begin), static_cast<size_t>(end));
372 };
373 112 return parallel_batches(callback, mt, (height + 1) / 2);
374 56 }
375
376 56 kleidicv_error_t kleidicv_thread_rgba_to_yuv420_sp_u8(
377 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
378 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
379 bool is_nv21, kleidicv_thread_multithreading mt) {
380 116 auto callback = [=](unsigned begin, unsigned end) {
381 120 return kleidicv_rgba_to_yuv420_sp_stripe_u8(
382 60 src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height,
383 60 is_nv21, static_cast<size_t>(begin), static_cast<size_t>(end));
384 };
385 112 return parallel_batches(callback, mt, (height + 1) / 2);
386 56 }
387
388 56 kleidicv_error_t kleidicv_thread_bgr_to_yuv420_sp_u8(
389 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
390 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
391 bool is_nv21, kleidicv_thread_multithreading mt) {
392 116 auto callback = [=](unsigned begin, unsigned end) {
393 120 return kleidicv_bgr_to_yuv420_sp_stripe_u8(
394 60 src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height,
395 60 is_nv21, static_cast<size_t>(begin), static_cast<size_t>(end));
396 };
397 112 return parallel_batches(callback, mt, (height + 1) / 2);
398 56 }
399
400 56 kleidicv_error_t kleidicv_thread_bgra_to_yuv420_sp_u8(
401 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
402 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
403 bool is_nv21, kleidicv_thread_multithreading mt) {
404 116 auto callback = [=](unsigned begin, unsigned end) {
405 120 return kleidicv_bgra_to_yuv420_sp_stripe_u8(
406 60 src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height,
407 60 is_nv21, static_cast<size_t>(begin), static_cast<size_t>(end));
408 };
409 112 return parallel_batches(callback, mt, (height + 1) / 2);
410 56 }
411
412 template <typename F>
413 608 inline kleidicv_error_t kleidicv_thread_yuv_sp_to_rgb_u8_impl(
414 F f, const uint8_t *src_y, size_t src_y_stride, const uint8_t *src_uv,
415 size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width,
416 size_t height, bool is_nv21, kleidicv_thread_multithreading mt) {
417 1216 auto callback = [=](unsigned begin, unsigned end) {
418 608 size_t row_begin = size_t{begin} * 2;
419 608 size_t row_end = std::min<size_t>(height, size_t{end} * 2);
420 608 size_t row_uv = begin;
421 1824 return f(src_y + row_begin * src_y_stride, src_y_stride,
422 608 src_uv + row_uv * src_uv_stride, src_uv_stride,
423 608 dst + row_begin * dst_stride, dst_stride, width,
424 608 row_end - row_begin, is_nv21);
425 608 };
426 1216 return parallel_batches(callback, mt, (height + 1) / 2);
427 608 }
428
429 #define YUV_SP_TO_RGB(suffix) \
430 kleidicv_error_t kleidicv_thread_##suffix( \
431 const uint8_t *src_y, size_t src_y_stride, const uint8_t *src_uv, \
432 size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, \
433 size_t height, bool is_nv21, kleidicv_thread_multithreading mt) { \
434 return kleidicv_thread_yuv_sp_to_rgb_u8_impl( \
435 kleidicv_##suffix, src_y, src_y_stride, src_uv, src_uv_stride, dst, \
436 dst_stride, width, height, is_nv21, mt); \
437 }
438
439 152 YUV_SP_TO_RGB(yuv_sp_to_bgr_u8);
440 152 YUV_SP_TO_RGB(yuv_sp_to_bgra_u8);
441 152 YUV_SP_TO_RGB(yuv_sp_to_rgb_u8);
442 152 YUV_SP_TO_RGB(yuv_sp_to_rgba_u8);
443
444 template <typename ScalarType, typename FunctionType>
445 276 kleidicv_error_t parallel_min_max(FunctionType min_max_func,
446 const ScalarType *src, size_t src_stride,
447 size_t width, size_t height,
448 ScalarType *p_min_value,
449 ScalarType *p_max_value,
450 kleidicv_thread_multithreading mt) {
451 552 std::vector<ScalarType> min_values(height,
452 276 std::numeric_limits<ScalarType>::max());
453 552 std::vector<ScalarType> max_values(height,
454 276 std::numeric_limits<ScalarType>::lowest());
455
456 570 auto callback = [&](unsigned begin, unsigned end) {
457 588 return min_max_func(src + begin * (src_stride / sizeof(ScalarType)),
458 294 src_stride, width, end - begin,
459
12/12
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 42 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 42 times.
✓ Branch 5 taken 6 times.
✓ Branch 6 taken 42 times.
✓ Branch 7 taken 6 times.
✓ Branch 8 taken 42 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 48 times.
✓ Branch 11 taken 6 times.
294 p_min_value ? min_values.data() + begin : nullptr,
460
12/12
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 42 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 42 times.
✓ Branch 5 taken 6 times.
✓ Branch 6 taken 42 times.
✓ Branch 7 taken 6 times.
✓ Branch 8 taken 42 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 48 times.
✓ Branch 11 taken 6 times.
294 p_max_value ? max_values.data() + begin : nullptr);
461 };
462
463 276 auto return_val = parallel_batches(callback, mt, height);
464
465
12/12
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 39 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 39 times.
✓ Branch 5 taken 6 times.
✓ Branch 6 taken 39 times.
✓ Branch 7 taken 6 times.
✓ Branch 8 taken 39 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 45 times.
✓ Branch 11 taken 6 times.
276 if (p_min_value) {
466 240 *p_min_value = std::numeric_limits<ScalarType>::max();
467
12/12
✓ Branch 0 taken 216 times.
✓ Branch 1 taken 39 times.
✓ Branch 2 taken 216 times.
✓ Branch 3 taken 39 times.
✓ Branch 4 taken 216 times.
✓ Branch 5 taken 39 times.
✓ Branch 6 taken 216 times.
✓ Branch 7 taken 39 times.
✓ Branch 8 taken 216 times.
✓ Branch 9 taken 39 times.
✓ Branch 10 taken 276 times.
✓ Branch 11 taken 45 times.
1596 for (ScalarType m : min_values) {
468
12/12
✓ Branch 0 taken 177 times.
✓ Branch 1 taken 39 times.
✓ Branch 2 taken 177 times.
✓ Branch 3 taken 39 times.
✓ Branch 4 taken 176 times.
✓ Branch 5 taken 40 times.
✓ Branch 6 taken 176 times.
✓ Branch 7 taken 40 times.
✓ Branch 8 taken 176 times.
✓ Branch 9 taken 40 times.
✓ Branch 10 taken 230 times.
✓ Branch 11 taken 46 times.
1356 if (m < *p_min_value) {
469 244 *p_min_value = m;
470 244 }
471 1356 }
472 240 }
473
12/12
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 39 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 39 times.
✓ Branch 5 taken 6 times.
✓ Branch 6 taken 39 times.
✓ Branch 7 taken 6 times.
✓ Branch 8 taken 39 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 45 times.
✓ Branch 11 taken 6 times.
276 if (p_max_value) {
474 240 *p_max_value = std::numeric_limits<ScalarType>::lowest();
475
12/12
✓ Branch 0 taken 216 times.
✓ Branch 1 taken 39 times.
✓ Branch 2 taken 216 times.
✓ Branch 3 taken 39 times.
✓ Branch 4 taken 216 times.
✓ Branch 5 taken 39 times.
✓ Branch 6 taken 216 times.
✓ Branch 7 taken 39 times.
✓ Branch 8 taken 216 times.
✓ Branch 9 taken 39 times.
✓ Branch 10 taken 276 times.
✓ Branch 11 taken 45 times.
1596 for (ScalarType m : max_values) {
476
12/12
✓ Branch 0 taken 176 times.
✓ Branch 1 taken 40 times.
✓ Branch 2 taken 176 times.
✓ Branch 3 taken 40 times.
✓ Branch 4 taken 175 times.
✓ Branch 5 taken 41 times.
✓ Branch 6 taken 175 times.
✓ Branch 7 taken 41 times.
✓ Branch 8 taken 175 times.
✓ Branch 9 taken 41 times.
✓ Branch 10 taken 230 times.
✓ Branch 11 taken 46 times.
1356 if (m > *p_max_value) {
477 249 *p_max_value = m;
478 249 }
479 1356 }
480 240 }
481 276 return return_val;
482 276 }
483
484 #define DEFINE_KLEIDICV_THREAD_MIN_MAX(suffix, type) \
485 kleidicv_error_t kleidicv_thread_min_max_##suffix( \
486 const type *src, size_t src_stride, size_t width, size_t height, \
487 type *p_min_value, type *p_max_value, \
488 kleidicv_thread_multithreading mt) { \
489 return parallel_min_max(kleidicv_min_max_##suffix, src, src_stride, width, \
490 height, p_min_value, p_max_value, mt); \
491 }
492
493 45 DEFINE_KLEIDICV_THREAD_MIN_MAX(u8, uint8_t);
494 45 DEFINE_KLEIDICV_THREAD_MIN_MAX(s8, int8_t);
495 45 DEFINE_KLEIDICV_THREAD_MIN_MAX(u16, uint16_t);
496 45 DEFINE_KLEIDICV_THREAD_MIN_MAX(s16, int16_t);
497 45 DEFINE_KLEIDICV_THREAD_MIN_MAX(s32, int32_t);
498 51 DEFINE_KLEIDICV_THREAD_MIN_MAX(f32, float);
499
500 template <typename ScalarType, typename FunctionType>
501 45 kleidicv_error_t parallel_min_max_loc(FunctionType min_max_loc_func,
502 const ScalarType *src, size_t src_stride,
503 size_t width, size_t height,
504 size_t *p_min_offset,
505 size_t *p_max_offset,
506 kleidicv_thread_multithreading mt) {
507 45 std::vector<size_t> min_offsets(height, 0);
508 45 std::vector<size_t> max_offsets(height, 0);
509
510 93 auto callback = [&](unsigned begin, unsigned end) {
511 96 return min_max_loc_func(
512 48 src + begin * (src_stride / sizeof(ScalarType)), src_stride, width,
513
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 6 times.
48 end - begin, p_min_offset ? min_offsets.data() + begin : nullptr,
514
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 6 times.
48 p_max_offset ? max_offsets.data() + begin : nullptr);
515 };
516 45 auto return_val = parallel_batches(callback, mt, height);
517
518
2/2
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 6 times.
45 if (p_min_offset) {
519 39 *p_min_offset = 0;
520
2/2
✓ Branch 0 taken 216 times.
✓ Branch 1 taken 39 times.
255 for (size_t i = 0; i < min_offsets.size(); ++i) {
521 216 size_t offs = min_offsets[i] + i * src_stride;
522
4/4
✓ Branch 0 taken 190 times.
✓ Branch 1 taken 26 times.
✓ Branch 2 taken 190 times.
✓ Branch 3 taken 26 times.
432 if (src[offs / sizeof(ScalarType)] <
523 216 src[*p_min_offset / sizeof(ScalarType)]) {
524 26 *p_min_offset = offs;
525 26 }
526 216 }
527 39 }
528
2/2
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 6 times.
45 if (p_max_offset) {
529 39 *p_max_offset = 0;
530
2/2
✓ Branch 0 taken 216 times.
✓ Branch 1 taken 39 times.
255 for (size_t i = 0; i < max_offsets.size(); ++i) {
531 216 size_t offs = max_offsets[i] + i * src_stride;
532
4/4
✓ Branch 0 taken 191 times.
✓ Branch 1 taken 25 times.
✓ Branch 2 taken 191 times.
✓ Branch 3 taken 25 times.
432 if (src[offs / sizeof(ScalarType)] >
533 216 src[*p_max_offset / sizeof(ScalarType)]) {
534 25 *p_max_offset = offs;
535 25 }
536 216 }
537 39 }
538 45 return return_val;
539 45 }
540
541 #define DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(suffix, type) \
542 kleidicv_error_t kleidicv_thread_min_max_loc_##suffix( \
543 const type *src, size_t src_stride, size_t width, size_t height, \
544 size_t *p_min_offset, size_t *p_max_offset, \
545 kleidicv_thread_multithreading mt) { \
546 return parallel_min_max_loc(kleidicv_min_max_loc_##suffix, src, \
547 src_stride, width, height, p_min_offset, \
548 p_max_offset, mt); \
549 }
550
551 45 DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(u8, uint8_t);
552
553 template <typename F>
554 108 kleidicv_error_t kleidicv_thread_filter(F filter, size_t width, size_t height,
555 size_t channels, size_t kernel_width,
556 size_t kernel_height,
557 kleidicv_filter_context_t *context,
558 kleidicv_thread_multithreading mt) {
559 279 auto callback = [=](unsigned y_begin, unsigned y_end) {
560 // The context contains a buffer that can only fit a single row, so can't be
561 // shared between threads. Since we don't know how many threads there are,
562 // create and destroy a context every time this callback is called. Only use
563 // the context argument for the first thread.
564 171 bool create_context = 0 != y_begin;
565 171 kleidicv_filter_context_t *thread_context = context;
566
12/12
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 3 times.
✓ Branch 4 taken 21 times.
✓ Branch 5 taken 12 times.
✓ Branch 6 taken 21 times.
✓ Branch 7 taken 12 times.
✓ Branch 8 taken 21 times.
✓ Branch 9 taken 12 times.
✓ Branch 10 taken 21 times.
✓ Branch 11 taken 12 times.
171 if (create_context) {
567 126 kleidicv_error_t context_create_result = kleidicv_filter_context_create(
568 63 &thread_context, channels, kernel_width, kernel_height, width,
569 63 height);
570 // Excluded from coverage because it's impractical to test this.
571 // MockMallocToFail can't be used because malloc is used in thread setup.
572 // GCOVR_EXCL_START
573 if (KLEIDICV_OK != context_create_result) {
574 return context_create_result;
575 }
576 // GCOVR_EXCL_STOP
577 63 }
578
579 171 kleidicv_error_t result = filter(y_begin, y_end, thread_context);
580
581
12/12
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 3 times.
✓ Branch 4 taken 21 times.
✓ Branch 5 taken 12 times.
✓ Branch 6 taken 21 times.
✓ Branch 7 taken 12 times.
✓ Branch 8 taken 21 times.
✓ Branch 9 taken 12 times.
✓ Branch 10 taken 21 times.
✓ Branch 11 taken 12 times.
171 if (create_context) {
582 126 kleidicv_error_t context_release_result =
583 63 kleidicv_filter_context_release(thread_context);
584
6/12
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 12 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 12 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 12 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 12 times.
63 if (KLEIDICV_OK == result) {
585 63 result = context_release_result;
586 63 }
587 63 }
588 171 return result;
589 171 };
590 216 return parallel_batches(callback, mt, height);
591 108 }
592
593 156 kleidicv_error_t kleidicv_thread_gaussian_blur_u8(
594 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
595 size_t width, size_t height, size_t channels, size_t kernel_width,
596 size_t kernel_height, float sigma_x, float sigma_y,
597 kleidicv_border_type_t border_type, kleidicv_filter_context_t *context,
598 kleidicv_thread_multithreading mt) {
599 156 auto fixed_border_type = kleidicv::get_fixed_border_type(border_type);
600
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 153 times.
156 if (!fixed_border_type) {
601 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
602 }
603
604
4/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 129 times.
✓ Branch 2 taken 24 times.
✓ Branch 3 taken 129 times.
306 if (!kleidicv::gaussian_blur_is_implemented(width, height, kernel_width,
605 153 kernel_height, sigma_x, sigma_y,
606 153 channels, *fixed_border_type)) {
607 129 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
608 }
609
610
4/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 3 times.
24 if (kernel_width <= 7 || kernel_width == 15 || kernel_width == 21) {
611 54 auto callback = [=](size_t y_begin, size_t y_end,
612 kleidicv_filter_context_t *thread_context) {
613 66 return kleidicv_gaussian_blur_fixed_stripe_u8(
614 33 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
615 33 channels, kernel_width, kernel_height, sigma_x, sigma_y,
616 33 *fixed_border_type, thread_context);
617 };
618 42 return kleidicv_thread_filter(callback, width, height, channels,
619 21 kernel_width, kernel_height, context, mt);
620 21 }
621
622 9 auto callback = [=](size_t y_begin, size_t y_end,
623 kleidicv_filter_context_t *thread_context) {
624 12 return kleidicv_gaussian_blur_arbitrary_stripe_u8(
625 6 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
626 6 channels, kernel_width, kernel_height, sigma_x, sigma_y,
627 6 *fixed_border_type, thread_context);
628 };
629 6 return kleidicv_thread_filter(callback, width, height, channels, kernel_width,
630 3 kernel_height, context, mt);
631 156 }
632
633 81 kleidicv_error_t kleidicv_thread_separable_filter_2d_u8(
634 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
635 size_t width, size_t height, size_t channels, const uint8_t *kernel_x,
636 size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height,
637 kleidicv_border_type_t border_type, kleidicv_filter_context_t *context,
638 kleidicv_thread_multithreading mt) {
639
4/4
✓ Branch 0 taken 57 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 57 times.
✓ Branch 3 taken 24 times.
162 if (!kleidicv::separable_filter_2d_is_implemented(width, height, kernel_width,
640 81 kernel_height)) {
641 57 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
642 }
643
644 24 auto fixed_border_type = kleidicv::get_fixed_border_type(border_type);
645
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
24 if (!fixed_border_type) {
646 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
647 }
648
649 54 auto callback = [=](size_t y_begin, size_t y_end,
650 kleidicv_filter_context_t *thread_context) {
651 66 return kleidicv_separable_filter_2d_stripe_u8(
652 33 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
653 33 channels, kernel_x, kernel_width, kernel_y, kernel_height,
654 33 *fixed_border_type, thread_context);
655 };
656 42 return kleidicv_thread_filter(callback, width, height, channels, kernel_width,
657 21 kernel_height, context, mt);
658 81 }
659
660 81 kleidicv_error_t kleidicv_thread_separable_filter_2d_u16(
661 const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride,
662 size_t width, size_t height, size_t channels, const uint16_t *kernel_x,
663 size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height,
664 kleidicv_border_type_t border_type, kleidicv_filter_context_t *context,
665 kleidicv_thread_multithreading mt) {
666
4/4
✓ Branch 0 taken 57 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 57 times.
✓ Branch 3 taken 24 times.
162 if (!kleidicv::separable_filter_2d_is_implemented(width, height, kernel_width,
667 81 kernel_height)) {
668 57 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
669 }
670
671 24 auto fixed_border_type = kleidicv::get_fixed_border_type(border_type);
672
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
24 if (!fixed_border_type) {
673 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
674 }
675
676 54 auto callback = [=](size_t y_begin, size_t y_end,
677 kleidicv_filter_context_t *thread_context) {
678 66 return kleidicv_separable_filter_2d_stripe_u16(
679 33 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
680 33 channels, kernel_x, kernel_width, kernel_y, kernel_height,
681 33 *fixed_border_type, thread_context);
682 };
683 42 return kleidicv_thread_filter(callback, width, height, channels, kernel_width,
684 21 kernel_height, context, mt);
685 81 }
686
687 81 kleidicv_error_t kleidicv_thread_separable_filter_2d_s16(
688 const int16_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
689 size_t width, size_t height, size_t channels, const int16_t *kernel_x,
690 size_t kernel_width, const int16_t *kernel_y, size_t kernel_height,
691 kleidicv_border_type_t border_type, kleidicv_filter_context_t *context,
692 kleidicv_thread_multithreading mt) {
693
4/4
✓ Branch 0 taken 57 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 57 times.
✓ Branch 3 taken 24 times.
162 if (!kleidicv::separable_filter_2d_is_implemented(width, height, kernel_width,
694 81 kernel_height)) {
695 57 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
696 }
697
698 24 auto fixed_border_type = kleidicv::get_fixed_border_type(border_type);
699
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
24 if (!fixed_border_type) {
700 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
701 }
702
703 54 auto callback = [=](size_t y_begin, size_t y_end,
704 kleidicv_filter_context_t *thread_context) {
705 66 return kleidicv_separable_filter_2d_stripe_s16(
706 33 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
707 33 channels, kernel_x, kernel_width, kernel_y, kernel_height,
708 33 *fixed_border_type, thread_context);
709 };
710 42 return kleidicv_thread_filter(callback, width, height, channels, kernel_width,
711 21 kernel_height, context, mt);
712 81 }
713
714 81 kleidicv_error_t kleidicv_thread_blur_and_downsample_u8(
715 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
716 uint8_t *dst, size_t dst_stride, size_t channels,
717 kleidicv_border_type_t border_type, kleidicv_filter_context_t *context,
718 kleidicv_thread_multithreading mt) {
719
4/4
✓ Branch 0 taken 57 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 57 times.
✓ Branch 3 taken 24 times.
162 if (!kleidicv::blur_and_downsample_is_implemented(src_width, src_height,
720 81 channels)) {
721 57 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
722 }
723
724 24 auto fixed_border_type = kleidicv::get_fixed_border_type(border_type);
725
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
24 if (!fixed_border_type) {
726 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
727 }
728
729 54 auto callback = [=](unsigned y_begin, unsigned y_end,
730 kleidicv_filter_context_t *thread_context) {
731 66 return kleidicv_blur_and_downsample_stripe_u8(
732 33 src, src_stride, src_width, src_height, dst, dst_stride, y_begin, y_end,
733 33 channels, *fixed_border_type, thread_context);
734 };
735 42 return kleidicv_thread_filter(callback, src_width, src_height, channels, 5, 5,
736 21 context, mt);
737 81 }
738
739 153 kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8(
740 const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
741 size_t width, size_t height, size_t channels,
742 kleidicv_thread_multithreading mt) {
743
2/2
✓ Branch 0 taken 69 times.
✓ Branch 1 taken 84 times.
153 if (!kleidicv::sobel_is_implemented(width, height, 3)) {
744 69 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
745 }
746
747 204 auto callback = [=](unsigned y_begin, unsigned y_end) {
748 240 return kleidicv_sobel_3x3_horizontal_stripe_s16_u8(
749 120 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
750 120 channels);
751 };
752 84 return parallel_batches(callback, mt, height);
753 153 }
754
755 399 kleidicv_error_t kleidicv_thread_median_blur_u8(
756 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
757 size_t width, size_t height, size_t channels, size_t kernel_width,
758 size_t kernel_height, kleidicv_border_type_t border_type,
759 kleidicv_thread_multithreading mt) {
760 798 auto result_pair = kleidicv::median_blur_is_implemented(
761 399 src, src_stride, dst, dst_stride, width, height, channels, kernel_width,
762 399 kernel_height, border_type);
763
764 399 auto checks_result = result_pair.first;
765 399 auto fixed_border_type = result_pair.second;
766
2/2
✓ Branch 0 taken 312 times.
✓ Branch 1 taken 87 times.
399 if (checks_result != KLEIDICV_OK) {
767 312 return checks_result;
768 }
769
770
2/2
✓ Branch 0 taken 75 times.
✓ Branch 1 taken 12 times.
87 if (kernel_width <= 7) {
771 189 auto callback = [=](unsigned y_begin, unsigned y_end) {
772 228 return kleidicv_median_blur_sorting_network_stripe_u8(
773 114 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
774 114 channels, kernel_width, kernel_height, fixed_border_type);
775 };
776 75 return parallel_batches(callback, mt, height);
777 75 }
778
779
3/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 9 times.
12 if (kernel_width > 7 && kernel_width <= 15) {
780 27 auto callback = [=](unsigned y_begin, unsigned y_end) {
781 36 return kleidicv_median_blur_small_hist_stripe_u8(
782 18 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
783 18 channels, kernel_width, kernel_height, fixed_border_type);
784 };
785 9 return parallel_batches(callback, mt, height);
786 9 }
787
788 9 auto callback = [=](unsigned y_begin, unsigned y_end) {
789 12 return kleidicv_median_blur_large_hist_stripe_u8(
790 6 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
791 6 channels, kernel_width, kernel_height, fixed_border_type);
792 };
793 3 return parallel_batches(callback, mt, height);
794 399 }
795
796 399 kleidicv_error_t kleidicv_thread_median_blur_s16(
797 const int16_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
798 size_t width, size_t height, size_t channels, size_t kernel_width,
799 size_t kernel_height, kleidicv_border_type_t border_type,
800 kleidicv_thread_multithreading mt) {
801 798 auto result_pair = kleidicv::median_blur_is_implemented(
802 399 src, src_stride, dst, dst_stride, width, height, channels, kernel_width,
803 399 kernel_height, border_type);
804
805 399 auto checks_result = result_pair.first;
806 399 auto fixed_border_type = result_pair.second;
807
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 75 times.
399 if (checks_result != KLEIDICV_OK) {
808 324 return checks_result;
809 }
810
811 189 auto callback = [=](unsigned y_begin, unsigned y_end) {
812 228 return kleidicv_median_blur_sorting_network_stripe_s16(
813 114 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
814 114 channels, kernel_width, kernel_height, fixed_border_type);
815 };
816 75 return parallel_batches(callback, mt, height);
817 399 }
818
819 399 kleidicv_error_t kleidicv_thread_median_blur_u16(
820 const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride,
821 size_t width, size_t height, size_t channels, size_t kernel_width,
822 size_t kernel_height, kleidicv_border_type_t border_type,
823 kleidicv_thread_multithreading mt) {
824 798 auto result_pair = kleidicv::median_blur_is_implemented(
825 399 src, src_stride, dst, dst_stride, width, height, channels, kernel_width,
826 399 kernel_height, border_type);
827
828 399 auto checks_result = result_pair.first;
829 399 auto fixed_border_type = result_pair.second;
830
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 75 times.
399 if (checks_result != KLEIDICV_OK) {
831 324 return checks_result;
832 }
833
834 189 auto callback = [=](unsigned y_begin, unsigned y_end) {
835 228 return kleidicv_median_blur_sorting_network_stripe_u16(
836 114 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
837 114 channels, kernel_width, kernel_height, fixed_border_type);
838 };
839 75 return parallel_batches(callback, mt, height);
840 399 }
841
842 399 kleidicv_error_t kleidicv_thread_median_blur_f32(
843 const float *src, size_t src_stride, float *dst, size_t dst_stride,
844 size_t width, size_t height, size_t channels, size_t kernel_width,
845 size_t kernel_height, kleidicv_border_type_t border_type,
846 kleidicv_thread_multithreading mt) {
847 798 auto result_pair = kleidicv::median_blur_is_implemented(
848 399 src, src_stride, dst, dst_stride, width, height, channels, kernel_width,
849 399 kernel_height, border_type);
850
851 399 auto checks_result = result_pair.first;
852 399 auto fixed_border_type = result_pair.second;
853
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 75 times.
399 if (checks_result != KLEIDICV_OK) {
854 324 return checks_result;
855 }
856
857 189 auto callback = [=](unsigned y_begin, unsigned y_end) {
858 228 return kleidicv_median_blur_sorting_network_stripe_f32(
859 114 src, src_stride, dst, dst_stride, width, height, y_begin, y_end,
860 114 channels, kernel_width, kernel_height, fixed_border_type);
861 };
862 75 return parallel_batches(callback, mt, height);
863 399 }
864
865 153 kleidicv_error_t kleidicv_thread_sobel_3x3_vertical_s16_u8(
866 const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
867 size_t width, size_t height, size_t channels,
868 kleidicv_thread_multithreading mt) {
869
2/2
✓ Branch 0 taken 69 times.
✓ Branch 1 taken 84 times.
153 if (!kleidicv::sobel_is_implemented(width, height, 3)) {
870 69 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
871 }
872
873 204 auto callback = [=](unsigned y_begin, unsigned y_end) {
874 240 return kleidicv_sobel_3x3_vertical_stripe_s16_u8(src, src_stride, dst,
875 120 dst_stride, width, height,
876 120 y_begin, y_end, channels);
877 };
878 84 return parallel_batches(callback, mt, height);
879 153 }
880
881 78 kleidicv_error_t kleidicv_thread_scharr_interleaved_s16_u8(
882 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
883 size_t src_channels, int16_t *dst, size_t dst_stride,
884 kleidicv_thread_multithreading mt) {
885
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 75 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 75 times.
156 if (!kleidicv::scharr_interleaved_is_implemented(src_width, src_height,
886 78 src_channels)) {
887 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
888 }
889
890 183 auto callback = [=](unsigned y_begin, unsigned y_end) {
891 216 return kleidicv_scharr_interleaved_stripe_s16_u8(
892 108 src, src_stride, src_width, src_height, src_channels, dst, dst_stride,
893 108 y_begin, y_end);
894 };
895
896 // height is decremented by 2 as the result has less rows.
897 75 return parallel_batches(callback, mt, src_height - 2);
898 78 }
899
900 96 kleidicv_error_t kleidicv_thread_resize_to_quarter_u8(
901 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
902 uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
903 kleidicv_thread_multithreading mt) {
904 192 auto callback = [=](unsigned begin, unsigned end) {
905 96 size_t src_begin = size_t{begin} * 2;
906 96 size_t src_end = std::min<size_t>(src_height, size_t{end} * 2);
907 96 size_t dst_begin = begin;
908 96 size_t dst_end = std::min<size_t>(dst_height, end);
909
910 // half of odd height is rounded towards zero?
911
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 88 times.
96 if (dst_begin == dst_end) {
912 8 return KLEIDICV_OK;
913 }
914
915 176 return kleidicv_resize_to_quarter_u8(
916 88 src + src_begin * src_stride, src_stride, src_width,
917 88 src_end - src_begin, dst + dst_begin * dst_stride, dst_stride,
918 88 dst_width, dst_end - dst_begin);
919 96 };
920 192 return parallel_batches(callback, mt, (src_height + 1) / 2);
921 96 }
922
923 100 kleidicv_error_t kleidicv_thread_resize_linear_u8(
924 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
925 uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
926 kleidicv_thread_multithreading mt) {
927
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 96 times.
200 if (!kleidicv::resize_linear_u8_is_implemented(src_width, src_height,
928 100 dst_width, dst_height)) {
929 4 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
930 }
931 200 auto callback = [=](unsigned y_begin, unsigned y_end) {
932 208 return kleidicv_resize_linear_stripe_u8(
933 104 src, src_stride, src_width, src_height, y_begin,
934 104 std::min<size_t>(src_height, y_end + 1), dst, dst_stride, dst_width,
935 104 dst_height);
936 };
937 96 return parallel_batches(callback, mt, std::max<size_t>(1, src_height - 1));
938 100 }
939
940 148 kleidicv_error_t kleidicv_thread_resize_linear_f32(
941 const float *src, size_t src_stride, size_t src_width, size_t src_height,
942 float *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
943 kleidicv_thread_multithreading mt) {
944
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 144 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 144 times.
296 if (!kleidicv::resize_linear_f32_is_implemented(src_width, src_height,
945 148 dst_width, dst_height)) {
946 4 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
947 }
948 300 auto callback = [=](unsigned y_begin, unsigned y_end) {
949 312 return kleidicv_resize_linear_stripe_f32(
950 156 src, src_stride, src_width, src_height, y_begin,
951 156 std::min<size_t>(src_height, y_end + 1), dst, dst_stride, dst_width,
952 156 dst_height);
953 };
954 144 return parallel_batches(callback, mt, std::max<size_t>(1, src_height - 1));
955 148 }
956
957 156 kleidicv_error_t kleidicv_thread_remap_s16_u8(
958 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
959 uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
960 size_t channels, const int16_t *mapxy, size_t mapxy_stride,
961 kleidicv_border_type_t border_type, const uint8_t *border_value,
962 kleidicv_thread_multithreading mt) {
963
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 150 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 150 times.
312 if (!kleidicv::remap_s16_is_implemented<uint8_t>(src_stride, src_width,
964 156 src_height, dst_width,
965 156 border_type, channels)) {
966 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
967 }
968 366 auto callback = [=](unsigned begin, unsigned end) {
969 432 return kleidicv_remap_s16_u8(
970 216 src, src_stride, src_width, src_height,
971 216 dst + begin * dst_stride / sizeof(uint8_t), dst_stride, dst_width,
972 216 end - begin, channels,
973 216 mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)),
974 216 mapxy_stride, border_type, border_value);
975 };
976 150 return parallel_batches(callback, mt, dst_height);
977 156 }
978
979 156 kleidicv_error_t kleidicv_thread_remap_s16_u16(
980 const uint16_t *src, size_t src_stride, size_t src_width, size_t src_height,
981 uint16_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
982 size_t channels, const int16_t *mapxy, size_t mapxy_stride,
983 kleidicv_border_type_t border_type, const uint16_t *border_value,
984 kleidicv_thread_multithreading mt) {
985
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 150 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 150 times.
312 if (!kleidicv::remap_s16_is_implemented<uint16_t>(src_stride, src_width,
986 156 src_height, dst_width,
987 156 border_type, channels)) {
988 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
989 }
990 366 auto callback = [=](unsigned begin, unsigned end) {
991 432 return kleidicv_remap_s16_u16(
992 216 src, src_stride, src_width, src_height,
993 216 dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint16_t)),
994 216 dst_stride, dst_width, end - begin, channels,
995 216 mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)),
996 216 mapxy_stride, border_type, border_value);
997 };
998 150 return parallel_batches(callback, mt, dst_height);
999 156 }
1000
1001 306 kleidicv_error_t kleidicv_thread_remap_s16point5_u8(
1002 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
1003 uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
1004 size_t channels, const int16_t *mapxy, size_t mapxy_stride,
1005 const uint16_t *mapfrac, size_t mapfrac_stride,
1006 kleidicv_border_type_t border_type, const uint8_t *border_value,
1007 kleidicv_thread_multithreading mt) {
1008
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 300 times.
306 if (!kleidicv::remap_s16point5_is_implemented<uint8_t>(
1009 306 src_stride, src_width, src_height, dst_width, border_type,
1010 306 channels)) {
1011 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
1012 }
1013 732 auto callback = [=](unsigned begin, unsigned end) {
1014 864 return kleidicv_remap_s16point5_u8(
1015 432 src, src_stride, src_width, src_height,
1016 432 dst + begin * dst_stride / sizeof(uint8_t), dst_stride, dst_width,
1017 432 end - begin, channels,
1018 432 mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)),
1019 432 mapxy_stride,
1020 864 mapfrac +
1021 432 static_cast<ptrdiff_t>(begin * mapfrac_stride / sizeof(uint16_t)),
1022 432 mapfrac_stride, border_type, border_value);
1023 };
1024 300 return parallel_batches(callback, mt, dst_height);
1025 306 }
1026
1027 306 kleidicv_error_t kleidicv_thread_remap_s16point5_u16(
1028 const uint16_t *src, size_t src_stride, size_t src_width, size_t src_height,
1029 uint16_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
1030 size_t channels, const int16_t *mapxy, size_t mapxy_stride,
1031 const uint16_t *mapfrac, size_t mapfrac_stride,
1032 kleidicv_border_type_t border_type, const uint16_t *border_value,
1033 kleidicv_thread_multithreading mt) {
1034
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 300 times.
306 if (!kleidicv::remap_s16point5_is_implemented<uint16_t>(
1035 306 src_stride, src_width, src_height, dst_width, border_type,
1036 306 channels)) {
1037 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
1038 }
1039 732 auto callback = [=](unsigned begin, unsigned end) {
1040 864 return kleidicv_remap_s16point5_u16(
1041 432 src, src_stride, src_width, src_height,
1042 432 dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint16_t)),
1043 432 dst_stride, dst_width, end - begin, channels,
1044 432 mapxy + static_cast<ptrdiff_t>(begin * mapxy_stride / sizeof(int16_t)),
1045 432 mapxy_stride,
1046 864 mapfrac +
1047 432 static_cast<ptrdiff_t>(begin * mapfrac_stride / sizeof(uint16_t)),
1048 432 mapfrac_stride, border_type, border_value);
1049 };
1050 300 return parallel_batches(callback, mt, dst_height);
1051 306 }
1052
1053 306 kleidicv_error_t kleidicv_thread_remap_f32_u8(
1054 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
1055 uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
1056 size_t channels, const float *mapx, size_t mapx_stride, const float *mapy,
1057 size_t mapy_stride, kleidicv_interpolation_type_t interpolation,
1058 kleidicv_border_type_t border_type, const uint8_t *border_value,
1059 kleidicv_thread_multithreading mt) {
1060
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 300 times.
306 if (!kleidicv::remap_f32_is_implemented<uint8_t>(
1061 306 src_stride, src_width, src_height, dst_width, dst_height, border_type,
1062 306 channels, interpolation)) {
1063 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
1064 }
1065 732 auto callback = [=](unsigned begin, unsigned end) {
1066 864 return kleidicv_remap_f32_u8(
1067 432 src, src_stride, src_width, src_height,
1068 432 dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint8_t)),
1069 432 dst_stride, dst_width, end - begin, channels,
1070 432 mapx + static_cast<ptrdiff_t>(begin * mapx_stride / sizeof(float)),
1071 432 mapx_stride,
1072 432 mapy + static_cast<ptrdiff_t>(begin * mapy_stride / sizeof(float)),
1073 432 mapy_stride, interpolation, border_type, border_value);
1074 };
1075 300 return parallel_batches(callback, mt, dst_height);
1076 306 }
1077
1078 306 kleidicv_error_t kleidicv_thread_remap_f32_u16(
1079 const uint16_t *src, size_t src_stride, size_t src_width, size_t src_height,
1080 uint16_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
1081 size_t channels, const float *mapx, size_t mapx_stride, const float *mapy,
1082 size_t mapy_stride, kleidicv_interpolation_type_t interpolation,
1083 kleidicv_border_type_t border_type, const uint16_t *border_value,
1084 kleidicv_thread_multithreading mt) {
1085
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 300 times.
306 if (!kleidicv::remap_f32_is_implemented<uint16_t>(
1086 306 src_stride, src_width, src_height, dst_width, dst_height, border_type,
1087 306 channels, interpolation)) {
1088 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
1089 }
1090 732 auto callback = [=](unsigned begin, unsigned end) {
1091 864 return kleidicv_remap_f32_u16(
1092 432 src, src_stride, src_width, src_height,
1093 432 dst + static_cast<ptrdiff_t>(begin * dst_stride / sizeof(uint16_t)),
1094 432 dst_stride, dst_width, end - begin, channels,
1095 432 mapx + static_cast<ptrdiff_t>(begin * mapx_stride / sizeof(float)),
1096 432 mapx_stride,
1097 432 mapy + static_cast<ptrdiff_t>(begin * mapy_stride / sizeof(float)),
1098 432 mapy_stride, interpolation, border_type, border_value);
1099 };
1100 300 return parallel_batches(callback, mt, dst_height);
1101 306 }
1102
1103 162 kleidicv_error_t kleidicv_thread_warp_perspective_u8(
1104 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
1105 uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
1106 const float transformation[9], size_t channels,
1107 kleidicv_interpolation_type_t interpolation,
1108 kleidicv_border_type_t border_type, const uint8_t *border_value,
1109 kleidicv_thread_multithreading mt) {
1110
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 150 times.
162 if (!kleidicv::warp_perspective_is_implemented<uint8_t>(
1111 162 dst_width, channels, interpolation, border_type)) {
1112 12 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
1113 }
1114
1115 366 auto callback = [=](unsigned y_begin, unsigned y_end) {
1116 432 return kleidicv_warp_perspective_stripe_u8(
1117 216 src, src_stride, src_width, src_height, dst, dst_stride, dst_width,
1118 216 dst_height, y_begin, std::min<size_t>(dst_height, y_end + 1),
1119 216 transformation, channels, interpolation, border_type, border_value);
1120 };
1121 150 return parallel_batches(callback, mt, dst_height);
1122 162 }
1123