KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/transform/remap_f32_sve2.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 281 281 100.0%
Functions: 274 274 100.0%
Branches: 136 136 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cassert>
6 #include <cmath>
7 #include <cstddef>
8 #include <cstdint>
9
10 #include "kleidicv/sve2.h"
11 #include "kleidicv/transform/remap.h"
12 #include "transform_sve2.h"
13
14 namespace kleidicv::sve2 {
15
16 template <typename ScalarType, bool IsLarge, kleidicv_border_type_t Border,
17 size_t Channels>
18 1680 void remap_f32_nearest(svuint32_t sv_xmax, svuint32_t sv_ymax,
19 svuint32_t sv_src_stride,
20 Rows<const ScalarType> src_rows, svuint32_t sv_border,
21 Columns<ScalarType> dst, size_t kStep, size_t dst_width,
22 Rows<const float> mapx_rows, Rows<const float> mapy_rows,
23 [[maybe_unused]] svuint8_t load_table_2ch) {
24 1680 svbool_t pg_all32 = svptrue_b32();
25 1680 svbool_t pg_all16 = svptrue_b16();
26 8864 auto load_coords = [&](svbool_t pg, size_t xs) {
27 7184 auto x = static_cast<ptrdiff_t>(xs);
28 21552 return svcreate2(svld1_f32(pg, &mapx_rows.as_columns()[x]),
29 7184 svld1_f32(pg, &mapy_rows.as_columns()[x]));
30 7184 };
31
32 8864 auto load_source = [&](svbool_t pg, svuint32_t x, svuint32_t y) {
33 if constexpr (Channels == 1) {
34 3232 return load_xy<ScalarType, IsLarge>(pg, x, y, sv_src_stride, src_rows);
35 }
36 if constexpr (Channels == 2) {
37 7904 return load_xy_2ch<ScalarType, IsLarge>(pg, x, y, sv_src_stride, src_rows,
38 3952 load_table_2ch);
39 }
40 };
41
42 8864 auto get_pixels = [&](svbool_t pg, svuint32x2_t coords) {
43 7184 svuint32_t x = svget2(coords, 0);
44 7184 svuint32_t y = svget2(coords, 1);
45 if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) {
46 7184 svbool_t in_range = svand_b_z(pg, svcmple_u32(pg, x, sv_xmax),
47 3592 svcmple_u32(pg, y, sv_ymax));
48 3592 svuint32_t result = load_source(in_range, x, y);
49 // Select between source pixels and border colour
50 7184 return svsel_u32(in_range, result, sv_border);
51 3592 } else {
52 static_assert(Border == KLEIDICV_BORDER_TYPE_REPLICATE);
53 7184 return load_source(pg, x, y);
54 }
55 7184 };
56
57 8864 auto calculate_nearest_coordinates = [&](svbool_t pg32, size_t x) {
58 7184 svfloat32x2_t coords = load_coords(pg32, x);
59 7184 svfloat32_t xf = svget2(coords, 0);
60 7184 svfloat32_t yf = svget2(coords, 1);
61
62 7184 svuint32_t xi, yi;
63 if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) {
64 // Convert coordinates to integers.
65 // Negative numbers will become large positive numbers.
66 // Since the source width and height is known to be <=2^24 these large
67 // positive numbers will always be treated as outside the source image
68 // bounds.
69 3592 xi = svreinterpret_u32_s32(
70 3592 svcvt_s32_f32_x(pg_all32, svrinta_f32_x(pg_all32, xf)));
71 3592 yi = svreinterpret_u32_s32(
72 3592 svcvt_s32_f32_x(pg_all32, svrinta_f32_x(pg_all32, yf)));
73 } else {
74 // Round to the nearest integer, clamp it to within the dimensions of
75 // the source image (negative values are already saturated to 0)
76 7184 xi = svmin_x(pg_all32,
77 3592 svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, xf, 0.5F)),
78 3592 sv_xmax);
79 7184 yi = svmin_x(pg_all32,
80 3592 svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, yf, 0.5F)),
81 3592 sv_ymax);
82 }
83 14368 return svcreate2(xi, yi);
84 7184 };
85
86 1680 LoopUnroll2 loop{dst_width, kStep};
87
88 if constexpr (Channels == 1) {
89 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
90 1232 auto vector_path_generic = [&](size_t x, size_t x_max,
91 Columns<ScalarType> dst) {
92 700 size_t length = x_max - x;
93 700 svbool_t pg32 = svwhilelt_b32_u64(0ULL, length);
94 1400 svuint32_t result =
95 700 get_pixels(pg32, calculate_nearest_coordinates(pg32, x));
96 700 svst1b_u32(pg32, &dst[static_cast<ptrdiff_t>(x)], result);
97 700 };
98
99 896 loop.unroll_four_times([&](size_t x) {
100 364 ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)];
101 728 svuint32_t res32_0 =
102 364 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
103 364 x += kStep;
104 728 svuint32_t res32_1 =
105 364 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
106 728 svuint16_t result0 = svuzp1_u16(svreinterpret_u16_u32(res32_0),
107 364 svreinterpret_u16_u32(res32_1));
108 364 x += kStep;
109 364 res32_0 =
110 364 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
111 364 x += kStep;
112 364 res32_1 =
113 364 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
114 728 svuint16_t result1 = svuzp1_u16(svreinterpret_u16_u32(res32_0),
115 364 svreinterpret_u16_u32(res32_1));
116 728 svuint8_t result = svuzp1_u8(svreinterpret_u8_u16(result0),
117 364 svreinterpret_u8_u16(result1));
118 364 svst1(svptrue_b8(), p_dst, result);
119 364 });
120 532 loop.unroll_once(
121 1176 [&](size_t x) { vector_path_generic(x, x + kStep, dst); });
122 588 loop.remaining([&](size_t x, size_t length) {
123 56 vector_path_generic(x, length, dst);
124 56 });
125 532 }
126
127 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
128 640 auto vector_path_generic = [&](size_t x, size_t x_max,
129 Columns<ScalarType> dst) {
130 348 size_t length = x_max - x;
131 348 svbool_t pg32 = svwhilelt_b32(0ULL, length);
132 696 svuint32_t result =
133 348 get_pixels(pg32, calculate_nearest_coordinates(pg32, x));
134 348 svst1h_u32(pg32, &dst[static_cast<ptrdiff_t>(x)], result);
135 348 };
136
137 656 loop.unroll_twice([&](size_t x) {
138 364 ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)];
139 728 svuint32_t res32_0 =
140 364 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
141 364 x += kStep;
142 728 svuint32_t res32_1 =
143 364 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
144 728 svuint16_t result = svuzp1_u16(svreinterpret_u16_u32(res32_0),
145 364 svreinterpret_u16_u32(res32_1));
146 364 svst1(svptrue_b16(), p_dst, result);
147 364 });
148 292 loop.unroll_once(
149 584 [&](size_t x) { vector_path_generic(x, x + kStep, dst); });
150 348 loop.remaining([&](size_t x, size_t length) {
151 56 vector_path_generic(x, length, dst);
152 56 });
153 292 }
154 }
155
156 if constexpr (Channels == 2) {
157 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
158 1168 auto vector_path_generic = [&](size_t x, size_t x_max,
159 Columns<ScalarType> dst) {
160 620 size_t length = x_max - x;
161 620 svbool_t pg32 = svwhilelt_b32(0ULL, length);
162 1240 svuint32_t result =
163 620 get_pixels(pg32, calculate_nearest_coordinates(pg32, x));
164 620 svbool_t pg16 = svwhilelt_b16_u64(0ULL, 2 * length);
165 1240 svst1b_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)),
166 620 svreinterpret_u16_u32(result));
167 620 };
168
169 1556 loop.unroll_twice([&](size_t x) {
170 1008 ScalarType* p_dst = dst.ptr_at(static_cast<ptrdiff_t>(x));
171 2016 svuint32_t result0 =
172 1008 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
173 1008 x += kStep;
174 2016 svuint32_t result1 =
175 1008 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x));
176 2016 svuint8_t result = svuzp1_u8(svreinterpret_u8_u32(result0),
177 1008 svreinterpret_u8_u32(result1));
178 1008 svst1(svptrue_b8(), p_dst, result);
179 1008 });
180 548 loop.unroll_once(
181 1096 [&](size_t x) { vector_path_generic(x, x + kStep, dst); });
182 620 loop.remaining([&](size_t x, size_t length) {
183 72 vector_path_generic(x, length, dst);
184 72 });
185 548 }
186
187 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
188 1552 loop.unroll_once([&](size_t x) {
189 2488 svuint16_t result = svreinterpret_u16_u32(
190 1244 get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)));
191 1244 svst1_u16(pg_all16, dst.ptr_at(static_cast<ptrdiff_t>(x)), result);
192 1244 });
193 380 loop.remaining([&](size_t x, size_t x_max) {
194 72 svbool_t pg32 = svwhilelt_b32_u64(x, x_max);
195 144 svuint16_t result = svreinterpret_u16_u32(
196 72 get_pixels(pg32, calculate_nearest_coordinates(pg32, x)));
197 72 svbool_t pg16 = svwhilelt_b16_u64(2 * x, 2 * x_max);
198 72 svst1_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)), result);
199 72 });
200 }
201 }
202 1680 }
203
204 template <typename ScalarType, bool IsLarge, kleidicv_border_type_t Border,
205 size_t Channels>
206 24528 void remap_f32_linear(svuint32_t sv_xmax, svuint32_t sv_ymax,
207 svfloat32_t sv_xmaxf, svfloat32_t sv_ymaxf,
208 svuint32_t sv_src_stride, Rows<const ScalarType> src_rows,
209 svuint32_t sv_border, Columns<ScalarType> dst,
210 size_t kStep, size_t dst_width,
211 Rows<const float> mapx_rows, Rows<const float> mapy_rows,
212 svuint8_t load_table_2ch) {
213 93728 auto load_coords = [&](svbool_t pg, size_t xs) {
214 69200 auto x = static_cast<ptrdiff_t>(xs);
215 207600 return svcreate2(svld1_f32(pg, &mapx_rows.as_columns()[x]),
216 69200 svld1_f32(pg, &mapy_rows.as_columns()[x]));
217 69200 };
218
219 93728 auto calculate_linear = [&](svbool_t pg, uint32_t x) {
220 if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) {
221 34600 svfloat32x2_t coords = load_coords(pg, x);
222 69200 return calculate_linear_replicated_border<ScalarType, IsLarge, Channels>(
223 34600 pg, coords, sv_xmaxf, sv_ymaxf, sv_src_stride, src_rows,
224 34600 load_table_2ch);
225 34600 } else {
226 static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT);
227 34600 svfloat32x2_t coords = load_coords(pg, x);
228 69200 return calculate_linear_constant_border<ScalarType, IsLarge, Channels>(
229 34600 pg, coords, sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows,
230 34600 load_table_2ch);
231 34600 }
232 };
233
234 44888 auto store_vector = [](svbool_t pg32, ScalarType* p_dst, svuint32_t result) {
235 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
236 11852 svst1b_u32(pg32, p_dst, result);
237 }
238 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
239 8508 svst1h_u32(pg32, p_dst, result);
240 }
241 20360 };
242
243 24528 svbool_t pg_all32 = svptrue_b32();
244 24528 LoopUnroll2 loop{dst_width, kStep};
245 if constexpr (Channels == 1) {
246 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
247 7696 loop.unroll_four_times([&](size_t x) {
248 1452 ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)];
249 1452 svuint32_t res0 = calculate_linear(pg_all32, x);
250 1452 x += kStep;
251 1452 svuint32_t res1 = calculate_linear(pg_all32, x);
252 2904 svuint16_t result16_0 = svuzp1_u16(svreinterpret_u16_u32(res0),
253 1452 svreinterpret_u16_u32(res1));
254 1452 x += kStep;
255 1452 res0 = calculate_linear(pg_all32, x);
256 1452 x += kStep;
257 1452 res1 = calculate_linear(pg_all32, x);
258 2904 svuint16_t result16_1 = svuzp1_u16(svreinterpret_u16_u32(res0),
259 1452 svreinterpret_u16_u32(res1));
260 2904 svst1_u8(svptrue_b8(), p_dst,
261 2904 svuzp1_u8(svreinterpret_u8_u16(result16_0),
262 1452 svreinterpret_u8_u16(result16_1)));
263 1452 });
264 }
265 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
266 10040 loop.unroll_twice([&](size_t x) {
267 4036 ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)];
268 4036 svuint32_t res0 = calculate_linear(pg_all32, x);
269 4036 x += kStep;
270 4036 svuint32_t res1 = calculate_linear(pg_all32, x);
271 8072 svuint16_t result16 = svuzp1_u16(svreinterpret_u16_u32(res0),
272 4036 svreinterpret_u16_u32(res1));
273 4036 svst1_u16(svptrue_b16(), p_dst, result16);
274 4036 });
275 }
276 24064 loop.unroll_once([&](size_t x) {
277 11816 svuint32_t result = calculate_linear(pg_all32, x);
278 11816 store_vector(pg_all32, &dst[static_cast<ptrdiff_t>(x)], result);
279 11816 });
280 20792 loop.remaining([&](size_t x, size_t x_max) {
281 8544 svbool_t pg32 = svwhilelt_b32_u64(x, x_max);
282 8544 svuint32_t result = calculate_linear(pg32, x);
283 8544 store_vector(pg32, &dst[static_cast<ptrdiff_t>(x)], result);
284 8544 });
285 }
286
287 if constexpr (Channels == 2) {
288 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
289 15040 auto vector_path_generic = [&](size_t x, size_t x_max,
290 Columns<ScalarType> dst) {
291 8780 size_t length = x_max - x;
292 8780 svbool_t pg32 = svwhilelt_b32(0ULL, length);
293 8780 svuint32_t result = calculate_linear(pg32, x);
294 8780 svbool_t pg16 = svwhilelt_b16(0ULL, 2 * length);
295 17560 svst1b_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)),
296 8780 svreinterpret_u16_u32(result));
297 8780 };
298
299 10940 loop.unroll_twice([&](size_t x) {
300 4680 ScalarType* p_dst = dst.ptr_at(static_cast<ptrdiff_t>(x));
301 4680 svuint32_t result0 = calculate_linear(pg_all32, x);
302 4680 x += kStep;
303 4680 svuint32_t result1 = calculate_linear(pg_all32, x);
304 9360 svuint8_t result = svuzp1_u8(svreinterpret_u8_u32(result0),
305 4680 svreinterpret_u8_u32(result1));
306 4680 svst1(svptrue_b8(), p_dst, result);
307 4680 });
308 6260 loop.unroll_once(
309 10752 [&](size_t x) { vector_path_generic(x, x + kStep, dst); });
310 10548 loop.remaining([&](size_t x, size_t length) {
311 4288 vector_path_generic(x, length, dst);
312 4288 });
313 6260 }
314 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
315 18552 loop.unroll_once([&](size_t x) {
316 25064 svuint16_t result =
317 12532 svreinterpret_u16_u32(calculate_linear(pg_all32, x));
318 12532 svst1_u16(svptrue_b16(), dst.ptr_at(static_cast<ptrdiff_t>(x)), result);
319 12532 });
320 10308 loop.remaining([&](size_t x, size_t x_max) {
321 4288 svbool_t pg32 = svwhilelt_b32_u64(x, x_max);
322 4288 svuint16_t result = svreinterpret_u16_u32(calculate_linear(pg32, x));
323 4288 svbool_t pg16 = svwhilelt_b16_u64(2 * x, 2 * x_max);
324 4288 svst1_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)), result);
325 4288 });
326 }
327 }
328 24528 }
329
330 template <typename ScalarType, bool IsLarge,
331 kleidicv_interpolation_type_t Inter, kleidicv_border_type_t Border,
332 size_t Channels>
333 1168 void transform_operation(Rows<const ScalarType> src_rows, size_t src_width,
334 size_t src_height, const ScalarType* border_value,
335 Rows<ScalarType> dst_rows, size_t dst_width,
336 size_t y_begin, size_t y_end,
337 Rows<const float> mapx_rows,
338 Rows<const float> mapy_rows) {
339 1168 svuint32_t sv_xmax = svdup_n_u32(src_width - 1);
340 1168 svuint32_t sv_ymax = svdup_n_u32(src_height - 1);
341 1168 svuint32_t sv_src_stride = svdup_n_u32(src_rows.stride());
342 1168 svuint32_t sv_border = svdup_n_u32(0);
343
344 if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) {
345 if constexpr (Channels == 1) {
346 292 sv_border = svdup_n_u32(border_value[0]);
347 }
348 if constexpr (Channels == 2) {
349 584 uint32_t v = static_cast<uint32_t>(border_value[0]) |
350 292 (static_cast<uint32_t>(border_value[1]) << 16);
351 292 sv_border = svdup_n_u32(v);
352 292 }
353 }
354
355 1168 svfloat32_t sv_xmaxf = svdup_n_f32(static_cast<float>(src_width - 1));
356 1168 svfloat32_t sv_ymaxf = svdup_n_f32(static_cast<float>(src_height - 1));
357
358 1168 const size_t kStep = VecTraits<float>::num_lanes();
359
360 // Rearrange input for 8bit 2channel:
361 // Gather Load 16bits, 2x 8bits for 2 channels:
362 // after 32-bit gather load: ..DC..BA
363 // goal is to have 16-bit elements: .D.C.B.A
364 2336 svuint8_t load_table_2ch =
365 1168 svreinterpret_u8_u32(svindex_u32(0x03010200U, 0x04040404));
366
367
64/64
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 8 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 8 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 8 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 8 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 8 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 8 times.
✓ Branch 16 taken 8 times.
✓ Branch 17 taken 258 times.
✓ Branch 18 taken 10 times.
✓ Branch 19 taken 266 times.
✓ Branch 20 taken 8 times.
✓ Branch 21 taken 258 times.
✓ Branch 22 taken 10 times.
✓ Branch 23 taken 266 times.
✓ Branch 24 taken 132 times.
✓ Branch 25 taken 3114 times.
✓ Branch 26 taken 132 times.
✓ Branch 27 taken 3122 times.
✓ Branch 28 taken 132 times.
✓ Branch 29 taken 3114 times.
✓ Branch 30 taken 132 times.
✓ Branch 31 taken 3122 times.
✓ Branch 32 taken 2 times.
✓ Branch 33 taken 8 times.
✓ Branch 34 taken 2 times.
✓ Branch 35 taken 8 times.
✓ Branch 36 taken 2 times.
✓ Branch 37 taken 8 times.
✓ Branch 38 taken 2 times.
✓ Branch 39 taken 8 times.
✓ Branch 40 taken 4 times.
✓ Branch 41 taken 8 times.
✓ Branch 42 taken 2 times.
✓ Branch 43 taken 8 times.
✓ Branch 44 taken 4 times.
✓ Branch 45 taken 8 times.
✓ Branch 46 taken 2 times.
✓ Branch 47 taken 8 times.
✓ Branch 48 taken 8 times.
✓ Branch 49 taken 138 times.
✓ Branch 50 taken 10 times.
✓ Branch 51 taken 146 times.
✓ Branch 52 taken 8 times.
✓ Branch 53 taken 138 times.
✓ Branch 54 taken 10 times.
✓ Branch 55 taken 146 times.
✓ Branch 56 taken 132 times.
✓ Branch 57 taken 2994 times.
✓ Branch 58 taken 132 times.
✓ Branch 59 taken 3002 times.
✓ Branch 60 taken 132 times.
✓ Branch 61 taken 2994 times.
✓ Branch 62 taken 132 times.
✓ Branch 63 taken 3002 times.
27376 for (size_t y = y_begin; y < y_end; ++y) {
368 26208 Columns<ScalarType> dst = dst_rows.as_columns();
369 if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) {
370 1680 remap_f32_nearest<ScalarType, IsLarge, Border, Channels>(
371 1680 sv_xmax, sv_ymax, sv_src_stride, src_rows, sv_border, dst, kStep,
372 1680 dst_width, mapx_rows, mapy_rows, load_table_2ch);
373 } else {
374 static_assert(Inter == KLEIDICV_INTERPOLATION_LINEAR);
375 24528 remap_f32_linear<ScalarType, IsLarge, Border, Channels>(
376 24528 sv_xmax, sv_ymax, sv_xmaxf, sv_ymaxf, sv_src_stride, src_rows,
377 24528 sv_border, dst, kStep, dst_width, mapx_rows, mapy_rows,
378 24528 load_table_2ch);
379 }
380 26208 ++mapx_rows;
381 26208 ++mapy_rows;
382 26208 ++dst_rows;
383 26208 }
384 1168 }
385
386 // Most of the complexity comes from parameter checking.
387 // NOLINTBEGIN(readability-function-cognitive-complexity)
388 template <typename T>
389 1276 kleidicv_error_t remap_f32(const T* src, size_t src_stride, size_t src_width,
390 size_t src_height, T* dst, size_t dst_stride,
391 size_t dst_width, size_t dst_height, size_t channels,
392 const float* mapx, size_t mapx_stride,
393 const float* mapy, size_t mapy_stride,
394 kleidicv_interpolation_type_t interpolation,
395 kleidicv_border_type_t border_type,
396 const T* border_value) {
397
8/8
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 632 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 632 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 632 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 632 times.
1276 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
398
8/8
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 628 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 628 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 626 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 626 times.
1264 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
399
8/8
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 622 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 622 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 622 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 622 times.
1254 CHECK_POINTER_AND_STRIDE(mapx, mapx_stride, dst_height);
400
8/8
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 616 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 616 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 616 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 616 times.
1244 CHECK_POINTER_AND_STRIDE(mapy, mapy_stride, dst_height);
401
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 614 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 612 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 612 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 614 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 612 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 612 times.
1232 CHECK_IMAGE_SIZE(src_width, src_height);
402
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 610 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 608 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 608 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 610 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 608 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 608 times.
1224 CHECK_IMAGE_SIZE(dst_width, dst_height);
403
8/8
✓ Branch 0 taken 298 times.
✓ Branch 1 taken 310 times.
✓ Branch 2 taken 294 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 298 times.
✓ Branch 5 taken 310 times.
✓ Branch 6 taken 294 times.
✓ Branch 7 taken 4 times.
1216 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) {
404 8 return KLEIDICV_ERROR_NULL_POINTER;
405 }
406
407
8/8
✓ Branch 0 taken 584 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 584 times.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 584 times.
✓ Branch 5 taken 20 times.
✓ Branch 6 taken 584 times.
✓ Branch 7 taken 20 times.
2416 if (!remap_f32_is_implemented<T>(src_stride, src_width, src_height, dst_width,
408 1208 dst_height, border_type, channels,
409 1208 interpolation)) {
410 40 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
411 }
412
413 1168 Rows<const T> src_rows{src, src_stride, channels};
414 1168 Rows<const float> mapx_rows{mapx, mapx_stride, 1};
415 1168 Rows<const float> mapy_rows{mapy, mapy_stride, 1};
416 1168 Rows<T> dst_rows{dst, dst_stride, channels};
417 1168 Rectangle rect{dst_width, dst_height};
418
419 2336 transform_operation<T>(is_image_large(src_rows, src_height), interpolation,
420 border_type, channels, src_rows, src_width, src_height,
421 1168 border_value, dst_rows, dst_width, 0, dst_height,
422 mapx_rows, mapy_rows);
423
424 1168 return KLEIDICV_OK;
425 1276 }
426 // NOLINTEND(readability-function-cognitive-complexity)
427
428 #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(type) \
429 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_f32<type>( \
430 const type* src, size_t src_stride, size_t src_width, size_t src_height, \
431 type* dst, size_t dst_stride, size_t dst_width, size_t dst_height, \
432 size_t channels, const float* mapx, size_t mapx_stride, \
433 const float* mapy, size_t mapy_stride, \
434 kleidicv_interpolation_type_t interpolation, \
435 kleidicv_border_type_t border_type, const type* border_value)
436
437 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint8_t);
438 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint16_t);
439
440 } // namespace kleidicv::sve2
441