Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #include <cassert> | ||
6 | #include <cmath> | ||
7 | #include <cstddef> | ||
8 | #include <cstdint> | ||
9 | |||
10 | #include "kleidicv/sve2.h" | ||
11 | #include "kleidicv/transform/remap.h" | ||
12 | #include "transform_sve2.h" | ||
13 | |||
14 | namespace kleidicv::sve2 { | ||
15 | |||
16 | template <typename ScalarType, bool IsLarge, kleidicv_border_type_t Border, | ||
17 | size_t Channels> | ||
18 | 1680 | void remap_f32_nearest(svuint32_t sv_xmax, svuint32_t sv_ymax, | |
19 | svuint32_t sv_src_stride, | ||
20 | Rows<const ScalarType> src_rows, svuint32_t sv_border, | ||
21 | Columns<ScalarType> dst, size_t kStep, size_t dst_width, | ||
22 | Rows<const float> mapx_rows, Rows<const float> mapy_rows, | ||
23 | [[maybe_unused]] svuint8_t load_table_2ch) { | ||
24 | 1680 | svbool_t pg_all32 = svptrue_b32(); | |
25 | 1680 | svbool_t pg_all16 = svptrue_b16(); | |
26 | 8864 | auto load_coords = [&](svbool_t pg, size_t xs) { | |
27 | 7184 | auto x = static_cast<ptrdiff_t>(xs); | |
28 | 21552 | return svcreate2(svld1_f32(pg, &mapx_rows.as_columns()[x]), | |
29 | 7184 | svld1_f32(pg, &mapy_rows.as_columns()[x])); | |
30 | 7184 | }; | |
31 | |||
32 | 8864 | auto load_source = [&](svbool_t pg, svuint32_t x, svuint32_t y) { | |
33 | if constexpr (Channels == 1) { | ||
34 | 3232 | return load_xy<ScalarType, IsLarge>(pg, x, y, sv_src_stride, src_rows); | |
35 | } | ||
36 | if constexpr (Channels == 2) { | ||
37 | 7904 | return load_xy_2ch<ScalarType, IsLarge>(pg, x, y, sv_src_stride, src_rows, | |
38 | 3952 | load_table_2ch); | |
39 | } | ||
40 | }; | ||
41 | |||
42 | 8864 | auto get_pixels = [&](svbool_t pg, svuint32x2_t coords) { | |
43 | 7184 | svuint32_t x = svget2(coords, 0); | |
44 | 7184 | svuint32_t y = svget2(coords, 1); | |
45 | if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { | ||
46 | 7184 | svbool_t in_range = svand_b_z(pg, svcmple_u32(pg, x, sv_xmax), | |
47 | 3592 | svcmple_u32(pg, y, sv_ymax)); | |
48 | 3592 | svuint32_t result = load_source(in_range, x, y); | |
49 | // Select between source pixels and border colour | ||
50 | 7184 | return svsel_u32(in_range, result, sv_border); | |
51 | 3592 | } else { | |
52 | static_assert(Border == KLEIDICV_BORDER_TYPE_REPLICATE); | ||
53 | 7184 | return load_source(pg, x, y); | |
54 | } | ||
55 | 7184 | }; | |
56 | |||
57 | 8864 | auto calculate_nearest_coordinates = [&](svbool_t pg32, size_t x) { | |
58 | 7184 | svfloat32x2_t coords = load_coords(pg32, x); | |
59 | 7184 | svfloat32_t xf = svget2(coords, 0); | |
60 | 7184 | svfloat32_t yf = svget2(coords, 1); | |
61 | |||
62 | 7184 | svuint32_t xi, yi; | |
63 | if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { | ||
64 | // Convert coordinates to integers. | ||
65 | // Negative numbers will become large positive numbers. | ||
66 | // Since the source width and height is known to be <=2^24 these large | ||
67 | // positive numbers will always be treated as outside the source image | ||
68 | // bounds. | ||
69 | 3592 | xi = svreinterpret_u32_s32( | |
70 | 3592 | svcvt_s32_f32_x(pg_all32, svrinta_f32_x(pg_all32, xf))); | |
71 | 3592 | yi = svreinterpret_u32_s32( | |
72 | 3592 | svcvt_s32_f32_x(pg_all32, svrinta_f32_x(pg_all32, yf))); | |
73 | } else { | ||
74 | // Round to the nearest integer, clamp it to within the dimensions of | ||
75 | // the source image (negative values are already saturated to 0) | ||
76 | 7184 | xi = svmin_x(pg_all32, | |
77 | 3592 | svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, xf, 0.5F)), | |
78 | 3592 | sv_xmax); | |
79 | 7184 | yi = svmin_x(pg_all32, | |
80 | 3592 | svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, yf, 0.5F)), | |
81 | 3592 | sv_ymax); | |
82 | } | ||
83 | 14368 | return svcreate2(xi, yi); | |
84 | 7184 | }; | |
85 | |||
86 | 1680 | LoopUnroll2 loop{dst_width, kStep}; | |
87 | |||
88 | if constexpr (Channels == 1) { | ||
89 | if constexpr (std::is_same<ScalarType, uint8_t>::value) { | ||
90 | 1232 | auto vector_path_generic = [&](size_t x, size_t x_max, | |
91 | Columns<ScalarType> dst) { | ||
92 | 700 | size_t length = x_max - x; | |
93 | 700 | svbool_t pg32 = svwhilelt_b32_u64(0ULL, length); | |
94 | 1400 | svuint32_t result = | |
95 | 700 | get_pixels(pg32, calculate_nearest_coordinates(pg32, x)); | |
96 | 700 | svst1b_u32(pg32, &dst[static_cast<ptrdiff_t>(x)], result); | |
97 | 700 | }; | |
98 | |||
99 | 896 | loop.unroll_four_times([&](size_t x) { | |
100 | 364 | ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)]; | |
101 | 728 | svuint32_t res32_0 = | |
102 | 364 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
103 | 364 | x += kStep; | |
104 | 728 | svuint32_t res32_1 = | |
105 | 364 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
106 | 728 | svuint16_t result0 = svuzp1_u16(svreinterpret_u16_u32(res32_0), | |
107 | 364 | svreinterpret_u16_u32(res32_1)); | |
108 | 364 | x += kStep; | |
109 | 364 | res32_0 = | |
110 | 364 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
111 | 364 | x += kStep; | |
112 | 364 | res32_1 = | |
113 | 364 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
114 | 728 | svuint16_t result1 = svuzp1_u16(svreinterpret_u16_u32(res32_0), | |
115 | 364 | svreinterpret_u16_u32(res32_1)); | |
116 | 728 | svuint8_t result = svuzp1_u8(svreinterpret_u8_u16(result0), | |
117 | 364 | svreinterpret_u8_u16(result1)); | |
118 | 364 | svst1(svptrue_b8(), p_dst, result); | |
119 | 364 | }); | |
120 | 532 | loop.unroll_once( | |
121 | 1176 | [&](size_t x) { vector_path_generic(x, x + kStep, dst); }); | |
122 | 588 | loop.remaining([&](size_t x, size_t length) { | |
123 | 56 | vector_path_generic(x, length, dst); | |
124 | 56 | }); | |
125 | 532 | } | |
126 | |||
127 | if constexpr (std::is_same<ScalarType, uint16_t>::value) { | ||
128 | 640 | auto vector_path_generic = [&](size_t x, size_t x_max, | |
129 | Columns<ScalarType> dst) { | ||
130 | 348 | size_t length = x_max - x; | |
131 | 348 | svbool_t pg32 = svwhilelt_b32(0ULL, length); | |
132 | 696 | svuint32_t result = | |
133 | 348 | get_pixels(pg32, calculate_nearest_coordinates(pg32, x)); | |
134 | 348 | svst1h_u32(pg32, &dst[static_cast<ptrdiff_t>(x)], result); | |
135 | 348 | }; | |
136 | |||
137 | 656 | loop.unroll_twice([&](size_t x) { | |
138 | 364 | ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)]; | |
139 | 728 | svuint32_t res32_0 = | |
140 | 364 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
141 | 364 | x += kStep; | |
142 | 728 | svuint32_t res32_1 = | |
143 | 364 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
144 | 728 | svuint16_t result = svuzp1_u16(svreinterpret_u16_u32(res32_0), | |
145 | 364 | svreinterpret_u16_u32(res32_1)); | |
146 | 364 | svst1(svptrue_b16(), p_dst, result); | |
147 | 364 | }); | |
148 | 292 | loop.unroll_once( | |
149 | 584 | [&](size_t x) { vector_path_generic(x, x + kStep, dst); }); | |
150 | 348 | loop.remaining([&](size_t x, size_t length) { | |
151 | 56 | vector_path_generic(x, length, dst); | |
152 | 56 | }); | |
153 | 292 | } | |
154 | } | ||
155 | |||
156 | if constexpr (Channels == 2) { | ||
157 | if constexpr (std::is_same<ScalarType, uint8_t>::value) { | ||
158 | 1168 | auto vector_path_generic = [&](size_t x, size_t x_max, | |
159 | Columns<ScalarType> dst) { | ||
160 | 620 | size_t length = x_max - x; | |
161 | 620 | svbool_t pg32 = svwhilelt_b32(0ULL, length); | |
162 | 1240 | svuint32_t result = | |
163 | 620 | get_pixels(pg32, calculate_nearest_coordinates(pg32, x)); | |
164 | 620 | svbool_t pg16 = svwhilelt_b16_u64(0ULL, 2 * length); | |
165 | 1240 | svst1b_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)), | |
166 | 620 | svreinterpret_u16_u32(result)); | |
167 | 620 | }; | |
168 | |||
169 | 1556 | loop.unroll_twice([&](size_t x) { | |
170 | 1008 | ScalarType* p_dst = dst.ptr_at(static_cast<ptrdiff_t>(x)); | |
171 | 2016 | svuint32_t result0 = | |
172 | 1008 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
173 | 1008 | x += kStep; | |
174 | 2016 | svuint32_t result1 = | |
175 | 1008 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); | |
176 | 2016 | svuint8_t result = svuzp1_u8(svreinterpret_u8_u32(result0), | |
177 | 1008 | svreinterpret_u8_u32(result1)); | |
178 | 1008 | svst1(svptrue_b8(), p_dst, result); | |
179 | 1008 | }); | |
180 | 548 | loop.unroll_once( | |
181 | 1096 | [&](size_t x) { vector_path_generic(x, x + kStep, dst); }); | |
182 | 620 | loop.remaining([&](size_t x, size_t length) { | |
183 | 72 | vector_path_generic(x, length, dst); | |
184 | 72 | }); | |
185 | 548 | } | |
186 | |||
187 | if constexpr (std::is_same<ScalarType, uint16_t>::value) { | ||
188 | 1552 | loop.unroll_once([&](size_t x) { | |
189 | 2488 | svuint16_t result = svreinterpret_u16_u32( | |
190 | 1244 | get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x))); | |
191 | 1244 | svst1_u16(pg_all16, dst.ptr_at(static_cast<ptrdiff_t>(x)), result); | |
192 | 1244 | }); | |
193 | 380 | loop.remaining([&](size_t x, size_t x_max) { | |
194 | 72 | svbool_t pg32 = svwhilelt_b32_u64(x, x_max); | |
195 | 144 | svuint16_t result = svreinterpret_u16_u32( | |
196 | 72 | get_pixels(pg32, calculate_nearest_coordinates(pg32, x))); | |
197 | 72 | svbool_t pg16 = svwhilelt_b16_u64(2 * x, 2 * x_max); | |
198 | 72 | svst1_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)), result); | |
199 | 72 | }); | |
200 | } | ||
201 | } | ||
202 | 1680 | } | |
203 | |||
204 | template <typename ScalarType, bool IsLarge, kleidicv_border_type_t Border, | ||
205 | size_t Channels> | ||
206 | 24528 | void remap_f32_linear(svuint32_t sv_xmax, svuint32_t sv_ymax, | |
207 | svfloat32_t sv_xmaxf, svfloat32_t sv_ymaxf, | ||
208 | svuint32_t sv_src_stride, Rows<const ScalarType> src_rows, | ||
209 | svuint32_t sv_border, Columns<ScalarType> dst, | ||
210 | size_t kStep, size_t dst_width, | ||
211 | Rows<const float> mapx_rows, Rows<const float> mapy_rows, | ||
212 | svuint8_t load_table_2ch) { | ||
213 | 93728 | auto load_coords = [&](svbool_t pg, size_t xs) { | |
214 | 69200 | auto x = static_cast<ptrdiff_t>(xs); | |
215 | 207600 | return svcreate2(svld1_f32(pg, &mapx_rows.as_columns()[x]), | |
216 | 69200 | svld1_f32(pg, &mapy_rows.as_columns()[x])); | |
217 | 69200 | }; | |
218 | |||
219 | 93728 | auto calculate_linear = [&](svbool_t pg, uint32_t x) { | |
220 | if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) { | ||
221 | 34600 | svfloat32x2_t coords = load_coords(pg, x); | |
222 | 69200 | return calculate_linear_replicated_border<ScalarType, IsLarge, Channels>( | |
223 | 34600 | pg, coords, sv_xmaxf, sv_ymaxf, sv_src_stride, src_rows, | |
224 | 34600 | load_table_2ch); | |
225 | 34600 | } else { | |
226 | static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT); | ||
227 | 34600 | svfloat32x2_t coords = load_coords(pg, x); | |
228 | 69200 | return calculate_linear_constant_border<ScalarType, IsLarge, Channels>( | |
229 | 34600 | pg, coords, sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows, | |
230 | 34600 | load_table_2ch); | |
231 | 34600 | } | |
232 | }; | ||
233 | |||
234 | 44888 | auto store_vector = [](svbool_t pg32, ScalarType* p_dst, svuint32_t result) { | |
235 | if constexpr (std::is_same<ScalarType, uint8_t>::value) { | ||
236 | 11852 | svst1b_u32(pg32, p_dst, result); | |
237 | } | ||
238 | if constexpr (std::is_same<ScalarType, uint16_t>::value) { | ||
239 | 8508 | svst1h_u32(pg32, p_dst, result); | |
240 | } | ||
241 | 20360 | }; | |
242 | |||
243 | 24528 | svbool_t pg_all32 = svptrue_b32(); | |
244 | 24528 | LoopUnroll2 loop{dst_width, kStep}; | |
245 | if constexpr (Channels == 1) { | ||
246 | if constexpr (std::is_same<ScalarType, uint8_t>::value) { | ||
247 | 7696 | loop.unroll_four_times([&](size_t x) { | |
248 | 1452 | ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)]; | |
249 | 1452 | svuint32_t res0 = calculate_linear(pg_all32, x); | |
250 | 1452 | x += kStep; | |
251 | 1452 | svuint32_t res1 = calculate_linear(pg_all32, x); | |
252 | 2904 | svuint16_t result16_0 = svuzp1_u16(svreinterpret_u16_u32(res0), | |
253 | 1452 | svreinterpret_u16_u32(res1)); | |
254 | 1452 | x += kStep; | |
255 | 1452 | res0 = calculate_linear(pg_all32, x); | |
256 | 1452 | x += kStep; | |
257 | 1452 | res1 = calculate_linear(pg_all32, x); | |
258 | 2904 | svuint16_t result16_1 = svuzp1_u16(svreinterpret_u16_u32(res0), | |
259 | 1452 | svreinterpret_u16_u32(res1)); | |
260 | 2904 | svst1_u8(svptrue_b8(), p_dst, | |
261 | 2904 | svuzp1_u8(svreinterpret_u8_u16(result16_0), | |
262 | 1452 | svreinterpret_u8_u16(result16_1))); | |
263 | 1452 | }); | |
264 | } | ||
265 | if constexpr (std::is_same<ScalarType, uint16_t>::value) { | ||
266 | 10040 | loop.unroll_twice([&](size_t x) { | |
267 | 4036 | ScalarType* p_dst = &dst[static_cast<ptrdiff_t>(x)]; | |
268 | 4036 | svuint32_t res0 = calculate_linear(pg_all32, x); | |
269 | 4036 | x += kStep; | |
270 | 4036 | svuint32_t res1 = calculate_linear(pg_all32, x); | |
271 | 8072 | svuint16_t result16 = svuzp1_u16(svreinterpret_u16_u32(res0), | |
272 | 4036 | svreinterpret_u16_u32(res1)); | |
273 | 4036 | svst1_u16(svptrue_b16(), p_dst, result16); | |
274 | 4036 | }); | |
275 | } | ||
276 | 24064 | loop.unroll_once([&](size_t x) { | |
277 | 11816 | svuint32_t result = calculate_linear(pg_all32, x); | |
278 | 11816 | store_vector(pg_all32, &dst[static_cast<ptrdiff_t>(x)], result); | |
279 | 11816 | }); | |
280 | 20792 | loop.remaining([&](size_t x, size_t x_max) { | |
281 | 8544 | svbool_t pg32 = svwhilelt_b32_u64(x, x_max); | |
282 | 8544 | svuint32_t result = calculate_linear(pg32, x); | |
283 | 8544 | store_vector(pg32, &dst[static_cast<ptrdiff_t>(x)], result); | |
284 | 8544 | }); | |
285 | } | ||
286 | |||
287 | if constexpr (Channels == 2) { | ||
288 | if constexpr (std::is_same<ScalarType, uint8_t>::value) { | ||
289 | 15040 | auto vector_path_generic = [&](size_t x, size_t x_max, | |
290 | Columns<ScalarType> dst) { | ||
291 | 8780 | size_t length = x_max - x; | |
292 | 8780 | svbool_t pg32 = svwhilelt_b32(0ULL, length); | |
293 | 8780 | svuint32_t result = calculate_linear(pg32, x); | |
294 | 8780 | svbool_t pg16 = svwhilelt_b16(0ULL, 2 * length); | |
295 | 17560 | svst1b_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)), | |
296 | 8780 | svreinterpret_u16_u32(result)); | |
297 | 8780 | }; | |
298 | |||
299 | 10940 | loop.unroll_twice([&](size_t x) { | |
300 | 4680 | ScalarType* p_dst = dst.ptr_at(static_cast<ptrdiff_t>(x)); | |
301 | 4680 | svuint32_t result0 = calculate_linear(pg_all32, x); | |
302 | 4680 | x += kStep; | |
303 | 4680 | svuint32_t result1 = calculate_linear(pg_all32, x); | |
304 | 9360 | svuint8_t result = svuzp1_u8(svreinterpret_u8_u32(result0), | |
305 | 4680 | svreinterpret_u8_u32(result1)); | |
306 | 4680 | svst1(svptrue_b8(), p_dst, result); | |
307 | 4680 | }); | |
308 | 6260 | loop.unroll_once( | |
309 | 10752 | [&](size_t x) { vector_path_generic(x, x + kStep, dst); }); | |
310 | 10548 | loop.remaining([&](size_t x, size_t length) { | |
311 | 4288 | vector_path_generic(x, length, dst); | |
312 | 4288 | }); | |
313 | 6260 | } | |
314 | if constexpr (std::is_same<ScalarType, uint16_t>::value) { | ||
315 | 18552 | loop.unroll_once([&](size_t x) { | |
316 | 25064 | svuint16_t result = | |
317 | 12532 | svreinterpret_u16_u32(calculate_linear(pg_all32, x)); | |
318 | 12532 | svst1_u16(svptrue_b16(), dst.ptr_at(static_cast<ptrdiff_t>(x)), result); | |
319 | 12532 | }); | |
320 | 10308 | loop.remaining([&](size_t x, size_t x_max) { | |
321 | 4288 | svbool_t pg32 = svwhilelt_b32_u64(x, x_max); | |
322 | 4288 | svuint16_t result = svreinterpret_u16_u32(calculate_linear(pg32, x)); | |
323 | 4288 | svbool_t pg16 = svwhilelt_b16_u64(2 * x, 2 * x_max); | |
324 | 4288 | svst1_u16(pg16, dst.ptr_at(static_cast<ptrdiff_t>(x)), result); | |
325 | 4288 | }); | |
326 | } | ||
327 | } | ||
328 | 24528 | } | |
329 | |||
330 | template <typename ScalarType, bool IsLarge, | ||
331 | kleidicv_interpolation_type_t Inter, kleidicv_border_type_t Border, | ||
332 | size_t Channels> | ||
333 | 1168 | void transform_operation(Rows<const ScalarType> src_rows, size_t src_width, | |
334 | size_t src_height, const ScalarType* border_value, | ||
335 | Rows<ScalarType> dst_rows, size_t dst_width, | ||
336 | size_t y_begin, size_t y_end, | ||
337 | Rows<const float> mapx_rows, | ||
338 | Rows<const float> mapy_rows) { | ||
339 | 1168 | svuint32_t sv_xmax = svdup_n_u32(src_width - 1); | |
340 | 1168 | svuint32_t sv_ymax = svdup_n_u32(src_height - 1); | |
341 | 1168 | svuint32_t sv_src_stride = svdup_n_u32(src_rows.stride()); | |
342 | 1168 | svuint32_t sv_border = svdup_n_u32(0); | |
343 | |||
344 | if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { | ||
345 | if constexpr (Channels == 1) { | ||
346 | 292 | sv_border = svdup_n_u32(border_value[0]); | |
347 | } | ||
348 | if constexpr (Channels == 2) { | ||
349 | 584 | uint32_t v = static_cast<uint32_t>(border_value[0]) | | |
350 | 292 | (static_cast<uint32_t>(border_value[1]) << 16); | |
351 | 292 | sv_border = svdup_n_u32(v); | |
352 | 292 | } | |
353 | } | ||
354 | |||
355 | 1168 | svfloat32_t sv_xmaxf = svdup_n_f32(static_cast<float>(src_width - 1)); | |
356 | 1168 | svfloat32_t sv_ymaxf = svdup_n_f32(static_cast<float>(src_height - 1)); | |
357 | |||
358 | 1168 | const size_t kStep = VecTraits<float>::num_lanes(); | |
359 | |||
360 | // Rearrange input for 8bit 2channel: | ||
361 | // Gather Load 16bits, 2x 8bits for 2 channels: | ||
362 | // after 32-bit gather load: ..DC..BA | ||
363 | // goal is to have 16-bit elements: .D.C.B.A | ||
364 | 2336 | svuint8_t load_table_2ch = | |
365 | 1168 | svreinterpret_u8_u32(svindex_u32(0x03010200U, 0x04040404)); | |
366 | |||
367 |
64/64✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 8 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 8 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 8 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 8 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 8 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 8 times.
✓ Branch 16 taken 8 times.
✓ Branch 17 taken 258 times.
✓ Branch 18 taken 10 times.
✓ Branch 19 taken 266 times.
✓ Branch 20 taken 8 times.
✓ Branch 21 taken 258 times.
✓ Branch 22 taken 10 times.
✓ Branch 23 taken 266 times.
✓ Branch 24 taken 132 times.
✓ Branch 25 taken 3114 times.
✓ Branch 26 taken 132 times.
✓ Branch 27 taken 3122 times.
✓ Branch 28 taken 132 times.
✓ Branch 29 taken 3114 times.
✓ Branch 30 taken 132 times.
✓ Branch 31 taken 3122 times.
✓ Branch 32 taken 2 times.
✓ Branch 33 taken 8 times.
✓ Branch 34 taken 2 times.
✓ Branch 35 taken 8 times.
✓ Branch 36 taken 2 times.
✓ Branch 37 taken 8 times.
✓ Branch 38 taken 2 times.
✓ Branch 39 taken 8 times.
✓ Branch 40 taken 4 times.
✓ Branch 41 taken 8 times.
✓ Branch 42 taken 2 times.
✓ Branch 43 taken 8 times.
✓ Branch 44 taken 4 times.
✓ Branch 45 taken 8 times.
✓ Branch 46 taken 2 times.
✓ Branch 47 taken 8 times.
✓ Branch 48 taken 8 times.
✓ Branch 49 taken 138 times.
✓ Branch 50 taken 10 times.
✓ Branch 51 taken 146 times.
✓ Branch 52 taken 8 times.
✓ Branch 53 taken 138 times.
✓ Branch 54 taken 10 times.
✓ Branch 55 taken 146 times.
✓ Branch 56 taken 132 times.
✓ Branch 57 taken 2994 times.
✓ Branch 58 taken 132 times.
✓ Branch 59 taken 3002 times.
✓ Branch 60 taken 132 times.
✓ Branch 61 taken 2994 times.
✓ Branch 62 taken 132 times.
✓ Branch 63 taken 3002 times.
|
27376 | for (size_t y = y_begin; y < y_end; ++y) { |
368 | 26208 | Columns<ScalarType> dst = dst_rows.as_columns(); | |
369 | if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) { | ||
370 | 1680 | remap_f32_nearest<ScalarType, IsLarge, Border, Channels>( | |
371 | 1680 | sv_xmax, sv_ymax, sv_src_stride, src_rows, sv_border, dst, kStep, | |
372 | 1680 | dst_width, mapx_rows, mapy_rows, load_table_2ch); | |
373 | } else { | ||
374 | static_assert(Inter == KLEIDICV_INTERPOLATION_LINEAR); | ||
375 | 24528 | remap_f32_linear<ScalarType, IsLarge, Border, Channels>( | |
376 | 24528 | sv_xmax, sv_ymax, sv_xmaxf, sv_ymaxf, sv_src_stride, src_rows, | |
377 | 24528 | sv_border, dst, kStep, dst_width, mapx_rows, mapy_rows, | |
378 | 24528 | load_table_2ch); | |
379 | } | ||
380 | 26208 | ++mapx_rows; | |
381 | 26208 | ++mapy_rows; | |
382 | 26208 | ++dst_rows; | |
383 | 26208 | } | |
384 | 1168 | } | |
385 | |||
386 | // Most of the complexity comes from parameter checking. | ||
387 | // NOLINTBEGIN(readability-function-cognitive-complexity) | ||
388 | template <typename T> | ||
389 | 1276 | kleidicv_error_t remap_f32(const T* src, size_t src_stride, size_t src_width, | |
390 | size_t src_height, T* dst, size_t dst_stride, | ||
391 | size_t dst_width, size_t dst_height, size_t channels, | ||
392 | const float* mapx, size_t mapx_stride, | ||
393 | const float* mapy, size_t mapy_stride, | ||
394 | kleidicv_interpolation_type_t interpolation, | ||
395 | kleidicv_border_type_t border_type, | ||
396 | const T* border_value) { | ||
397 |
8/8✓ Branch 0 taken 4 times.
✓ Branch 1 taken 632 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 632 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 632 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 632 times.
|
1276 | CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); |
398 |
8/8✓ Branch 0 taken 4 times.
✓ Branch 1 taken 628 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 628 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 626 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 626 times.
|
1264 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); |
399 |
8/8✓ Branch 0 taken 6 times.
✓ Branch 1 taken 622 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 622 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 622 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 622 times.
|
1254 | CHECK_POINTER_AND_STRIDE(mapx, mapx_stride, dst_height); |
400 |
8/8✓ Branch 0 taken 6 times.
✓ Branch 1 taken 616 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 616 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 616 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 616 times.
|
1244 | CHECK_POINTER_AND_STRIDE(mapy, mapy_stride, dst_height); |
401 |
12/12✓ Branch 0 taken 2 times.
✓ Branch 1 taken 614 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 612 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 612 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 614 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 612 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 612 times.
|
1232 | CHECK_IMAGE_SIZE(src_width, src_height); |
402 |
12/12✓ Branch 0 taken 2 times.
✓ Branch 1 taken 610 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 608 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 608 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 610 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 608 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 608 times.
|
1224 | CHECK_IMAGE_SIZE(dst_width, dst_height); |
403 |
8/8✓ Branch 0 taken 298 times.
✓ Branch 1 taken 310 times.
✓ Branch 2 taken 294 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 298 times.
✓ Branch 5 taken 310 times.
✓ Branch 6 taken 294 times.
✓ Branch 7 taken 4 times.
|
1216 | if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) { |
404 | 8 | return KLEIDICV_ERROR_NULL_POINTER; | |
405 | } | ||
406 | |||
407 |
8/8✓ Branch 0 taken 584 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 584 times.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 584 times.
✓ Branch 5 taken 20 times.
✓ Branch 6 taken 584 times.
✓ Branch 7 taken 20 times.
|
2416 | if (!remap_f32_is_implemented<T>(src_stride, src_width, src_height, dst_width, |
408 | 1208 | dst_height, border_type, channels, | |
409 | 1208 | interpolation)) { | |
410 | 40 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
411 | } | ||
412 | |||
413 | 1168 | Rows<const T> src_rows{src, src_stride, channels}; | |
414 | 1168 | Rows<const float> mapx_rows{mapx, mapx_stride, 1}; | |
415 | 1168 | Rows<const float> mapy_rows{mapy, mapy_stride, 1}; | |
416 | 1168 | Rows<T> dst_rows{dst, dst_stride, channels}; | |
417 | 1168 | Rectangle rect{dst_width, dst_height}; | |
418 | |||
419 | 2336 | transform_operation<T>(is_image_large(src_rows, src_height), interpolation, | |
420 | border_type, channels, src_rows, src_width, src_height, | ||
421 | 1168 | border_value, dst_rows, dst_width, 0, dst_height, | |
422 | mapx_rows, mapy_rows); | ||
423 | |||
424 | 1168 | return KLEIDICV_OK; | |
425 | 1276 | } | |
426 | // NOLINTEND(readability-function-cognitive-complexity) | ||
427 | |||
428 | #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(type) \ | ||
429 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_f32<type>( \ | ||
430 | const type* src, size_t src_stride, size_t src_width, size_t src_height, \ | ||
431 | type* dst, size_t dst_stride, size_t dst_width, size_t dst_height, \ | ||
432 | size_t channels, const float* mapx, size_t mapx_stride, \ | ||
433 | const float* mapy, size_t mapy_stride, \ | ||
434 | kleidicv_interpolation_type_t interpolation, \ | ||
435 | kleidicv_border_type_t border_type, const type* border_value) | ||
436 | |||
437 | KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint8_t); | ||
438 | KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint16_t); | ||
439 | |||
440 | } // namespace kleidicv::sve2 | ||
441 |