KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/transform/remap_f32_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 127 127 100.0%
Functions: 142 142 100.0%
Branches: 136 136 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cassert>
6
7 #include "kleidicv/ctypes.h"
8 #include "kleidicv/neon.h"
9 #include "kleidicv/transform/remap.h"
10 #include "transform_neon.h"
11
12 namespace kleidicv::neon {
13
14 template <typename ScalarType, bool IsLarge, size_t Channels,
15 kleidicv_border_type_t Border>
16 408 void remap_f32_nearest(uint32x4_t v_xmax, uint32x4_t v_ymax,
17 uint32x4_t v_src_stride, Rows<const ScalarType> src_rows,
18 Columns<ScalarType> dst, size_t dst_width,
19 Columns<const float> mapx, Columns<const float> mapy,
20 const size_t kStep, const ScalarType *border_values) {
21 408 LoopUnroll2<TryToAvoidTailLoop> loop{dst_width, kStep};
22 1840 loop.unroll_once([&](size_t x) {
23 1432 transform_pixels<ScalarType, IsLarge, Channels, Border>(
24 1432 vld1q_f32(&mapx[x]), vld1q_f32(&mapy[x]), v_xmax, v_ymax, v_src_stride,
25 1432 src_rows, dst.at(x), border_values);
26 1432 });
27 408 }
28
29 template <typename ScalarType, bool IsLarge, kleidicv_border_type_t Border,
30 size_t Channels>
31 11832 void remap_f32_linear(uint32x4_t v_xmax, uint32x4_t v_ymax,
32 uint32x4_t v_src_stride, Rows<const ScalarType> src_rows,
33 Columns<ScalarType> dst, size_t dst_width,
34 Columns<const float> mapx, Columns<const float> mapy,
35 const size_t kStep, const ScalarType *border_values) {
36 44272 auto load_xy = [&](size_t x) {
37 32440 return FloatVectorPair{vld1q_f32(&mapx[x]), vld1q_f32(&mapy[x])};
38 };
39
40 27980 auto vector_path_1ch = [&](size_t x) {
41 16148 float32x4_t a, b, c, d, xfrac, yfrac;
42 if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) {
43 8074 load_quad_pixels_replicate<ScalarType, IsLarge>(
44 8074 load_xy(x), v_xmax, v_ymax, v_src_stride, src_rows, xfrac, yfrac, a,
45 b, c, d);
46 } else {
47 static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT);
48 8074 load_quad_pixels_constant<ScalarType, IsLarge>(
49 8074 load_xy(x), v_xmax, v_ymax, v_src_stride, border_values, src_rows,
50 xfrac, yfrac, a, b, c, d);
51 }
52 32296 return lerp_2d(xfrac, yfrac, a, b, c, d);
53 16148 };
54
55 28124 auto vector_path_2ch = [&](size_t x) {
56 16292 float32x4x2_t a, b, c, d;
57 16292 float32x4_t xfrac, yfrac;
58 if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) {
59 8146 load_quad_pixels_replicate_2ch<ScalarType, IsLarge>(
60 8146 load_xy(x), v_xmax, v_ymax, v_src_stride, src_rows, xfrac, yfrac, a,
61 b, c, d);
62 } else {
63 static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT);
64 8146 load_quad_pixels_constant_2ch<ScalarType, IsLarge>(
65 8146 load_xy(x), v_xmax, v_ymax, v_src_stride, border_values, src_rows,
66 xfrac, yfrac, a, b, c, d);
67 }
68 16292 float32x4_t xfrac_low = vzip1q_f32(xfrac, xfrac);
69 16292 float32x4_t yfrac_low = vzip1q_f32(yfrac, yfrac);
70 16292 float32x4_t xfrac_high = vzip2q_f32(xfrac, xfrac);
71 16292 float32x4_t yfrac_high = vzip2q_f32(yfrac, yfrac);
72
73 32584 uint32x4_t result_low =
74 16292 lerp_2d(xfrac_low, yfrac_low, a.val[0], b.val[0], c.val[0], d.val[0]);
75 32584 uint32x4_t result_high =
76 16292 lerp_2d(xfrac_high, yfrac_high, a.val[1], b.val[1], c.val[1], d.val[1]);
77 32584 return vuzp1q_u16(result_low, result_high);
78 16292 };
79
80 11832 LoopUnroll2<TryToAvoidTailLoop> loop{dst_width, kStep};
81
82 if constexpr (Channels == 1) {
83 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
84 3578 loop.unroll_four_times([&](size_t x) {
85 600 ScalarType *p_dst = &dst[x];
86 600 uint32x4_t res0 = vector_path_1ch(x);
87 600 x += kStep;
88 600 uint32x4_t res1 = vector_path_1ch(x);
89 600 uint16x8_t result16_0 = vuzp1q_u16(res0, res1);
90
91 600 x += kStep;
92 600 res0 = vector_path_1ch(x);
93 600 x += kStep;
94 600 res1 = vector_path_1ch(x);
95 600 uint16x8_t result16_1 = vuzp1q_u16(res0, res1);
96
97 600 vst1q_u8(p_dst, vuzp1q_u8(result16_0, result16_1));
98 600 });
99 8760 loop.unroll_once([&](size_t x) {
100 5782 uint8x16_t result = vreinterpretq_u8_u32(vector_path_1ch(x));
101 5782 dst[x] = vgetq_lane_u8(result, 0);
102 5782 dst[x + 1] = vgetq_lane_u8(result, 4);
103 5782 dst[x + 2] = vgetq_lane_u8(result, 8);
104 5782 dst[x + 3] = vgetq_lane_u8(result, 12);
105 5782 });
106 }
107 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
108 4822 loop.unroll_twice([&](size_t x) {
109 1892 ScalarType *p_dst = dst.ptr_at(x);
110 1892 uint32x4_t res0 = vector_path_1ch(x);
111 1892 x += kStep;
112 1892 uint32x4_t res1 = vector_path_1ch(x);
113 1892 vst1q_u16(p_dst, vuzp1q_u16(res0, res1));
114 1892 });
115 7112 loop.unroll_once([&](size_t x) {
116 4182 uint32x4_t result = vector_path_1ch(x);
117 4182 uint16x4_t result16 = vget_low_u16(vuzp1q_u16(result, result));
118 4182 vst1_u16(dst.ptr_at(x), result16);
119 4182 });
120 }
121 }
122 if constexpr (Channels == 2) {
123 if constexpr (std::is_same<ScalarType, uint8_t>::value) {
124 5002 loop.unroll_twice([&](size_t x) {
125 2016 ScalarType *p_dst = dst.ptr_at(x);
126 2016 uint16x8_t result16_0 = vector_path_2ch(x);
127 2016 x += kStep;
128 2016 uint16x8_t result16_1 = vector_path_2ch(x);
129 2016 vst1q_u8(p_dst, vuzp1q_u8(vreinterpretq_u8_u16(result16_0),
130 vreinterpretq_u8_u16(result16_1)));
131 2016 });
132 7232 loop.unroll_once([&](size_t x) {
133 4246 uint16x8_t result = vector_path_2ch(x);
134 4246 vst1_u8(dst.ptr_at(x), vmovn_u16(result));
135 4246 });
136 }
137 if constexpr (std::is_same<ScalarType, uint16_t>::value) {
138 10952 loop.unroll_once([&](size_t x) {
139 8014 uint16x8_t result = vector_path_2ch(x);
140 8014 vst1q_u16(dst.ptr_at(x), result);
141 8014 });
142 }
143 }
144 11832 }
145
146 template <typename ScalarType, bool IsLarge,
147 kleidicv_interpolation_type_t Inter, kleidicv_border_type_t Border,
148 size_t Channels>
149 584 void transform_operation(Rows<const ScalarType> src_rows, size_t src_width,
150 size_t src_height, const ScalarType *border_values,
151 Rows<ScalarType> dst_rows, size_t dst_width,
152 size_t y_begin, size_t y_end,
153 Rows<const float> mapx_rows,
154 Rows<const float> mapy_rows) {
155 1168 uint32x4_t v_src_element_stride = vdupq_n_u32(
156 584 static_cast<uint32_t>(src_rows.stride() / sizeof(ScalarType)));
157 584 uint32x4_t v_xmax = vdupq_n_u32(static_cast<uint32_t>(src_width - 1));
158 584 uint32x4_t v_ymax = vdupq_n_u32(static_cast<uint32_t>(src_height - 1));
159 584 const size_t kStep = VecTraits<float>::num_lanes();
160
161
64/64
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 4 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 4 times.
✓ Branch 10 taken 1 times.
✓ Branch 11 taken 4 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 4 times.
✓ Branch 14 taken 1 times.
✓ Branch 15 taken 4 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 57 times.
✓ Branch 18 taken 5 times.
✓ Branch 19 taken 61 times.
✓ Branch 20 taken 4 times.
✓ Branch 21 taken 57 times.
✓ Branch 22 taken 5 times.
✓ Branch 23 taken 61 times.
✓ Branch 24 taken 66 times.
✓ Branch 25 taken 1485 times.
✓ Branch 26 taken 66 times.
✓ Branch 27 taken 1489 times.
✓ Branch 28 taken 66 times.
✓ Branch 29 taken 1485 times.
✓ Branch 30 taken 66 times.
✓ Branch 31 taken 1489 times.
✓ Branch 32 taken 1 times.
✓ Branch 33 taken 4 times.
✓ Branch 34 taken 1 times.
✓ Branch 35 taken 4 times.
✓ Branch 36 taken 1 times.
✓ Branch 37 taken 4 times.
✓ Branch 38 taken 1 times.
✓ Branch 39 taken 4 times.
✓ Branch 40 taken 2 times.
✓ Branch 41 taken 4 times.
✓ Branch 42 taken 1 times.
✓ Branch 43 taken 4 times.
✓ Branch 44 taken 2 times.
✓ Branch 45 taken 4 times.
✓ Branch 46 taken 1 times.
✓ Branch 47 taken 4 times.
✓ Branch 48 taken 4 times.
✓ Branch 49 taken 33 times.
✓ Branch 50 taken 5 times.
✓ Branch 51 taken 37 times.
✓ Branch 52 taken 4 times.
✓ Branch 53 taken 33 times.
✓ Branch 54 taken 5 times.
✓ Branch 55 taken 37 times.
✓ Branch 56 taken 66 times.
✓ Branch 57 taken 1461 times.
✓ Branch 58 taken 66 times.
✓ Branch 59 taken 1465 times.
✓ Branch 60 taken 66 times.
✓ Branch 61 taken 1461 times.
✓ Branch 62 taken 66 times.
✓ Branch 63 taken 1465 times.
12824 for (size_t y = y_begin; y < y_end; ++y) {
162 if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) {
163 408 remap_f32_nearest<ScalarType, IsLarge, Channels, Border>(
164 408 v_xmax, v_ymax, v_src_element_stride, src_rows, dst_rows.as_columns(),
165 408 dst_width, mapx_rows.as_columns(), mapy_rows.as_columns(), kStep,
166 408 border_values);
167 } else {
168 static_assert(Inter == KLEIDICV_INTERPOLATION_LINEAR);
169 11832 remap_f32_linear<ScalarType, IsLarge, Border, Channels>(
170 11832 v_xmax, v_ymax, v_src_element_stride, src_rows, dst_rows.as_columns(),
171 11832 dst_width, mapx_rows.as_columns(), mapy_rows.as_columns(), kStep,
172 11832 border_values);
173 }
174 12240 ++mapx_rows;
175 12240 ++mapy_rows;
176 12240 ++dst_rows;
177 12240 }
178 584 }
179
180 // Most of the complexity comes from parameter checking.
181 // NOLINTBEGIN(readability-function-cognitive-complexity)
182 template <typename T>
183 638 kleidicv_error_t remap_f32(const T *src, size_t src_stride, size_t src_width,
184 size_t src_height, T *dst, size_t dst_stride,
185 size_t dst_width, size_t dst_height, size_t channels,
186 const float *mapx, size_t mapx_stride,
187 const float *mapy, size_t mapy_stride,
188 kleidicv_interpolation_type_t interpolation,
189 kleidicv_border_type_t border_type,
190 const T *border_value) {
191
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 316 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 316 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 316 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 316 times.
638 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
192
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 314 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 314 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 313 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 313 times.
632 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
193
8/8
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 311 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 311 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 311 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 311 times.
627 CHECK_POINTER_AND_STRIDE(mapx, mapx_stride, dst_height);
194
8/8
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 308 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 308 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 308 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 308 times.
622 CHECK_POINTER_AND_STRIDE(mapy, mapy_stride, dst_height);
195
12/12
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 307 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 306 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 306 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 307 times.
✓ Branch 8 taken 1 times.
✓ Branch 9 taken 306 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 306 times.
616 CHECK_IMAGE_SIZE(src_width, src_height);
196
12/12
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 305 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 304 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 304 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 305 times.
✓ Branch 8 taken 1 times.
✓ Branch 9 taken 304 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 304 times.
612 CHECK_IMAGE_SIZE(dst_width, dst_height);
197
8/8
✓ Branch 0 taken 149 times.
✓ Branch 1 taken 155 times.
✓ Branch 2 taken 147 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 149 times.
✓ Branch 5 taken 155 times.
✓ Branch 6 taken 147 times.
✓ Branch 7 taken 2 times.
608 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) {
198 4 return KLEIDICV_ERROR_NULL_POINTER;
199 }
200
201
8/8
✓ Branch 0 taken 292 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 292 times.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 292 times.
✓ Branch 5 taken 10 times.
✓ Branch 6 taken 292 times.
✓ Branch 7 taken 10 times.
1208 if (!remap_f32_is_implemented<T>(src_stride, src_width, src_height, dst_width,
202 604 dst_height, border_type, channels,
203 604 interpolation)) {
204 20 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
205 }
206
207 584 Rows<const T> src_rows{src, src_stride, channels};
208 584 Rows<const float> mapx_rows{mapx, mapx_stride, 1};
209 584 Rows<const float> mapy_rows{mapy, mapy_stride, 1};
210 584 Rows<T> dst_rows{dst, dst_stride, channels};
211 584 Rectangle rect{dst_width, dst_height};
212
213 1168 transform_operation<T>(is_image_large(src_rows, src_height), interpolation,
214 border_type, channels, src_rows, src_width, src_height,
215 584 border_value, dst_rows, dst_width, 0, dst_height,
216 mapx_rows, mapy_rows);
217
218 584 return KLEIDICV_OK;
219 638 }
220 // NOLINTEND(readability-function-cognitive-complexity)
221
222 #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(type) \
223 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_f32<type>( \
224 const type *src, size_t src_stride, size_t src_width, size_t src_height, \
225 type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \
226 size_t channels, const float *mapx, size_t mapx_stride, \
227 const float *mapy, size_t mapy_stride, \
228 kleidicv_interpolation_type_t interpolation, \
229 kleidicv_border_type_t border_type, const type *border_value)
230
231 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint8_t);
232 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint16_t);
233
234 } // namespace kleidicv::neon
235