KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/transform/remap_s16_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 125 125 100.0%
Functions: 22 22 100.0%
Branches: 68 68 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cassert>
6 #include <type_traits>
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/neon.h"
10 #include "kleidicv/transform/remap.h"
11
12 namespace kleidicv::neon {
13
14 template <typename ScalarType>
15 class RemapS16Replicate {
16 public:
17 using MapVecTraits = neon::VecTraits<int16_t>;
18 using MapVectorType = typename MapVecTraits::VectorType;
19 using MapVector2Type = typename MapVecTraits::Vector2Type;
20
21 134 RemapS16Replicate(Rows<const ScalarType> src_rows, size_t src_width,
22 size_t src_height)
23 134 : src_rows_{src_rows},
24 268 v_src_element_stride_{vdupq_n_u16(
25 134 static_cast<uint16_t>(src_rows_.stride() / sizeof(ScalarType)))},
26 134 v_xmax_{vdupq_n_s16(static_cast<int16_t>(src_width - 1))},
27 134 v_ymax_{vdupq_n_s16(static_cast<int16_t>(src_height - 1))} {}
28
29 4962 void transform_pixels(uint32x4_t indices_low, uint32x4_t indices_high,
30 Columns<ScalarType> dst) {
31 // Copy pixels from source
32 4962 dst[0] = src_rows_[vgetq_lane_u32(indices_low, 0)];
33 4962 dst[1] = src_rows_[vgetq_lane_u32(indices_low, 1)];
34 4962 dst[2] = src_rows_[vgetq_lane_u32(indices_low, 2)];
35 4962 dst[3] = src_rows_[vgetq_lane_u32(indices_low, 3)];
36
37 4962 dst[4] = src_rows_[vgetq_lane_u32(indices_high, 0)];
38 4962 dst[5] = src_rows_[vgetq_lane_u32(indices_high, 1)];
39 4962 dst[6] = src_rows_[vgetq_lane_u32(indices_high, 2)];
40 4962 dst[7] = src_rows_[vgetq_lane_u32(indices_high, 3)];
41 4962 }
42
43 182 void process_row(size_t width, Columns<const int16_t> mapxy,
44 Columns<ScalarType> dst) {
45 5144 auto vector_path = [&](size_t step) {
46 4962 MapVector2Type xy = vld2q_s16(&mapxy[0]);
47 // Clamp coordinates to within the dimensions of the source image
48 9924 uint16x8_t x = vreinterpretq_u16_s16(
49 4962 vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[0], v_xmax_)));
50 9924 uint16x8_t y = vreinterpretq_u16_s16(
51 4962 vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[1], v_ymax_)));
52 // Calculate offsets from coordinates (y * stride/sizeof(ScalarType) + x)
53 9924 uint32x4_t indices_low =
54 9924 vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y),
55 4962 vget_low_u16(v_src_element_stride_));
56 9924 uint32x4_t indices_high =
57 4962 vmlal_high_u16(vmovl_high_u16(x), y, v_src_element_stride_);
58
59 4962 transform_pixels(indices_low, indices_high, dst);
60
61 4962 mapxy += ptrdiff_t(step);
62 4962 dst += ptrdiff_t(step);
63 4962 };
64
65 182 LoopUnroll loop{width, MapVecTraits::num_lanes()};
66 182 loop.unroll_once(vector_path);
67 364 ptrdiff_t back_step = static_cast<ptrdiff_t>(loop.step()) -
68 182 static_cast<ptrdiff_t>(loop.remaining_length());
69 182 mapxy -= back_step;
70 182 dst -= back_step;
71 280 loop.remaining([&](size_t, size_t step) { vector_path(step); });
72 182 }
73
74 private:
75 Rows<const ScalarType> src_rows_;
76 uint16x8_t v_src_element_stride_;
77 int16x8_t v_xmax_;
78 int16x8_t v_ymax_;
79 }; // end of class RemapS16Replicate<ScalarType>
80
81 template <typename ScalarType>
82 class RemapS16ConstantBorder {
83 public:
84 using SrcVecTraits = neon::VecTraits<ScalarType>;
85 using SrcVecType = typename SrcVecTraits::VectorType;
86
87 using MapVecTraits = neon::VecTraits<int16_t>;
88 using MapVectorType = typename MapVecTraits::VectorType;
89 using MapVector2Type = typename MapVecTraits::Vector2Type;
90
91 132 RemapS16ConstantBorder(Rows<const ScalarType> src_rows, size_t src_width,
92 size_t src_height, const ScalarType *border_value)
93 132 : src_rows_{src_rows},
94 264 v_src_element_stride_{vdupq_n_u16(
95 132 static_cast<uint16_t>(src_rows_.stride() / sizeof(ScalarType)))},
96 132 v_width_{vdupq_n_u16(static_cast<uint16_t>(src_width))},
97 132 v_height_{vdupq_n_u16(static_cast<uint16_t>(src_height))},
98 132 v_border_{vdupq_n_u16(*border_value)} {}
99
100 void transform_pixels(uint32x4_t indices_low, uint32x4_t indices_high,
101 uint16x8_t in_range, Columns<ScalarType> dst);
102
103 180 void process_row(size_t width, Columns<const int16_t> mapxy,
104 Columns<ScalarType> dst) {
105 5140 auto vector_path = [&](size_t step) {
106 4960 MapVector2Type xy = vld2q_s16(&mapxy[0]);
107
108 4960 uint16x8_t x = vreinterpretq_u16_s16(xy.val[0]);
109 4960 uint16x8_t y = vreinterpretq_u16_s16(xy.val[1]);
110
111 // Find whether coordinates are within the image dimensions.
112 // Negative coordinates are interpreted as large values due to the
113 // s16->u16 reinterpretation.
114 9920 uint16x8_t in_range =
115 4960 vandq_u16(vcltq_u16(x, v_width_), vcltq_u16(y, v_height_));
116
117 // Zero out-of-range coordinates.
118 4960 x = vandq_u16(in_range, x);
119 4960 y = vandq_u16(in_range, y);
120
121 // Calculate offsets from coordinates (y * stride/sizeof(ScalarType) + x)
122 9920 uint32x4_t indices_low =
123 9920 vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y),
124 4960 vget_low_u16(v_src_element_stride_));
125 9920 uint32x4_t indices_high =
126 4960 vmlal_high_u16(vmovl_high_u16(x), y, v_src_element_stride_);
127
128 4960 transform_pixels(indices_low, indices_high, in_range, dst);
129
130 4960 mapxy += ptrdiff_t(step);
131 4960 dst += ptrdiff_t(step);
132 4960 };
133
134 180 LoopUnroll loop{width, MapVecTraits::num_lanes()};
135 180 loop.unroll_once(vector_path);
136 360 ptrdiff_t back_step = static_cast<ptrdiff_t>(loop.step()) -
137 180 static_cast<ptrdiff_t>(loop.remaining_length());
138 180 mapxy -= back_step;
139 180 dst -= back_step;
140 278 loop.remaining([&](size_t, size_t step) { vector_path(step); });
141 180 }
142
143 private:
144 Rows<const ScalarType> src_rows_;
145 uint16x8_t v_src_element_stride_;
146 uint16x8_t v_width_;
147 uint16x8_t v_height_;
148 uint16x8_t v_border_;
149 }; // end of class RemapS16ConstantBorder<ScalarType>
150
151 template <>
152 2528 void RemapS16ConstantBorder<uint8_t>::transform_pixels(uint32x4_t indices_low,
153 uint32x4_t indices_high,
154 uint16x8_t in_range,
155 Columns<uint8_t> dst) {
156 22752 uint8x8_t pixels = {
157 2528 src_rows_[vgetq_lane_u32(indices_low, 0)],
158 2528 src_rows_[vgetq_lane_u32(indices_low, 1)],
159 2528 src_rows_[vgetq_lane_u32(indices_low, 2)],
160 2528 src_rows_[vgetq_lane_u32(indices_low, 3)],
161 2528 src_rows_[vgetq_lane_u32(indices_high, 0)],
162 2528 src_rows_[vgetq_lane_u32(indices_high, 1)],
163 2528 src_rows_[vgetq_lane_u32(indices_high, 2)],
164 2528 src_rows_[vgetq_lane_u32(indices_high, 3)],
165 };
166 // Select between source pixels and border colour
167 5056 uint8x8_t pixels_or_border =
168 2528 vbsl_u8(vmovn_u16(in_range), pixels, vmovn_u16(v_border_));
169
170 2528 vst1_u8(&dst[0], pixels_or_border);
171 2528 }
172
173 template <>
174 2432 void RemapS16ConstantBorder<uint16_t>::transform_pixels(uint32x4_t indices_low,
175 uint32x4_t indices_high,
176 uint16x8_t in_range,
177 Columns<uint16_t> dst) {
178 21888 uint16x8_t pixels = {
179 2432 src_rows_[vgetq_lane_u32(indices_low, 0)],
180 2432 src_rows_[vgetq_lane_u32(indices_low, 1)],
181 2432 src_rows_[vgetq_lane_u32(indices_low, 2)],
182 2432 src_rows_[vgetq_lane_u32(indices_low, 3)],
183 2432 src_rows_[vgetq_lane_u32(indices_high, 0)],
184 2432 src_rows_[vgetq_lane_u32(indices_high, 1)],
185 2432 src_rows_[vgetq_lane_u32(indices_high, 2)],
186 2432 src_rows_[vgetq_lane_u32(indices_high, 3)],
187 };
188
189 // Select between source pixels and border colour
190 2432 uint16x8_t pixels_or_border = vbslq_u16(in_range, pixels, v_border_);
191
192 2432 vst1q_u16(&dst[0], pixels_or_border);
193 2432 }
194
195 // Most of the complexity comes from parameter checking.
196 // NOLINTBEGIN(readability-function-cognitive-complexity)
197 template <typename T>
198 300 kleidicv_error_t remap_s16(const T *src, size_t src_stride, size_t src_width,
199 size_t src_height, T *dst, size_t dst_stride,
200 size_t dst_width, size_t dst_height, size_t channels,
201 const int16_t *mapxy, size_t mapxy_stride,
202 kleidicv_border_type_t border_type,
203 [[maybe_unused]] const T *border_value) {
204
8/8
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 149 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 149 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 149 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 149 times.
300 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
205
8/8
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 148 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 148 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 148 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 148 times.
298 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
206
8/8
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 147 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 147 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 147 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 147 times.
296 CHECK_POINTER_AND_STRIDE(mapxy, mapxy_stride, dst_height);
207
12/12
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 146 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 144 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 144 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 146 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 144 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 144 times.
294 CHECK_IMAGE_SIZE(src_width, src_height);
208
12/12
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 143 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 142 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 142 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 143 times.
✓ Branch 8 taken 1 times.
✓ Branch 9 taken 142 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 142 times.
288 CHECK_IMAGE_SIZE(dst_width, dst_height);
209
8/8
✓ Branch 0 taken 67 times.
✓ Branch 1 taken 75 times.
✓ Branch 2 taken 66 times.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 67 times.
✓ Branch 5 taken 75 times.
✓ Branch 6 taken 66 times.
✓ Branch 7 taken 1 times.
284 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) {
210 2 return KLEIDICV_ERROR_NULL_POINTER;
211 }
212
213
8/8
✓ Branch 0 taken 133 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 133 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 133 times.
✓ Branch 5 taken 8 times.
✓ Branch 6 taken 133 times.
✓ Branch 7 taken 8 times.
564 if (!remap_s16_is_implemented<T>(src_stride, src_width, src_height, dst_width,
214 282 border_type, channels)) {
215 16 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
216 }
217
218 266 Rows<const T> src_rows{src, src_stride, channels};
219 266 Rows<const int16_t> mapxy_rows{mapxy, mapxy_stride, 2};
220 266 Rows<T> dst_rows{dst, dst_stride, channels};
221 266 Rectangle rect{dst_width, dst_height};
222
4/4
✓ Branch 0 taken 67 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 67 times.
✓ Branch 3 taken 66 times.
266 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) {
223 264 RemapS16ConstantBorder<T> operation{src_rows, src_width, src_height,
224 132 border_value};
225 132 zip_rows(operation, rect, mapxy_rows, dst_rows);
226 132 } else {
227 assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE);
228 134 RemapS16Replicate<T> operation{src_rows, src_width, src_height};
229 134 zip_rows(operation, rect, mapxy_rows, dst_rows);
230 134 }
231 266 return KLEIDICV_OK;
232 300 }
233 // NOLINTEND(readability-function-cognitive-complexity)
234
235 #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(type) \
236 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16<type>( \
237 const type *src, size_t src_stride, size_t src_width, size_t src_height, \
238 type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \
239 size_t channels, const int16_t *mapxy, size_t mapxy_stride, \
240 kleidicv_border_type_t border_type, const type *border_value)
241
242 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint8_t);
243 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint16_t);
244
245 } // namespace kleidicv::neon
246