KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/transform/remap_s16_neon.cpp
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 125 125 100.0%
Functions: 22 22 100.0%
Branches: 68 68 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cassert>
6 #include <type_traits>
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/neon.h"
10 #include "kleidicv/transform/remap.h"
11
12 namespace kleidicv::neon {
13
14 template <typename ScalarType>
15 class RemapS16Replicate {
16 public:
17 using MapVecTraits = neon::VecTraits<int16_t>;
18 using MapVectorType = typename MapVecTraits::VectorType;
19 using MapVector2Type = typename MapVecTraits::Vector2Type;
20
21 268 RemapS16Replicate(Rows<const ScalarType> src_rows, size_t src_width,
22 size_t src_height)
23 268 : src_rows_{src_rows},
24 536 v_src_element_stride_{vdupq_n_u16(
25 268 static_cast<uint16_t>(src_rows_.stride() / sizeof(ScalarType)))},
26 268 v_xmax_{vdupq_n_s16(static_cast<int16_t>(src_width - 1))},
27 268 v_ymax_{vdupq_n_s16(static_cast<int16_t>(src_height - 1))} {}
28
29 10788 void transform_pixels(uint32x4_t indices_low, uint32x4_t indices_high,
30 Columns<ScalarType> dst) {
31 // Copy pixels from source
32 10788 dst[0] = src_rows_[vgetq_lane_u32(indices_low, 0)];
33 10788 dst[1] = src_rows_[vgetq_lane_u32(indices_low, 1)];
34 10788 dst[2] = src_rows_[vgetq_lane_u32(indices_low, 2)];
35 10788 dst[3] = src_rows_[vgetq_lane_u32(indices_low, 3)];
36
37 10788 dst[4] = src_rows_[vgetq_lane_u32(indices_high, 0)];
38 10788 dst[5] = src_rows_[vgetq_lane_u32(indices_high, 1)];
39 10788 dst[6] = src_rows_[vgetq_lane_u32(indices_high, 2)];
40 10788 dst[7] = src_rows_[vgetq_lane_u32(indices_high, 3)];
41 10788 }
42
43 364 void process_row(size_t width, Columns<const int16_t> mapxy,
44 Columns<ScalarType> dst) {
45 11152 auto vector_path = [&](size_t step) {
46 10788 MapVector2Type xy = vld2q_s16(&mapxy[0]);
47 // Clamp coordinates to within the dimensions of the source image
48 21576 uint16x8_t x = vreinterpretq_u16_s16(
49 10788 vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[0], v_xmax_)));
50 21576 uint16x8_t y = vreinterpretq_u16_s16(
51 10788 vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[1], v_ymax_)));
52 // Calculate offsets from coordinates (y * stride/sizeof(ScalarType) + x)
53 21576 uint32x4_t indices_low =
54 21576 vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y),
55 10788 vget_low_u16(v_src_element_stride_));
56 21576 uint32x4_t indices_high =
57 10788 vmlal_high_u16(vmovl_high_u16(x), y, v_src_element_stride_);
58
59 10788 transform_pixels(indices_low, indices_high, dst);
60
61 10788 mapxy += ptrdiff_t(step);
62 10788 dst += ptrdiff_t(step);
63 10788 };
64
65 364 LoopUnroll loop{width, MapVecTraits::num_lanes()};
66 364 loop.unroll_once(vector_path);
67 728 ptrdiff_t back_step = static_cast<ptrdiff_t>(loop.step()) -
68 364 static_cast<ptrdiff_t>(loop.remaining_length());
69 364 mapxy -= back_step;
70 364 dst -= back_step;
71 560 loop.remaining([&](size_t, size_t step) { vector_path(step); });
72 364 }
73
74 private:
75 Rows<const ScalarType> src_rows_;
76 uint16x8_t v_src_element_stride_;
77 int16x8_t v_xmax_;
78 int16x8_t v_ymax_;
79 }; // end of class RemapS16Replicate<ScalarType>
80
81 template <typename ScalarType>
82 class RemapS16ConstantBorder {
83 public:
84 using SrcVecTraits = neon::VecTraits<ScalarType>;
85 using SrcVecType = typename SrcVecTraits::VectorType;
86
87 using MapVecTraits = neon::VecTraits<int16_t>;
88 using MapVectorType = typename MapVecTraits::VectorType;
89 using MapVector2Type = typename MapVecTraits::Vector2Type;
90
91 264 RemapS16ConstantBorder(Rows<const ScalarType> src_rows, size_t src_width,
92 size_t src_height, const ScalarType *border_value)
93 264 : src_rows_{src_rows},
94 528 v_src_element_stride_{vdupq_n_u16(
95 264 static_cast<uint16_t>(src_rows_.stride() / sizeof(ScalarType)))},
96 264 v_width_{vdupq_n_u16(static_cast<uint16_t>(src_width))},
97 264 v_height_{vdupq_n_u16(static_cast<uint16_t>(src_height))},
98 264 v_border_{vdupq_n_u16(*border_value)} {}
99
100 void transform_pixels(uint32x4_t indices_low, uint32x4_t indices_high,
101 uint16x8_t in_range, Columns<ScalarType> dst);
102
103 360 void process_row(size_t width, Columns<const int16_t> mapxy,
104 Columns<ScalarType> dst) {
105 11144 auto vector_path = [&](size_t step) {
106 10784 MapVector2Type xy = vld2q_s16(&mapxy[0]);
107
108 10784 uint16x8_t x = vreinterpretq_u16_s16(xy.val[0]);
109 10784 uint16x8_t y = vreinterpretq_u16_s16(xy.val[1]);
110
111 // Find whether coordinates are within the image dimensions.
112 // Negative coordinates are interpreted as large values due to the
113 // s16->u16 reinterpretation.
114 21568 uint16x8_t in_range =
115 10784 vandq_u16(vcltq_u16(x, v_width_), vcltq_u16(y, v_height_));
116
117 // Zero out-of-range coordinates.
118 10784 x = vandq_u16(in_range, x);
119 10784 y = vandq_u16(in_range, y);
120
121 // Calculate offsets from coordinates (y * stride/sizeof(ScalarType) + x)
122 21568 uint32x4_t indices_low =
123 21568 vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y),
124 10784 vget_low_u16(v_src_element_stride_));
125 21568 uint32x4_t indices_high =
126 10784 vmlal_high_u16(vmovl_high_u16(x), y, v_src_element_stride_);
127
128 10784 transform_pixels(indices_low, indices_high, in_range, dst);
129
130 10784 mapxy += ptrdiff_t(step);
131 10784 dst += ptrdiff_t(step);
132 10784 };
133
134 360 LoopUnroll loop{width, MapVecTraits::num_lanes()};
135 360 loop.unroll_once(vector_path);
136 720 ptrdiff_t back_step = static_cast<ptrdiff_t>(loop.step()) -
137 360 static_cast<ptrdiff_t>(loop.remaining_length());
138 360 mapxy -= back_step;
139 360 dst -= back_step;
140 556 loop.remaining([&](size_t, size_t step) { vector_path(step); });
141 360 }
142
143 private:
144 Rows<const ScalarType> src_rows_;
145 uint16x8_t v_src_element_stride_;
146 uint16x8_t v_width_;
147 uint16x8_t v_height_;
148 uint16x8_t v_border_;
149 }; // end of class RemapS16ConstantBorder<ScalarType>
150
151 template <>
152 5632 void RemapS16ConstantBorder<uint8_t>::transform_pixels(uint32x4_t indices_low,
153 uint32x4_t indices_high,
154 uint16x8_t in_range,
155 Columns<uint8_t> dst) {
156 50688 uint8x8_t pixels = {
157 5632 src_rows_[vgetq_lane_u32(indices_low, 0)],
158 5632 src_rows_[vgetq_lane_u32(indices_low, 1)],
159 5632 src_rows_[vgetq_lane_u32(indices_low, 2)],
160 5632 src_rows_[vgetq_lane_u32(indices_low, 3)],
161 5632 src_rows_[vgetq_lane_u32(indices_high, 0)],
162 5632 src_rows_[vgetq_lane_u32(indices_high, 1)],
163 5632 src_rows_[vgetq_lane_u32(indices_high, 2)],
164 5632 src_rows_[vgetq_lane_u32(indices_high, 3)],
165 };
166 // Select between source pixels and border colour
167 11264 uint8x8_t pixels_or_border =
168 5632 vbsl_u8(vmovn_u16(in_range), pixels, vmovn_u16(v_border_));
169
170 5632 vst1_u8(&dst[0], pixels_or_border);
171 5632 }
172
173 template <>
174 5152 void RemapS16ConstantBorder<uint16_t>::transform_pixels(uint32x4_t indices_low,
175 uint32x4_t indices_high,
176 uint16x8_t in_range,
177 Columns<uint16_t> dst) {
178 46368 uint16x8_t pixels = {
179 5152 src_rows_[vgetq_lane_u32(indices_low, 0)],
180 5152 src_rows_[vgetq_lane_u32(indices_low, 1)],
181 5152 src_rows_[vgetq_lane_u32(indices_low, 2)],
182 5152 src_rows_[vgetq_lane_u32(indices_low, 3)],
183 5152 src_rows_[vgetq_lane_u32(indices_high, 0)],
184 5152 src_rows_[vgetq_lane_u32(indices_high, 1)],
185 5152 src_rows_[vgetq_lane_u32(indices_high, 2)],
186 5152 src_rows_[vgetq_lane_u32(indices_high, 3)],
187 };
188
189 // Select between source pixels and border colour
190 5152 uint16x8_t pixels_or_border = vbslq_u16(in_range, pixels, v_border_);
191
192 5152 vst1q_u16(&dst[0], pixels_or_border);
193 5152 }
194
195 // Most of the complexity comes from parameter checking.
196 // NOLINTBEGIN(readability-function-cognitive-complexity)
197 template <typename T>
198 600 kleidicv_error_t remap_s16(const T *src, size_t src_stride, size_t src_width,
199 size_t src_height, T *dst, size_t dst_stride,
200 size_t dst_width, size_t dst_height, size_t channels,
201 const int16_t *mapxy, size_t mapxy_stride,
202 kleidicv_border_type_t border_type,
203 [[maybe_unused]] const T *border_value) {
204
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 298 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 298 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 298 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 298 times.
600 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
205
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 296 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 296 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 296 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 296 times.
596 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
206
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 294 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 294 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 294 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 294 times.
592 CHECK_POINTER_AND_STRIDE(mapxy, mapxy_stride, dst_height);
207
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 292 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 288 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 288 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 292 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 288 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 288 times.
588 CHECK_IMAGE_SIZE(src_width, src_height);
208
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 286 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 284 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 284 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 286 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 284 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 284 times.
576 CHECK_IMAGE_SIZE(dst_width, dst_height);
209
8/8
✓ Branch 0 taken 134 times.
✓ Branch 1 taken 150 times.
✓ Branch 2 taken 132 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 134 times.
✓ Branch 5 taken 150 times.
✓ Branch 6 taken 132 times.
✓ Branch 7 taken 2 times.
568 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) {
210 4 return KLEIDICV_ERROR_NULL_POINTER;
211 }
212
213
8/8
✓ Branch 0 taken 266 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 266 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 266 times.
✓ Branch 5 taken 16 times.
✓ Branch 6 taken 266 times.
✓ Branch 7 taken 16 times.
1128 if (!remap_s16_is_implemented<T>(src_stride, src_width, src_height, dst_width,
214 564 border_type, channels)) {
215 32 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
216 }
217
218 532 Rows<const T> src_rows{src, src_stride, channels};
219 532 Rows<const int16_t> mapxy_rows{mapxy, mapxy_stride, 2};
220 532 Rows<T> dst_rows{dst, dst_stride, channels};
221 532 Rectangle rect{dst_width, dst_height};
222
4/4
✓ Branch 0 taken 134 times.
✓ Branch 1 taken 132 times.
✓ Branch 2 taken 134 times.
✓ Branch 3 taken 132 times.
532 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) {
223 528 RemapS16ConstantBorder<T> operation{src_rows, src_width, src_height,
224 264 border_value};
225 264 zip_rows(operation, rect, mapxy_rows, dst_rows);
226 264 } else {
227 assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE);
228 268 RemapS16Replicate<T> operation{src_rows, src_width, src_height};
229 268 zip_rows(operation, rect, mapxy_rows, dst_rows);
230 268 }
231 532 return KLEIDICV_OK;
232 600 }
233 // NOLINTEND(readability-function-cognitive-complexity)
234
235 #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(type) \
236 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16<type>( \
237 const type *src, size_t src_stride, size_t src_width, size_t src_height, \
238 type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \
239 size_t channels, const int16_t *mapxy, size_t mapxy_stride, \
240 kleidicv_border_type_t border_type, const type *border_value)
241
242 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint8_t);
243 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint16_t);
244
245 } // namespace kleidicv::neon
246