KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/transform/remap_s16_neon.cpp
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 125 125 100.0%
Functions: 22 22 100.0%
Branches: 68 68 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cassert>
6 #include <type_traits>
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/neon.h"
10 #include "kleidicv/transform/remap.h"
11
12 namespace kleidicv::neon {
13
14 template <typename ScalarType>
15 class RemapS16Replicate {
16 public:
17 using MapVecTraits = neon::VecTraits<int16_t>;
18 using MapVectorType = typename MapVecTraits::VectorType;
19 using MapVector2Type = typename MapVecTraits::Vector2Type;
20
21 340 RemapS16Replicate(Rows<const ScalarType> src_rows, size_t src_width,
22 size_t src_height)
23 340 : src_rows_{src_rows},
24 680 v_src_element_stride_{vdupq_n_u16(
25 340 static_cast<uint16_t>(src_rows_.stride() / sizeof(ScalarType)))},
26 340 v_xmax_{vdupq_n_s16(static_cast<int16_t>(src_width - 1))},
27 340 v_ymax_{vdupq_n_s16(static_cast<int16_t>(src_height - 1))} {}
28
29 10836 void transform_pixels(uint32x4_t indices_low, uint32x4_t indices_high,
30 Columns<ScalarType> dst) {
31 // Copy pixels from source
32 10836 dst[0] = src_rows_[vgetq_lane_u32(indices_low, 0)];
33 10836 dst[1] = src_rows_[vgetq_lane_u32(indices_low, 1)];
34 10836 dst[2] = src_rows_[vgetq_lane_u32(indices_low, 2)];
35 10836 dst[3] = src_rows_[vgetq_lane_u32(indices_low, 3)];
36
37 10836 dst[4] = src_rows_[vgetq_lane_u32(indices_high, 0)];
38 10836 dst[5] = src_rows_[vgetq_lane_u32(indices_high, 1)];
39 10836 dst[6] = src_rows_[vgetq_lane_u32(indices_high, 2)];
40 10836 dst[7] = src_rows_[vgetq_lane_u32(indices_high, 3)];
41 10836 }
42
43 436 void process_row(size_t width, Columns<const int16_t> mapxy,
44 Columns<ScalarType> dst) {
45 11272 auto vector_path = [&](size_t step) {
46 10836 MapVector2Type xy = vld2q_s16(&mapxy[0]);
47 // Clamp coordinates to within the dimensions of the source image
48 21672 uint16x8_t x = vreinterpretq_u16_s16(
49 10836 vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[0], v_xmax_)));
50 21672 uint16x8_t y = vreinterpretq_u16_s16(
51 10836 vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[1], v_ymax_)));
52 // Calculate offsets from coordinates (y * stride/sizeof(ScalarType) + x)
53 21672 uint32x4_t indices_low =
54 21672 vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y),
55 10836 vget_low_u16(v_src_element_stride_));
56 21672 uint32x4_t indices_high =
57 10836 vmlal_high_u16(vmovl_high_u16(x), y, v_src_element_stride_);
58
59 10836 transform_pixels(indices_low, indices_high, dst);
60
61 10836 mapxy += ptrdiff_t(step);
62 10836 dst += ptrdiff_t(step);
63 10836 };
64
65 436 LoopUnroll loop{width, MapVecTraits::num_lanes()};
66 436 loop.unroll_once(vector_path);
67 872 ptrdiff_t back_step = static_cast<ptrdiff_t>(loop.step()) -
68 436 static_cast<ptrdiff_t>(loop.remaining_length());
69 436 mapxy -= back_step;
70 436 dst -= back_step;
71 708 loop.remaining([&](size_t, size_t step) { vector_path(step); });
72 436 }
73
74 private:
75 Rows<const ScalarType> src_rows_;
76 uint16x8_t v_src_element_stride_;
77 int16x8_t v_xmax_;
78 int16x8_t v_ymax_;
79 }; // end of class RemapS16Replicate<ScalarType>
80
81 template <typename ScalarType>
82 class RemapS16ConstantBorder {
83 public:
84 using SrcVecTraits = neon::VecTraits<ScalarType>;
85 using SrcVecType = typename SrcVecTraits::VectorType;
86
87 using MapVecTraits = neon::VecTraits<int16_t>;
88 using MapVectorType = typename MapVecTraits::VectorType;
89 using MapVector2Type = typename MapVecTraits::Vector2Type;
90
91 336 RemapS16ConstantBorder(Rows<const ScalarType> src_rows, size_t src_width,
92 size_t src_height, const ScalarType *border_value)
93 336 : src_rows_{src_rows},
94 672 v_src_element_stride_{vdupq_n_u16(
95 336 static_cast<uint16_t>(src_rows_.stride() / sizeof(ScalarType)))},
96 336 v_width_{vdupq_n_u16(static_cast<uint16_t>(src_width))},
97 336 v_height_{vdupq_n_u16(static_cast<uint16_t>(src_height))},
98 336 v_border_{vdupq_n_u16(*border_value)} {}
99
100 void transform_pixels(uint32x4_t indices_low, uint32x4_t indices_high,
101 uint16x8_t in_range, Columns<ScalarType> dst);
102
103 432 void process_row(size_t width, Columns<const int16_t> mapxy,
104 Columns<ScalarType> dst) {
105 11264 auto vector_path = [&](size_t step) {
106 10832 MapVector2Type xy = vld2q_s16(&mapxy[0]);
107
108 10832 uint16x8_t x = vreinterpretq_u16_s16(xy.val[0]);
109 10832 uint16x8_t y = vreinterpretq_u16_s16(xy.val[1]);
110
111 // Find whether coordinates are within the image dimensions.
112 // Negative coordinates are interpreted as large values due to the
113 // s16->u16 reinterpretation.
114 21664 uint16x8_t in_range =
115 10832 vandq_u16(vcltq_u16(x, v_width_), vcltq_u16(y, v_height_));
116
117 // Zero out-of-range coordinates.
118 10832 x = vandq_u16(in_range, x);
119 10832 y = vandq_u16(in_range, y);
120
121 // Calculate offsets from coordinates (y * stride/sizeof(ScalarType) + x)
122 21664 uint32x4_t indices_low =
123 21664 vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y),
124 10832 vget_low_u16(v_src_element_stride_));
125 21664 uint32x4_t indices_high =
126 10832 vmlal_high_u16(vmovl_high_u16(x), y, v_src_element_stride_);
127
128 10832 transform_pixels(indices_low, indices_high, in_range, dst);
129
130 10832 mapxy += ptrdiff_t(step);
131 10832 dst += ptrdiff_t(step);
132 10832 };
133
134 432 LoopUnroll loop{width, MapVecTraits::num_lanes()};
135 432 loop.unroll_once(vector_path);
136 864 ptrdiff_t back_step = static_cast<ptrdiff_t>(loop.step()) -
137 432 static_cast<ptrdiff_t>(loop.remaining_length());
138 432 mapxy -= back_step;
139 432 dst -= back_step;
140 704 loop.remaining([&](size_t, size_t step) { vector_path(step); });
141 432 }
142
143 private:
144 Rows<const ScalarType> src_rows_;
145 uint16x8_t v_src_element_stride_;
146 uint16x8_t v_width_;
147 uint16x8_t v_height_;
148 uint16x8_t v_border_;
149 }; // end of class RemapS16ConstantBorder<ScalarType>
150
151 template <>
152 5656 void RemapS16ConstantBorder<uint8_t>::transform_pixels(uint32x4_t indices_low,
153 uint32x4_t indices_high,
154 uint16x8_t in_range,
155 Columns<uint8_t> dst) {
156 50904 uint8x8_t pixels = {
157 5656 src_rows_[vgetq_lane_u32(indices_low, 0)],
158 5656 src_rows_[vgetq_lane_u32(indices_low, 1)],
159 5656 src_rows_[vgetq_lane_u32(indices_low, 2)],
160 5656 src_rows_[vgetq_lane_u32(indices_low, 3)],
161 5656 src_rows_[vgetq_lane_u32(indices_high, 0)],
162 5656 src_rows_[vgetq_lane_u32(indices_high, 1)],
163 5656 src_rows_[vgetq_lane_u32(indices_high, 2)],
164 5656 src_rows_[vgetq_lane_u32(indices_high, 3)],
165 };
166 // Select between source pixels and border color
167 11312 uint8x8_t pixels_or_border =
168 5656 vbsl_u8(vmovn_u16(in_range), pixels, vmovn_u16(v_border_));
169
170 5656 vst1_u8(&dst[0], pixels_or_border);
171 5656 }
172
173 template <>
174 5176 void RemapS16ConstantBorder<uint16_t>::transform_pixels(uint32x4_t indices_low,
175 uint32x4_t indices_high,
176 uint16x8_t in_range,
177 Columns<uint16_t> dst) {
178 46584 uint16x8_t pixels = {
179 5176 src_rows_[vgetq_lane_u32(indices_low, 0)],
180 5176 src_rows_[vgetq_lane_u32(indices_low, 1)],
181 5176 src_rows_[vgetq_lane_u32(indices_low, 2)],
182 5176 src_rows_[vgetq_lane_u32(indices_low, 3)],
183 5176 src_rows_[vgetq_lane_u32(indices_high, 0)],
184 5176 src_rows_[vgetq_lane_u32(indices_high, 1)],
185 5176 src_rows_[vgetq_lane_u32(indices_high, 2)],
186 5176 src_rows_[vgetq_lane_u32(indices_high, 3)],
187 };
188
189 // Select between source pixels and border color
190 5176 uint16x8_t pixels_or_border = vbslq_u16(in_range, pixels, v_border_);
191
192 5176 vst1q_u16(&dst[0], pixels_or_border);
193 5176 }
194
195 // Most of the complexity comes from parameter checking.
196 // NOLINTBEGIN(readability-function-cognitive-complexity)
197 template <typename T>
198 744 kleidicv_error_t remap_s16(const T *src, size_t src_stride, size_t src_width,
199 size_t src_height, T *dst, size_t dst_stride,
200 size_t dst_width, size_t dst_height, size_t channels,
201 const int16_t *mapxy, size_t mapxy_stride,
202 kleidicv_border_type_t border_type,
203 [[maybe_unused]] const T *border_value) {
204
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 370 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 370 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 370 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 370 times.
744 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
205
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 368 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 368 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 368 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 368 times.
740 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
206
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 366 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 366 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 366 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 366 times.
736 CHECK_POINTER_AND_STRIDE(mapxy, mapxy_stride, dst_height);
207
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 364 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 360 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 360 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 364 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 360 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 360 times.
732 CHECK_IMAGE_SIZE(src_width, src_height);
208
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 358 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 356 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 356 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 358 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 356 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 356 times.
720 CHECK_IMAGE_SIZE(dst_width, dst_height);
209
8/8
✓ Branch 0 taken 170 times.
✓ Branch 1 taken 186 times.
✓ Branch 2 taken 168 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 170 times.
✓ Branch 5 taken 186 times.
✓ Branch 6 taken 168 times.
✓ Branch 7 taken 2 times.
712 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) {
210 4 return KLEIDICV_ERROR_NULL_POINTER;
211 }
212
213
8/8
✓ Branch 0 taken 338 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 338 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 338 times.
✓ Branch 5 taken 16 times.
✓ Branch 6 taken 338 times.
✓ Branch 7 taken 16 times.
1416 if (!remap_s16_is_implemented<T>(src_stride, src_width, src_height, dst_width,
214 708 border_type, channels)) {
215 32 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
216 }
217
218 676 Rows<const T> src_rows{src, src_stride, channels};
219 676 Rows<const int16_t> mapxy_rows{mapxy, mapxy_stride, 2};
220 676 Rows<T> dst_rows{dst, dst_stride, channels};
221 676 Rectangle rect{dst_width, dst_height};
222
4/4
✓ Branch 0 taken 170 times.
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 170 times.
✓ Branch 3 taken 168 times.
676 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) {
223 672 RemapS16ConstantBorder<T> operation{src_rows, src_width, src_height,
224 336 border_value};
225 336 zip_rows(operation, rect, mapxy_rows, dst_rows);
226 336 } else {
227 assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE);
228 340 RemapS16Replicate<T> operation{src_rows, src_width, src_height};
229 340 zip_rows(operation, rect, mapxy_rows, dst_rows);
230 340 }
231 676 return KLEIDICV_OK;
232 744 }
233 // NOLINTEND(readability-function-cognitive-complexity)
234
235 #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(type) \
236 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16<type>( \
237 const type *src, size_t src_stride, size_t src_width, size_t src_height, \
238 type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \
239 size_t channels, const int16_t *mapxy, size_t mapxy_stride, \
240 kleidicv_border_type_t border_type, const type *border_value)
241
242 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint8_t);
243 KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint16_t);
244
245 } // namespace kleidicv::neon
246