KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/rgb_to_rgb_neon.cpp
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 173 173 100.0%
Functions: 21 21 100.0%
Branches: 86 86 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/conversions/rgb_to_rgb.h"
6 #include "kleidicv/kleidicv.h"
7 #include "kleidicv/neon.h"
8 #include "kleidicv/traits.h"
9 #include "kleidicv/types.h"
10
11 namespace kleidicv::neon {
12
13 template <typename ScalarType>
14 class RGBToBGR final {
15 public:
16 using VecTraits = neon::VecTraits<ScalarType>;
17
18 KLEIDICV_FORCE_INLINE
19 81 void process_row(size_t length, Columns<const uint8_t> src,
20 Columns<uint8_t> dst) {
21 81 LoopUnroll loop{length, 16};
22 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
23 81 uint8x16x3_t indices;
24 81 VecTraits::load(kRGBToBGRTableIndices, indices);
25 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
26
27 520 loop.unroll_once([&](size_t step) {
28 439 KLEIDICV_PREFETCH(&src[0] + 1024);
29 439 uint8x16x3_t dst_vect;
30 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || defined(__clang__)
31 439 uint8x16x3_t src_vect;
32 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || defined(__clang__)
33
34 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
35 src_vect = vld3q(&src[0]);
36 std::swap(src_vect.val[0], src_vect.val[2]);
37 vst3q(&dst[0], dst_vect);
38 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
39 #if defined(__clang__)
40 439 VecTraits::load(&src[0], src_vect);
41
42 439 dst_vect.val[0] =
43 439 vqtbl2q_u8({src_vect.val[0], src_vect.val[1]}, indices.val[0]);
44 439 dst_vect.val[1] = vqtbl3q_u8(
45 439 {src_vect.val[0], src_vect.val[1], src_vect.val[2]}, indices.val[1]);
46 439 dst_vect.val[2] =
47 439 vqtbl2q_u8({src_vect.val[1], src_vect.val[2]}, indices.val[2]);
48 #else // defined(__clang__)
49 asm volatile(
50 "ld1 { v16.16b, v17.16b, v18.16b }, [%[src_ptr]] \n\t"
51 "tbl %0.16b, { v16.16b, v17.16b }, %[idx0].16b \n\t"
52 "tbl %1.16b, { v16.16b, v17.16b, v18.16b }, %[idx1].16b \n\t"
53 "tbl %2.16b, { v17.16b, v18.16b }, %[idx2].16b \n\t"
54 : "=&w"(dst_vect.val[0]), "=&w"(dst_vect.val[1]),
55 "=&w"(dst_vect.val[2])
56 : [src_ptr] "r"(&src[0]), [idx0] "w"(indices.val[0]),
57 [idx1] "w"(indices.val[1]), [idx2] "w"(indices.val[2])
58 : "v16", "v17", "v18", "memory");
59 #endif // defined(__clang__)
60 439 VecTraits::store(dst_vect, &dst[0]);
61 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
62 439 src += static_cast<ptrdiff_t>(step);
63 439 dst += static_cast<ptrdiff_t>(step);
64 439 });
65
66 134 loop.remaining([&](size_t length, size_t /* step */) {
67
2/2
✓ Branch 0 taken 53 times.
✓ Branch 1 taken 291 times.
344 for (size_t index = 0; index < length; ++index) {
68 291 disable_loop_vectorization();
69 582 scalar_path(&src.at(static_cast<ptrdiff_t>(index))[0],
70 291 &dst.at(static_cast<ptrdiff_t>(index))[0]);
71 291 }
72 53 });
73 81 }
74
75 private:
76 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
77 static constexpr uint8_t kRGBToBGRTableIndices[48] = {
78 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17,
79 16, 15, 20, 19, 18, 23, 22, 21, 26, 25, 24, 29, 28, 27, 32, 31,
80 14, 19, 18, 17, 22, 21, 20, 25, 24, 23, 28, 27, 26, 31, 30, 29};
81 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
82
83 291 void scalar_path(const ScalarType *src, ScalarType *dst) {
84 291 auto tmp = src[0];
85 291 dst[0] = src[2];
86 291 dst[1] = src[1];
87 291 dst[2] = tmp;
88 291 }
89 }; // end of class RGBToBGR<ScalarType>
90
91 template <typename ScalarType>
92 class RGBAToBGRA final : public UnrollOnce {
93 public:
94 using VecTraits = neon::VecTraits<ScalarType>;
95
96 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
97 82 RGBAToBGRA() : indices_{} {
98 82 VecTraits::load(kRGBAToBGRATableIndices, indices_);
99 82 }
100 #else
101 RGBAToBGRA() = default;
102 #endif
103
104 439 void vector_path(const ScalarType *src, ScalarType *dst) {
105 439 KLEIDICV_PREFETCH(&src[0] + 1024);
106 439 uint8x16x4_t src_vect, dst_vect;
107
108 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
109 src_vect = vld4q_u8(src);
110
111 dst_vect.val[0] = src_vect.val[2];
112 dst_vect.val[1] = src_vect.val[1];
113 dst_vect.val[2] = src_vect.val[0];
114 dst_vect.val[3] = src_vect.val[3];
115
116 vst4q_u8(dst, dst_vect);
117 #else
118 439 VecTraits::load(&src[0], src_vect);
119
120 439 dst_vect.val[0] = vqtbl1q_u8(src_vect.val[0], indices_);
121 439 dst_vect.val[1] = vqtbl1q_u8(src_vect.val[1], indices_);
122 439 dst_vect.val[2] = vqtbl1q_u8(src_vect.val[2], indices_);
123 439 dst_vect.val[3] = vqtbl1q_u8(src_vect.val[3], indices_);
124
125 439 VecTraits::store(dst_vect, &dst[0]);
126 #endif
127 439 }
128
129 291 void scalar_path(const ScalarType *src, ScalarType *dst) {
130 291 auto tmp = src[0];
131 291 dst[0] = src[2];
132 291 dst[1] = src[1];
133 291 dst[2] = tmp;
134 291 dst[3] = src[3];
135 291 }
136
137 private:
138 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
139 static constexpr uint8_t kRGBAToBGRATableIndices[16] = {
140 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
141 uint8x16_t indices_;
142 #endif
143 }; // end of class RGBAToBGRA<ScalarType>
144
145 template <typename ScalarType>
146 class RGBToBGRA final : public UnrollTwice {
147 public:
148 using VecTraits = neon::VecTraits<ScalarType>;
149
150 406 void vector_path(const ScalarType *src, ScalarType *dst) {
151 406 uint8x16x3_t src_vect = vld3q_u8(src);
152 406 uint8x16x4_t dst_vect;
153
154 406 dst_vect.val[0] = src_vect.val[2];
155 406 dst_vect.val[1] = src_vect.val[1];
156 406 dst_vect.val[2] = src_vect.val[0];
157 406 dst_vect.val[3] = vdupq_n_u8(0xff);
158
159 406 vst4q_u8(dst, dst_vect);
160 406 }
161
162 819 void scalar_path(const ScalarType *src, ScalarType *dst) {
163 819 auto tmp = src[0];
164 819 dst[0] = src[2];
165 819 dst[1] = src[1];
166 819 dst[2] = tmp;
167 819 dst[3] = 0xff;
168 819 }
169 }; // end of class RGBToBGRA<ScalarType>
170
171 template <typename ScalarType>
172 class RGBToRGBA final : public UnrollTwice {
173 public:
174 using VecTraits = neon::VecTraits<ScalarType>;
175
176 406 void vector_path(const ScalarType *src, ScalarType *dst) {
177 406 uint8x16x3_t src_vect = vld3q_u8(src);
178 406 uint8x16x4_t dst_vect;
179
180 406 dst_vect.val[0] = src_vect.val[0];
181 406 dst_vect.val[1] = src_vect.val[1];
182 406 dst_vect.val[2] = src_vect.val[2];
183 406 dst_vect.val[3] = vdupq_n_u8(0xff);
184
185 406 vst4q_u8(dst, dst_vect);
186 406 }
187
188 819 void scalar_path(const ScalarType *src, ScalarType *dst) {
189 819 memcpy(static_cast<void *>(dst), static_cast<const void *>(src), 3);
190 819 dst[3] = 0xff;
191 819 }
192 }; // end of class RGBToRGBA<ScalarType>
193
194 template <typename ScalarType>
195 class RGBAToBGR final : public UnrollTwice {
196 public:
197 using VecTraits = neon::VecTraits<ScalarType>;
198
199 406 void vector_path(const ScalarType *src, ScalarType *dst) {
200 406 uint8x16x4_t src_vect = vld4q_u8(src);
201 406 uint8x16x3_t dst_vect;
202
203 406 dst_vect.val[0] = src_vect.val[2];
204 406 dst_vect.val[1] = src_vect.val[1];
205 406 dst_vect.val[2] = src_vect.val[0];
206
207 406 vst3q_u8(dst, dst_vect);
208 406 }
209
210 819 void scalar_path(const ScalarType *src, ScalarType *dst) {
211 819 auto tmp = src[0];
212 819 dst[0] = src[2];
213 819 dst[1] = src[1];
214 819 dst[2] = tmp;
215 819 }
216 }; // end of class RGBAToBGR<ScalarType>
217
218 template <typename ScalarType>
219 class RGBAToRGB final : public UnrollTwice {
220 public:
221 using VecTraits = neon::VecTraits<ScalarType>;
222
223 406 void vector_path(const ScalarType *src, ScalarType *dst) {
224 406 uint8x16x4_t src_vect = vld4q_u8(src);
225 406 uint8x16x3_t dst_vect;
226
227 406 dst_vect.val[0] = src_vect.val[0];
228 406 dst_vect.val[1] = src_vect.val[1];
229 406 dst_vect.val[2] = src_vect.val[2];
230
231 406 vst3q_u8(dst, dst_vect);
232 406 }
233
234 819 void scalar_path(const ScalarType *src, ScalarType *dst) {
235 819 memcpy(static_cast<void *>(dst), static_cast<const void *>(src), 3);
236 819 }
237 }; // end of class RGBAToRGB<ScalarType>
238
239 KLEIDICV_TARGET_FN_ATTRS
240 86 kleidicv_error_t rgb_to_bgr_u8(const uint8_t *src, size_t src_stride,
241 uint8_t *dst, size_t dst_stride, size_t width,
242 size_t height) {
243
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
86 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
244
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 84 times.
85 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
245
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 82 times.
84 CHECK_IMAGE_SIZE(width, height);
246
247 82 Rectangle rect{width, height};
248 82 Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */};
249 82 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* BGR */};
250 82 RGBToBGR<uint8_t> operation;
251 82 zip_rows(operation, rect, src_rows, dst_rows);
252 82 return KLEIDICV_OK;
253 86 }
254
255 KLEIDICV_TARGET_FN_ATTRS
256 86 kleidicv_error_t rgba_to_bgra_u8(const uint8_t *src, size_t src_stride,
257 uint8_t *dst, size_t dst_stride, size_t width,
258 size_t height) {
259
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
86 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
260
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 84 times.
85 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
261
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 82 times.
84 CHECK_IMAGE_SIZE(width, height);
262
263 82 Rectangle rect{width, height};
264 82 Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */};
265 82 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* BGRA */};
266 82 RGBAToBGRA<uint8_t> operation;
267 82 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
268 82 return KLEIDICV_OK;
269 86 }
270
271 KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
272 86 rgb_to_bgra_u8(const uint8_t *src, size_t src_stride, uint8_t *dst,
273 size_t dst_stride, size_t width, size_t height) {
274
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
86 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
275
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 84 times.
85 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
276
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 82 times.
84 CHECK_IMAGE_SIZE(width, height);
277
278 82 Rectangle rect{width, height};
279 82 Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */};
280 82 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* BGRA */};
281 82 RGBToBGRA<uint8_t> operation;
282 82 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
283 82 return KLEIDICV_OK;
284 86 }
285
286 KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
287 86 rgb_to_rgba_u8(const uint8_t *src, size_t src_stride, uint8_t *dst,
288 size_t dst_stride, size_t width, size_t height) {
289
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
86 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
290
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 84 times.
85 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
291
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 82 times.
84 CHECK_IMAGE_SIZE(width, height);
292
293 82 Rectangle rect{width, height};
294 82 Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */};
295 82 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
296 82 RGBToRGBA<uint8_t> operation;
297 82 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
298 82 return KLEIDICV_OK;
299 86 }
300
301 KLEIDICV_TARGET_FN_ATTRS
302 86 kleidicv_error_t rgba_to_bgr_u8(const uint8_t *src, size_t src_stride,
303 uint8_t *dst, size_t dst_stride, size_t width,
304 size_t height) {
305
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
86 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
306
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 84 times.
85 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
307
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 82 times.
84 CHECK_IMAGE_SIZE(width, height);
308
309 82 Rectangle rect{width, height};
310 82 Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */};
311 82 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* BGR */};
312 82 RGBAToBGR<uint8_t> operation;
313 82 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
314 82 return KLEIDICV_OK;
315 86 }
316
317 KLEIDICV_TARGET_FN_ATTRS
318 86 kleidicv_error_t rgba_to_rgb_u8(const uint8_t *src, size_t src_stride,
319 uint8_t *dst, size_t dst_stride, size_t width,
320 size_t height) {
321
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
86 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
322
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 84 times.
85 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
323
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 83 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 82 times.
84 CHECK_IMAGE_SIZE(width, height);
324
325 82 Rectangle rect{width, height};
326 82 Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */};
327 82 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
328 82 RGBAToRGB<uint8_t> operation;
329 82 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
330 82 return KLEIDICV_OK;
331 86 }
332
333 } // namespace kleidicv::neon
334