KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/rgb_to_rgb_neon.cpp
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 173 173 100.0%
Functions: 22 22 100.0%
Branches: 86 86 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/conversions/rgb_to_rgb.h"
6 #include "kleidicv/kleidicv.h"
7 #include "kleidicv/neon.h"
8 #include "kleidicv/traits.h"
9 #include "kleidicv/types.h"
10
11 namespace kleidicv::neon {
12
13 template <typename ScalarType>
14 class RGBToBGR final {
15 public:
16 using VecTraits = neon::VecTraits<ScalarType>;
17
18 63 void process_row(size_t length, Columns<const uint8_t> src,
19 Columns<uint8_t> dst) {
20 63 LoopUnroll loop{length, 16};
21 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
22 63 uint8x16x3_t indices;
23 63 VecTraits::load(kRGBToBGRTableIndices, indices);
24 #endif
25
26 511 loop.unroll_once([&](size_t step) {
27 448 KLEIDICV_PREFETCH(&src[0] + 1024);
28 448 uint8x16x3_t src_vect, dst_vect;
29
30 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
31 src_vect = vld3q(&src[0]);
32 dst_vect = vector_path(src_vect);
33 vst3q(&dst[0], dst_vect);
34 #else
35 448 VecTraits::load(&src[0], src_vect);
36 448 dst_vect = vector_path(src_vect, indices);
37 448 VecTraits::store(dst_vect, &dst[0]);
38 #endif
39 448 src += static_cast<ptrdiff_t>(step);
40 448 dst += static_cast<ptrdiff_t>(step);
41 448 });
42
43 90 loop.remaining([&](size_t length, size_t /* step */) {
44
2/2
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 147 times.
174 for (size_t index = 0; index < length; ++index) {
45 147 disable_loop_vectorization();
46 294 scalar_path(&src.at(static_cast<ptrdiff_t>(index))[0],
47 147 &dst.at(static_cast<ptrdiff_t>(index))[0]);
48 147 }
49 27 });
50 63 }
51
52 private:
53 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
54 uint8x16x3_t vector_path(uint8x16x3_t src) {
55 std::swap(src.val[0], src.val[2]);
56 return src;
57 }
58 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
59 448 uint8x16x3_t vector_path(const uint8x16x3_t &src,
60 const uint8x16x3_t &indices) {
61 uint8x16x3_t dst;
62
63 448 asm volatile(
64 // dst0 = vqtbl2q_u8({src0, src1}, indices0)
65 "tbl %0.16b, { %3.16b, %4.16b }, %6.16b \n\t"
66 // dst1 = vqtbl3q_u8({src0, src1, src2}, indices1)
67 "tbl %1.16b, { %3.16b, %4.16b, %5.16b }, %7.16b \n\t"
68 // dst2 = vqtbl2q_u8({src1, src2}, indices2)
69 "tbl %2.16b, { %4.16b, %5.16b }, %8.16b \n\t"
70 448 : "=&w"(dst.val[0]), "=&w"(dst.val[1]), "=&w"(dst.val[2])
71 448 : "w"(src.val[0]), "w"(src.val[1]), "w"(src.val[2]),
72 448 "w"(indices.val[0]), "w"(indices.val[1]), "w"(indices.val[2])
73 :);
74
75 448 return dst;
76 }
77 static constexpr uint8_t kRGBToBGRTableIndices[48] = {
78 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17,
79 16, 15, 20, 19, 18, 23, 22, 21, 26, 25, 24, 29, 28, 27, 32, 31,
80 14, 19, 18, 17, 22, 21, 20, 25, 24, 23, 28, 27, 26, 31, 30, 29};
81 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
82
83 147 void scalar_path(const ScalarType *src, ScalarType *dst) {
84 147 auto tmp = src[0];
85 147 dst[0] = src[2];
86 147 dst[1] = src[1];
87 147 dst[2] = tmp;
88 147 }
89 }; // end of class RGBToBGR<ScalarType>
90
91 template <typename ScalarType>
92 class RGBAToBGRA final : public UnrollOnce {
93 public:
94 using VecTraits = neon::VecTraits<ScalarType>;
95
96 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
97 64 RGBAToBGRA() : indices_{} {
98 64 VecTraits::load(kRGBAToBGRATableIndices, indices_);
99 64 }
100 #else
101 RGBAToBGRA() = default;
102 #endif
103
104 448 void vector_path(const ScalarType *src, ScalarType *dst) {
105 448 KLEIDICV_PREFETCH(&src[0] + 1024);
106 448 uint8x16x4_t src_vect, dst_vect;
107
108 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
109 src_vect = vld4q_u8(src);
110
111 dst_vect.val[0] = src_vect.val[2];
112 dst_vect.val[1] = src_vect.val[1];
113 dst_vect.val[2] = src_vect.val[0];
114 dst_vect.val[3] = src_vect.val[3];
115
116 vst4q_u8(dst, dst_vect);
117 #else
118 448 VecTraits::load(&src[0], src_vect);
119
120 448 dst_vect.val[0] = vqtbl1q_u8(src_vect.val[0], indices_);
121 448 dst_vect.val[1] = vqtbl1q_u8(src_vect.val[1], indices_);
122 448 dst_vect.val[2] = vqtbl1q_u8(src_vect.val[2], indices_);
123 448 dst_vect.val[3] = vqtbl1q_u8(src_vect.val[3], indices_);
124
125 448 VecTraits::store(dst_vect, &dst[0]);
126 #endif
127 448 }
128
129 147 void scalar_path(const ScalarType *src, ScalarType *dst) {
130 147 auto tmp = src[0];
131 147 dst[0] = src[2];
132 147 dst[1] = src[1];
133 147 dst[2] = tmp;
134 147 dst[3] = src[3];
135 147 }
136
137 private:
138 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
139 static constexpr uint8_t kRGBAToBGRATableIndices[16] = {
140 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
141 uint8x16_t indices_;
142 #endif
143 }; // end of class RGBAToBGRA<ScalarType>
144
145 template <typename ScalarType>
146 class RGBToBGRA final : public UnrollTwice {
147 public:
148 using VecTraits = neon::VecTraits<ScalarType>;
149
150 430 void vector_path(const ScalarType *src, ScalarType *dst) {
151 430 uint8x16x3_t src_vect = vld3q_u8(src);
152 430 uint8x16x4_t dst_vect;
153
154 430 dst_vect.val[0] = src_vect.val[2];
155 430 dst_vect.val[1] = src_vect.val[1];
156 430 dst_vect.val[2] = src_vect.val[0];
157 430 dst_vect.val[3] = vdupq_n_u8(0xff);
158
159 430 vst4q_u8(dst, dst_vect);
160 430 }
161
162 435 void scalar_path(const ScalarType *src, ScalarType *dst) {
163 435 auto tmp = src[0];
164 435 dst[0] = src[2];
165 435 dst[1] = src[1];
166 435 dst[2] = tmp;
167 435 dst[3] = 0xff;
168 435 }
169 }; // end of class RGBToBGRA<ScalarType>
170
171 template <typename ScalarType>
172 class RGBToRGBA final : public UnrollTwice {
173 public:
174 using VecTraits = neon::VecTraits<ScalarType>;
175
176 430 void vector_path(const ScalarType *src, ScalarType *dst) {
177 430 uint8x16x3_t src_vect = vld3q_u8(src);
178 430 uint8x16x4_t dst_vect;
179
180 430 dst_vect.val[0] = src_vect.val[0];
181 430 dst_vect.val[1] = src_vect.val[1];
182 430 dst_vect.val[2] = src_vect.val[2];
183 430 dst_vect.val[3] = vdupq_n_u8(0xff);
184
185 430 vst4q_u8(dst, dst_vect);
186 430 }
187
188 435 void scalar_path(const ScalarType *src, ScalarType *dst) {
189 435 memcpy(static_cast<void *>(dst), static_cast<const void *>(src), 3);
190 435 dst[3] = 0xff;
191 435 }
192 }; // end of class RGBToRGBA<ScalarType>
193
194 template <typename ScalarType>
195 class RGBAToBGR final : public UnrollTwice {
196 public:
197 using VecTraits = neon::VecTraits<ScalarType>;
198
199 430 void vector_path(const ScalarType *src, ScalarType *dst) {
200 430 uint8x16x4_t src_vect = vld4q_u8(src);
201 430 uint8x16x3_t dst_vect;
202
203 430 dst_vect.val[0] = src_vect.val[2];
204 430 dst_vect.val[1] = src_vect.val[1];
205 430 dst_vect.val[2] = src_vect.val[0];
206
207 430 vst3q_u8(dst, dst_vect);
208 430 }
209
210 435 void scalar_path(const ScalarType *src, ScalarType *dst) {
211 435 auto tmp = src[0];
212 435 dst[0] = src[2];
213 435 dst[1] = src[1];
214 435 dst[2] = tmp;
215 435 }
216 }; // end of class RGBAToBGR<ScalarType>
217
218 template <typename ScalarType>
219 class RGBAToRGB final : public UnrollTwice {
220 public:
221 using VecTraits = neon::VecTraits<ScalarType>;
222
223 430 void vector_path(const ScalarType *src, ScalarType *dst) {
224 430 uint8x16x4_t src_vect = vld4q_u8(src);
225 430 uint8x16x3_t dst_vect;
226
227 430 dst_vect.val[0] = src_vect.val[0];
228 430 dst_vect.val[1] = src_vect.val[1];
229 430 dst_vect.val[2] = src_vect.val[2];
230
231 430 vst3q_u8(dst, dst_vect);
232 430 }
233
234 435 void scalar_path(const ScalarType *src, ScalarType *dst) {
235 435 memcpy(static_cast<void *>(dst), static_cast<const void *>(src), 3);
236 435 }
237 }; // end of class RGBAToRGB<ScalarType>
238
239 KLEIDICV_TARGET_FN_ATTRS
240 68 kleidicv_error_t rgb_to_bgr_u8(const uint8_t *src, size_t src_stride,
241 uint8_t *dst, size_t dst_stride, size_t width,
242 size_t height) {
243
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
68 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
244
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 66 times.
67 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
245
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 64 times.
66 CHECK_IMAGE_SIZE(width, height);
246
247 64 Rectangle rect{width, height};
248 64 Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */};
249 64 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* BGR */};
250 64 RGBToBGR<uint8_t> operation;
251 64 zip_rows(operation, rect, src_rows, dst_rows);
252 64 return KLEIDICV_OK;
253 68 }
254
255 KLEIDICV_TARGET_FN_ATTRS
256 68 kleidicv_error_t rgba_to_bgra_u8(const uint8_t *src, size_t src_stride,
257 uint8_t *dst, size_t dst_stride, size_t width,
258 size_t height) {
259
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
68 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
260
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 66 times.
67 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
261
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 64 times.
66 CHECK_IMAGE_SIZE(width, height);
262
263 64 Rectangle rect{width, height};
264 64 Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */};
265 64 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* BGRA */};
266 64 RGBAToBGRA<uint8_t> operation;
267 64 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
268 64 return KLEIDICV_OK;
269 68 }
270
271 KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
272 68 rgb_to_bgra_u8(const uint8_t *src, size_t src_stride, uint8_t *dst,
273 size_t dst_stride, size_t width, size_t height) {
274
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
68 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
275
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 66 times.
67 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
276
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 64 times.
66 CHECK_IMAGE_SIZE(width, height);
277
278 64 Rectangle rect{width, height};
279 64 Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */};
280 64 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* BGRA */};
281 64 RGBToBGRA<uint8_t> operation;
282 64 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
283 64 return KLEIDICV_OK;
284 68 }
285
286 KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
287 68 rgb_to_rgba_u8(const uint8_t *src, size_t src_stride, uint8_t *dst,
288 size_t dst_stride, size_t width, size_t height) {
289
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
68 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
290
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 66 times.
67 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
291
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 64 times.
66 CHECK_IMAGE_SIZE(width, height);
292
293 64 Rectangle rect{width, height};
294 64 Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */};
295 64 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
296 64 RGBToRGBA<uint8_t> operation;
297 64 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
298 64 return KLEIDICV_OK;
299 68 }
300
301 KLEIDICV_TARGET_FN_ATTRS
302 68 kleidicv_error_t rgba_to_bgr_u8(const uint8_t *src, size_t src_stride,
303 uint8_t *dst, size_t dst_stride, size_t width,
304 size_t height) {
305
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
68 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
306
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 66 times.
67 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
307
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 64 times.
66 CHECK_IMAGE_SIZE(width, height);
308
309 64 Rectangle rect{width, height};
310 64 Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */};
311 64 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* BGR */};
312 64 RGBAToBGR<uint8_t> operation;
313 64 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
314 64 return KLEIDICV_OK;
315 68 }
316
317 KLEIDICV_TARGET_FN_ATTRS
318 68 kleidicv_error_t rgba_to_rgb_u8(const uint8_t *src, size_t src_stride,
319 uint8_t *dst, size_t dst_stride, size_t width,
320 size_t height) {
321
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
68 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
322
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 66 times.
67 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
323
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 65 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 64 times.
66 CHECK_IMAGE_SIZE(width, height);
324
325 64 Rectangle rect{width, height};
326 64 Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */};
327 64 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
328 64 RGBAToRGB<uint8_t> operation;
329 64 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
330 64 return KLEIDICV_OK;
331 68 }
332
333 } // namespace kleidicv::neon
334