KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/rgb_to_yuv422_sc.h
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 174 174 100.0%
Functions: 146 146 100.0%
Branches: 55 55 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RGB_TO_YUV422_SC_H
6 #define KLEIDICV_RGB_TO_YUV422_SC_H
7
8 #include <utility>
9
10 #include "kleidicv/conversions/rgb_to_yuv.h"
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 static const int kWeightScale = 14;
17
18 static const int16_t KR2Y422Weight =
19 4211; // 0.299077 * (236 - 16) / 256 * 16384
20 static const int16_t KG2Y422Weight =
21 8258; // 0.586506 * (236 - 16) / 256 * 16384
22 static const int16_t KB2Y422Weight =
23 1606; // 0.114062 * (236 - 16) / 256 * 16384
24
25 static const int16_t KR2U422Weight = -1212; // -0.148 * 8192
26 static const int16_t KG2U422Weight = -2384; // -0.291 * 8192
27 static const int16_t KB2U422Weight = 3596; // 0.439 * 8192
28 static const int16_t KG2V422Weight = -3015; // -0.368 * 8192
29 static const int16_t KB2V422Weight = -582; // -0.071 * 8192
30
31 template <size_t b_idx, size_t u_idx, size_t y_idx, size_t scn>
32 class RGBxOrBGRxToYUV422 {
33 public:
34 static constexpr size_t r_idx = 2 - b_idx;
35 static constexpr size_t v_idx = (u_idx + 2) % 4;
36 using ArrayOfTwo_svuint8 = ScalableVectorArray1D<svuint8_t, 2>;
37 using ArrayOfTwo_svint16 = ScalableVectorArray1D<svint16_t, 2>;
38
39 672 static kleidicv_error_t rgbx2yuv422_operation(
40 const uint8_t* src, size_t src_stride, uint8_t* dst, size_t dst_stride,
41 size_t width, size_t height) KLEIDICV_STREAMING {
42 // Destination channel count (dcn = 2) because YUV422 is interleaved with
43 // two channels per pixel on average: one luma (Y) and one shared
44 // chroma (U or V). Thus, dcn is set to 2 for this color format.
45 672 constexpr size_t dcn = 2;
46 672 auto kVectorLength = svcntb();
47
48 // Loop through rows along the image height.
49
24/24
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 32 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 32 times.
✓ Branch 6 taken 212 times.
✓ Branch 7 taken 9184 times.
✓ Branch 8 taken 212 times.
✓ Branch 9 taken 9184 times.
✓ Branch 10 taken 212 times.
✓ Branch 11 taken 9184 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 32 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 32 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 32 times.
✓ Branch 18 taken 4 times.
✓ Branch 19 taken 32 times.
✓ Branch 20 taken 4 times.
✓ Branch 21 taken 32 times.
✓ Branch 22 taken 4 times.
✓ Branch 23 taken 32 times.
28512 for (size_t h = 0; h < height; h++, src += src_stride) {
50 // Keep track of the current output row being written.
51 27840 Columns<uint8_t> dst_row{dst + dst_stride * h, dcn};
52 27840 LoopUnroll2 loop{width, kVectorLength};
53
54 // Unroll by 2: convert two RGB pixels → one YVYU pair [Y0, V0, Y1, U0].
55 // Compute Y0 and Y1 per pixel; compute V0/U0 once for the pair (shared
56 // chroma); then pack as [Y0, V0, Y1, U0] each iteration for higher
57 // throughput.
58 28416 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
59 576 svbool_t pg = svptrue_b8();
60 576 svuint8_t r0, g0, b0, r1, g1, b1;
61 576 ArrayOfTwo_svuint8 r = {{std::ref(r0), std::ref(r1)}};
62 576 ArrayOfTwo_svuint8 g = {{std::ref(g0), std::ref(g1)}};
63 576 ArrayOfTwo_svuint8 b = {{std::ref(b0), std::ref(b1)}};
64 if constexpr (scn == 4) {
65 288 svuint8x4_t rgbx_0 = svld4(pg, src + index * scn);
66 576 svuint8x4_t rgbx_1 =
67 288 svld4(pg, src + index * scn + kVectorLength * scn);
68
69 288 r(0) = svget4(rgbx_0, r_idx), g(0) = svget4(rgbx_0, 1),
70 288 b(0) = svget4(rgbx_0, b_idx);
71 288 r(1) = svget4(rgbx_1, r_idx), g(1) = svget4(rgbx_1, 1),
72 288 b(1) = svget4(rgbx_1, b_idx);
73 288 } else {
74 288 svuint8x3_t rgbx_0 = svld3(pg, src + index * scn);
75 576 svuint8x3_t rgbx_1 =
76 288 svld3(pg, src + index * scn + kVectorLength * scn);
77
78 288 r(0) = svget3(rgbx_0, r_idx), g(0) = svget3(rgbx_0, 1),
79 288 b(0) = svget3(rgbx_0, b_idx);
80 288 r(1) = svget3(rgbx_1, r_idx), g(1) = svget3(rgbx_1, 1),
81 288 b(1) = svget3(rgbx_1, b_idx);
82 288 }
83
84 1152 rgb_to_yuv422(r, g, b, dst_row.ptr_at(static_cast<ptrdiff_t>(index)),
85 576 pg);
86 576 });
87
88 55680 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
89 27840 svbool_t pg1 = svwhilelt_b8_u64(index, length);
90 27840 svbool_t pg2 = svwhilelt_b8_u64(index + kVectorLength, length);
91 27840 svbool_t pg3 = svwhilelt_b8_u64(0, (length - index) / 2);
92 27840 svuint8_t r0, g0, b0, r1, g1, b1;
93 27840 ArrayOfTwo_svuint8 r = {{std::ref(r0), std::ref(r1)}};
94 27840 ArrayOfTwo_svuint8 g = {{std::ref(g0), std::ref(g1)}};
95 27840 ArrayOfTwo_svuint8 b = {{std::ref(b0), std::ref(b1)}};
96 if constexpr (scn == 4) {
97 192 svuint8x4_t rgbx_0 = svld4(pg1, src + index * scn);
98 384 svuint8x4_t rgbx_1 =
99 192 svld4(pg2, src + index * scn + kVectorLength * scn);
100
101 192 r(0) = svget4(rgbx_0, r_idx), g(0) = svget4(rgbx_0, 1),
102 192 b(0) = svget4(rgbx_0, b_idx);
103 192 r(1) = svget4(rgbx_1, r_idx), g(1) = svget4(rgbx_1, 1),
104 192 b(1) = svget4(rgbx_1, b_idx);
105 192 } else {
106 27648 svuint8x3_t rgbx_0 = svld3(pg1, src + index * scn);
107 55296 svuint8x3_t rgbx_1 =
108 27648 svld3(pg2, src + index * scn + kVectorLength * scn);
109
110 27648 r(0) = svget3(rgbx_0, r_idx), g(0) = svget3(rgbx_0, 1),
111 27648 b(0) = svget3(rgbx_0, b_idx);
112 27648 r(1) = svget3(rgbx_1, r_idx), g(1) = svget3(rgbx_1, 1),
113 27648 b(1) = svget3(rgbx_1, b_idx);
114 27648 }
115
116 55680 rgb_to_yuv422(r, g, b, dst_row.ptr_at(static_cast<ptrdiff_t>(index)),
117 27840 pg3);
118 27840 });
119 27840 }
120 672 return KLEIDICV_OK;
121 672 }
122
123 private:
124 28416 static inline void rgb_to_yuv422(ArrayOfTwo_svuint8 r, ArrayOfTwo_svuint8 g,
125 ArrayOfTwo_svuint8 b, uint8_t* dst_ptr,
126 svbool_t pg) KLEIDICV_STREAMING {
127 28416 int y_base = (1 << (kWeightScale - 1)) + (1 << kWeightScale) * 16;
128 28416 int uv_bias = (1 << (kWeightScale - 1)) + (1 << (kWeightScale - 1)) * 256;
129
130 // get the even element to calculate y0
131 28416 svint16_t r0_even, g0_even, b0_even, r1_even, g1_even, b1_even;
132 28416 ArrayOfTwo_svint16 r_even = {{std::ref(r0_even), std::ref(r1_even)}};
133 28416 ArrayOfTwo_svint16 g_even = {{std::ref(g0_even), std::ref(g1_even)}};
134 28416 ArrayOfTwo_svint16 b_even = {{std::ref(b0_even), std::ref(b1_even)}};
135
136 28416 r_even(0) = svreinterpret_s16(svmovlb(r(0)));
137 28416 g_even(0) = svreinterpret_s16(svmovlb(g(0)));
138 28416 b_even(0) = svreinterpret_s16(svmovlb(b(0)));
139 28416 r_even(1) = svreinterpret_s16(svmovlb(r(1)));
140 28416 g_even(1) = svreinterpret_s16(svmovlb(g(1)));
141 28416 b_even(1) = svreinterpret_s16(svmovlb(b(1)));
142
143 56832 svuint8_t y1 =
144 56832 compute_weighted_channel_422(r_even, g_even, b_even, KR2Y422Weight,
145 28416 KG2Y422Weight, KB2Y422Weight, y_base);
146
147 // get the odd element to calculate y0
148 28416 svint16_t r0_odd, g0_odd, b0_odd, r1_odd, g1_odd, b1_odd;
149 28416 ArrayOfTwo_svint16 r_odd = {{std::ref(r0_odd), std::ref(r1_odd)}};
150 28416 ArrayOfTwo_svint16 g_odd = {{std::ref(g0_odd), std::ref(g1_odd)}};
151 28416 ArrayOfTwo_svint16 b_odd = {{std::ref(b0_odd), std::ref(b1_odd)}};
152 28416 r_odd(0) = svreinterpret_s16(svmovlt(r(0)));
153 28416 g_odd(0) = svreinterpret_s16(svmovlt(g(0)));
154 28416 b_odd(0) = svreinterpret_s16(svmovlt(b(0)));
155 28416 r_odd(1) = svreinterpret_s16(svmovlt(r(1)));
156 28416 g_odd(1) = svreinterpret_s16(svmovlt(g(1)));
157 28416 b_odd(1) = svreinterpret_s16(svmovlt(b(1)));
158
159 56832 svuint8_t y2 =
160 56832 compute_weighted_channel_422(r_odd, g_odd, b_odd, KR2Y422Weight,
161 28416 KG2Y422Weight, KB2Y422Weight, y_base);
162
163 28416 svint16_t r0_avg, r1_avg, g0_avg, g1_avg, b0_avg, b1_avg;
164 28416 ArrayOfTwo_svint16 r_avg = {{std::ref(r0_avg), std::ref(r1_avg)}};
165 28416 ArrayOfTwo_svint16 g_avg = {{std::ref(g0_avg), std::ref(g1_avg)}};
166 28416 ArrayOfTwo_svint16 b_avg = {{std::ref(b0_avg), std::ref(b1_avg)}};
167 28416 r_avg(0) = svadd_x(svptrue_b16(), r_even(0), r_odd(0));
168 28416 r_avg(1) = svadd_x(svptrue_b16(), r_even(1), r_odd(1));
169 28416 g_avg(0) = svadd_x(svptrue_b16(), g_even(0), g_odd(0));
170 28416 g_avg(1) = svadd_x(svptrue_b16(), g_even(1), g_odd(1));
171 28416 b_avg(0) = svadd_x(svptrue_b16(), b_even(0), b_odd(0));
172 28416 b_avg(1) = svadd_x(svptrue_b16(), b_even(1), b_odd(1));
173
174 56832 svuint8_t u =
175 56832 compute_weighted_channel_422(r_avg, g_avg, b_avg, KR2U422Weight,
176 28416 KG2U422Weight, KB2U422Weight, uv_bias);
177 56832 svuint8_t v =
178 56832 compute_weighted_channel_422(r_avg, g_avg, b_avg, KB2U422Weight,
179 28416 KG2V422Weight, KB2V422Weight, uv_bias);
180
181 56832 svuint8x4_t yuv422 = svcreate4(svdup_n_u8(0xFF), svdup_n_u8(0xFF),
182 28416 svdup_n_u8(0xFF), svdup_n_u8(0xFF));
183 28416 yuv422 = svset4(yuv422, u_idx, u);
184 28416 yuv422 = svset4(yuv422, v_idx, v);
185 28416 yuv422 = svset4(yuv422, y_idx, y1);
186 28416 yuv422 = svset4(yuv422, y_idx + 2, y2);
187
188 28416 svst4_u8(pg, dst_ptr, yuv422);
189 28416 }
190
191 113664 static svuint8_t normalize_and_pack(svint32_t vec_0, svint32_t vec_1,
192 svint32_t vec_2,
193 svint32_t vec_3) KLEIDICV_STREAMING {
194 113664 svuint16_t y_b = svshrnb_n_u32(svreinterpret_u32(vec_0), kWeightScale);
195 113664 y_b = svshrnt_n_u32(y_b, svreinterpret_u32(vec_1), kWeightScale);
196 113664 svuint16_t y_t = svshrnb_n_u32(svreinterpret_u32(vec_2), kWeightScale);
197 113664 y_t = svshrnt_n_u32(y_t, svreinterpret_u32(vec_3), kWeightScale);
198
199 227328 return svuzp1_u8(svreinterpret_u8(y_b), svreinterpret_u8(y_t));
200 113664 }
201
202 // Common helper: apply RGB weights into 4x s32 accumulators and pack to u8.
203 113664 static inline svuint8_t compute_weighted_channel_422(
204 ArrayOfTwo_svint16 r, ArrayOfTwo_svint16 g, ArrayOfTwo_svint16 b,
205 int16_t r_coeff, int16_t g_coeff, int16_t b_coeff,
206 int fixed) KLEIDICV_STREAMING {
207 113664 svint32_t bias = svdup_s32(fixed);
208 113664 svint32_t acc_lo_lo = bias;
209 113664 svint32_t acc_lo_hi = bias;
210 113664 svint32_t acc_hi_lo = bias;
211 113664 svint32_t acc_hi_hi = bias;
212
213 // R contributions
214 113664 acc_lo_lo = svmlalb_n_s32(acc_lo_lo, r(0), r_coeff);
215 113664 acc_lo_hi = svmlalt_n_s32(acc_lo_hi, r(0), r_coeff);
216 113664 acc_hi_lo = svmlalb_n_s32(acc_hi_lo, r(1), r_coeff);
217 113664 acc_hi_hi = svmlalt_n_s32(acc_hi_hi, r(1), r_coeff);
218
219 // G contributions
220 113664 acc_lo_lo = svmlalb_n_s32(acc_lo_lo, g(0), g_coeff);
221 113664 acc_lo_hi = svmlalt_n_s32(acc_lo_hi, g(0), g_coeff);
222 113664 acc_hi_lo = svmlalb_n_s32(acc_hi_lo, g(1), g_coeff);
223 113664 acc_hi_hi = svmlalt_n_s32(acc_hi_hi, g(1), g_coeff);
224
225 // B contributions
226 113664 acc_lo_lo = svmlalb_n_s32(acc_lo_lo, b(0), b_coeff);
227 113664 acc_lo_hi = svmlalt_n_s32(acc_lo_hi, b(0), b_coeff);
228 113664 acc_hi_lo = svmlalb_n_s32(acc_hi_lo, b(1), b_coeff);
229 113664 acc_hi_hi = svmlalt_n_s32(acc_hi_hi, b(1), b_coeff);
230
231 227328 return normalize_and_pack(acc_lo_lo, acc_lo_hi, acc_hi_lo, acc_hi_hi);
232 113664 }
233 };
234
235 KLEIDICV_TARGET_FN_ATTRS
236 784 static kleidicv_error_t rgb_to_yuv422_u8_sc(
237 const uint8_t* src, size_t src_stride, uint8_t* dst, size_t dst_stride,
238 size_t width, size_t height,
239 kleidicv_color_conversion_t color_format) KLEIDICV_STREAMING {
240
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 780 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 780 times.
784 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
241
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 776 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 776 times.
780 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
242
6/6
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 748 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 732 times.
✓ Branch 4 taken 44 times.
✓ Branch 5 taken 732 times.
776 CHECK_IMAGE_SIZE(width, height);
243
244 // YUV422 packs pixels in pairs: (Y0, U, Y1, V).
245 // Therefore, the image width must be at least 2 and always even.
246
4/4
✓ Branch 0 taken 716 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 700 times.
732 if (width < 2 || (width % 2) != 0) {
247 32 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
248 }
249
250
13/13
✓ Branch 0 taken 212 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 212 times.
✓ Branch 7 taken 212 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 4 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 4 times.
✓ Branch 12 taken 4 times.
700 switch (color_format) {
251 case KLEIDICV_BGR_TO_YUYV:
252 4 return RGBxOrBGRxToYUV422<0, 1, 0, 3>::rgbx2yuv422_operation(
253 4 src, src_stride, dst, dst_stride, width, height);
254 break;
255 case KLEIDICV_BGR_TO_UYVY:
256 4 return RGBxOrBGRxToYUV422<0, 0, 1, 3>::rgbx2yuv422_operation(
257 4 src, src_stride, dst, dst_stride, width, height);
258 break;
259 case KLEIDICV_BGR_TO_YVYU:
260 4 return RGBxOrBGRxToYUV422<0, 3, 0, 3>::rgbx2yuv422_operation(
261 4 src, src_stride, dst, dst_stride, width, height);
262 break;
263 case KLEIDICV_RGB_TO_YUYV:
264 212 return RGBxOrBGRxToYUV422<2, 1, 0, 3>::rgbx2yuv422_operation(
265 212 src, src_stride, dst, dst_stride, width, height);
266 break;
267 case KLEIDICV_RGB_TO_UYVY:
268 212 return RGBxOrBGRxToYUV422<2, 0, 1, 3>::rgbx2yuv422_operation(
269 212 src, src_stride, dst, dst_stride, width, height);
270 break;
271 case KLEIDICV_RGB_TO_YVYU:
272 212 return RGBxOrBGRxToYUV422<2, 3, 0, 3>::rgbx2yuv422_operation(
273 212 src, src_stride, dst, dst_stride, width, height);
274 break;
275 case KLEIDICV_BGRA_TO_YUYV:
276 4 return RGBxOrBGRxToYUV422<0, 1, 0, 4>::rgbx2yuv422_operation(
277 4 src, src_stride, dst, dst_stride, width, height);
278 break;
279 case KLEIDICV_BGRA_TO_UYVY:
280 4 return RGBxOrBGRxToYUV422<0, 0, 1, 4>::rgbx2yuv422_operation(
281 4 src, src_stride, dst, dst_stride, width, height);
282 break;
283 case KLEIDICV_BGRA_TO_YVYU:
284 4 return RGBxOrBGRxToYUV422<0, 3, 0, 4>::rgbx2yuv422_operation(
285 4 src, src_stride, dst, dst_stride, width, height);
286 break;
287 case KLEIDICV_RGBA_TO_YUYV:
288 4 return RGBxOrBGRxToYUV422<2, 1, 0, 4>::rgbx2yuv422_operation(
289 4 src, src_stride, dst, dst_stride, width, height);
290 break;
291 case KLEIDICV_RGBA_TO_UYVY:
292 4 return RGBxOrBGRxToYUV422<2, 0, 1, 4>::rgbx2yuv422_operation(
293 4 src, src_stride, dst, dst_stride, width, height);
294 break;
295 case KLEIDICV_RGBA_TO_YVYU:
296 4 return RGBxOrBGRxToYUV422<2, 3, 0, 4>::rgbx2yuv422_operation(
297 4 src, src_stride, dst, dst_stride, width, height);
298 break;
299 default:
300 28 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
301 break;
302 }
303
304 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
305 784 }
306
307 } // namespace KLEIDICV_TARGET_NAMESPACE
308
309 #endif // KLEIDICV_RGB_TO_YUV422_SC_H
310