KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/rgb_to_yuv420_sc.h
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 231 231 100.0%
Functions: 240 240 100.0%
Branches: 80 80 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RGB_TO_YUV420_SC_H
6 #define KLEIDICV_RGB_TO_YUV420_SC_H
7
8 #include <algorithm>
9
10 #include "kleidicv/sve2.h"
11 #include "yuv42x_coefficients.h"
12
13 namespace KLEIDICV_TARGET_NAMESPACE {
14
15 template <bool kAlpha, bool RGB, bool kInterleave>
16 class RGBxorBGRxToYUV420 {
17 public:
18 using ArrayOfFour_svuint32 = ScalableVectorArray1D<svuint32_t, 4>;
19 using ArrayOfFour_svint32 = ScalableVectorArray1D<svint32_t, 4>;
20 using ArrayOfTwo_svint32 = ScalableVectorArray1D<svint32_t, 2>;
21
22 3520 static kleidicv_error_t rgb2yuv420_operation_sc(
23 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
24 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
25 bool v_first, size_t begin, size_t end) KLEIDICV_STREAMING {
26 3520 size_t row_begin = begin * 2;
27 3520 size_t row_end = std::min<size_t>(height, end * 2);
28 3520 const uint8_t *src_row = nullptr;
29 3520 uint8_t *y_row = nullptr;
30 3520 uint8_t *u_row = nullptr;
31 3520 uint8_t *v_row = nullptr;
32
8/8
✓ Branch 0 taken 880 times.
✓ Branch 1 taken 36968 times.
✓ Branch 2 taken 880 times.
✓ Branch 3 taken 36968 times.
✓ Branch 4 taken 880 times.
✓ Branch 5 taken 36968 times.
✓ Branch 6 taken 880 times.
✓ Branch 7 taken 36968 times.
151392 for (size_t h = row_begin; h < row_end; h++) {
33 147872 src_row = src + src_stride * h;
34 147872 y_row = y_dst + y_stride * h;
35 147872 bool evenRow = (h & 1) == 0;
36
8/8
✓ Branch 0 taken 18328 times.
✓ Branch 1 taken 18640 times.
✓ Branch 2 taken 18328 times.
✓ Branch 3 taken 18640 times.
✓ Branch 4 taken 18328 times.
✓ Branch 5 taken 18640 times.
✓ Branch 6 taken 18328 times.
✓ Branch 7 taken 18640 times.
147872 if (evenRow) {
37 if constexpr (kInterleave) {
38 37504 u_row = uv_dst + uv_stride * (h / 2);
39 } else {
40 37056 u_row =
41 37056 uv_dst + uv_stride * (h / 4) + ((h / 2) % 2) * ((width + 1) / 2);
42 // Pointer to the start of the V plane.
43 // The V plane follows the U plane. Both U and V planes are
44 // subsampled at a 2:1 vertical ratio (i.e., each has height / 2
45 // rows), and are often stored in a single contiguous chroma region in
46 // memory. Depending on image height and stride, the starting offset
47 // of V may require adjustment to maintain correct alignment. In
48 // particular, the chroma rows may not align perfectly, so a
49 // fractional offset (in rows) is applied to calculate the V plane
50 // position. The formula used here accounts for this by adjusting
51 // based on row parity, assuming consistent memory layout across the
52 // Y, U, and V planes.
53 74112 v_row = uv_dst + uv_stride * ((h + height + 1) / 4) +
54 37056 (((h + height + 1) / 2) % 2) * ((width + 1) / 2);
55 }
56 74560 }
57
58 147872 const size_t kVectorLength = svcntb();
59 147872 LoopUnroll2 loop{width, kVectorLength};
60
61 164288 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
62 16416 svbool_t pg = svptrue_b8();
63
64 32832 vector_path_2x(src_row, y_row, u_row, v_row, v_first, index, evenRow,
65 16416 pg, pg, pg);
66 16416 });
67
68 295184 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
69 147312 svbool_t pg = svwhilelt_b8_u64(index, length);
70 147312 svbool_t pg_half = svwhilelt_b8_u64((index + 1) / 2, (length + 1) / 2);
71
8/8
✓ Branch 0 taken 37044 times.
✓ Branch 1 taken 36828 times.
✓ Branch 2 taken 37044 times.
✓ Branch 3 taken 36828 times.
✓ Branch 4 taken 37044 times.
✓ Branch 5 taken 36828 times.
✓ Branch 6 taken 37044 times.
✓ Branch 7 taken 36828 times.
295488 while (svptest_first(svptrue_b8(), pg)) {
72 296352 vector_path(src_row, y_row, u_row, v_row, v_first, index, evenRow, pg,
73 148176 pg_half);
74 148176 index += kVectorLength;
75 148176 pg = svwhilelt_b8_u64(index, length);
76 148176 pg_half = svwhilelt_b8_u64((index + 1) / 2, (length + 1) / 2);
77 }
78 147312 });
79 147872 }
80 3520 return KLEIDICV_OK;
81 3520 }
82
83 private:
84 KLEIDICV_FORCE_INLINE
85 16416 static void vector_path_2x(const uint8_t *src_row, uint8_t *y_row,
86 uint8_t *u_row, uint8_t *v_row, bool v_first,
87 const size_t index, const bool evenRow,
88 const svbool_t pg0, const svbool_t pg1,
89 const svbool_t pg_half) KLEIDICV_STREAMING {
90 16416 svuint32_t r0_0, r0_1, r0_2, r0_3, g0_0, g0_1, g0_2, g0_3, b0_0, b0_1, b0_2,
91 b0_3, r1_0, r1_1, r1_2, r1_3, g1_0, g1_1, g1_2, g1_3, b1_0, b1_1, b1_2,
92 b1_3;
93
94 16416 ArrayOfFour_svuint32 r0 = {
95 16416 {std::ref(r0_0), std::ref(r0_1), std::ref(r0_2), std::ref(r0_3)}};
96 16416 ArrayOfFour_svuint32 g0 = {
97 16416 {std::ref(g0_0), std::ref(g0_1), std::ref(g0_2), std::ref(g0_3)}};
98 16416 ArrayOfFour_svuint32 b0 = {
99 16416 {std::ref(b0_0), std::ref(b0_1), std::ref(b0_2), std::ref(b0_3)}};
100 16416 ArrayOfFour_svuint32 r1 = {
101 16416 {std::ref(r1_0), std::ref(r1_1), std::ref(r1_2), std::ref(r1_3)}};
102 16416 ArrayOfFour_svuint32 g1 = {
103 16416 {std::ref(g1_0), std::ref(g1_1), std::ref(g1_2), std::ref(g1_3)}};
104 16416 ArrayOfFour_svuint32 b1 = {
105 16416 {std::ref(b1_0), std::ref(b1_1), std::ref(b1_2), std::ref(b1_3)}};
106
107 16416 load_rgb_2x(r0, g0, b0, r1, g1, b1, src_row, scn * index, pg0, pg1);
108
109 16416 svuint8_t y0 = rgb_to_y(r0, g0, b0);
110
111 16416 svuint8_t y1 = rgb_to_y(r1, g1, b1);
112
113 #if KLEIDICV_TARGET_SME2
114 // assume the predicates are full true
115 1296 svcount_t pg_counter = svptrue_c8();
116 1296 svst1(pg_counter, y_row + index, svcreate2(y0, y1));
117 #else
118 15120 svst1(pg0, y_row + index, y0);
119 15120 svst1(pg1, y_row + index + svcntb(), y1);
120 #endif // KLEIDICV_TARGET_SME2
121
122
8/8
✓ Branch 0 taken 1976 times.
✓ Branch 1 taken 2128 times.
✓ Branch 2 taken 1976 times.
✓ Branch 3 taken 2128 times.
✓ Branch 4 taken 1976 times.
✓ Branch 5 taken 2128 times.
✓ Branch 6 taken 1976 times.
✓ Branch 7 taken 2128 times.
16416 if (evenRow) {
123 8512 svuint8_t u, v;
124 8512 svint32_t r_even_0 = svreinterpret_s32(r0_0);
125 8512 svint32_t r_even_1 = svreinterpret_s32(r0_1);
126 8512 svint32_t r_even_2 = svreinterpret_s32(r1_0);
127 8512 svint32_t r_even_3 = svreinterpret_s32(r1_1);
128 8512 svint32_t g_even_0 = svreinterpret_s32(g0_0);
129 8512 svint32_t g_even_1 = svreinterpret_s32(g0_1);
130 8512 svint32_t g_even_2 = svreinterpret_s32(g1_0);
131 8512 svint32_t g_even_3 = svreinterpret_s32(g1_1);
132 8512 svint32_t b_even_0 = svreinterpret_s32(b0_0);
133 8512 svint32_t b_even_1 = svreinterpret_s32(b0_1);
134 8512 svint32_t b_even_2 = svreinterpret_s32(b1_0);
135 8512 svint32_t b_even_3 = svreinterpret_s32(b1_1);
136
137 17024 ArrayOfFour_svint32 r_even = {{std::ref(r_even_0), std::ref(r_even_1),
138 17024 std::ref(r_even_2), std::ref(r_even_3)}};
139 17024 ArrayOfFour_svint32 g_even = {{std::ref(g_even_0), std::ref(g_even_1),
140 17024 std::ref(g_even_2), std::ref(g_even_3)}};
141 17024 ArrayOfFour_svint32 b_even = {{std::ref(b_even_0), std::ref(b_even_1),
142 17024 std::ref(b_even_2), std::ref(b_even_3)}};
143
144 8512 rgb_to_uv_2x(r_even, g_even, b_even, u, v);
145
146
8/8
✓ Branch 0 taken 1064 times.
✓ Branch 1 taken 1064 times.
✓ Branch 2 taken 1064 times.
✓ Branch 3 taken 1064 times.
✓ Branch 4 taken 1064 times.
✓ Branch 5 taken 1064 times.
✓ Branch 6 taken 1064 times.
✓ Branch 7 taken 1064 times.
8512 if (v_first) {
147 4256 swap_scalable(u, v);
148 4256 }
149
150 if constexpr (kInterleave) {
151 4256 svuint8x2_t uv = svcreate2(u, v);
152 4256 svst2_u8(pg_half, u_row + index, uv);
153 4256 } else {
154 4256 svst1(pg_half, u_row + index / 2, u);
155 4256 svst1(pg_half, v_row + index / 2, v);
156 }
157 8512 }
158 16416 }
159
160 148176 static void vector_path(const uint8_t *src_row, uint8_t *y_row,
161 uint8_t *u_row, uint8_t *v_row, bool v_first,
162 const size_t index, const bool evenRow,
163 const svbool_t pg0,
164 const svbool_t pg_half) KLEIDICV_STREAMING {
165 148176 svuint32_t r0_0, r0_1, r0_2, r0_3, g0_0, g0_1, g0_2, g0_3, b0_0, b0_1, b0_2,
166 b0_3;
167
168 148176 ArrayOfFour_svuint32 r0 = {
169 148176 {std::ref(r0_0), std::ref(r0_1), std::ref(r0_2), std::ref(r0_3)}};
170 148176 ArrayOfFour_svuint32 g0 = {
171 148176 {std::ref(g0_0), std::ref(g0_1), std::ref(g0_2), std::ref(g0_3)}};
172 148176 ArrayOfFour_svuint32 b0 = {
173 148176 {std::ref(b0_0), std::ref(b0_1), std::ref(b0_2), std::ref(b0_3)}};
174
175 148176 load_rgb(r0, g0, b0, src_row, scn * index, pg0);
176
177 148176 svuint8_t y0 = rgb_to_y(r0, g0, b0);
178
179 148176 svst1(pg0, y_row + index, y0);
180
181
8/8
✓ Branch 0 taken 18380 times.
✓ Branch 1 taken 18664 times.
✓ Branch 2 taken 18380 times.
✓ Branch 3 taken 18664 times.
✓ Branch 4 taken 18380 times.
✓ Branch 5 taken 18664 times.
✓ Branch 6 taken 18380 times.
✓ Branch 7 taken 18664 times.
148176 if (evenRow) {
182 74656 svuint8_t u, v;
183 74656 svint32_t r_even_0 = svreinterpret_s32(r0_0);
184 74656 svint32_t r_even_1 = svreinterpret_s32(r0_1);
185 74656 svint32_t g_even_0 = svreinterpret_s32(g0_0);
186 74656 svint32_t g_even_1 = svreinterpret_s32(g0_1);
187 74656 svint32_t b_even_0 = svreinterpret_s32(b0_0);
188 74656 svint32_t b_even_1 = svreinterpret_s32(b0_1);
189
190 74656 ArrayOfTwo_svint32 r_even = {{std::ref(r_even_0), std::ref(r_even_1)}};
191 74656 ArrayOfTwo_svint32 g_even = {{std::ref(g_even_0), std::ref(g_even_1)}};
192 74656 ArrayOfTwo_svint32 b_even = {{std::ref(b_even_0), std::ref(b_even_1)}};
193
194 74656 rgb_to_uv(r_even, g_even, b_even, u, v);
195
196
8/8
✓ Branch 0 taken 9332 times.
✓ Branch 1 taken 9332 times.
✓ Branch 2 taken 9332 times.
✓ Branch 3 taken 9332 times.
✓ Branch 4 taken 9332 times.
✓ Branch 5 taken 9332 times.
✓ Branch 6 taken 9332 times.
✓ Branch 7 taken 9332 times.
74656 if (v_first) {
197 37328 swap_scalable(u, v);
198 37328 }
199
200 if constexpr (kInterleave) {
201 37552 svuint8x2_t uv = svcreate2(u, v);
202 37552 svst2_u8(pg_half, u_row + index, uv);
203 37552 } else {
204 37104 svst1(pg_half, u_row + index / 2, u);
205 37104 svst1(pg_half, v_row + index / 2, v);
206 }
207 74656 }
208 148176 }
209
210 181008 static svuint8_t rgb_to_y(ArrayOfFour_svuint32 r, ArrayOfFour_svuint32 g,
211 ArrayOfFour_svuint32 b) KLEIDICV_STREAMING {
212 181008 const uint32_t kShifted16 = (16 << kWeightScale);
213 181008 const uint32_t kHalfShift = (1 << (kWeightScale - 1));
214
215 181008 svbool_t pg = svptrue_b32();
216
217 // Y = kR*R + kG*G + kB*B + rounding bias
218 181008 svuint32_t bias = svdup_u32(kHalfShift + kShifted16);
219 181008 svuint32_t y_0 = bias;
220 181008 svuint32_t y_1 = bias;
221 181008 svuint32_t y_2 = bias;
222 181008 svuint32_t y_3 = bias;
223
224 181008 ArrayOfFour_svuint32 y = {
225 181008 {std::ref(y_0), std::ref(y_1), std::ref(y_2), std::ref(y_3)}};
226
227 KLEIDICV_FORCE_LOOP_UNROLL
228
8/8
✓ Branch 0 taken 45252 times.
✓ Branch 1 taken 181008 times.
✓ Branch 2 taken 45252 times.
✓ Branch 3 taken 181008 times.
✓ Branch 4 taken 45252 times.
✓ Branch 5 taken 181008 times.
✓ Branch 6 taken 45252 times.
✓ Branch 7 taken 181008 times.
905040 for (int i = 0; i < 4; i++) {
229 724032 y(i) = svmla_n_u32_x(pg, y(i), r(i), kRYWeight);
230 724032 y(i) = svmla_n_u32_x(pg, y(i), g(i), kGYWeight);
231 724032 y(i) = svmla_n_u32_x(pg, y(i), b(i), kBYWeight);
232 724032 }
233
234 181008 svuint16_t y_b = svshrnb_n_u32(y(0), kWeightScale - 8);
235 181008 y_b = svshrnt_n_u32(y_b, y(2), kWeightScale - 8); // 0, 1, 2, 3, 4, 5, 6, 7
236 181008 svuint16_t y_t = svshrnb_n_u32(y(1), kWeightScale - 8);
237 181008 y_t = svshrnt_n_u32(y_t, y(3),
238 kWeightScale - 8); // 8, 9, 10, 11, 12, 13, 14, 15
239
240 362016 return svuzp2_u8(svreinterpret_u8(y_b), svreinterpret_u8(y_t));
241 181008 }
242
243 17024 static svuint8_t compute_u_or_v_2x(ArrayOfFour_svint32 r,
244 ArrayOfFour_svint32 g,
245 ArrayOfFour_svint32 b, const int r_coeff,
246 const int g_coeff,
247 const int b_coeff) KLEIDICV_STREAMING {
248 17024 svbool_t pg = svptrue_b32();
249 17024 const int kHalfShift = (1 << (kWeightScale - 1));
250 17024 const int kShifted128 = (128 << kWeightScale);
251 17024 svint32_t bias = svdup_s32(kHalfShift + kShifted128);
252 17024 svint32_t uv0 = bias;
253 17024 svint32_t uv1 = bias;
254 17024 svint32_t uv2 = bias;
255 17024 svint32_t uv3 = bias;
256
257 17024 ArrayOfFour_svint32 uv = {
258 17024 {std::ref(uv0), std::ref(uv1), std::ref(uv2), std::ref(uv3)}};
259
260 KLEIDICV_FORCE_LOOP_UNROLL
261
8/8
✓ Branch 0 taken 4256 times.
✓ Branch 1 taken 17024 times.
✓ Branch 2 taken 4256 times.
✓ Branch 3 taken 17024 times.
✓ Branch 4 taken 4256 times.
✓ Branch 5 taken 17024 times.
✓ Branch 6 taken 4256 times.
✓ Branch 7 taken 17024 times.
85120 for (int i = 0; i < 4; i++) {
262 68096 uv(i) = svmla_n_s32_x(pg, uv(i), r(i), r_coeff);
263 68096 uv(i) = svmla_n_s32_x(pg, uv(i), g(i), g_coeff);
264 68096 uv(i) = svmla_n_s32_x(pg, uv(i), b(i), b_coeff);
265 68096 }
266
267 34048 svint16_t uv_b =
268 17024 svuzp2_s16(svreinterpret_s16(uv(0)), svreinterpret_s16(uv(1)));
269 34048 svint16_t uv_t =
270 17024 svuzp2_s16(svreinterpret_s16(uv(2)), svreinterpret_s16(uv(3)));
271
272 17024 uv_b = svasr_n_s16_x(pg, uv_b, kWeightScale - 16);
273 17024 uv_t = svasr_n_s16_x(pg, uv_t, kWeightScale - 16);
274
275 34048 return svuzp1_u8(svreinterpret_u8(uv_b), svreinterpret_u8(uv_t));
276 17024 }
277
278 8512 static void rgb_to_uv_2x(ArrayOfFour_svint32 r, ArrayOfFour_svint32 g,
279 ArrayOfFour_svint32 b, svuint8_t &u,
280 svuint8_t &v) KLEIDICV_STREAMING {
281 // ---------------- U (Cb) Component ----------------
282 // U = R * kRU + G * kGU + B * kBU + bias
283 8512 u = compute_u_or_v_2x(r, g, b, kRUWeight, kGUWeight, kBUWeight);
284
285 // ---------------- V (Cr) Component ----------------
286 // V = R * kBU + G * kGV + B * kBV + bias
287 8512 v = compute_u_or_v_2x(r, g, b, kBUWeight, kGVWeight, kBVWeight);
288 8512 }
289
290 149312 static svuint8_t compute_u_or_v(ArrayOfTwo_svint32 r, ArrayOfTwo_svint32 g,
291 ArrayOfTwo_svint32 b, const int r_coeff,
292 const int g_coeff,
293 const int b_coeff) KLEIDICV_STREAMING {
294 149312 svbool_t pg = svptrue_b32();
295 149312 const int kHalfShift = (1 << (kWeightScale - 1));
296 149312 const int kShifted128 = (128 << kWeightScale);
297
298 149312 svint32_t bias = svdup_s32(kHalfShift + kShifted128);
299 149312 svint32_t uv0 = bias;
300 149312 svint32_t uv1 = bias;
301
302 149312 ArrayOfTwo_svint32 uv = {{std::ref(uv0), std::ref(uv1)}};
303
304 KLEIDICV_FORCE_LOOP_UNROLL
305
8/8
✓ Branch 0 taken 37328 times.
✓ Branch 1 taken 74656 times.
✓ Branch 2 taken 37328 times.
✓ Branch 3 taken 74656 times.
✓ Branch 4 taken 37328 times.
✓ Branch 5 taken 74656 times.
✓ Branch 6 taken 37328 times.
✓ Branch 7 taken 74656 times.
447936 for (int i = 0; i < 2; i++) {
306 298624 uv(i) = svmla_n_s32_x(pg, uv(i), r(i), r_coeff);
307 298624 uv(i) = svmla_n_s32_x(pg, uv(i), g(i), g_coeff);
308 298624 uv(i) = svmla_n_s32_x(pg, uv(i), b(i), b_coeff);
309 298624 }
310
311 298624 svint16_t output =
312 149312 svuzp2_s16(svreinterpret_s16(uv(0)), svreinterpret_s16(uv(1)));
313
314 149312 output = svasr_n_s16_x(pg, output, kWeightScale - 16);
315
316 298624 return svuzp1_u8(svreinterpret_u8(output), svreinterpret_u8(output));
317 149312 }
318
319 74656 static void rgb_to_uv(ArrayOfTwo_svint32 r, ArrayOfTwo_svint32 g,
320 ArrayOfTwo_svint32 b, svuint8_t &u,
321 svuint8_t &v) KLEIDICV_STREAMING {
322 // ---------------- U (Cb) Component ----------------
323 // U = R * kRU + G * kGU + B * kBU + bias
324 74656 u = compute_u_or_v(r, g, b, kRUWeight, kGUWeight, kBUWeight);
325
326 // ---------------- V (Cr) Component ----------------
327 // V = R * kBU + G * kGV + B * kBV + bias
328 74656 v = compute_u_or_v(r, g, b, kBUWeight, kGVWeight, kBVWeight);
329 74656 }
330
331 181008 static void load_rgb(ArrayOfFour_svuint32 &r, ArrayOfFour_svuint32 &g,
332 ArrayOfFour_svuint32 &b, const uint8_t *src_row,
333 const size_t w, const svbool_t &pg0) KLEIDICV_STREAMING {
334 181008 svuint8_t b0, g0, r0;
335 if constexpr (kAlpha) {
336 // 4-channel input (RGBA or BGRA)
337 90504 svuint8x4_t vsrc0 = svld4(pg0, src_row + w);
338
339 90504 b0 = svget4(vsrc0, b_index);
340 90504 g0 = svget4(vsrc0, g_index);
341 90504 r0 = svget4(vsrc0, r_index);
342
343 90504 } else {
344 // 3-channel input (RGB or BGR)
345 90504 svuint8x3_t vsrc0 = svld3(pg0, src_row + w);
346
347 90504 b0 = svget3(vsrc0, b_index);
348 90504 g0 = svget3(vsrc0, g_index);
349 90504 r0 = svget3(vsrc0, r_index);
350 90504 }
351 181008 svuint16_t r0_lo = svmovlb(r0);
352 181008 svuint16_t r0_hi = svmovlt(r0);
353 181008 r(0) = svunpklo(r0_lo); // 0, 2, 4, 6
354 181008 r(1) = svunpkhi(r0_lo); // 8, 10, 12, 14
355 181008 r(2) = svunpklo(r0_hi); // 1, 3, 5, 7
356 181008 r(3) = svunpkhi(r0_hi); // 9, 11, 13, 15
357
358 181008 svuint16_t g0_lo = svmovlb(g0);
359 181008 svuint16_t g0_hi = svmovlt(g0);
360 181008 g(0) = svunpklo(g0_lo);
361 181008 g(1) = svunpkhi(g0_lo);
362 181008 g(2) = svunpklo(g0_hi);
363 181008 g(3) = svunpkhi(g0_hi);
364
365 181008 svuint16_t b0_lo = svmovlb(b0);
366 181008 svuint16_t b0_hi = svmovlt(b0);
367 181008 b(0) = svunpklo(b0_lo);
368 181008 b(1) = svunpkhi(b0_lo);
369 181008 b(2) = svunpklo(b0_hi);
370 181008 b(3) = svunpkhi(b0_hi);
371 181008 }
372
373 16416 static void load_rgb_2x(ArrayOfFour_svuint32 &r0, ArrayOfFour_svuint32 &g0,
374 ArrayOfFour_svuint32 &b0, ArrayOfFour_svuint32 &r1,
375 ArrayOfFour_svuint32 &g1, ArrayOfFour_svuint32 &b1,
376 const uint8_t *src_row, const size_t w,
377 const svbool_t pg0,
378 const svbool_t pg1) KLEIDICV_STREAMING {
379 16416 const size_t kVectorLength = svcntb();
380 16416 load_rgb(r0, g0, b0, src_row, w, pg0);
381
382 16416 load_rgb(r1, g1, b1, src_row, w + scn * kVectorLength, pg1);
383 16416 }
384
385 static constexpr int b_index = RGB ? 2 : 0;
386 static constexpr int g_index = 1;
387 static constexpr int r_index = RGB ? 0 : 2;
388 static constexpr size_t scn = kAlpha ? 4 : 3;
389 };
390
391 } // namespace KLEIDICV_TARGET_NAMESPACE
392
393 #endif // KLEIDICV_RGB_TO_YUV420_SC_H
394