KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/rgb_to_yuv420_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 231 231 100.0%
Functions: 240 240 100.0%
Branches: 80 80 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RGB_TO_YUV420_SC_H
6 #define KLEIDICV_RGB_TO_YUV420_SC_H
7
8 #include <algorithm>
9
10 #include "kleidicv/sve2.h"
11 #include "yuv420_coefficients.h"
12
13 namespace KLEIDICV_TARGET_NAMESPACE {
14
15 template <bool kAlpha, bool RGB, bool kInterleave>
16 class RGBxorBGRxToYUV420 {
17 public:
18 using ArrayOfFour_svuint32 = ScalableVectorArray1D<svuint32_t, 4>;
19 using ArrayOfFour_svint32 = ScalableVectorArray1D<svint32_t, 4>;
20 using ArrayOfTwo_svint32 = ScalableVectorArray1D<svint32_t, 2>;
21
22 1504 static kleidicv_error_t rgb2yuv420_operation_sc(
23 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
24 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
25 bool v_first, size_t begin, size_t end) KLEIDICV_STREAMING {
26 1504 size_t row_begin = begin * 2;
27 1504 size_t row_end = std::min<size_t>(height, end * 2);
28 1504 const uint8_t *src_row = nullptr;
29 1504 uint8_t *y_row = nullptr;
30 1504 uint8_t *u_row = nullptr;
31 1504 uint8_t *v_row = nullptr;
32
8/8
✓ Branch 0 taken 376 times.
✓ Branch 1 taken 19240 times.
✓ Branch 2 taken 376 times.
✓ Branch 3 taken 19240 times.
✓ Branch 4 taken 376 times.
✓ Branch 5 taken 19240 times.
✓ Branch 6 taken 376 times.
✓ Branch 7 taken 19240 times.
78464 for (size_t h = row_begin; h < row_end; h++) {
33 76960 src_row = src + src_stride * h;
34 76960 y_row = y_dst + y_stride * h;
35 76960 bool evenRow = (h & 1) == 0;
36
8/8
✓ Branch 0 taken 9528 times.
✓ Branch 1 taken 9712 times.
✓ Branch 2 taken 9528 times.
✓ Branch 3 taken 9712 times.
✓ Branch 4 taken 9528 times.
✓ Branch 5 taken 9712 times.
✓ Branch 6 taken 9528 times.
✓ Branch 7 taken 9712 times.
76960 if (evenRow) {
37 if constexpr (kInterleave) {
38 19648 u_row = uv_dst + uv_stride * (h / 2);
39 } else {
40 19200 u_row =
41 19200 uv_dst + uv_stride * (h / 4) + ((h / 2) % 2) * ((width + 1) / 2);
42 // Pointer to the start of the V plane.
43 // The V plane follows the U plane. Both U and V planes are
44 // subsampled at a 2:1 vertical ratio (i.e., each has height / 2
45 // rows), and are often stored in a single contiguous chroma region in
46 // memory. Depending on image height and stride, the starting offset
47 // of V may require adjustment to maintain correct alignment. In
48 // particular, the chroma rows may not align perfectly, so a
49 // fractional offset (in rows) is applied to calculate the V plane
50 // position. The formula used here accounts for this by adjusting
51 // based on row parity, assuming consistent memory layout across the
52 // Y, U, and V planes.
53 38400 v_row = uv_dst + uv_stride * ((h + height + 1) / 4) +
54 19200 (((h + height + 1) / 2) % 2) * ((width + 1) / 2);
55 }
56 38848 }
57
58 76960 const size_t kVectorLength = svcntb();
59 76960 LoopUnroll2 loop{width, kVectorLength};
60
61 93376 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
62 16416 svbool_t pg = svptrue_b8();
63
64 32832 vector_path_2x(src_row, y_row, u_row, v_row, v_first, index, evenRow,
65 16416 pg, pg, pg);
66 16416 });
67
68 153424 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
69 76464 svbool_t pg = svwhilelt_b8_u64(index, length);
70 76464 svbool_t pg_half = svwhilelt_b8_u64((index + 1) / 2, (length + 1) / 2);
71
8/8
✓ Branch 0 taken 19332 times.
✓ Branch 1 taken 19116 times.
✓ Branch 2 taken 19332 times.
✓ Branch 3 taken 19116 times.
✓ Branch 4 taken 19332 times.
✓ Branch 5 taken 19116 times.
✓ Branch 6 taken 19332 times.
✓ Branch 7 taken 19116 times.
153792 while (svptest_first(svptrue_b8(), pg)) {
72 154656 vector_path(src_row, y_row, u_row, v_row, v_first, index, evenRow, pg,
73 77328 pg_half);
74 77328 index += kVectorLength;
75 77328 pg = svwhilelt_b8_u64(index, length);
76 77328 pg_half = svwhilelt_b8_u64((index + 1) / 2, (length + 1) / 2);
77 }
78 76464 });
79 76960 }
80 1504 return KLEIDICV_OK;
81 1504 }
82
83 private:
84 16416 static void vector_path_2x(const uint8_t *src_row, uint8_t *y_row,
85 uint8_t *u_row, uint8_t *v_row, bool v_first,
86 const size_t index, const bool evenRow,
87 const svbool_t pg0, const svbool_t pg1,
88 const svbool_t pg_half) KLEIDICV_STREAMING {
89 16416 svuint32_t r0_0, r0_1, r0_2, r0_3, g0_0, g0_1, g0_2, g0_3, b0_0, b0_1, b0_2,
90 b0_3, r1_0, r1_1, r1_2, r1_3, g1_0, g1_1, g1_2, g1_3, b1_0, b1_1, b1_2,
91 b1_3;
92
93 16416 ArrayOfFour_svuint32 r0 = {
94 16416 {std::ref(r0_0), std::ref(r0_1), std::ref(r0_2), std::ref(r0_3)}};
95 16416 ArrayOfFour_svuint32 g0 = {
96 16416 {std::ref(g0_0), std::ref(g0_1), std::ref(g0_2), std::ref(g0_3)}};
97 16416 ArrayOfFour_svuint32 b0 = {
98 16416 {std::ref(b0_0), std::ref(b0_1), std::ref(b0_2), std::ref(b0_3)}};
99 16416 ArrayOfFour_svuint32 r1 = {
100 16416 {std::ref(r1_0), std::ref(r1_1), std::ref(r1_2), std::ref(r1_3)}};
101 16416 ArrayOfFour_svuint32 g1 = {
102 16416 {std::ref(g1_0), std::ref(g1_1), std::ref(g1_2), std::ref(g1_3)}};
103 16416 ArrayOfFour_svuint32 b1 = {
104 16416 {std::ref(b1_0), std::ref(b1_1), std::ref(b1_2), std::ref(b1_3)}};
105
106 16416 load_rgb_2x(r0, g0, b0, r1, g1, b1, src_row, scn * index, pg0, pg1);
107
108 16416 svuint8_t y0 = rgb_to_y(r0, g0, b0);
109
110 16416 svuint8_t y1 = rgb_to_y(r1, g1, b1);
111
112 #if KLEIDICV_TARGET_SME2
113 // assume the predicates are full true
114 1296 svcount_t pg_counter = svptrue_c8();
115 1296 svst1(pg_counter, y_row + index, svcreate2(y0, y1));
116 #else
117 15120 svst1(pg0, y_row + index, y0);
118 15120 svst1(pg1, y_row + index + svcntb(), y1);
119 #endif // KLEIDICV_TARGET_SME2
120
121
8/8
✓ Branch 0 taken 1976 times.
✓ Branch 1 taken 2128 times.
✓ Branch 2 taken 1976 times.
✓ Branch 3 taken 2128 times.
✓ Branch 4 taken 1976 times.
✓ Branch 5 taken 2128 times.
✓ Branch 6 taken 1976 times.
✓ Branch 7 taken 2128 times.
16416 if (evenRow) {
122 8512 svuint8_t u, v;
123 8512 svint32_t r_even_0 = svreinterpret_s32(r0_0);
124 8512 svint32_t r_even_1 = svreinterpret_s32(r0_1);
125 8512 svint32_t r_even_2 = svreinterpret_s32(r1_0);
126 8512 svint32_t r_even_3 = svreinterpret_s32(r1_1);
127 8512 svint32_t g_even_0 = svreinterpret_s32(g0_0);
128 8512 svint32_t g_even_1 = svreinterpret_s32(g0_1);
129 8512 svint32_t g_even_2 = svreinterpret_s32(g1_0);
130 8512 svint32_t g_even_3 = svreinterpret_s32(g1_1);
131 8512 svint32_t b_even_0 = svreinterpret_s32(b0_0);
132 8512 svint32_t b_even_1 = svreinterpret_s32(b0_1);
133 8512 svint32_t b_even_2 = svreinterpret_s32(b1_0);
134 8512 svint32_t b_even_3 = svreinterpret_s32(b1_1);
135
136 17024 ArrayOfFour_svint32 r_even = {{std::ref(r_even_0), std::ref(r_even_1),
137 17024 std::ref(r_even_2), std::ref(r_even_3)}};
138 17024 ArrayOfFour_svint32 g_even = {{std::ref(g_even_0), std::ref(g_even_1),
139 17024 std::ref(g_even_2), std::ref(g_even_3)}};
140 17024 ArrayOfFour_svint32 b_even = {{std::ref(b_even_0), std::ref(b_even_1),
141 17024 std::ref(b_even_2), std::ref(b_even_3)}};
142
143 8512 rgb_to_uv_2x(r_even, g_even, b_even, u, v);
144
145
8/8
✓ Branch 0 taken 1064 times.
✓ Branch 1 taken 1064 times.
✓ Branch 2 taken 1064 times.
✓ Branch 3 taken 1064 times.
✓ Branch 4 taken 1064 times.
✓ Branch 5 taken 1064 times.
✓ Branch 6 taken 1064 times.
✓ Branch 7 taken 1064 times.
8512 if (v_first) {
146 4256 swap_scalable(u, v);
147 4256 }
148
149 if constexpr (kInterleave) {
150 4256 svuint8x2_t uv = svcreate2(u, v);
151 4256 svst2_u8(pg_half, u_row + index, uv);
152 4256 } else {
153 4256 svst1(pg_half, u_row + index / 2, u);
154 4256 svst1(pg_half, v_row + index / 2, v);
155 }
156 8512 }
157 16416 }
158
159 77328 static void vector_path(const uint8_t *src_row, uint8_t *y_row,
160 uint8_t *u_row, uint8_t *v_row, bool v_first,
161 const size_t index, const bool evenRow,
162 const svbool_t pg0,
163 const svbool_t pg_half) KLEIDICV_STREAMING {
164 77328 svuint32_t r0_0, r0_1, r0_2, r0_3, g0_0, g0_1, g0_2, g0_3, b0_0, b0_1, b0_2,
165 b0_3;
166
167 77328 ArrayOfFour_svuint32 r0 = {
168 77328 {std::ref(r0_0), std::ref(r0_1), std::ref(r0_2), std::ref(r0_3)}};
169 77328 ArrayOfFour_svuint32 g0 = {
170 77328 {std::ref(g0_0), std::ref(g0_1), std::ref(g0_2), std::ref(g0_3)}};
171 77328 ArrayOfFour_svuint32 b0 = {
172 77328 {std::ref(b0_0), std::ref(b0_1), std::ref(b0_2), std::ref(b0_3)}};
173
174 77328 load_rgb(r0, g0, b0, src_row, scn * index, pg0);
175
176 77328 svuint8_t y0 = rgb_to_y(r0, g0, b0);
177
178 77328 svst1(pg0, y_row + index, y0);
179
180
8/8
✓ Branch 0 taken 9580 times.
✓ Branch 1 taken 9752 times.
✓ Branch 2 taken 9580 times.
✓ Branch 3 taken 9752 times.
✓ Branch 4 taken 9580 times.
✓ Branch 5 taken 9752 times.
✓ Branch 6 taken 9580 times.
✓ Branch 7 taken 9752 times.
77328 if (evenRow) {
181 39008 svuint8_t u, v;
182 39008 svint32_t r_even_0 = svreinterpret_s32(r0_0);
183 39008 svint32_t r_even_1 = svreinterpret_s32(r0_1);
184 39008 svint32_t g_even_0 = svreinterpret_s32(g0_0);
185 39008 svint32_t g_even_1 = svreinterpret_s32(g0_1);
186 39008 svint32_t b_even_0 = svreinterpret_s32(b0_0);
187 39008 svint32_t b_even_1 = svreinterpret_s32(b0_1);
188
189 39008 ArrayOfTwo_svint32 r_even = {{std::ref(r_even_0), std::ref(r_even_1)}};
190 39008 ArrayOfTwo_svint32 g_even = {{std::ref(g_even_0), std::ref(g_even_1)}};
191 39008 ArrayOfTwo_svint32 b_even = {{std::ref(b_even_0), std::ref(b_even_1)}};
192
193 39008 rgb_to_uv(r_even, g_even, b_even, u, v);
194
195
8/8
✓ Branch 0 taken 9332 times.
✓ Branch 1 taken 420 times.
✓ Branch 2 taken 9332 times.
✓ Branch 3 taken 420 times.
✓ Branch 4 taken 9332 times.
✓ Branch 5 taken 420 times.
✓ Branch 6 taken 9332 times.
✓ Branch 7 taken 420 times.
39008 if (v_first) {
196 1680 swap_scalable(u, v);
197 1680 }
198
199 if constexpr (kInterleave) {
200 19728 svuint8x2_t uv = svcreate2(u, v);
201 19728 svst2_u8(pg_half, u_row + index, uv);
202 19728 } else {
203 19280 svst1(pg_half, u_row + index / 2, u);
204 19280 svst1(pg_half, v_row + index / 2, v);
205 }
206 39008 }
207 77328 }
208
209 110160 static svuint8_t rgb_to_y(ArrayOfFour_svuint32 r, ArrayOfFour_svuint32 g,
210 ArrayOfFour_svuint32 b) KLEIDICV_STREAMING {
211 110160 const uint32_t kShifted16 = (16 << kWeightScale);
212 110160 const uint32_t kHalfShift = (1 << (kWeightScale - 1));
213
214 110160 svbool_t pg = svptrue_b32();
215
216 // Y = kR*R + kG*G + kB*B + rounding bias
217 110160 svuint32_t bias = svdup_u32(kHalfShift + kShifted16);
218 110160 svuint32_t y_0 = bias;
219 110160 svuint32_t y_1 = bias;
220 110160 svuint32_t y_2 = bias;
221 110160 svuint32_t y_3 = bias;
222
223 110160 ArrayOfFour_svuint32 y = {
224 110160 {std::ref(y_0), std::ref(y_1), std::ref(y_2), std::ref(y_3)}};
225
226 KLEIDICV_FORCE_LOOP_UNROLL
227
8/8
✓ Branch 0 taken 27540 times.
✓ Branch 1 taken 110160 times.
✓ Branch 2 taken 27540 times.
✓ Branch 3 taken 110160 times.
✓ Branch 4 taken 27540 times.
✓ Branch 5 taken 110160 times.
✓ Branch 6 taken 27540 times.
✓ Branch 7 taken 110160 times.
550800 for (int i = 0; i < 4; i++) {
228 440640 y(i) = svmla_n_u32_x(pg, y(i), r(i), kRYWeight);
229 440640 y(i) = svmla_n_u32_x(pg, y(i), g(i), kGYWeight);
230 440640 y(i) = svmla_n_u32_x(pg, y(i), b(i), kBYWeight);
231 440640 }
232
233 110160 svuint16_t y_b = svshrnb_n_u32(y(0), kWeightScale - 8);
234 110160 y_b = svshrnt_n_u32(y_b, y(2), kWeightScale - 8); // 0, 1, 2, 3, 4, 5, 6, 7
235 110160 svuint16_t y_t = svshrnb_n_u32(y(1), kWeightScale - 8);
236 110160 y_t = svshrnt_n_u32(y_t, y(3),
237 kWeightScale - 8); // 8, 9, 10, 11, 12, 13, 14, 15
238
239 220320 return svuzp2_u8(svreinterpret_u8(y_b), svreinterpret_u8(y_t));
240 110160 }
241
242 17024 static svuint8_t compute_u_or_v_2x(ArrayOfFour_svint32 r,
243 ArrayOfFour_svint32 g,
244 ArrayOfFour_svint32 b, const int r_coeff,
245 const int g_coeff,
246 const int b_coeff) KLEIDICV_STREAMING {
247 17024 svbool_t pg = svptrue_b32();
248 17024 const int kHalfShift = (1 << (kWeightScale - 1));
249 17024 const int kShifted128 = (128 << kWeightScale);
250 17024 svint32_t bias = svdup_s32(kHalfShift + kShifted128);
251 17024 svint32_t uv0 = bias;
252 17024 svint32_t uv1 = bias;
253 17024 svint32_t uv2 = bias;
254 17024 svint32_t uv3 = bias;
255
256 17024 ArrayOfFour_svint32 uv = {
257 17024 {std::ref(uv0), std::ref(uv1), std::ref(uv2), std::ref(uv3)}};
258
259 KLEIDICV_FORCE_LOOP_UNROLL
260
8/8
✓ Branch 0 taken 4256 times.
✓ Branch 1 taken 17024 times.
✓ Branch 2 taken 4256 times.
✓ Branch 3 taken 17024 times.
✓ Branch 4 taken 4256 times.
✓ Branch 5 taken 17024 times.
✓ Branch 6 taken 4256 times.
✓ Branch 7 taken 17024 times.
85120 for (int i = 0; i < 4; i++) {
261 68096 uv(i) = svmla_n_s32_x(pg, uv(i), r(i), r_coeff);
262 68096 uv(i) = svmla_n_s32_x(pg, uv(i), g(i), g_coeff);
263 68096 uv(i) = svmla_n_s32_x(pg, uv(i), b(i), b_coeff);
264 68096 }
265
266 34048 svint16_t uv_b =
267 17024 svuzp2_s16(svreinterpret_s16(uv(0)), svreinterpret_s16(uv(1)));
268 34048 svint16_t uv_t =
269 17024 svuzp2_s16(svreinterpret_s16(uv(2)), svreinterpret_s16(uv(3)));
270
271 17024 uv_b = svasr_n_s16_x(pg, uv_b, kWeightScale - 16);
272 17024 uv_t = svasr_n_s16_x(pg, uv_t, kWeightScale - 16);
273
274 34048 return svuzp1_u8(svreinterpret_u8(uv_b), svreinterpret_u8(uv_t));
275 17024 }
276
277 8512 static void rgb_to_uv_2x(ArrayOfFour_svint32 r, ArrayOfFour_svint32 g,
278 ArrayOfFour_svint32 b, svuint8_t &u,
279 svuint8_t &v) KLEIDICV_STREAMING {
280 // ---------------- U (Cb) Component ----------------
281 // U = R * kRU + G * kGU + B * kBU + bias
282 8512 u = compute_u_or_v_2x(r, g, b, kRUWeight, kGUWeight, kBUWeight);
283
284 // ---------------- V (Cr) Component ----------------
285 // V = R * kBU + G * kGV + B * kBV + bias
286 8512 v = compute_u_or_v_2x(r, g, b, kBUWeight, kGVWeight, kBVWeight);
287 8512 }
288
289 78016 static svuint8_t compute_u_or_v(ArrayOfTwo_svint32 r, ArrayOfTwo_svint32 g,
290 ArrayOfTwo_svint32 b, const int r_coeff,
291 const int g_coeff,
292 const int b_coeff) KLEIDICV_STREAMING {
293 78016 svbool_t pg = svptrue_b32();
294 78016 const int kHalfShift = (1 << (kWeightScale - 1));
295 78016 const int kShifted128 = (128 << kWeightScale);
296
297 78016 svint32_t bias = svdup_s32(kHalfShift + kShifted128);
298 78016 svint32_t uv0 = bias;
299 78016 svint32_t uv1 = bias;
300
301 78016 ArrayOfTwo_svint32 uv = {{std::ref(uv0), std::ref(uv1)}};
302
303 KLEIDICV_FORCE_LOOP_UNROLL
304
8/8
✓ Branch 0 taken 19504 times.
✓ Branch 1 taken 39008 times.
✓ Branch 2 taken 19504 times.
✓ Branch 3 taken 39008 times.
✓ Branch 4 taken 19504 times.
✓ Branch 5 taken 39008 times.
✓ Branch 6 taken 19504 times.
✓ Branch 7 taken 39008 times.
234048 for (int i = 0; i < 2; i++) {
305 156032 uv(i) = svmla_n_s32_x(pg, uv(i), r(i), r_coeff);
306 156032 uv(i) = svmla_n_s32_x(pg, uv(i), g(i), g_coeff);
307 156032 uv(i) = svmla_n_s32_x(pg, uv(i), b(i), b_coeff);
308 156032 }
309
310 156032 svint16_t output =
311 78016 svuzp2_s16(svreinterpret_s16(uv(0)), svreinterpret_s16(uv(1)));
312
313 78016 output = svasr_n_s16_x(pg, output, kWeightScale - 16);
314
315 156032 return svuzp1_u8(svreinterpret_u8(output), svreinterpret_u8(output));
316 78016 }
317
318 39008 static void rgb_to_uv(ArrayOfTwo_svint32 r, ArrayOfTwo_svint32 g,
319 ArrayOfTwo_svint32 b, svuint8_t &u,
320 svuint8_t &v) KLEIDICV_STREAMING {
321 // ---------------- U (Cb) Component ----------------
322 // U = R * kRU + G * kGU + B * kBU + bias
323 39008 u = compute_u_or_v(r, g, b, kRUWeight, kGUWeight, kBUWeight);
324
325 // ---------------- V (Cr) Component ----------------
326 // V = R * kBU + G * kGV + B * kBV + bias
327 39008 v = compute_u_or_v(r, g, b, kBUWeight, kGVWeight, kBVWeight);
328 39008 }
329
330 110160 static void load_rgb(ArrayOfFour_svuint32 &r, ArrayOfFour_svuint32 &g,
331 ArrayOfFour_svuint32 &b, const uint8_t *src_row,
332 const size_t w, const svbool_t &pg0) KLEIDICV_STREAMING {
333 110160 svuint8_t b0, g0, r0;
334 if constexpr (kAlpha) {
335 // 4-channel input (RGBA or BGRA)
336 55080 svuint8x4_t vsrc0 = svld4(pg0, src_row + w);
337
338 55080 b0 = svget4(vsrc0, b_index);
339 55080 g0 = svget4(vsrc0, g_index);
340 55080 r0 = svget4(vsrc0, r_index);
341
342 55080 } else {
343 // 3-channel input (RGB or BGR)
344 55080 svuint8x3_t vsrc0 = svld3(pg0, src_row + w);
345
346 55080 b0 = svget3(vsrc0, b_index);
347 55080 g0 = svget3(vsrc0, g_index);
348 55080 r0 = svget3(vsrc0, r_index);
349 55080 }
350 110160 svuint16_t r0_lo = svmovlb(r0);
351 110160 svuint16_t r0_hi = svmovlt(r0);
352 110160 r(0) = svunpklo(r0_lo); // 0, 2, 4, 6
353 110160 r(1) = svunpkhi(r0_lo); // 8, 10, 12, 14
354 110160 r(2) = svunpklo(r0_hi); // 1, 3, 5, 7
355 110160 r(3) = svunpkhi(r0_hi); // 9, 11, 13, 15
356
357 110160 svuint16_t g0_lo = svmovlb(g0);
358 110160 svuint16_t g0_hi = svmovlt(g0);
359 110160 g(0) = svunpklo(g0_lo);
360 110160 g(1) = svunpkhi(g0_lo);
361 110160 g(2) = svunpklo(g0_hi);
362 110160 g(3) = svunpkhi(g0_hi);
363
364 110160 svuint16_t b0_lo = svmovlb(b0);
365 110160 svuint16_t b0_hi = svmovlt(b0);
366 110160 b(0) = svunpklo(b0_lo);
367 110160 b(1) = svunpkhi(b0_lo);
368 110160 b(2) = svunpklo(b0_hi);
369 110160 b(3) = svunpkhi(b0_hi);
370 110160 }
371
372 16416 static void load_rgb_2x(ArrayOfFour_svuint32 &r0, ArrayOfFour_svuint32 &g0,
373 ArrayOfFour_svuint32 &b0, ArrayOfFour_svuint32 &r1,
374 ArrayOfFour_svuint32 &g1, ArrayOfFour_svuint32 &b1,
375 const uint8_t *src_row, const size_t w,
376 const svbool_t pg0,
377 const svbool_t pg1) KLEIDICV_STREAMING {
378 16416 const size_t kVectorLength = svcntb();
379 16416 load_rgb(r0, g0, b0, src_row, w, pg0);
380
381 16416 load_rgb(r1, g1, b1, src_row, w + scn * kVectorLength, pg1);
382 16416 }
383
384 static constexpr int b_index = RGB ? 2 : 0;
385 static constexpr int g_index = 1;
386 static constexpr int r_index = RGB ? 0 : 2;
387 static constexpr size_t scn = kAlpha ? 4 : 3;
388 };
389
390 } // namespace KLEIDICV_TARGET_NAMESPACE
391
392 #endif // KLEIDICV_RGB_TO_YUV420_SC_H
393