KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/rgb_to_yuv420_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 230 230 100.0%
Functions: 192 192 100.0%
Branches: 80 80 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RGB_TO_YUV420_SC_H
6 #define KLEIDICV_RGB_TO_YUV420_SC_H
7
8 #include <algorithm>
9 #include <functional>
10 #include <utility>
11
12 #include "kleidicv/kleidicv.h"
13 #include "kleidicv/sve2.h"
14 #include "yuv420_coefficients.h"
15
16 namespace KLEIDICV_TARGET_NAMESPACE {
17
18 template <bool kAlpha, bool RGB, bool kInterleave>
19 class RGBxorBGRxToYUV420 {
20 public:
21 using ArrayOfFour_svuint32 = ScalableVectorArray1D<svuint32_t, 4>;
22 using ArrayOfFour_svint32 = ScalableVectorArray1D<svint32_t, 4>;
23 using ArrayOfTwo_svint32 = ScalableVectorArray1D<svint32_t, 2>;
24
25 1128 static kleidicv_error_t rgb2yuv420_operation_sc(
26 const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride,
27 uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height,
28 bool v_first, size_t begin, size_t end) KLEIDICV_STREAMING {
29 1128 size_t row_begin = begin * 2;
30 1128 size_t row_end = std::min<size_t>(height, end * 2);
31 1128 const uint8_t *src_row = nullptr;
32 1128 uint8_t *y_row = nullptr;
33 1128 uint8_t *u_row = nullptr;
34 1128 uint8_t *v_row = nullptr;
35
8/8
✓ Branch 0 taken 282 times.
✓ Branch 1 taken 14430 times.
✓ Branch 2 taken 282 times.
✓ Branch 3 taken 14430 times.
✓ Branch 4 taken 282 times.
✓ Branch 5 taken 14430 times.
✓ Branch 6 taken 282 times.
✓ Branch 7 taken 14430 times.
58848 for (size_t h = row_begin; h < row_end; h++) {
36 57720 src_row = src + src_stride * h;
37 57720 y_row = y_dst + y_stride * h;
38 57720 bool evenRow = (h & 1) == 0;
39
8/8
✓ Branch 0 taken 7146 times.
✓ Branch 1 taken 7284 times.
✓ Branch 2 taken 7146 times.
✓ Branch 3 taken 7284 times.
✓ Branch 4 taken 7146 times.
✓ Branch 5 taken 7284 times.
✓ Branch 6 taken 7146 times.
✓ Branch 7 taken 7284 times.
57720 if (evenRow) {
40 if constexpr (kInterleave) {
41 14736 u_row = uv_dst + uv_stride * (h / 2);
42 } else {
43 14400 u_row =
44 14400 uv_dst + uv_stride * (h / 4) + ((h / 2) % 2) * ((width + 1) / 2);
45 // Pointer to the start of the V plane.
46 // The V plane follows the U plane. Both U and V planes are
47 // subsampled at a 2:1 vertical ratio (i.e., each has height / 2
48 // rows), and are often stored in a single contiguous chroma region in
49 // memory. Depending on image height and stride, the starting offset
50 // of V may require adjustment to maintain correct alignment. In
51 // particular, the chroma rows may not align perfectly, so a
52 // fractional offset (in rows) is applied to calculate the V plane
53 // position. The formula used here accounts for this by adjusting
54 // based on row parity, assuming consistent memory layout across the
55 // Y, U, and V planes.
56 28800 v_row = uv_dst + uv_stride * ((h + height + 1) / 4) +
57 14400 (((h + height + 1) / 2) % 2) * ((width + 1) / 2);
58 }
59 29136 }
60
61 57720 const size_t kVectorLength = svcntb();
62 57720 LoopUnroll2 loop{width, kVectorLength};
63
64 71544 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
65 13824 svbool_t pg = svptrue_b8();
66
67 27648 vector_path_2x(src_row, y_row, u_row, v_row, v_first, index, evenRow,
68 13824 pg, pg, pg);
69 13824 });
70
71 114960 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
72 57240 svbool_t pg = svwhilelt_b8_u64(index, length);
73 57240 svbool_t pg_half = svwhilelt_b8_u64((index + 1) / 2, (length + 1) / 2);
74
8/8
✓ Branch 0 taken 14526 times.
✓ Branch 1 taken 14310 times.
✓ Branch 2 taken 14526 times.
✓ Branch 3 taken 14310 times.
✓ Branch 4 taken 14526 times.
✓ Branch 5 taken 14310 times.
✓ Branch 6 taken 14526 times.
✓ Branch 7 taken 14310 times.
115344 while (svptest_first(svptrue_b8(), pg)) {
75 116208 vector_path(src_row, y_row, u_row, v_row, v_first, index, evenRow, pg,
76 58104 pg_half);
77 58104 index += kVectorLength;
78 58104 pg = svwhilelt_b8_u64(index, length);
79 58104 pg_half = svwhilelt_b8_u64((index + 1) / 2, (length + 1) / 2);
80 }
81 57240 });
82 57720 }
83 1128 return KLEIDICV_OK;
84 1128 }
85
86 private:
87 13824 static void vector_path_2x(const uint8_t *src_row, uint8_t *y_row,
88 uint8_t *u_row, uint8_t *v_row, bool v_first,
89 const size_t index, const bool evenRow,
90 const svbool_t pg0, const svbool_t pg1,
91 const svbool_t pg_half) KLEIDICV_STREAMING {
92 13824 const size_t kVectorLength = svcntb();
93 13824 svuint32_t r0_0, r0_1, r0_2, r0_3, g0_0, g0_1, g0_2, g0_3, b0_0, b0_1, b0_2,
94 b0_3, r1_0, r1_1, r1_2, r1_3, g1_0, g1_1, g1_2, g1_3, b1_0, b1_1, b1_2,
95 b1_3;
96
97 13824 ArrayOfFour_svuint32 r0 = {
98 13824 {std::ref(r0_0), std::ref(r0_1), std::ref(r0_2), std::ref(r0_3)}};
99 13824 ArrayOfFour_svuint32 g0 = {
100 13824 {std::ref(g0_0), std::ref(g0_1), std::ref(g0_2), std::ref(g0_3)}};
101 13824 ArrayOfFour_svuint32 b0 = {
102 13824 {std::ref(b0_0), std::ref(b0_1), std::ref(b0_2), std::ref(b0_3)}};
103 13824 ArrayOfFour_svuint32 r1 = {
104 13824 {std::ref(r1_0), std::ref(r1_1), std::ref(r1_2), std::ref(r1_3)}};
105 13824 ArrayOfFour_svuint32 g1 = {
106 13824 {std::ref(g1_0), std::ref(g1_1), std::ref(g1_2), std::ref(g1_3)}};
107 13824 ArrayOfFour_svuint32 b1 = {
108 13824 {std::ref(b1_0), std::ref(b1_1), std::ref(b1_2), std::ref(b1_3)}};
109
110 13824 load_rgb_2x(r0, g0, b0, r1, g1, b1, src_row, scn * index, pg0, pg1);
111
112 13824 svuint8_t y0 = rgb_to_y(r0, g0, b0);
113
114 13824 svuint8_t y1 = rgb_to_y(r1, g1, b1);
115
116 13824 svst1(pg0, y_row + index, y0);
117 13824 svst1(pg1, y_row + index + kVectorLength, y1);
118
119
8/8
✓ Branch 0 taken 1664 times.
✓ Branch 1 taken 1792 times.
✓ Branch 2 taken 1664 times.
✓ Branch 3 taken 1792 times.
✓ Branch 4 taken 1664 times.
✓ Branch 5 taken 1792 times.
✓ Branch 6 taken 1664 times.
✓ Branch 7 taken 1792 times.
13824 if (evenRow) {
120 7168 svuint8_t u, v;
121 7168 svint32_t r_even_0 = svreinterpret_s32(r0_0);
122 7168 svint32_t r_even_1 = svreinterpret_s32(r0_1);
123 7168 svint32_t r_even_2 = svreinterpret_s32(r1_0);
124 7168 svint32_t r_even_3 = svreinterpret_s32(r1_1);
125 7168 svint32_t g_even_0 = svreinterpret_s32(g0_0);
126 7168 svint32_t g_even_1 = svreinterpret_s32(g0_1);
127 7168 svint32_t g_even_2 = svreinterpret_s32(g1_0);
128 7168 svint32_t g_even_3 = svreinterpret_s32(g1_1);
129 7168 svint32_t b_even_0 = svreinterpret_s32(b0_0);
130 7168 svint32_t b_even_1 = svreinterpret_s32(b0_1);
131 7168 svint32_t b_even_2 = svreinterpret_s32(b1_0);
132 7168 svint32_t b_even_3 = svreinterpret_s32(b1_1);
133
134 14336 ArrayOfFour_svint32 r_even = {{std::ref(r_even_0), std::ref(r_even_1),
135 14336 std::ref(r_even_2), std::ref(r_even_3)}};
136 14336 ArrayOfFour_svint32 g_even = {{std::ref(g_even_0), std::ref(g_even_1),
137 14336 std::ref(g_even_2), std::ref(g_even_3)}};
138 14336 ArrayOfFour_svint32 b_even = {{std::ref(b_even_0), std::ref(b_even_1),
139 14336 std::ref(b_even_2), std::ref(b_even_3)}};
140
141 7168 rgb_to_uv_2x(r_even, g_even, b_even, u, v);
142
143
8/8
✓ Branch 0 taken 896 times.
✓ Branch 1 taken 896 times.
✓ Branch 2 taken 896 times.
✓ Branch 3 taken 896 times.
✓ Branch 4 taken 896 times.
✓ Branch 5 taken 896 times.
✓ Branch 6 taken 896 times.
✓ Branch 7 taken 896 times.
7168 if (v_first) {
144 3584 swap_scalable(u, v);
145 3584 }
146
147 if constexpr (kInterleave) {
148 3584 svuint8x2_t uv = svcreate2(u, v);
149 3584 svst2_u8(pg_half, u_row + index, uv);
150 3584 } else {
151 3584 svst1(pg_half, u_row + index / 2, u);
152 3584 svst1(pg_half, v_row + index / 2, v);
153 }
154 7168 }
155 13824 }
156
157 58104 static void vector_path(const uint8_t *src_row, uint8_t *y_row,
158 uint8_t *u_row, uint8_t *v_row, bool v_first,
159 const size_t index, const bool evenRow,
160 const svbool_t pg0,
161 const svbool_t pg_half) KLEIDICV_STREAMING {
162 58104 svuint32_t r0_0, r0_1, r0_2, r0_3, g0_0, g0_1, g0_2, g0_3, b0_0, b0_1, b0_2,
163 b0_3;
164
165 58104 ArrayOfFour_svuint32 r0 = {
166 58104 {std::ref(r0_0), std::ref(r0_1), std::ref(r0_2), std::ref(r0_3)}};
167 58104 ArrayOfFour_svuint32 g0 = {
168 58104 {std::ref(g0_0), std::ref(g0_1), std::ref(g0_2), std::ref(g0_3)}};
169 58104 ArrayOfFour_svuint32 b0 = {
170 58104 {std::ref(b0_0), std::ref(b0_1), std::ref(b0_2), std::ref(b0_3)}};
171
172 58104 load_rgb(r0, g0, b0, src_row, scn * index, pg0);
173
174 58104 svuint8_t y0 = rgb_to_y(r0, g0, b0);
175
176 58104 svst1(pg0, y_row + index, y0);
177
178
8/8
✓ Branch 0 taken 7198 times.
✓ Branch 1 taken 7328 times.
✓ Branch 2 taken 7198 times.
✓ Branch 3 taken 7328 times.
✓ Branch 4 taken 7198 times.
✓ Branch 5 taken 7328 times.
✓ Branch 6 taken 7198 times.
✓ Branch 7 taken 7328 times.
58104 if (evenRow) {
179 29312 svuint8_t u, v;
180 29312 svint32_t r_even_0 = svreinterpret_s32(r0_0);
181 29312 svint32_t r_even_1 = svreinterpret_s32(r0_1);
182 29312 svint32_t g_even_0 = svreinterpret_s32(g0_0);
183 29312 svint32_t g_even_1 = svreinterpret_s32(g0_1);
184 29312 svint32_t b_even_0 = svreinterpret_s32(b0_0);
185 29312 svint32_t b_even_1 = svreinterpret_s32(b0_1);
186
187 29312 ArrayOfTwo_svint32 r_even = {{std::ref(r_even_0), std::ref(r_even_1)}};
188 29312 ArrayOfTwo_svint32 g_even = {{std::ref(g_even_0), std::ref(g_even_1)}};
189 29312 ArrayOfTwo_svint32 b_even = {{std::ref(b_even_0), std::ref(b_even_1)}};
190
191 29312 rgb_to_uv(r_even, g_even, b_even, u, v);
192
193
8/8
✓ Branch 0 taken 7006 times.
✓ Branch 1 taken 322 times.
✓ Branch 2 taken 7006 times.
✓ Branch 3 taken 322 times.
✓ Branch 4 taken 7006 times.
✓ Branch 5 taken 322 times.
✓ Branch 6 taken 7006 times.
✓ Branch 7 taken 322 times.
29312 if (v_first) {
194 1288 swap_scalable(u, v);
195 1288 }
196
197 if constexpr (kInterleave) {
198 14824 svuint8x2_t uv = svcreate2(u, v);
199 14824 svst2_u8(pg_half, u_row + index, uv);
200 14824 } else {
201 14488 svst1(pg_half, u_row + index / 2, u);
202 14488 svst1(pg_half, v_row + index / 2, v);
203 }
204 29312 }
205 58104 }
206
207 85752 static svuint8_t rgb_to_y(ArrayOfFour_svuint32 r, ArrayOfFour_svuint32 g,
208 ArrayOfFour_svuint32 b) KLEIDICV_STREAMING {
209 85752 const uint32_t kShifted16 = (16 << kWeightScale);
210 85752 const uint32_t kHalfShift = (1 << (kWeightScale - 1));
211
212 85752 svbool_t pg = svptrue_b32();
213
214 // Y = kR*R + kG*G + kB*B + rounding bias
215 85752 svuint32_t bias = svdup_u32(kHalfShift + kShifted16);
216 85752 svuint32_t y_0 = bias;
217 85752 svuint32_t y_1 = bias;
218 85752 svuint32_t y_2 = bias;
219 85752 svuint32_t y_3 = bias;
220
221 85752 ArrayOfFour_svuint32 y = {
222 85752 {std::ref(y_0), std::ref(y_1), std::ref(y_2), std::ref(y_3)}};
223
224 KLEIDICV_FORCE_LOOP_UNROLL
225
8/8
✓ Branch 0 taken 21438 times.
✓ Branch 1 taken 85752 times.
✓ Branch 2 taken 21438 times.
✓ Branch 3 taken 85752 times.
✓ Branch 4 taken 21438 times.
✓ Branch 5 taken 85752 times.
✓ Branch 6 taken 21438 times.
✓ Branch 7 taken 85752 times.
428760 for (int i = 0; i < 4; i++) {
226 343008 y(i) = svmla_n_u32_x(pg, y(i), r(i), kRYWeight);
227 343008 y(i) = svmla_n_u32_x(pg, y(i), g(i), kGYWeight);
228 343008 y(i) = svmla_n_u32_x(pg, y(i), b(i), kBYWeight);
229 343008 }
230
231 85752 svuint16_t y_b = svshrnb_n_u32(y(0), kWeightScale - 8);
232 85752 y_b = svshrnt_n_u32(y_b, y(2), kWeightScale - 8); // 0, 1, 2, 3, 4, 5, 6, 7
233 85752 svuint16_t y_t = svshrnb_n_u32(y(1), kWeightScale - 8);
234 85752 y_t = svshrnt_n_u32(y_t, y(3),
235 kWeightScale - 8); // 8, 9, 10, 11, 12, 13, 14, 15
236
237 171504 return svuzp2_u8(svreinterpret_u8(y_b), svreinterpret_u8(y_t));
238 85752 }
239
240 14336 static svuint8_t compute_u_or_v_2x(ArrayOfFour_svint32 r,
241 ArrayOfFour_svint32 g,
242 ArrayOfFour_svint32 b, const int r_coeff,
243 const int g_coeff,
244 const int b_coeff) KLEIDICV_STREAMING {
245 14336 svbool_t pg = svptrue_b32();
246 14336 const int kHalfShift = (1 << (kWeightScale - 1));
247 14336 const int kShifted128 = (128 << kWeightScale);
248 14336 svint32_t bias = svdup_s32(kHalfShift + kShifted128);
249 14336 svint32_t uv0 = bias;
250 14336 svint32_t uv1 = bias;
251 14336 svint32_t uv2 = bias;
252 14336 svint32_t uv3 = bias;
253
254 14336 ArrayOfFour_svint32 uv = {
255 14336 {std::ref(uv0), std::ref(uv1), std::ref(uv2), std::ref(uv3)}};
256
257 KLEIDICV_FORCE_LOOP_UNROLL
258
8/8
✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 14336 times.
✓ Branch 2 taken 3584 times.
✓ Branch 3 taken 14336 times.
✓ Branch 4 taken 3584 times.
✓ Branch 5 taken 14336 times.
✓ Branch 6 taken 3584 times.
✓ Branch 7 taken 14336 times.
71680 for (int i = 0; i < 4; i++) {
259 57344 uv(i) = svmla_n_s32_x(pg, uv(i), r(i), r_coeff);
260 57344 uv(i) = svmla_n_s32_x(pg, uv(i), g(i), g_coeff);
261 57344 uv(i) = svmla_n_s32_x(pg, uv(i), b(i), b_coeff);
262 57344 }
263
264 28672 svint16_t uv_b =
265 14336 svuzp2_s16(svreinterpret_s16(uv(0)), svreinterpret_s16(uv(1)));
266 28672 svint16_t uv_t =
267 14336 svuzp2_s16(svreinterpret_s16(uv(2)), svreinterpret_s16(uv(3)));
268
269 14336 uv_b = svasr_n_s16_x(pg, uv_b, kWeightScale - 16);
270 14336 uv_t = svasr_n_s16_x(pg, uv_t, kWeightScale - 16);
271
272 28672 return svuzp1_u8(svreinterpret_u8(uv_b), svreinterpret_u8(uv_t));
273 14336 }
274
275 7168 static void rgb_to_uv_2x(ArrayOfFour_svint32 r, ArrayOfFour_svint32 g,
276 ArrayOfFour_svint32 b, svuint8_t &u,
277 svuint8_t &v) KLEIDICV_STREAMING {
278 // ---------------- U (Cb) Component ----------------
279 // U = R * kRU + G * kGU + B * kBU + bias
280 7168 u = compute_u_or_v_2x(r, g, b, kRUWeight, kGUWeight, kBUWeight);
281
282 // ---------------- V (Cr) Component ----------------
283 // V = R * kBU + G * kGV + B * kBV + bias
284 7168 v = compute_u_or_v_2x(r, g, b, kBUWeight, kGVWeight, kBVWeight);
285 7168 }
286
287 58624 static svuint8_t compute_u_or_v(ArrayOfTwo_svint32 r, ArrayOfTwo_svint32 g,
288 ArrayOfTwo_svint32 b, const int r_coeff,
289 const int g_coeff,
290 const int b_coeff) KLEIDICV_STREAMING {
291 58624 svbool_t pg = svptrue_b32();
292 58624 const int kHalfShift = (1 << (kWeightScale - 1));
293 58624 const int kShifted128 = (128 << kWeightScale);
294
295 58624 svint32_t bias = svdup_s32(kHalfShift + kShifted128);
296 58624 svint32_t uv0 = bias;
297 58624 svint32_t uv1 = bias;
298
299 58624 ArrayOfTwo_svint32 uv = {{std::ref(uv0), std::ref(uv1)}};
300
301 KLEIDICV_FORCE_LOOP_UNROLL
302
8/8
✓ Branch 0 taken 14656 times.
✓ Branch 1 taken 29312 times.
✓ Branch 2 taken 14656 times.
✓ Branch 3 taken 29312 times.
✓ Branch 4 taken 14656 times.
✓ Branch 5 taken 29312 times.
✓ Branch 6 taken 14656 times.
✓ Branch 7 taken 29312 times.
175872 for (int i = 0; i < 2; i++) {
303 117248 uv(i) = svmla_n_s32_x(pg, uv(i), r(i), r_coeff);
304 117248 uv(i) = svmla_n_s32_x(pg, uv(i), g(i), g_coeff);
305 117248 uv(i) = svmla_n_s32_x(pg, uv(i), b(i), b_coeff);
306 117248 }
307
308 117248 svint16_t output =
309 58624 svuzp2_s16(svreinterpret_s16(uv(0)), svreinterpret_s16(uv(1)));
310
311 58624 output = svasr_n_s16_x(pg, output, kWeightScale - 16);
312
313 117248 return svuzp1_u8(svreinterpret_u8(output), svreinterpret_u8(output));
314 58624 }
315
316 29312 static void rgb_to_uv(ArrayOfTwo_svint32 r, ArrayOfTwo_svint32 g,
317 ArrayOfTwo_svint32 b, svuint8_t &u,
318 svuint8_t &v) KLEIDICV_STREAMING {
319 // ---------------- U (Cb) Component ----------------
320 // U = R * kRU + G * kGU + B * kBU + bias
321 29312 u = compute_u_or_v(r, g, b, kRUWeight, kGUWeight, kBUWeight);
322
323 // ---------------- V (Cr) Component ----------------
324 // V = R * kBU + G * kGV + B * kBV + bias
325 29312 v = compute_u_or_v(r, g, b, kBUWeight, kGVWeight, kBVWeight);
326 29312 }
327
328 85752 static void load_rgb(ArrayOfFour_svuint32 &r, ArrayOfFour_svuint32 &g,
329 ArrayOfFour_svuint32 &b, const uint8_t *src_row,
330 const size_t w, const svbool_t &pg0) KLEIDICV_STREAMING {
331 85752 svuint8_t b0, g0, r0;
332 if constexpr (kAlpha) {
333 // 4-channel input (RGBA or BGRA)
334 42876 svuint8x4_t vsrc0 = svld4(pg0, src_row + w);
335
336 42876 b0 = svget4(vsrc0, b_index);
337 42876 g0 = svget4(vsrc0, g_index);
338 42876 r0 = svget4(vsrc0, r_index);
339
340 42876 } else {
341 // 3-channel input (RGB or BGR)
342 42876 svuint8x3_t vsrc0 = svld3(pg0, src_row + w);
343
344 42876 b0 = svget3(vsrc0, b_index);
345 42876 g0 = svget3(vsrc0, g_index);
346 42876 r0 = svget3(vsrc0, r_index);
347 42876 }
348 85752 svuint16_t r0_lo = svmovlb(r0);
349 85752 svuint16_t r0_hi = svmovlt(r0);
350 85752 r(0) = svunpklo(r0_lo); // 0, 2, 4, 6
351 85752 r(1) = svunpkhi(r0_lo); // 8, 10, 12, 14
352 85752 r(2) = svunpklo(r0_hi); // 1, 3, 5, 7
353 85752 r(3) = svunpkhi(r0_hi); // 9, 11, 13, 15
354
355 85752 svuint16_t g0_lo = svmovlb(g0);
356 85752 svuint16_t g0_hi = svmovlt(g0);
357 85752 g(0) = svunpklo(g0_lo);
358 85752 g(1) = svunpkhi(g0_lo);
359 85752 g(2) = svunpklo(g0_hi);
360 85752 g(3) = svunpkhi(g0_hi);
361
362 85752 svuint16_t b0_lo = svmovlb(b0);
363 85752 svuint16_t b0_hi = svmovlt(b0);
364 85752 b(0) = svunpklo(b0_lo);
365 85752 b(1) = svunpkhi(b0_lo);
366 85752 b(2) = svunpklo(b0_hi);
367 85752 b(3) = svunpkhi(b0_hi);
368 85752 }
369
370 13824 static void load_rgb_2x(ArrayOfFour_svuint32 &r0, ArrayOfFour_svuint32 &g0,
371 ArrayOfFour_svuint32 &b0, ArrayOfFour_svuint32 &r1,
372 ArrayOfFour_svuint32 &g1, ArrayOfFour_svuint32 &b1,
373 const uint8_t *src_row, const size_t w,
374 const svbool_t pg0,
375 const svbool_t pg1) KLEIDICV_STREAMING {
376 13824 const size_t kVectorLength = svcntb();
377 13824 load_rgb(r0, g0, b0, src_row, w, pg0);
378
379 13824 load_rgb(r1, g1, b1, src_row, w + scn * kVectorLength, pg1);
380 13824 }
381
382 static constexpr int b_index = RGB ? 2 : 0;
383 static constexpr int g_index = 1;
384 static constexpr int r_index = RGB ? 0 : 2;
385 static constexpr size_t scn = kAlpha ? 4 : 3;
386 };
387
388 } // namespace KLEIDICV_TARGET_NAMESPACE
389
390 #endif // KLEIDICV_RGB_TO_YUV420_SC_H
391