KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/yuv_to_rgb_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 107 107 100.0%
Functions: 32 32 100.0%
Branches: 56 56 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_YUV_TO_RGB_SC_H
6 #define KLEIDICV_YUV_TO_RGB_SC_H
7
8 #include <limits>
9
10 #include "kleidicv/conversions/yuv_to_rgb.h"
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <bool BGR, bool kAlpha>
17 class YUVToRGB : public UnrollOnce {
18 public:
19 using ContextType = Context;
20 using ScalarType = uint8_t;
21 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
22 using VectorType = VecTraits::VectorType;
23 using Vector3Type = VecTraits::Vector3Type;
24 using RawDestinationVectorType =
25 typename std::conditional<kAlpha, svuint8x4_t, svuint8x3_t>::type;
26
27 // Returns the number of channels in the output image.
28 924 static constexpr size_t output_channels() KLEIDICV_STREAMING {
29 924 return kAlpha ? /* RGBA */ 4 : /* RGB */ 3;
30 }
31
32 3452 void vector_path(ContextType ctx, const ScalarType *src,
33 ScalarType *dst) KLEIDICV_STREAMING {
34 3452 auto pg = ctx.predicate();
35 3452 Vector3Type svsrc = svld3(pg, src);
36 3452 svint16_t y_0 = svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 0), 0));
37 3452 svint16_t y_1 = svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 0), 0));
38 6904 svint16_t u4_0 =
39 3452 svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 1), kPreShift));
40 6904 svint16_t u4_1 =
41 3452 svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 1), kPreShift));
42 6904 svint16_t v4_0 =
43 3452 svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 2), kPreShift));
44 6904 svint16_t v4_1 =
45 3452 svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 2), kPreShift));
46 3452 svuint8_t r, g, b;
47
48 // Compute B value in 32-bit precision
49 {
50 // Multiplication is done with uint16_t because UBWeight only fits in
51 // unsigned 16-bit
52 6904 svint32_t b_00 = svreinterpret_s32_u32(
53 3452 svmullb(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight));
54 6904 svint32_t b_01 = svreinterpret_s32_u32(
55 3452 svmullt(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight));
56 6904 svint32_t b_10 = svreinterpret_s32_u32(
57 3452 svmullb(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight));
58 6904 svint32_t b_11 = svreinterpret_s32_u32(
59 3452 svmullt(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight));
60
61 3452 b_00 = svadd_n_s32_x(svptrue_b32(), b_00, kBDelta4);
62 3452 b_01 = svadd_n_s32_x(svptrue_b32(), b_01, kBDelta4);
63 3452 b_10 = svadd_n_s32_x(svptrue_b32(), b_10, kBDelta4);
64 3452 b_11 = svadd_n_s32_x(svptrue_b32(), b_11, kBDelta4);
65
66 6904 svint16_t b_0 = svadd_x(
67 3452 svptrue_b16(), y_0,
68 3452 svtrn2_s16(svreinterpret_s16_s32(b_00), svreinterpret_s16_s32(b_01)));
69 6904 svint16_t b_1 = svadd_x(
70 3452 svptrue_b16(), y_1,
71 3452 svtrn2_s16(svreinterpret_s16_s32(b_10), svreinterpret_s16_s32(b_11)));
72
73 3452 b = svqxtunt(svqxtunb(b_0), b_1);
74 3452 }
75
76 // Compute G value in 32-bit precision
77 {
78 3452 svint32_t svg_delta4 = svdup_n_s32(kGDelta4);
79 3452 svint32_t g_00 = svmlalb(svg_delta4, u4_0, kUGWeight);
80 3452 svint32_t g_01 = svmlalt(svg_delta4, u4_0, kUGWeight);
81 3452 svint32_t g_10 = svmlalb(svg_delta4, u4_1, kUGWeight);
82 3452 svint32_t g_11 = svmlalt(svg_delta4, u4_1, kUGWeight);
83
84 3452 g_00 = svmlalb(g_00, v4_0, kVGWeight);
85 3452 g_01 = svmlalt(g_01, v4_0, kVGWeight);
86 3452 g_10 = svmlalb(g_10, v4_1, kVGWeight);
87 3452 g_11 = svmlalt(g_11, v4_1, kVGWeight);
88
89 6904 svint16_t g_0 = svadd_x(
90 3452 svptrue_b16(), y_0,
91 3452 svtrn2_s16(svreinterpret_s16_s32(g_00), svreinterpret_s16_s32(g_01)));
92 6904 svint16_t g_1 = svadd_x(
93 3452 svptrue_b16(), y_1,
94 3452 svtrn2_s16(svreinterpret_s16_s32(g_10), svreinterpret_s16_s32(g_11)));
95
96 3452 g = svqxtunt(svqxtunb(g_0), g_1);
97 3452 }
98
99 // Compute R value in 32-bit precision
100 {
101 3452 svint32_t svr_delta4 = svdup_n_s32(kRDelta4);
102 3452 svint32_t r_00 = svmlalb(svr_delta4, v4_0, kVRWeight);
103 3452 svint32_t r_01 = svmlalt(svr_delta4, v4_0, kVRWeight);
104 3452 svint32_t r_10 = svmlalb(svr_delta4, v4_1, kVRWeight);
105 3452 svint32_t r_11 = svmlalt(svr_delta4, v4_1, kVRWeight);
106
107 6904 svint16_t r_0 = svadd_x(
108 3452 svptrue_b16(), y_0,
109 3452 svtrn2_s16(svreinterpret_s16_s32(r_00), svreinterpret_s16_s32(r_01)));
110 6904 svint16_t r_1 = svadd_x(
111 3452 svptrue_b16(), y_1,
112 3452 svtrn2_s16(svreinterpret_s16_s32(r_10), svreinterpret_s16_s32(r_11)));
113
114 3452 r = svqxtunt(svqxtunb(r_0), r_1);
115 3452 }
116
117 if constexpr (kAlpha) {
118 1726 RawDestinationVectorType rgb;
119 if constexpr (BGR) {
120 863 rgb = svcreate4(b, g, r, svdup_u8(alpha_value));
121 } else {
122 863 rgb = svcreate4(r, g, b, svdup_u8(alpha_value));
123 }
124
125 // Narrow to 8 bits and store the pixels with deinterleaving.
126 1726 svst4_u8(pg, dst, rgb);
127 1726 } else {
128 1726 RawDestinationVectorType rgb;
129 if constexpr (BGR) {
130 863 rgb = svcreate3(b, g, r);
131 } else {
132 863 rgb = svcreate3(r, g, b);
133 }
134
135 // Narrow to 8 bits and store the pixels with deinterleaving.
136 1726 svst3_u8(pg, dst, rgb);
137 1726 }
138 3452 }
139 static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
140 }; // end of class YUVToRGB<bool BGR>
141
142 template <typename OperationType, typename ScalarType>
143 1180 kleidicv_error_t yuv2rgb_operation(OperationType operation,
144 const ScalarType *src, size_t src_stride,
145 ScalarType *dst, size_t dst_stride,
146 size_t width,
147 size_t height) KLEIDICV_STREAMING {
148
16/16
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 279 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 279 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 279 times.
✓ Branch 6 taken 16 times.
✓ Branch 7 taken 279 times.
✓ Branch 8 taken 16 times.
✓ Branch 9 taken 279 times.
✓ Branch 10 taken 16 times.
✓ Branch 11 taken 279 times.
✓ Branch 12 taken 16 times.
✓ Branch 13 taken 279 times.
✓ Branch 14 taken 16 times.
✓ Branch 15 taken 279 times.
1180 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
149
16/16
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 263 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 263 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 263 times.
✓ Branch 6 taken 16 times.
✓ Branch 7 taken 263 times.
✓ Branch 8 taken 16 times.
✓ Branch 9 taken 263 times.
✓ Branch 10 taken 16 times.
✓ Branch 11 taken 263 times.
✓ Branch 12 taken 16 times.
✓ Branch 13 taken 263 times.
✓ Branch 14 taken 16 times.
✓ Branch 15 taken 263 times.
1116 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
150
24/24
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 247 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 231 times.
✓ Branch 4 taken 32 times.
✓ Branch 5 taken 231 times.
✓ Branch 6 taken 16 times.
✓ Branch 7 taken 247 times.
✓ Branch 8 taken 16 times.
✓ Branch 9 taken 231 times.
✓ Branch 10 taken 32 times.
✓ Branch 11 taken 231 times.
✓ Branch 12 taken 16 times.
✓ Branch 13 taken 247 times.
✓ Branch 14 taken 16 times.
✓ Branch 15 taken 231 times.
✓ Branch 16 taken 32 times.
✓ Branch 17 taken 231 times.
✓ Branch 18 taken 16 times.
✓ Branch 19 taken 247 times.
✓ Branch 20 taken 16 times.
✓ Branch 21 taken 231 times.
✓ Branch 22 taken 32 times.
✓ Branch 23 taken 231 times.
1052 CHECK_IMAGE_SIZE(width, height);
151
152 924 Rectangle rect{width, height};
153 924 Rows src_rows{src, src_stride, 3};
154 924 Rows dst_rows{dst, dst_stride, operation.output_channels()};
155
156 924 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
157 924 return KLEIDICV_OK;
158 1180 }
159
160 KLEIDICV_TARGET_FN_ATTRS
161 295 static kleidicv_error_t yuv_to_rgb_u8_sc(const uint8_t *src, size_t src_stride,
162 uint8_t *dst, size_t dst_stride,
163 size_t width,
164 size_t height) KLEIDICV_STREAMING {
165 295 YUVToRGB<false, false> operation;
166 885 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
167 295 height);
168 295 }
169
170 KLEIDICV_TARGET_FN_ATTRS
171 295 static kleidicv_error_t yuv_to_rgba_u8_sc(const uint8_t *src, size_t src_stride,
172 uint8_t *dst, size_t dst_stride,
173 size_t width,
174 size_t height) KLEIDICV_STREAMING {
175 295 YUVToRGB<false, true> operation;
176 885 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
177 295 height);
178 295 }
179
180 KLEIDICV_TARGET_FN_ATTRS
181 295 static kleidicv_error_t yuv_to_bgr_u8_sc(const uint8_t *src, size_t src_stride,
182 uint8_t *dst, size_t dst_stride,
183 size_t width,
184 size_t height) KLEIDICV_STREAMING {
185 295 YUVToRGB<true, false> operation;
186 885 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
187 295 height);
188 295 }
189
190 KLEIDICV_TARGET_FN_ATTRS
191 295 static kleidicv_error_t yuv_to_bgra_u8_sc(const uint8_t *src, size_t src_stride,
192 uint8_t *dst, size_t dst_stride,
193 size_t width,
194 size_t height) KLEIDICV_STREAMING {
195 295 YUVToRGB<true, true> operation;
196 885 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
197 295 height);
198 295 }
199
200 } // namespace KLEIDICV_TARGET_NAMESPACE
201
202 #endif // KLEIDICV_YUV_TO_RGB_SC_H
203