KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/yuv444_to_rgb_sc.h
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 107 107 100.0%
Functions: 26 26 100.0%
Branches: 61 61 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_YUV444_TO_RGB_SC_H
6 #define KLEIDICV_YUV444_TO_RGB_SC_H
7
8 #include <limits>
9
10 #include "kleidicv/conversions/yuv_to_rgb.h"
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13 #include "yuv444_coefficients.h"
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <bool BGR, bool kAlpha>
17 class YUVToRGB : public UnrollOnce {
18 public:
19 using ContextType = Context;
20 using ScalarType = uint8_t;
21 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
22 using VectorType = VecTraits::VectorType;
23 using Vector3Type = VecTraits::Vector3Type;
24 using RawDestinationVectorType =
25 typename std::conditional<kAlpha, svuint8x4_t, svuint8x3_t>::type;
26
27 // Returns the number of channels in the output image.
28 1040 static constexpr size_t output_channels() KLEIDICV_STREAMING {
29 1040 return kAlpha ? /* RGBA */ 4 : /* RGB */ 3;
30 }
31
32 KLEIDICV_FORCE_INLINE
33 3760 void vector_path(ContextType ctx, const ScalarType *src,
34 ScalarType *dst) KLEIDICV_STREAMING {
35 3760 auto pg = ctx.predicate();
36 3760 Vector3Type svsrc = svld3(pg, src);
37 3760 svint16_t y_0 = svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 0), 0));
38 3760 svint16_t y_1 = svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 0), 0));
39 7520 svint16_t u4_0 =
40 3760 svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 1), kPreShift));
41 7520 svint16_t u4_1 =
42 3760 svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 1), kPreShift));
43 7520 svint16_t v4_0 =
44 3760 svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 2), kPreShift));
45 7520 svint16_t v4_1 =
46 3760 svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 2), kPreShift));
47 3760 svuint8_t r, g, b;
48
49 // Compute B value in 32-bit precision
50 {
51 // Multiplication is done with uint16_t because UBWeight only fits in
52 // unsigned 16-bit
53 7520 svint32_t b_00 = svreinterpret_s32_u32(
54 3760 svmullb(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight));
55 7520 svint32_t b_01 = svreinterpret_s32_u32(
56 3760 svmullt(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight));
57 7520 svint32_t b_10 = svreinterpret_s32_u32(
58 3760 svmullb(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight));
59 7520 svint32_t b_11 = svreinterpret_s32_u32(
60 3760 svmullt(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight));
61
62 3760 b_00 = svadd_n_s32_x(svptrue_b32(), b_00, kBDelta4);
63 3760 b_01 = svadd_n_s32_x(svptrue_b32(), b_01, kBDelta4);
64 3760 b_10 = svadd_n_s32_x(svptrue_b32(), b_10, kBDelta4);
65 3760 b_11 = svadd_n_s32_x(svptrue_b32(), b_11, kBDelta4);
66
67 7520 svint16_t b_0 = svadd_x(
68 3760 svptrue_b16(), y_0,
69 3760 svtrn2_s16(svreinterpret_s16_s32(b_00), svreinterpret_s16_s32(b_01)));
70 7520 svint16_t b_1 = svadd_x(
71 3760 svptrue_b16(), y_1,
72 3760 svtrn2_s16(svreinterpret_s16_s32(b_10), svreinterpret_s16_s32(b_11)));
73
74 3760 b = svqxtunt(svqxtunb(b_0), b_1);
75 3760 }
76
77 // Compute G value in 32-bit precision
78 {
79 3760 svint32_t svg_delta4 = svdup_n_s32(kGDelta4);
80 3760 svint32_t g_00 = svmlalb(svg_delta4, u4_0, kUGWeight);
81 3760 svint32_t g_01 = svmlalt(svg_delta4, u4_0, kUGWeight);
82 3760 svint32_t g_10 = svmlalb(svg_delta4, u4_1, kUGWeight);
83 3760 svint32_t g_11 = svmlalt(svg_delta4, u4_1, kUGWeight);
84
85 3760 g_00 = svmlalb(g_00, v4_0, kVGWeight);
86 3760 g_01 = svmlalt(g_01, v4_0, kVGWeight);
87 3760 g_10 = svmlalb(g_10, v4_1, kVGWeight);
88 3760 g_11 = svmlalt(g_11, v4_1, kVGWeight);
89
90 7520 svint16_t g_0 = svadd_x(
91 3760 svptrue_b16(), y_0,
92 3760 svtrn2_s16(svreinterpret_s16_s32(g_00), svreinterpret_s16_s32(g_01)));
93 7520 svint16_t g_1 = svadd_x(
94 3760 svptrue_b16(), y_1,
95 3760 svtrn2_s16(svreinterpret_s16_s32(g_10), svreinterpret_s16_s32(g_11)));
96
97 3760 g = svqxtunt(svqxtunb(g_0), g_1);
98 3760 }
99
100 // Compute R value in 32-bit precision
101 {
102 3760 svint32_t svr_delta4 = svdup_n_s32(kRDelta4);
103 3760 svint32_t r_00 = svmlalb(svr_delta4, v4_0, kVRWeight);
104 3760 svint32_t r_01 = svmlalt(svr_delta4, v4_0, kVRWeight);
105 3760 svint32_t r_10 = svmlalb(svr_delta4, v4_1, kVRWeight);
106 3760 svint32_t r_11 = svmlalt(svr_delta4, v4_1, kVRWeight);
107
108 7520 svint16_t r_0 = svadd_x(
109 3760 svptrue_b16(), y_0,
110 3760 svtrn2_s16(svreinterpret_s16_s32(r_00), svreinterpret_s16_s32(r_01)));
111 7520 svint16_t r_1 = svadd_x(
112 3760 svptrue_b16(), y_1,
113 3760 svtrn2_s16(svreinterpret_s16_s32(r_10), svreinterpret_s16_s32(r_11)));
114
115 3760 r = svqxtunt(svqxtunb(r_0), r_1);
116 3760 }
117
118 if constexpr (kAlpha) {
119 1880 RawDestinationVectorType rgb;
120 if constexpr (BGR) {
121 940 rgb = svcreate4(b, g, r, svdup_u8(alpha_value));
122 } else {
123 940 rgb = svcreate4(r, g, b, svdup_u8(alpha_value));
124 }
125
126 // Narrow to 8 bits and store the pixels with deinterleaving.
127 1880 svst4_u8(pg, dst, rgb);
128 1880 } else {
129 1880 RawDestinationVectorType rgb;
130 if constexpr (BGR) {
131 940 rgb = svcreate3(b, g, r);
132 } else {
133 940 rgb = svcreate3(r, g, b);
134 }
135
136 // Narrow to 8 bits and store the pixels with deinterleaving.
137 1880 svst3_u8(pg, dst, rgb);
138 1880 }
139 3760 }
140 static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
141 }; // end of class YUVToRGB<bool BGR>
142
143 template <typename OperationType, typename ScalarType>
144 1344 kleidicv_error_t yuv2rgb_operation(OperationType operation,
145 const ScalarType *src, size_t src_stride,
146 ScalarType *dst, size_t dst_stride,
147 size_t width,
148 size_t height) KLEIDICV_STREAMING {
149
16/16
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 320 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 320 times.
✓ Branch 6 taken 16 times.
✓ Branch 7 taken 320 times.
✓ Branch 8 taken 16 times.
✓ Branch 9 taken 320 times.
✓ Branch 10 taken 16 times.
✓ Branch 11 taken 320 times.
✓ Branch 12 taken 16 times.
✓ Branch 13 taken 320 times.
✓ Branch 14 taken 16 times.
✓ Branch 15 taken 320 times.
1344 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
150
16/16
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 304 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 304 times.
✓ Branch 6 taken 16 times.
✓ Branch 7 taken 304 times.
✓ Branch 8 taken 16 times.
✓ Branch 9 taken 304 times.
✓ Branch 10 taken 16 times.
✓ Branch 11 taken 304 times.
✓ Branch 12 taken 16 times.
✓ Branch 13 taken 304 times.
✓ Branch 14 taken 16 times.
✓ Branch 15 taken 304 times.
1280 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
151
24/24
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 280 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 260 times.
✓ Branch 4 taken 44 times.
✓ Branch 5 taken 260 times.
✓ Branch 6 taken 24 times.
✓ Branch 7 taken 280 times.
✓ Branch 8 taken 20 times.
✓ Branch 9 taken 260 times.
✓ Branch 10 taken 44 times.
✓ Branch 11 taken 260 times.
✓ Branch 12 taken 24 times.
✓ Branch 13 taken 280 times.
✓ Branch 14 taken 20 times.
✓ Branch 15 taken 260 times.
✓ Branch 16 taken 44 times.
✓ Branch 17 taken 260 times.
✓ Branch 18 taken 24 times.
✓ Branch 19 taken 280 times.
✓ Branch 20 taken 20 times.
✓ Branch 21 taken 260 times.
✓ Branch 22 taken 44 times.
✓ Branch 23 taken 260 times.
1216 CHECK_IMAGE_SIZE(width, height);
152
153 1040 Rectangle rect{width, height};
154 1040 Rows src_rows{src, src_stride, 3};
155 1040 Rows dst_rows{dst, dst_stride, operation.output_channels()};
156
157 1040 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
158 1040 return KLEIDICV_OK;
159 1344 }
160
161 KLEIDICV_TARGET_FN_ATTRS
162 1440 static kleidicv_error_t yuv444_to_rgb_u8_sc(
163 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
164 size_t width, size_t height,
165 kleidicv_color_conversion_t color_format) KLEIDICV_STREAMING {
166
5/5
✓ Branch 0 taken 336 times.
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 336 times.
✓ Branch 3 taken 336 times.
✓ Branch 4 taken 336 times.
1440 switch (color_format) {
167 case KLEIDICV_YUV444_TO_RGB: {
168 336 YUVToRGB<false, false> operation;
169 672 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
170 336 width, height);
171 336 }
172
173 case KLEIDICV_YUV444_TO_BGR: {
174 336 YUVToRGB<true, false> operation;
175 672 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
176 336 width, height);
177 336 }
178
179 case KLEIDICV_YUV444_TO_RGBA: {
180 336 YUVToRGB<false, true> operation;
181 672 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
182 336 width, height);
183 336 }
184
185 case KLEIDICV_YUV444_TO_BGRA: {
186 336 YUVToRGB<true, true> operation;
187 672 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
188 336 width, height);
189 336 }
190
191 default:
192 96 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
193 }
194
195 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
196 1440 }
197
198 } // namespace KLEIDICV_TARGET_NAMESPACE
199
200 #endif // KLEIDICV_YUV444_TO_RGB_SC_H
201