KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/yuv_to_rgb_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 107 107 100.0%
Functions: 32 32 100.0%
Branches: 56 56 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_YUV_TO_RGB_SC_H
6 #define KLEIDICV_YUV_TO_RGB_SC_H
7
8 #include <limits>
9
10 #include "kleidicv/conversions/yuv_to_rgb.h"
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <bool BGR, bool kAlpha>
17 class YUVToRGB : public UnrollOnce {
18 public:
19 using ContextType = Context;
20 using ScalarType = uint8_t;
21 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
22 using VectorType = VecTraits::VectorType;
23 using Vector3Type = VecTraits::Vector3Type;
24 using RawDestinationVectorType =
25 typename std::conditional<kAlpha, svuint8x4_t, svuint8x3_t>::type;
26
27 // Returns the number of channels in the output image.
28 632 static constexpr size_t output_channels() KLEIDICV_STREAMING {
29 632 return kAlpha ? /* RGBA */ 4 : /* RGB */ 3;
30 }
31
32 2764 void vector_path(ContextType ctx, const ScalarType *src,
33 ScalarType *dst) KLEIDICV_STREAMING {
34 2764 auto pg = ctx.predicate();
35 2764 Vector3Type svsrc = svld3(pg, src);
36 2764 svint16_t y_0 = svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 0), 0));
37 2764 svint16_t y_1 = svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 0), 0));
38 5528 svint16_t u4_0 =
39 2764 svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 1), kPreShift));
40 5528 svint16_t u4_1 =
41 2764 svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 1), kPreShift));
42 5528 svint16_t v4_0 =
43 2764 svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 2), kPreShift));
44 5528 svint16_t v4_1 =
45 2764 svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 2), kPreShift));
46 2764 svuint8_t r, g, b;
47
48 // Compute B value in 32-bit precision
49 {
50 // Multiplication is done with uint16_t because UBWeight only fits in
51 // unsigned 16-bit
52 5528 svint32_t b_00 = svreinterpret_s32_u32(
53 2764 svmullb(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight));
54 5528 svint32_t b_01 = svreinterpret_s32_u32(
55 2764 svmullt(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight));
56 5528 svint32_t b_10 = svreinterpret_s32_u32(
57 2764 svmullb(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight));
58 5528 svint32_t b_11 = svreinterpret_s32_u32(
59 2764 svmullt(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight));
60
61 2764 b_00 = svadd_n_s32_x(svptrue_b32(), b_00, kBDelta4);
62 2764 b_01 = svadd_n_s32_x(svptrue_b32(), b_01, kBDelta4);
63 2764 b_10 = svadd_n_s32_x(svptrue_b32(), b_10, kBDelta4);
64 2764 b_11 = svadd_n_s32_x(svptrue_b32(), b_11, kBDelta4);
65
66 5528 svint16_t b_0 = svadd_x(
67 2764 svptrue_b16(), y_0,
68 2764 svtrn2_s16(svreinterpret_s16_s32(b_00), svreinterpret_s16_s32(b_01)));
69 5528 svint16_t b_1 = svadd_x(
70 2764 svptrue_b16(), y_1,
71 2764 svtrn2_s16(svreinterpret_s16_s32(b_10), svreinterpret_s16_s32(b_11)));
72
73 2764 b = svqxtunt(svqxtunb(b_0), b_1);
74 2764 }
75
76 // Compute G value in 32-bit precision
77 {
78 2764 svint32_t svg_delta4 = svdup_n_s32(kGDelta4);
79 2764 svint32_t g_00 = svmlalb(svg_delta4, u4_0, kUGWeight);
80 2764 svint32_t g_01 = svmlalt(svg_delta4, u4_0, kUGWeight);
81 2764 svint32_t g_10 = svmlalb(svg_delta4, u4_1, kUGWeight);
82 2764 svint32_t g_11 = svmlalt(svg_delta4, u4_1, kUGWeight);
83
84 2764 g_00 = svmlalb(g_00, v4_0, kVGWeight);
85 2764 g_01 = svmlalt(g_01, v4_0, kVGWeight);
86 2764 g_10 = svmlalb(g_10, v4_1, kVGWeight);
87 2764 g_11 = svmlalt(g_11, v4_1, kVGWeight);
88
89 5528 svint16_t g_0 = svadd_x(
90 2764 svptrue_b16(), y_0,
91 2764 svtrn2_s16(svreinterpret_s16_s32(g_00), svreinterpret_s16_s32(g_01)));
92 5528 svint16_t g_1 = svadd_x(
93 2764 svptrue_b16(), y_1,
94 2764 svtrn2_s16(svreinterpret_s16_s32(g_10), svreinterpret_s16_s32(g_11)));
95
96 2764 g = svqxtunt(svqxtunb(g_0), g_1);
97 2764 }
98
99 // Compute R value in 32-bit precision
100 {
101 2764 svint32_t svr_delta4 = svdup_n_s32(kRDelta4);
102 2764 svint32_t r_00 = svmlalb(svr_delta4, v4_0, kVRWeight);
103 2764 svint32_t r_01 = svmlalt(svr_delta4, v4_0, kVRWeight);
104 2764 svint32_t r_10 = svmlalb(svr_delta4, v4_1, kVRWeight);
105 2764 svint32_t r_11 = svmlalt(svr_delta4, v4_1, kVRWeight);
106
107 5528 svint16_t r_0 = svadd_x(
108 2764 svptrue_b16(), y_0,
109 2764 svtrn2_s16(svreinterpret_s16_s32(r_00), svreinterpret_s16_s32(r_01)));
110 5528 svint16_t r_1 = svadd_x(
111 2764 svptrue_b16(), y_1,
112 2764 svtrn2_s16(svreinterpret_s16_s32(r_10), svreinterpret_s16_s32(r_11)));
113
114 2764 r = svqxtunt(svqxtunb(r_0), r_1);
115 2764 }
116
117 if constexpr (kAlpha) {
118 1382 RawDestinationVectorType rgb;
119 if constexpr (BGR) {
120 691 rgb = svcreate4(b, g, r, svdup_u8(alpha_value));
121 } else {
122 691 rgb = svcreate4(r, g, b, svdup_u8(alpha_value));
123 }
124
125 // Narrow to 8 bits and store the pixels with deinterleaving.
126 1382 svst4_u8(pg, dst, rgb);
127 1382 } else {
128 1382 RawDestinationVectorType rgb;
129 if constexpr (BGR) {
130 691 rgb = svcreate3(b, g, r);
131 } else {
132 691 rgb = svcreate3(r, g, b);
133 }
134
135 // Narrow to 8 bits and store the pixels with deinterleaving.
136 1382 svst3_u8(pg, dst, rgb);
137 1382 }
138 2764 }
139 static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
140 }; // end of class YUVToRGB<bool BGR>
141
142 template <typename OperationType, typename ScalarType>
143 824 kleidicv_error_t yuv2rgb_operation(OperationType operation,
144 const ScalarType *src, size_t src_stride,
145 ScalarType *dst, size_t dst_stride,
146 size_t width,
147 size_t height) KLEIDICV_STREAMING {
148
16/16
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 194 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 194 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 194 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 194 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 194 times.
✓ Branch 10 taken 12 times.
✓ Branch 11 taken 194 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 194 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 194 times.
824 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
149
16/16
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 182 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 182 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 182 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 182 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 182 times.
✓ Branch 10 taken 12 times.
✓ Branch 11 taken 182 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 182 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 182 times.
776 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
150
24/24
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 170 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 158 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 158 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 170 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 158 times.
✓ Branch 10 taken 24 times.
✓ Branch 11 taken 158 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 170 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 158 times.
✓ Branch 16 taken 24 times.
✓ Branch 17 taken 158 times.
✓ Branch 18 taken 12 times.
✓ Branch 19 taken 170 times.
✓ Branch 20 taken 12 times.
✓ Branch 21 taken 158 times.
✓ Branch 22 taken 24 times.
✓ Branch 23 taken 158 times.
728 CHECK_IMAGE_SIZE(width, height);
151
152 632 Rectangle rect{width, height};
153 632 Rows src_rows{src, src_stride, 3};
154 632 Rows dst_rows{dst, dst_stride, operation.output_channels()};
155
156 632 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
157 632 return KLEIDICV_OK;
158 824 }
159
160 KLEIDICV_TARGET_FN_ATTRS
161 206 static kleidicv_error_t yuv_to_rgb_u8_sc(const uint8_t *src, size_t src_stride,
162 uint8_t *dst, size_t dst_stride,
163 size_t width,
164 size_t height) KLEIDICV_STREAMING {
165 206 YUVToRGB<false, false> operation;
166 618 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
167 206 height);
168 206 }
169
170 KLEIDICV_TARGET_FN_ATTRS
171 206 static kleidicv_error_t yuv_to_rgba_u8_sc(const uint8_t *src, size_t src_stride,
172 uint8_t *dst, size_t dst_stride,
173 size_t width,
174 size_t height) KLEIDICV_STREAMING {
175 206 YUVToRGB<false, true> operation;
176 618 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
177 206 height);
178 206 }
179
180 KLEIDICV_TARGET_FN_ATTRS
181 206 static kleidicv_error_t yuv_to_bgr_u8_sc(const uint8_t *src, size_t src_stride,
182 uint8_t *dst, size_t dst_stride,
183 size_t width,
184 size_t height) KLEIDICV_STREAMING {
185 206 YUVToRGB<true, false> operation;
186 618 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
187 206 height);
188 206 }
189
190 KLEIDICV_TARGET_FN_ATTRS
191 206 static kleidicv_error_t yuv_to_bgra_u8_sc(const uint8_t *src, size_t src_stride,
192 uint8_t *dst, size_t dst_stride,
193 size_t width,
194 size_t height) KLEIDICV_STREAMING {
195 206 YUVToRGB<true, true> operation;
196 618 return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
197 206 height);
198 206 }
199
200 } // namespace KLEIDICV_TARGET_NAMESPACE
201
202 #endif // KLEIDICV_YUV_TO_RGB_SC_H
203