Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_YUV_TO_RGB_SC_H | ||
6 | #define KLEIDICV_YUV_TO_RGB_SC_H | ||
7 | |||
8 | #include <limits> | ||
9 | |||
10 | #include "kleidicv/conversions/yuv_to_rgb.h" | ||
11 | #include "kleidicv/kleidicv.h" | ||
12 | #include "kleidicv/sve2.h" | ||
13 | |||
14 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
15 | |||
16 | template <bool BGR, bool kAlpha> | ||
17 | class YUVToRGB : public UnrollOnce { | ||
18 | public: | ||
19 | using ContextType = Context; | ||
20 | using ScalarType = uint8_t; | ||
21 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
22 | using VectorType = VecTraits::VectorType; | ||
23 | using Vector3Type = VecTraits::Vector3Type; | ||
24 | using RawDestinationVectorType = | ||
25 | typename std::conditional<kAlpha, svuint8x4_t, svuint8x3_t>::type; | ||
26 | |||
27 | // Returns the number of channels in the output image. | ||
28 | 632 | static constexpr size_t output_channels() KLEIDICV_STREAMING { | |
29 | 632 | return kAlpha ? /* RGBA */ 4 : /* RGB */ 3; | |
30 | } | ||
31 | |||
32 | 2764 | void vector_path(ContextType ctx, const ScalarType *src, | |
33 | ScalarType *dst) KLEIDICV_STREAMING { | ||
34 | 2764 | auto pg = ctx.predicate(); | |
35 | 2764 | Vector3Type svsrc = svld3(pg, src); | |
36 | 2764 | svint16_t y_0 = svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 0), 0)); | |
37 | 2764 | svint16_t y_1 = svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 0), 0)); | |
38 | 5528 | svint16_t u4_0 = | |
39 | 2764 | svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 1), kPreShift)); | |
40 | 5528 | svint16_t u4_1 = | |
41 | 2764 | svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 1), kPreShift)); | |
42 | 5528 | svint16_t v4_0 = | |
43 | 2764 | svreinterpret_s16_u16(svshllb_n_u16(svget3(svsrc, 2), kPreShift)); | |
44 | 5528 | svint16_t v4_1 = | |
45 | 2764 | svreinterpret_s16_u16(svshllt_n_u16(svget3(svsrc, 2), kPreShift)); | |
46 | 2764 | svuint8_t r, g, b; | |
47 | |||
48 | // Compute B value in 32-bit precision | ||
49 | { | ||
50 | // Multiplication is done with uint16_t because UBWeight only fits in | ||
51 | // unsigned 16-bit | ||
52 | 5528 | svint32_t b_00 = svreinterpret_s32_u32( | |
53 | 2764 | svmullb(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight)); | |
54 | 5528 | svint32_t b_01 = svreinterpret_s32_u32( | |
55 | 2764 | svmullt(svreinterpret_u16_s16(u4_0), kUnsignedUBWeight)); | |
56 | 5528 | svint32_t b_10 = svreinterpret_s32_u32( | |
57 | 2764 | svmullb(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight)); | |
58 | 5528 | svint32_t b_11 = svreinterpret_s32_u32( | |
59 | 2764 | svmullt(svreinterpret_u16_s16(u4_1), kUnsignedUBWeight)); | |
60 | |||
61 | 2764 | b_00 = svadd_n_s32_x(svptrue_b32(), b_00, kBDelta4); | |
62 | 2764 | b_01 = svadd_n_s32_x(svptrue_b32(), b_01, kBDelta4); | |
63 | 2764 | b_10 = svadd_n_s32_x(svptrue_b32(), b_10, kBDelta4); | |
64 | 2764 | b_11 = svadd_n_s32_x(svptrue_b32(), b_11, kBDelta4); | |
65 | |||
66 | 5528 | svint16_t b_0 = svadd_x( | |
67 | 2764 | svptrue_b16(), y_0, | |
68 | 2764 | svtrn2_s16(svreinterpret_s16_s32(b_00), svreinterpret_s16_s32(b_01))); | |
69 | 5528 | svint16_t b_1 = svadd_x( | |
70 | 2764 | svptrue_b16(), y_1, | |
71 | 2764 | svtrn2_s16(svreinterpret_s16_s32(b_10), svreinterpret_s16_s32(b_11))); | |
72 | |||
73 | 2764 | b = svqxtunt(svqxtunb(b_0), b_1); | |
74 | 2764 | } | |
75 | |||
76 | // Compute G value in 32-bit precision | ||
77 | { | ||
78 | 2764 | svint32_t svg_delta4 = svdup_n_s32(kGDelta4); | |
79 | 2764 | svint32_t g_00 = svmlalb(svg_delta4, u4_0, kUGWeight); | |
80 | 2764 | svint32_t g_01 = svmlalt(svg_delta4, u4_0, kUGWeight); | |
81 | 2764 | svint32_t g_10 = svmlalb(svg_delta4, u4_1, kUGWeight); | |
82 | 2764 | svint32_t g_11 = svmlalt(svg_delta4, u4_1, kUGWeight); | |
83 | |||
84 | 2764 | g_00 = svmlalb(g_00, v4_0, kVGWeight); | |
85 | 2764 | g_01 = svmlalt(g_01, v4_0, kVGWeight); | |
86 | 2764 | g_10 = svmlalb(g_10, v4_1, kVGWeight); | |
87 | 2764 | g_11 = svmlalt(g_11, v4_1, kVGWeight); | |
88 | |||
89 | 5528 | svint16_t g_0 = svadd_x( | |
90 | 2764 | svptrue_b16(), y_0, | |
91 | 2764 | svtrn2_s16(svreinterpret_s16_s32(g_00), svreinterpret_s16_s32(g_01))); | |
92 | 5528 | svint16_t g_1 = svadd_x( | |
93 | 2764 | svptrue_b16(), y_1, | |
94 | 2764 | svtrn2_s16(svreinterpret_s16_s32(g_10), svreinterpret_s16_s32(g_11))); | |
95 | |||
96 | 2764 | g = svqxtunt(svqxtunb(g_0), g_1); | |
97 | 2764 | } | |
98 | |||
99 | // Compute R value in 32-bit precision | ||
100 | { | ||
101 | 2764 | svint32_t svr_delta4 = svdup_n_s32(kRDelta4); | |
102 | 2764 | svint32_t r_00 = svmlalb(svr_delta4, v4_0, kVRWeight); | |
103 | 2764 | svint32_t r_01 = svmlalt(svr_delta4, v4_0, kVRWeight); | |
104 | 2764 | svint32_t r_10 = svmlalb(svr_delta4, v4_1, kVRWeight); | |
105 | 2764 | svint32_t r_11 = svmlalt(svr_delta4, v4_1, kVRWeight); | |
106 | |||
107 | 5528 | svint16_t r_0 = svadd_x( | |
108 | 2764 | svptrue_b16(), y_0, | |
109 | 2764 | svtrn2_s16(svreinterpret_s16_s32(r_00), svreinterpret_s16_s32(r_01))); | |
110 | 5528 | svint16_t r_1 = svadd_x( | |
111 | 2764 | svptrue_b16(), y_1, | |
112 | 2764 | svtrn2_s16(svreinterpret_s16_s32(r_10), svreinterpret_s16_s32(r_11))); | |
113 | |||
114 | 2764 | r = svqxtunt(svqxtunb(r_0), r_1); | |
115 | 2764 | } | |
116 | |||
117 | if constexpr (kAlpha) { | ||
118 | 1382 | RawDestinationVectorType rgb; | |
119 | if constexpr (BGR) { | ||
120 | 691 | rgb = svcreate4(b, g, r, svdup_u8(alpha_value)); | |
121 | } else { | ||
122 | 691 | rgb = svcreate4(r, g, b, svdup_u8(alpha_value)); | |
123 | } | ||
124 | |||
125 | // Narrow to 8 bits and store the pixels with deinterleaving. | ||
126 | 1382 | svst4_u8(pg, dst, rgb); | |
127 | 1382 | } else { | |
128 | 1382 | RawDestinationVectorType rgb; | |
129 | if constexpr (BGR) { | ||
130 | 691 | rgb = svcreate3(b, g, r); | |
131 | } else { | ||
132 | 691 | rgb = svcreate3(r, g, b); | |
133 | } | ||
134 | |||
135 | // Narrow to 8 bits and store the pixels with deinterleaving. | ||
136 | 1382 | svst3_u8(pg, dst, rgb); | |
137 | 1382 | } | |
138 | 2764 | } | |
139 | static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max(); | ||
140 | }; // end of class YUVToRGB<bool BGR> | ||
141 | |||
142 | template <typename OperationType, typename ScalarType> | ||
143 | 824 | kleidicv_error_t yuv2rgb_operation(OperationType operation, | |
144 | const ScalarType *src, size_t src_stride, | ||
145 | ScalarType *dst, size_t dst_stride, | ||
146 | size_t width, | ||
147 | size_t height) KLEIDICV_STREAMING { | ||
148 |
16/16✓ Branch 0 taken 12 times.
✓ Branch 1 taken 194 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 194 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 194 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 194 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 194 times.
✓ Branch 10 taken 12 times.
✓ Branch 11 taken 194 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 194 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 194 times.
|
824 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
149 |
16/16✓ Branch 0 taken 12 times.
✓ Branch 1 taken 182 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 182 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 182 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 182 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 182 times.
✓ Branch 10 taken 12 times.
✓ Branch 11 taken 182 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 182 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 182 times.
|
776 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
150 |
24/24✓ Branch 0 taken 12 times.
✓ Branch 1 taken 170 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 158 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 158 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 170 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 158 times.
✓ Branch 10 taken 24 times.
✓ Branch 11 taken 158 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 170 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 158 times.
✓ Branch 16 taken 24 times.
✓ Branch 17 taken 158 times.
✓ Branch 18 taken 12 times.
✓ Branch 19 taken 170 times.
✓ Branch 20 taken 12 times.
✓ Branch 21 taken 158 times.
✓ Branch 22 taken 24 times.
✓ Branch 23 taken 158 times.
|
728 | CHECK_IMAGE_SIZE(width, height); |
151 | |||
152 | 632 | Rectangle rect{width, height}; | |
153 | 632 | Rows src_rows{src, src_stride, 3}; | |
154 | 632 | Rows dst_rows{dst, dst_stride, operation.output_channels()}; | |
155 | |||
156 | 632 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
157 | 632 | return KLEIDICV_OK; | |
158 | 824 | } | |
159 | |||
160 | KLEIDICV_TARGET_FN_ATTRS | ||
161 | 206 | static kleidicv_error_t yuv_to_rgb_u8_sc(const uint8_t *src, size_t src_stride, | |
162 | uint8_t *dst, size_t dst_stride, | ||
163 | size_t width, | ||
164 | size_t height) KLEIDICV_STREAMING { | ||
165 | 206 | YUVToRGB<false, false> operation; | |
166 | 618 | return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, | |
167 | 206 | height); | |
168 | 206 | } | |
169 | |||
170 | KLEIDICV_TARGET_FN_ATTRS | ||
171 | 206 | static kleidicv_error_t yuv_to_rgba_u8_sc(const uint8_t *src, size_t src_stride, | |
172 | uint8_t *dst, size_t dst_stride, | ||
173 | size_t width, | ||
174 | size_t height) KLEIDICV_STREAMING { | ||
175 | 206 | YUVToRGB<false, true> operation; | |
176 | 618 | return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, | |
177 | 206 | height); | |
178 | 206 | } | |
179 | |||
180 | KLEIDICV_TARGET_FN_ATTRS | ||
181 | 206 | static kleidicv_error_t yuv_to_bgr_u8_sc(const uint8_t *src, size_t src_stride, | |
182 | uint8_t *dst, size_t dst_stride, | ||
183 | size_t width, | ||
184 | size_t height) KLEIDICV_STREAMING { | ||
185 | 206 | YUVToRGB<true, false> operation; | |
186 | 618 | return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, | |
187 | 206 | height); | |
188 | 206 | } | |
189 | |||
190 | KLEIDICV_TARGET_FN_ATTRS | ||
191 | 206 | static kleidicv_error_t yuv_to_bgra_u8_sc(const uint8_t *src, size_t src_stride, | |
192 | uint8_t *dst, size_t dst_stride, | ||
193 | size_t width, | ||
194 | size_t height) KLEIDICV_STREAMING { | ||
195 | 206 | YUVToRGB<true, true> operation; | |
196 | 618 | return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, | |
197 | 206 | height); | |
198 | 206 | } | |
199 | |||
200 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
201 | |||
202 | #endif // KLEIDICV_YUV_TO_RGB_SC_H | ||
203 |