Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_RGB_TO_RGB_SC_H | ||
6 | #define KLEIDICV_RGB_TO_RGB_SC_H | ||
7 | |||
8 | #include "kleidicv/conversions/rgb_to_rgb.h" | ||
9 | #include "kleidicv/kleidicv.h" | ||
10 | #include "kleidicv/sve2.h" | ||
11 | |||
12 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
13 | |||
14 | template <typename ScalarType> | ||
15 | class RGBToBGR final : | ||
16 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE && KLEIDICV_ASSUME_128BIT_SVE2 | ||
17 | public UsesTailPath, | ||
18 | #endif | ||
19 | public UnrollTwice { | ||
20 | public: | ||
21 | using ContextType = Context; | ||
22 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
23 | using VectorType = typename VecTraits::VectorType; | ||
24 | |||
25 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || !KLEIDICV_ASSUME_128BIT_SVE2 | ||
26 | 639 | void vector_path(ContextType ctx, const ScalarType *src, | |
27 | ScalarType *dst) KLEIDICV_STREAMING { | ||
28 | 639 | auto pg = ctx.predicate(); | |
29 | 639 | svuint8x3_t src_vect = svld3(pg, src); | |
30 | 1278 | svuint8x3_t dst_vect = svcreate3(svget3(src_vect, 2), svget3(src_vect, 1), | |
31 | 639 | svget3(src_vect, 0)); | |
32 | |||
33 | 639 | svst3(pg, dst, dst_vect); | |
34 | 639 | } | |
35 | #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || | ||
36 | // !KLEIDICV_ASSUME_128BIT_SVE2 | ||
37 | explicit RGBToBGR(svuint8x4_t &indices) KLEIDICV_STREAMING | ||
38 | : indices_{indices} { | ||
39 | initialize_indices(); | ||
40 | } | ||
41 | |||
42 | void vector_path(ContextType ctx, const ScalarType *src, | ||
43 | ScalarType *dst) KLEIDICV_STREAMING { | ||
44 | // Call the common vector path. | ||
45 | auto pg = ctx.predicate(); | ||
46 | common_vector_path(pg, pg, pg, src, dst); | ||
47 | } | ||
48 | |||
49 | void tail_path(ContextType ctx, const ScalarType *src, | ||
50 | ScalarType *dst) KLEIDICV_STREAMING { | ||
51 | auto pg = ctx.predicate(); | ||
52 | // Predicates for consecutive stores. | ||
53 | svbool_t pg_0, pg_1, pg_2; | ||
54 | VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2); | ||
55 | // Call the common vector path. | ||
56 | common_vector_path(pg_0, pg_1, pg_2, src, dst); | ||
57 | } | ||
58 | |||
59 | private: | ||
60 | void common_vector_path(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2, | ||
61 | const ScalarType *src, | ||
62 | ScalarType *dst) KLEIDICV_STREAMING { | ||
63 | VectorType src_0 = svld1(pg_0, &src[0]); | ||
64 | VectorType src_1 = svld1_vnum(pg_1, &src[0], 1); | ||
65 | VectorType src_2 = svld1_vnum(pg_2, &src[0], 2); | ||
66 | |||
67 | svuint8x2_t src_vect_0_1 = svcreate2(src_0, src_1); | ||
68 | svuint8x2_t src_vect_1_2 = svcreate2(src_1, src_2); | ||
69 | |||
70 | svuint8_t dst_vec_0 = svtbl2(src_vect_0_1, svget4(indices_, 0)); | ||
71 | svuint8_t dst_vec_2 = svtbl2(src_vect_1_2, svget4(indices_, 3)); | ||
72 | svuint8_t dst_vec_1 = svtbl2(src_vect_0_1, svget4(indices_, 1)); | ||
73 | src_vect_1_2 = svcreate2(dst_vec_1, src_2); | ||
74 | dst_vec_1 = svtbl2(src_vect_1_2, svget4(indices_, 2)); | ||
75 | |||
76 | svst1(pg_0, &dst[0], dst_vec_0); | ||
77 | svst1_vnum(pg_1, &dst[0], 1, dst_vec_1); | ||
78 | svst1_vnum(pg_2, &dst[0], 2, dst_vec_2); | ||
79 | } | ||
80 | |||
81 | void initialize_indices() KLEIDICV_STREAMING { | ||
82 | svbool_t pg = VecTraits::svptrue(); | ||
83 | indices_ = svcreate4(svld1(pg, &kTableIndices[0]), | ||
84 | svld1_vnum(pg, &kTableIndices[0], 1), | ||
85 | svld1_vnum(pg, &kTableIndices[0], 2), | ||
86 | svld1_vnum(pg, &kTableIndices[0], 3)); | ||
87 | } | ||
88 | |||
89 | static constexpr uint8_t kTableIndices[64] = { | ||
90 | 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17, | ||
91 | 16, 15, 20, 19, 18, 23, 22, 21, 26, 25, 24, 29, 28, 27, 32, 31, | ||
92 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 15, | ||
93 | 14, 19, 18, 17, 22, 21, 20, 25, 24, 23, 28, 27, 26, 31, 30, 29}; | ||
94 | |||
95 | // Hold a reference because a sizeless types cannot be members. | ||
96 | svuint8x4_t &indices_; | ||
97 | #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || | ||
98 | // !KLEIDICV_ASSUME_128BIT_SVE2 | ||
99 | }; // end of class RGBToBGR<ScalarType> | ||
100 | |||
101 | template <typename ScalarType> | ||
102 | class RGBAToBGRA final : public UnrollTwice { | ||
103 | public: | ||
104 | using ContextType = Context; | ||
105 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
106 | |||
107 | 639 | void vector_path(ContextType ctx, const ScalarType *src, | |
108 | ScalarType *dst) KLEIDICV_STREAMING { | ||
109 | 639 | auto pg = ctx.predicate(); | |
110 | 639 | svuint8x4_t src_vect = svld4(pg, src); | |
111 | 1278 | svuint8x4_t dst_vect = svcreate4(svget4(src_vect, 2), svget4(src_vect, 1), | |
112 | 639 | svget4(src_vect, 0), svget4(src_vect, 3)); | |
113 | |||
114 | 639 | svst4(pg, dst, dst_vect); | |
115 | 639 | } | |
116 | }; // end of class RGBAToBGRA<ScalarType> | ||
117 | |||
118 | template <typename ScalarType> | ||
119 | class RGBToBGRA final : public UnrollTwice { | ||
120 | public: | ||
121 | using ContextType = Context; | ||
122 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
123 | |||
124 | 639 | void vector_path(ContextType ctx, const ScalarType *src, | |
125 | ScalarType *dst) KLEIDICV_STREAMING { | ||
126 | 639 | auto pg = ctx.predicate(); | |
127 | 639 | svuint8x3_t src_vect = svld3(pg, src); | |
128 | 1278 | svuint8x4_t dst_vect = svcreate4(svget3(src_vect, 2), svget3(src_vect, 1), | |
129 | 639 | svget3(src_vect, 0), svdup_u8(0xff)); | |
130 | |||
131 | 639 | svst4(pg, dst, dst_vect); | |
132 | 639 | } | |
133 | }; // end of class RGBToBGRA<ScalarType> | ||
134 | |||
135 | template <typename ScalarType> | ||
136 | class RGBToRGBA final : public UnrollTwice { | ||
137 | public: | ||
138 | using ContextType = Context; | ||
139 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
140 | |||
141 | 639 | void vector_path(ContextType ctx, const ScalarType *src, | |
142 | ScalarType *dst) KLEIDICV_STREAMING { | ||
143 | 639 | auto pg = ctx.predicate(); | |
144 | 639 | svuint8x3_t src_vect = svld3(pg, src); | |
145 | 1278 | svuint8x4_t dst_vect = svcreate4(svget3(src_vect, 0), svget3(src_vect, 1), | |
146 | 639 | svget3(src_vect, 2), svdup_u8(0xff)); | |
147 | |||
148 | 639 | svst4(pg, dst, dst_vect); | |
149 | 639 | } | |
150 | }; // end of class RGBToRGBA<ScalarType> | ||
151 | |||
152 | template <typename ScalarType> | ||
153 | class RGBAToBGR final : public UnrollTwice { | ||
154 | public: | ||
155 | using ContextType = Context; | ||
156 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
157 | |||
158 | 639 | void vector_path(ContextType ctx, const ScalarType *src, | |
159 | ScalarType *dst) KLEIDICV_STREAMING { | ||
160 | 639 | auto pg = ctx.predicate(); | |
161 | 639 | svuint8x4_t src_vect = svld4(pg, src); | |
162 | 1278 | svuint8x3_t dst_vect = svcreate3(svget4(src_vect, 2), svget4(src_vect, 1), | |
163 | 639 | svget4(src_vect, 0)); | |
164 | |||
165 | 639 | svst3(pg, dst, dst_vect); | |
166 | 639 | } | |
167 | }; // end of class RGBAToBGR<ScalarType> | ||
168 | |||
169 | template <typename ScalarType> | ||
170 | class RGBAToRGB final : public UnrollTwice { | ||
171 | public: | ||
172 | using ContextType = Context; | ||
173 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
174 | |||
175 | 639 | void vector_path(ContextType ctx, const ScalarType *src, | |
176 | ScalarType *dst) KLEIDICV_STREAMING { | ||
177 | 639 | auto pg = ctx.predicate(); | |
178 | 639 | svuint8x4_t src_vect = svld4(pg, src); | |
179 | 1278 | svuint8x3_t dst_vect = svcreate3(svget4(src_vect, 0), svget4(src_vect, 1), | |
180 | 639 | svget4(src_vect, 2)); | |
181 | |||
182 | 639 | svst3(pg, dst, dst_vect); | |
183 | 639 | } | |
184 | }; // end of class RGBAToRGB<ScalarType> | ||
185 | |||
186 | 143 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t rgb_to_bgr_u8_sc( | |
187 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
188 | size_t width, size_t height) KLEIDICV_STREAMING { | ||
189 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 140 times.
|
143 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
190 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 137 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 137 times.
|
140 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
191 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 131 times.
|
137 | CHECK_IMAGE_SIZE(width, height); |
192 | |||
193 | 131 | Rectangle rect{width, height}; | |
194 | 131 | Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */}; | |
195 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* BGR */}; | |
196 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || !KLEIDICV_ASSUME_128BIT_SVE2 | ||
197 | 131 | RGBToBGR<uint8_t> operation; | |
198 | #else | ||
199 | svuint8x4_t table_indices; | ||
200 | RGBToBGR<uint8_t> operation{table_indices}; | ||
201 | #endif | ||
202 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
203 | 131 | return KLEIDICV_OK; | |
204 | 143 | } | |
205 | |||
206 | KLEIDICV_TARGET_FN_ATTRS | ||
207 | 143 | static kleidicv_error_t rgba_to_bgra_u8_sc(const uint8_t *src, | |
208 | size_t src_stride, uint8_t *dst, | ||
209 | size_t dst_stride, size_t width, | ||
210 | size_t height) KLEIDICV_STREAMING { | ||
211 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 140 times.
|
143 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
212 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 137 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 137 times.
|
140 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
213 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 131 times.
|
137 | CHECK_IMAGE_SIZE(width, height); |
214 | |||
215 | 131 | Rectangle rect{width, height}; | |
216 | 131 | Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */}; | |
217 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* BGRA */}; | |
218 | 131 | RGBAToBGRA<uint8_t> operation; | |
219 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
220 | 131 | return KLEIDICV_OK; | |
221 | 143 | } | |
222 | |||
223 | KLEIDICV_TARGET_FN_ATTRS | ||
224 | 143 | static kleidicv_error_t rgb_to_bgra_u8_sc(const uint8_t *src, size_t src_stride, | |
225 | uint8_t *dst, size_t dst_stride, | ||
226 | size_t width, | ||
227 | size_t height) KLEIDICV_STREAMING { | ||
228 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 140 times.
|
143 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
229 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 137 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 137 times.
|
140 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
230 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 131 times.
|
137 | CHECK_IMAGE_SIZE(width, height); |
231 | |||
232 | 131 | Rectangle rect{width, height}; | |
233 | 131 | Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */}; | |
234 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* BGRA */}; | |
235 | 131 | RGBToBGRA<uint8_t> operation; | |
236 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
237 | 131 | return KLEIDICV_OK; | |
238 | 143 | } | |
239 | |||
240 | KLEIDICV_TARGET_FN_ATTRS | ||
241 | 143 | static kleidicv_error_t rgb_to_rgba_u8_sc(const uint8_t *src, size_t src_stride, | |
242 | uint8_t *dst, size_t dst_stride, | ||
243 | size_t width, | ||
244 | size_t height) KLEIDICV_STREAMING { | ||
245 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 140 times.
|
143 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
246 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 137 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 137 times.
|
140 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
247 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 131 times.
|
137 | CHECK_IMAGE_SIZE(width, height); |
248 | |||
249 | 131 | Rectangle rect{width, height}; | |
250 | 131 | Rows<const uint8_t> src_rows{src, src_stride, 3 /* RGB */}; | |
251 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */}; | |
252 | 131 | RGBToRGBA<uint8_t> operation; | |
253 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
254 | 131 | return KLEIDICV_OK; | |
255 | 143 | } | |
256 | |||
257 | KLEIDICV_TARGET_FN_ATTRS | ||
258 | 143 | static kleidicv_error_t rgba_to_bgr_u8_sc(const uint8_t *src, size_t src_stride, | |
259 | uint8_t *dst, size_t dst_stride, | ||
260 | size_t width, | ||
261 | size_t height) KLEIDICV_STREAMING { | ||
262 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 140 times.
|
143 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
263 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 137 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 137 times.
|
140 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
264 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 131 times.
|
137 | CHECK_IMAGE_SIZE(width, height); |
265 | |||
266 | 131 | Rectangle rect{width, height}; | |
267 | 131 | Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */}; | |
268 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* BGR */}; | |
269 | 131 | RGBAToBGR<uint8_t> operation; | |
270 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
271 | 131 | return KLEIDICV_OK; | |
272 | 143 | } | |
273 | |||
274 | KLEIDICV_TARGET_FN_ATTRS | ||
275 | 143 | static kleidicv_error_t rgba_to_rgb_u8_sc(const uint8_t *src, size_t src_stride, | |
276 | uint8_t *dst, size_t dst_stride, | ||
277 | size_t width, | ||
278 | size_t height) KLEIDICV_STREAMING { | ||
279 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 140 times.
|
143 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
280 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 137 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 137 times.
|
140 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
281 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 131 times.
|
137 | CHECK_IMAGE_SIZE(width, height); |
282 | |||
283 | 131 | Rectangle rect{width, height}; | |
284 | 131 | Rows<const uint8_t> src_rows{src, src_stride, 4 /* RGBA */}; | |
285 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */}; | |
286 | 131 | RGBAToRGB<uint8_t> operation; | |
287 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
288 | 131 | return KLEIDICV_OK; | |
289 | 143 | } | |
290 | |||
291 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
292 | |||
293 | #endif // KLEIDICV_RGB_TO_RGB_SC_H | ||
294 |