Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_GRAY_TO_RGB_SC_H | ||
6 | #define KLEIDICV_GRAY_TO_RGB_SC_H | ||
7 | |||
8 | #include "kleidicv/conversions/gray_to_rgb.h" | ||
9 | #include "kleidicv/kleidicv.h" | ||
10 | #include "kleidicv/sve2.h" | ||
11 | |||
12 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
13 | |||
14 | template <typename ScalarType> | ||
15 | class GrayToRGB final : | ||
16 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
17 | public UsesTailPath, | ||
18 | #endif | ||
19 | public UnrollTwice { | ||
20 | public: | ||
21 | using ContextType = Context; | ||
22 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
23 | using VectorType = typename VecTraits::VectorType; | ||
24 | |||
25 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
26 | void vector_path(ContextType ctx, VectorType src_vect, | ||
27 | ScalarType *dst) KLEIDICV_STREAMING { | ||
28 | auto pg = ctx.predicate(); | ||
29 | svuint8x3_t dst_vect = svcreate3(src_vect, src_vect, src_vect); | ||
30 | svst3(pg, dst, dst_vect); | ||
31 | } | ||
32 | #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
33 | 131 | explicit GrayToRGB(svuint8x3_t &indices) KLEIDICV_STREAMING | |
34 | 131 | : indices_{indices} { | |
35 | 131 | initialize_indices(); | |
36 | 131 | } | |
37 | |||
38 | 564 | void vector_path(ContextType ctx, VectorType src_vect, | |
39 | ScalarType *dst) KLEIDICV_STREAMING { | ||
40 | // Call the common vector path. | ||
41 | 564 | auto pg = ctx.predicate(); | |
42 | 564 | common_vector_path(pg, pg, pg, src_vect, dst); | |
43 | 564 | } | |
44 | |||
45 | 120 | void tail_path(ContextType ctx, VectorType src_vect, | |
46 | ScalarType *dst) KLEIDICV_STREAMING { | ||
47 | 120 | auto pg = ctx.predicate(); | |
48 | // Predicates for consecutive stores. | ||
49 | 120 | svbool_t pg_0, pg_1, pg_2; | |
50 | 120 | VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2); | |
51 | // Call the common vector path. | ||
52 | 120 | common_vector_path(pg_0, pg_1, pg_2, src_vect, dst); | |
53 | 120 | } | |
54 | |||
55 | private: | ||
56 | 684 | void common_vector_path(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2, | |
57 | VectorType src_vect, | ||
58 | ScalarType *dst) KLEIDICV_STREAMING { | ||
59 | // Convert from gray to RGB using table-lookups. | ||
60 | 684 | VectorType dst_vec_0 = svtbl(src_vect, svget3(indices_, 0)); | |
61 | 684 | VectorType dst_vec_1 = svtbl(src_vect, svget3(indices_, 1)); | |
62 | 684 | VectorType dst_vec_2 = svtbl(src_vect, svget3(indices_, 2)); | |
63 | |||
64 | 684 | svst1(pg_0, &dst[0], dst_vec_0); | |
65 | 684 | svst1_vnum(pg_1, &dst[0], 1, dst_vec_1); | |
66 | 684 | svst1_vnum(pg_2, &dst[0], 2, dst_vec_2); | |
67 | 684 | } | |
68 | |||
69 | 131 | void initialize_indices() KLEIDICV_STREAMING { | |
70 | // All-true predicate to shorten code. | ||
71 | 131 | svbool_t pg_all = VecTraits::svptrue(); | |
72 | // Constant used for division by 3. | ||
73 | 131 | VectorType const_171 = VecTraits::svdup(171); | |
74 | // Generated indices. | ||
75 | 131 | VectorType indices_0, indices_1, indices_2; | |
76 | |||
77 | 131 | indices_0 = svindex_u8(0, 1); | |
78 | |||
79 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 128 times.
|
131 | if (KLEIDICV_UNLIKELY(svcntb() == 256)) { |
80 | 3 | indices_1 = svext( | |
81 | 3 | svdup_u8(0), | |
82 | 3 | svqadd(svindex_u8(svcntb() % 3, 1), static_cast<uint8_t>(2)), 254); | |
83 | 6 | indices_2 = svext(svdup_u8(0), | |
84 | 3 | svqadd(svindex_u8(0, 1), static_cast<uint8_t>(3)), 255); | |
85 | 3 | } else { | |
86 | 128 | indices_1 = svindex_u8(svcntb() % 3, 1); | |
87 | 128 | indices_2 = svindex_u8((svcntb() * 2) % 3, 1); | |
88 | } | ||
89 | |||
90 | 131 | indices_0 = svlsr_x(pg_all, svmulh_x(pg_all, indices_0, const_171), 1); | |
91 | 131 | indices_1 = svqadd_x( | |
92 | 131 | pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_1, const_171), 1), | |
93 | 131 | static_cast<ScalarType>(svcntb() / 3)); | |
94 | 131 | indices_2 = svqadd_x( | |
95 | 131 | pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_2, const_171), 1), | |
96 | 131 | static_cast<ScalarType>((svcntb() * 2) / 3)); | |
97 | |||
98 | 131 | indices_ = svcreate3(indices_0, indices_1, indices_2); | |
99 | 131 | } | |
100 | |||
101 | svuint8x3_t &indices_; | ||
102 | #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
103 | }; // end of class GrayToRGB<ScalarType> | ||
104 | |||
105 | template <typename ScalarType> | ||
106 | class GrayToRGBAWithInterleaving final : public UnrollTwice { | ||
107 | public: | ||
108 | using ContextType = Context; | ||
109 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
110 | using VectorType = typename VecTraits::VectorType; | ||
111 | 24 | void vector_path(ContextType ctx, VectorType src_vect, | |
112 | ScalarType *dst) KLEIDICV_STREAMING { | ||
113 | 24 | auto pg = ctx.predicate(); | |
114 | 24 | svuint8_t alpha = svdup_u8(0xff); | |
115 | 24 | svuint8x4_t dst_vect = svcreate4(src_vect, src_vect, src_vect, alpha); | |
116 | |||
117 | 24 | svst4(pg, dst, dst_vect); | |
118 | 24 | } | |
119 | }; // end of class GrayToRGBAWithInterleaving<ScalarType> | ||
120 | |||
121 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
122 | template <typename ScalarType> | ||
123 | class GrayToRGBAWithLookUpTable final : public UnrollTwice, | ||
124 | public UsesTailPath { | ||
125 | public: | ||
126 | using ContextType = Context; | ||
127 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
128 | using VectorType = typename VecTraits::VectorType; | ||
129 | 128 | explicit GrayToRGBAWithLookUpTable(svuint8x4_t &indices) KLEIDICV_STREAMING | |
130 | 128 | : indices_{indices} { | |
131 | 128 | initialize_indices(); | |
132 | 128 | } | |
133 | |||
134 | 544 | void vector_path(ContextType ctx, VectorType src_vect, | |
135 | ScalarType *dst) KLEIDICV_STREAMING { | ||
136 | // Call the common vector path. | ||
137 | 544 | auto pg = ctx.predicate(); | |
138 | 544 | common_vector_path(pg, pg, pg, pg, src_vect, dst); | |
139 | 544 | } | |
140 | |||
141 | 116 | void tail_path(ContextType ctx, VectorType src_vect, | |
142 | ScalarType *dst) KLEIDICV_STREAMING { | ||
143 | 116 | auto pg = ctx.predicate(); | |
144 | // Predicates for consecutive stores. | ||
145 | 116 | svbool_t pg_0, pg_1, pg_2, pg_3; | |
146 | 116 | VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2, pg_3); | |
147 | // Call the common vector path. | ||
148 | 116 | common_vector_path(pg_0, pg_1, pg_2, pg_3, src_vect, dst); | |
149 | 116 | } | |
150 | |||
151 | private: | ||
152 | 660 | void common_vector_path(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2, | |
153 | svbool_t pg_3, VectorType src_vect, | ||
154 | ScalarType *dst) KLEIDICV_STREAMING { | ||
155 | 660 | svuint8x2_t src_and_alpha = svcreate2(src_vect, VecTraits::svdup(-1)); | |
156 | |||
157 | // Convert from gray to RGBA using table-lookups. | ||
158 | 660 | VectorType dst_vec_0 = svtbl2(src_and_alpha, svget4(indices_, 0)); | |
159 | 660 | VectorType dst_vec_1 = svtbl2(src_and_alpha, svget4(indices_, 1)); | |
160 | 660 | VectorType dst_vec_2 = svtbl2(src_and_alpha, svget4(indices_, 2)); | |
161 | 660 | VectorType dst_vec_3 = svtbl2(src_and_alpha, svget4(indices_, 3)); | |
162 | |||
163 | 660 | svst1(pg_0, &dst[0], dst_vec_0); | |
164 | 660 | svst1_vnum(pg_1, &dst[0], 1, dst_vec_1); | |
165 | 660 | svst1_vnum(pg_2, &dst[0], 2, dst_vec_2); | |
166 | 660 | svst1_vnum(pg_3, &dst[0], 3, dst_vec_3); | |
167 | 660 | } | |
168 | |||
169 | 128 | void initialize_indices() KLEIDICV_STREAMING { | |
170 | // Number of four-tuple elements. | ||
171 | 128 | uint64_t num_four_tuples = VecTraits::num_lanes() / 4; | |
172 | // Index of alpha. | ||
173 | 128 | uint64_t idx_alpha = VecTraits::num_lanes(); | |
174 | // Start index. | ||
175 | 128 | uint64_t start_index = idx_alpha << 24; | |
176 | |||
177 | // Index generation is similar to that of GrayToRGB above. | ||
178 | 256 | VectorType indices_0 = | |
179 | 128 | svreinterpret_u8_u32(svindex_u32(start_index, 0x10101)); | |
180 | |||
181 | // Repeat for 'indices_1' but add number of 4-tuple elements. | ||
182 | 128 | start_index += 0x10101 * num_four_tuples; | |
183 | 256 | VectorType indices_1 = | |
184 | 128 | svreinterpret_u8_u32(svindex_u32(start_index, 0x10101)); | |
185 | |||
186 | // Similarly to 'indices_1', but add twice the number of 4-tuple elements. | ||
187 | 128 | start_index += 0x10101 * num_four_tuples; | |
188 | 256 | VectorType indices_2 = | |
189 | 128 | svreinterpret_u8_u32(svindex_u32(start_index, 0x10101)); | |
190 | |||
191 | // Similarly to 'indices_1', but add three times the number of 4-tuple | ||
192 | // elements. | ||
193 | 128 | start_index += 0x10101 * num_four_tuples; | |
194 | 256 | VectorType indices_3 = | |
195 | 128 | svreinterpret_u8_u32(svindex_u32(start_index, 0x10101)); | |
196 | |||
197 | 128 | indices_ = svcreate4(indices_0, indices_1, indices_2, indices_3); | |
198 | 128 | } | |
199 | |||
200 | svuint8x4_t &indices_; | ||
201 | }; // end of class GrayToRGBAWithLookUpTable<ScalarType> | ||
202 | #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
203 | |||
204 | 167 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgb_u8_sc( | |
205 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
206 | size_t width, size_t height) KLEIDICV_STREAMING { | ||
207 |
4/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 158 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 158 times.
|
167 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
208 |
4/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 149 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 149 times.
|
158 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
209 |
6/6✓ Branch 0 taken 9 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 131 times.
|
149 | CHECK_IMAGE_SIZE(width, height); |
210 | |||
211 | 131 | Rectangle rect{width, height}; | |
212 | 131 | Rows<const uint8_t> src_rows{src, src_stride}; | |
213 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */}; | |
214 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
215 | GrayToRGB<uint8_t> operation; | ||
216 | #else | ||
217 | 131 | svuint8x3_t table_indices; | |
218 | 131 | GrayToRGB<uint8_t> operation{table_indices}; | |
219 | #endif | ||
220 | 131 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
221 | 131 | return KLEIDICV_OK; | |
222 | 167 | } | |
223 | |||
224 | 167 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgba_u8_sc( | |
225 | const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, | ||
226 | size_t width, size_t height) KLEIDICV_STREAMING { | ||
227 |
4/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 158 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 158 times.
|
167 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
228 |
4/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 149 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 149 times.
|
158 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
229 |
6/6✓ Branch 0 taken 9 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 131 times.
|
149 | CHECK_IMAGE_SIZE(width, height); |
230 | |||
231 | 131 | Rectangle rect{width, height}; | |
232 | 131 | Rows<const uint8_t> src_rows{src, src_stride}; | |
233 | 131 | Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */}; | |
234 | |||
235 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
236 | GrayToRGBAWithInterleaving<int8_t> operation{}; | ||
237 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | ||
238 | #else | ||
239 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 128 times.
|
131 | if (svcntb() > 128) { |
240 | 3 | GrayToRGBAWithInterleaving<uint8_t> operation{}; | |
241 | 3 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
242 | 3 | } else { | |
243 | 128 | svuint8x4_t table_indices; | |
244 | 128 | GrayToRGBAWithLookUpTable<uint8_t> operation{table_indices}; | |
245 | 128 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
246 | 128 | } | |
247 | #endif | ||
248 | 131 | return KLEIDICV_OK; | |
249 | 167 | } | |
250 | |||
251 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
252 | |||
253 | #endif // KLEIDICV_GRAY_TO_RGB_SC_H | ||
254 |