KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/gray_to_rgb_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 137 137 100.0%
Functions: 44 46 95.7%
Branches: 32 32 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_GRAY_TO_RGB_SC_H
6 #define KLEIDICV_GRAY_TO_RGB_SC_H
7
8 #include "kleidicv/conversions/gray_to_rgb.h"
9 #include "kleidicv/sve2.h"
10
11 namespace KLEIDICV_TARGET_NAMESPACE {
12
13 template <typename ScalarType>
14 class GrayToRGB final :
15 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
16 public UsesTailPath,
17 #endif
18 public UnrollTwice {
19 public:
20 using ContextType = Context;
21 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
22 using VectorType = typename VecTraits::VectorType;
23 using Vector3Type = typename VecTraits::Vector3Type;
24
25 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
26 void vector_path(ContextType ctx, VectorType src_vect,
27 ScalarType *dst) KLEIDICV_STREAMING {
28 auto pg = ctx.predicate();
29 svuint8x3_t dst_vect = svcreate3(src_vect, src_vect, src_vect);
30 svst3(pg, dst, dst_vect);
31 }
32 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
33 195 explicit GrayToRGB(svuint8x3_t &indices) KLEIDICV_STREAMING
34 195 : indices_{indices} {
35 195 initialize_indices();
36 195 }
37
38 666 void vector_path(ContextType, VectorType src_vect,
39 ScalarType *dst) KLEIDICV_STREAMING {
40 666 Vector3Type dst_vect = common_vector_path(src_vect);
41 #if KLEIDICV_TARGET_SME2
42 102 two_plus_one_store(dst, dst_vect);
43 #else
44 564 svbool_t pg = VecTraits::svptrue();
45 564 common_store(pg, pg, pg, dst, dst_vect);
46 #endif
47 666 }
48
49 188 void tail_path(ContextType ctx, VectorType src_vect,
50 ScalarType *dst) KLEIDICV_STREAMING {
51 188 auto pg = ctx.predicate();
52 // Predicates for consecutive stores.
53 188 svbool_t pg_0, pg_1, pg_2;
54 188 VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2);
55 // Call the common vector path.
56 188 Vector3Type dst_vect = common_vector_path(src_vect);
57 188 common_store(pg_0, pg_1, pg_2, dst, dst_vect);
58 188 }
59
60 private:
61 854 Vector3Type common_vector_path(VectorType src_vect) KLEIDICV_STREAMING {
62 // Convert from gray to RGB using table-lookups.
63 1708 return svcreate3(svtbl(src_vect, svget3(indices_, 0)),
64 854 svtbl(src_vect, svget3(indices_, 1)),
65 854 svtbl(src_vect, svget3(indices_, 2)));
66 }
67
68 #if KLEIDICV_TARGET_SME2
69 102 void two_plus_one_store(ScalarType *dst,
70 Vector3Type dst_vect) KLEIDICV_STREAMING {
71 102 svcount_t p_counter = VecTraits::svptrue_c();
72 102 svst1(p_counter, dst, svcreate2(svget3(dst_vect, 0), svget3(dst_vect, 1)));
73 102 svst1_vnum(VecTraits::svptrue(), dst, 2, svget3(dst_vect, 2));
74 102 }
75 #endif
76
77 752 void common_store(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2,
78 ScalarType *dst, Vector3Type dst_vect) KLEIDICV_STREAMING {
79 752 svst1(pg_0, &dst[0], svget3(dst_vect, 0));
80 752 svst1_vnum(pg_1, &dst[0], 1, svget3(dst_vect, 1));
81 752 svst1_vnum(pg_2, &dst[0], 2, svget3(dst_vect, 2));
82 752 }
83
84 195 void initialize_indices() KLEIDICV_STREAMING {
85 // All-true predicate to shorten code.
86 195 svbool_t pg_all = VecTraits::svptrue();
87 // Constant used for division by 3.
88 195 VectorType const_171 = VecTraits::svdup(171);
89 // Generated indices.
90 195 VectorType indices_0, indices_1, indices_2;
91
92 195 indices_0 = svindex_u8(0, 1);
93
94
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 192 times.
195 if (KLEIDICV_UNLIKELY(svcntb() == 256)) {
95 3 indices_1 = svext(
96 3 svdup_u8(0),
97 3 svqadd(svindex_u8(svcntb() % 3, 1), static_cast<uint8_t>(2)), 254);
98 6 indices_2 = svext(svdup_u8(0),
99 3 svqadd(svindex_u8(0, 1), static_cast<uint8_t>(3)), 255);
100 3 } else {
101 192 indices_1 = svindex_u8(svcntb() % 3, 1);
102 192 indices_2 = svindex_u8((svcntb() * 2) % 3, 1);
103 }
104
105 195 indices_0 = svlsr_x(pg_all, svmulh_x(pg_all, indices_0, const_171), 1);
106 195 indices_1 = svqadd_x(
107 195 pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_1, const_171), 1),
108 195 static_cast<ScalarType>(svcntb() / 3));
109 195 indices_2 = svqadd_x(
110 195 pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_2, const_171), 1),
111 195 static_cast<ScalarType>((svcntb() * 2) / 3));
112
113 195 indices_ = svcreate3(indices_0, indices_1, indices_2);
114 195 }
115
116 svuint8x3_t &indices_;
117 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
118 }; // end of class GrayToRGB<ScalarType>
119
120 template <typename ScalarType>
121 class GrayToRGBAWithInterleaving final : public UnrollTwice {
122 public:
123 using ContextType = Context;
124 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
125 using VectorType = typename VecTraits::VectorType;
126 24 void vector_path(ContextType ctx, VectorType src_vect,
127 ScalarType *dst) KLEIDICV_STREAMING {
128 24 auto pg = ctx.predicate();
129 24 svuint8_t alpha = svdup_u8(0xff);
130 24 svuint8x4_t dst_vect = svcreate4(src_vect, src_vect, src_vect, alpha);
131
132 24 svst4(pg, dst, dst_vect);
133 24 }
134 }; // end of class GrayToRGBAWithInterleaving<ScalarType>
135
136 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
137 template <typename ScalarType>
138 class GrayToRGBAWithLookUpTable final : public UnrollTwice,
139 public UsesTailPath {
140 public:
141 using ContextType = Context;
142 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
143 using VectorType = typename VecTraits::VectorType;
144 using Vector4Type = typename VecTraits::Vector4Type;
145 192 explicit GrayToRGBAWithLookUpTable(svuint8x4_t &indices) KLEIDICV_STREAMING
146 192 : indices_{indices} {
147 192 initialize_indices();
148 192 }
149
150 646 void vector_path(ContextType, VectorType src_vect,
151 ScalarType *dst) KLEIDICV_STREAMING {
152 // Call the common vector path.
153 646 Vector4Type dst_vect = common_vector_path(src_vect);
154 #if KLEIDICV_TARGET_SME2
155 102 svcount_t p_counter = VecTraits::svptrue_c();
156 102 svst1(p_counter, &dst[0], dst_vect);
157 #else
158 544 svbool_t pg = VecTraits::svptrue();
159 544 common_store(pg, pg, pg, pg, dst, dst_vect);
160 #endif
161 646 }
162
163 184 void tail_path(ContextType ctx, VectorType src_vect,
164 ScalarType *dst) KLEIDICV_STREAMING {
165 184 auto pg = ctx.predicate();
166 // Predicates for consecutive stores.
167 184 svbool_t pg_0, pg_1, pg_2, pg_3;
168 184 VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2, pg_3);
169 // Call the common vector path.
170 184 Vector4Type dst_vect = common_vector_path(src_vect);
171 184 common_store(pg_0, pg_1, pg_2, pg_3, dst, dst_vect);
172 184 }
173
174 private:
175 830 Vector4Type common_vector_path(VectorType src_vect) KLEIDICV_STREAMING {
176 830 svuint8x2_t src_and_alpha = svcreate2(src_vect, VecTraits::svdup(-1));
177 // Convert from gray to RGBA using table-lookups.
178 2490 return svcreate4(svtbl2(src_and_alpha, svget4(indices_, 0)),
179 830 svtbl2(src_and_alpha, svget4(indices_, 1)),
180 830 svtbl2(src_and_alpha, svget4(indices_, 2)),
181 830 svtbl2(src_and_alpha, svget4(indices_, 3)));
182 830 }
183
184 728 void common_store(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2, svbool_t pg_3,
185 ScalarType *dst, Vector4Type dst_vect) KLEIDICV_STREAMING {
186 728 svst1(pg_0, &dst[0], svget4(dst_vect, 0));
187 728 svst1_vnum(pg_1, &dst[0], 1, svget4(dst_vect, 1));
188 728 svst1_vnum(pg_2, &dst[0], 2, svget4(dst_vect, 2));
189 728 svst1_vnum(pg_3, &dst[0], 3, svget4(dst_vect, 3));
190 728 }
191
192 192 void initialize_indices() KLEIDICV_STREAMING {
193 // Number of four-tuple elements.
194 192 uint64_t num_four_tuples = VecTraits::num_lanes() / 4;
195 // Index of alpha.
196 192 uint64_t idx_alpha = VecTraits::num_lanes();
197 // Start index.
198 192 uint64_t start_index = idx_alpha << 24;
199
200 // Index generation is similar to that of GrayToRGB above.
201 384 VectorType indices_0 =
202 192 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
203
204 // Repeat for 'indices_1' but add number of 4-tuple elements.
205 192 start_index += 0x10101 * num_four_tuples;
206 384 VectorType indices_1 =
207 192 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
208
209 // Similarly to 'indices_1', but add twice the number of 4-tuple elements.
210 192 start_index += 0x10101 * num_four_tuples;
211 384 VectorType indices_2 =
212 192 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
213
214 // Similarly to 'indices_1', but add three times the number of 4-tuple
215 // elements.
216 192 start_index += 0x10101 * num_four_tuples;
217 384 VectorType indices_3 =
218 192 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
219
220 192 indices_ = svcreate4(indices_0, indices_1, indices_2, indices_3);
221 192 }
222
223 svuint8x4_t &indices_;
224 }; // end of class GrayToRGBAWithLookUpTable<ScalarType>
225 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
226
227 243 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgb_u8_sc(
228 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
229 size_t width, size_t height) KLEIDICV_STREAMING {
230
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 231 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 231 times.
243 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
231
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 219 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 219 times.
231 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
232
6/6
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 207 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 195 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 195 times.
219 CHECK_IMAGE_SIZE(width, height);
233
234 195 Rectangle rect{width, height};
235 195 Rows<const uint8_t> src_rows{src, src_stride};
236 195 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
237 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
238 GrayToRGB<uint8_t> operation;
239 #else
240 195 svuint8x3_t table_indices;
241 195 GrayToRGB<uint8_t> operation{table_indices};
242 #endif
243 195 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
244 195 return KLEIDICV_OK;
245 243 }
246
247 243 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgba_u8_sc(
248 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
249 size_t width, size_t height) KLEIDICV_STREAMING {
250
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 231 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 231 times.
243 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
251
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 219 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 219 times.
231 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
252
6/6
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 207 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 195 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 195 times.
219 CHECK_IMAGE_SIZE(width, height);
253
254 195 Rectangle rect{width, height};
255 195 Rows<const uint8_t> src_rows{src, src_stride};
256 195 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
257
258 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
259 GrayToRGBAWithInterleaving<uint8_t> operation{};
260 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
261 #else
262
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 192 times.
195 if (svcntb() > 128) {
263 3 GrayToRGBAWithInterleaving<uint8_t> operation{};
264 3 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
265 3 } else {
266 192 svuint8x4_t table_indices;
267 192 GrayToRGBAWithLookUpTable<uint8_t> operation{table_indices};
268 192 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
269 192 }
270 #endif
271 195 return KLEIDICV_OK;
272 243 }
273
274 } // namespace KLEIDICV_TARGET_NAMESPACE
275
276 #endif // KLEIDICV_GRAY_TO_RGB_SC_H
277