KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/gray_to_rgb_sc.h
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 137 137 100.0%
Functions: 44 46 95.7%
Branches: 32 32 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_GRAY_TO_RGB_SC_H
6 #define KLEIDICV_GRAY_TO_RGB_SC_H
7
8 #include "kleidicv/conversions/gray_to_rgb.h"
9 #include "kleidicv/sve2.h"
10
11 namespace KLEIDICV_TARGET_NAMESPACE {
12
13 template <typename ScalarType>
14 class GrayToRGB final :
15 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
16 public UsesTailPath,
17 #endif
18 public UnrollTwice {
19 public:
20 using ContextType = Context;
21 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
22 using VectorType = typename VecTraits::VectorType;
23 using Vector3Type = typename VecTraits::Vector3Type;
24
25 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
26 void vector_path(ContextType ctx, VectorType src_vect,
27 ScalarType *dst) KLEIDICV_STREAMING {
28 auto pg = ctx.predicate();
29 svuint8x3_t dst_vect = svcreate3(src_vect, src_vect, src_vect);
30 svst3(pg, dst, dst_vect);
31 }
32 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
33 249 explicit GrayToRGB(svuint8x3_t &indices) KLEIDICV_STREAMING
34 249 : indices_{indices} {
35 249 initialize_indices();
36 249 }
37
38 622 void vector_path(ContextType, VectorType src_vect,
39 ScalarType *dst) KLEIDICV_STREAMING {
40 622 Vector3Type dst_vect = common_vector_path(src_vect);
41 #if KLEIDICV_TARGET_SME2
42 92 two_plus_one_store(dst, dst_vect);
43 #else
44 530 svbool_t pg = VecTraits::svptrue();
45 530 common_store(pg, pg, pg, dst, dst_vect);
46 #endif
47 622 }
48
49 285 void tail_path(ContextType ctx, VectorType src_vect,
50 ScalarType *dst) KLEIDICV_STREAMING {
51 285 auto pg = ctx.predicate();
52 // Predicates for consecutive stores.
53 285 svbool_t pg_0, pg_1, pg_2;
54 285 VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2);
55 // Call the common vector path.
56 285 Vector3Type dst_vect = common_vector_path(src_vect);
57 285 common_store(pg_0, pg_1, pg_2, dst, dst_vect);
58 285 }
59
60 private:
61 907 Vector3Type common_vector_path(VectorType src_vect) KLEIDICV_STREAMING {
62 // Convert from gray to RGB using table-lookups.
63 1814 return svcreate3(svtbl(src_vect, svget3(indices_, 0)),
64 907 svtbl(src_vect, svget3(indices_, 1)),
65 907 svtbl(src_vect, svget3(indices_, 2)));
66 }
67
68 #if KLEIDICV_TARGET_SME2
69 92 void two_plus_one_store(ScalarType *dst,
70 Vector3Type dst_vect) KLEIDICV_STREAMING {
71 92 svcount_t p_counter = VecTraits::svptrue_c();
72 92 svst1(p_counter, dst, svcreate2(svget3(dst_vect, 0), svget3(dst_vect, 1)));
73 92 svst1_vnum(VecTraits::svptrue(), dst, 2, svget3(dst_vect, 2));
74 92 }
75 #endif
76
77 815 void common_store(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2,
78 ScalarType *dst, Vector3Type dst_vect) KLEIDICV_STREAMING {
79 815 svst1(pg_0, &dst[0], svget3(dst_vect, 0));
80 815 svst1_vnum(pg_1, &dst[0], 1, svget3(dst_vect, 1));
81 815 svst1_vnum(pg_2, &dst[0], 2, svget3(dst_vect, 2));
82 815 }
83
84 249 void initialize_indices() KLEIDICV_STREAMING {
85 // All-true predicate to shorten code.
86 249 svbool_t pg_all = VecTraits::svptrue();
87 // Constant used for division by 3.
88 249 VectorType const_171 = VecTraits::svdup(171);
89 // Generated indices.
90 249 VectorType indices_0, indices_1, indices_2;
91
92 249 indices_0 = svindex_u8(0, 1);
93
94
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 246 times.
249 if (KLEIDICV_UNLIKELY(svcntb() == 256)) {
95 3 indices_1 = svext(
96 3 svdup_u8(0),
97 3 svqadd(svindex_u8(svcntb() % 3, 1), static_cast<uint8_t>(2)), 254);
98 6 indices_2 = svext(svdup_u8(0),
99 3 svqadd(svindex_u8(0, 1), static_cast<uint8_t>(3)), 255);
100 3 } else {
101 246 indices_1 = svindex_u8(svcntb() % 3, 1);
102 246 indices_2 = svindex_u8((svcntb() * 2) % 3, 1);
103 }
104
105 249 indices_0 = svlsr_x(pg_all, svmulh_x(pg_all, indices_0, const_171), 1);
106 249 indices_1 = svqadd_x(
107 249 pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_1, const_171), 1),
108 249 static_cast<ScalarType>(svcntb() / 3));
109 249 indices_2 = svqadd_x(
110 249 pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_2, const_171), 1),
111 249 static_cast<ScalarType>((svcntb() * 2) / 3));
112
113 249 indices_ = svcreate3(indices_0, indices_1, indices_2);
114 249 }
115
116 svuint8x3_t &indices_;
117 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
118 }; // end of class GrayToRGB<ScalarType>
119
120 template <typename ScalarType>
121 class GrayToRGBAWithInterleaving final : public UnrollTwice {
122 public:
123 using ContextType = Context;
124 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
125 using VectorType = typename VecTraits::VectorType;
126 24 void vector_path(ContextType ctx, VectorType src_vect,
127 ScalarType *dst) KLEIDICV_STREAMING {
128 24 auto pg = ctx.predicate();
129 24 svuint8_t alpha = svdup_u8(0xff);
130 24 svuint8x4_t dst_vect = svcreate4(src_vect, src_vect, src_vect, alpha);
131
132 24 svst4(pg, dst, dst_vect);
133 24 }
134 }; // end of class GrayToRGBAWithInterleaving<ScalarType>
135
136 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
137 template <typename ScalarType>
138 class GrayToRGBAWithLookUpTable final : public UnrollTwice,
139 public UsesTailPath {
140 public:
141 using ContextType = Context;
142 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
143 using VectorType = typename VecTraits::VectorType;
144 using Vector4Type = typename VecTraits::Vector4Type;
145 246 explicit GrayToRGBAWithLookUpTable(svuint8x4_t &indices) KLEIDICV_STREAMING
146 246 : indices_{indices} {
147 246 initialize_indices();
148 246 }
149
150 602 void vector_path(ContextType, VectorType src_vect,
151 ScalarType *dst) KLEIDICV_STREAMING {
152 // Call the common vector path.
153 602 Vector4Type dst_vect = common_vector_path(src_vect);
154 #if KLEIDICV_TARGET_SME2
155 92 svcount_t p_counter = VecTraits::svptrue_c();
156 92 svst1(p_counter, &dst[0], dst_vect);
157 #else
158 510 svbool_t pg = VecTraits::svptrue();
159 510 common_store(pg, pg, pg, pg, dst, dst_vect);
160 #endif
161 602 }
162
163 281 void tail_path(ContextType ctx, VectorType src_vect,
164 ScalarType *dst) KLEIDICV_STREAMING {
165 281 auto pg = ctx.predicate();
166 // Predicates for consecutive stores.
167 281 svbool_t pg_0, pg_1, pg_2, pg_3;
168 281 VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2, pg_3);
169 // Call the common vector path.
170 281 Vector4Type dst_vect = common_vector_path(src_vect);
171 281 common_store(pg_0, pg_1, pg_2, pg_3, dst, dst_vect);
172 281 }
173
174 private:
175 883 Vector4Type common_vector_path(VectorType src_vect) KLEIDICV_STREAMING {
176 883 svuint8x2_t src_and_alpha = svcreate2(src_vect, VecTraits::svdup(-1));
177 // Convert from gray to RGBA using table-lookups.
178 2649 return svcreate4(svtbl2(src_and_alpha, svget4(indices_, 0)),
179 883 svtbl2(src_and_alpha, svget4(indices_, 1)),
180 883 svtbl2(src_and_alpha, svget4(indices_, 2)),
181 883 svtbl2(src_and_alpha, svget4(indices_, 3)));
182 883 }
183
184 791 void common_store(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2, svbool_t pg_3,
185 ScalarType *dst, Vector4Type dst_vect) KLEIDICV_STREAMING {
186 791 svst1(pg_0, &dst[0], svget4(dst_vect, 0));
187 791 svst1_vnum(pg_1, &dst[0], 1, svget4(dst_vect, 1));
188 791 svst1_vnum(pg_2, &dst[0], 2, svget4(dst_vect, 2));
189 791 svst1_vnum(pg_3, &dst[0], 3, svget4(dst_vect, 3));
190 791 }
191
192 246 void initialize_indices() KLEIDICV_STREAMING {
193 // Number of four-tuple elements.
194 246 uint64_t num_four_tuples = VecTraits::num_lanes() / 4;
195 // Index of alpha.
196 246 uint64_t idx_alpha = VecTraits::num_lanes();
197 // Start index.
198 246 uint64_t start_index = idx_alpha << 24;
199
200 // Index generation is similar to that of GrayToRGB above.
201 492 VectorType indices_0 =
202 246 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
203
204 // Repeat for 'indices_1' but add number of 4-tuple elements.
205 246 start_index += 0x10101 * num_four_tuples;
206 492 VectorType indices_1 =
207 246 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
208
209 // Similarly to 'indices_1', but add twice the number of 4-tuple elements.
210 246 start_index += 0x10101 * num_four_tuples;
211 492 VectorType indices_2 =
212 246 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
213
214 // Similarly to 'indices_1', but add three times the number of 4-tuple
215 // elements.
216 246 start_index += 0x10101 * num_four_tuples;
217 492 VectorType indices_3 =
218 246 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
219
220 246 indices_ = svcreate4(indices_0, indices_1, indices_2, indices_3);
221 246 }
222
223 svuint8x4_t &indices_;
224 }; // end of class GrayToRGBAWithLookUpTable<ScalarType>
225 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
226
227 297 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgb_u8_sc(
228 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
229 size_t width, size_t height) KLEIDICV_STREAMING {
230
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 285 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 285 times.
297 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
231
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 273 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 273 times.
285 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
232
6/6
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 261 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 249 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 249 times.
273 CHECK_IMAGE_SIZE(width, height);
233
234 249 Rectangle rect{width, height};
235 249 Rows<const uint8_t> src_rows{src, src_stride};
236 249 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
237 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
238 GrayToRGB<uint8_t> operation;
239 #else
240 249 svuint8x3_t table_indices;
241 249 GrayToRGB<uint8_t> operation{table_indices};
242 #endif
243 249 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
244 249 return KLEIDICV_OK;
245 297 }
246
247 297 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgba_u8_sc(
248 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
249 size_t width, size_t height) KLEIDICV_STREAMING {
250
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 285 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 285 times.
297 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
251
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 273 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 273 times.
285 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
252
6/6
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 261 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 249 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 249 times.
273 CHECK_IMAGE_SIZE(width, height);
253
254 249 Rectangle rect{width, height};
255 249 Rows<const uint8_t> src_rows{src, src_stride};
256 249 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
257
258 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
259 GrayToRGBAWithInterleaving<uint8_t> operation{};
260 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
261 #else
262
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 246 times.
249 if (svcntb() > 128) {
263 3 GrayToRGBAWithInterleaving<uint8_t> operation{};
264 3 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
265 3 } else {
266 246 svuint8x4_t table_indices;
267 246 GrayToRGBAWithLookUpTable<uint8_t> operation{table_indices};
268 246 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
269 246 }
270 #endif
271 249 return KLEIDICV_OK;
272 297 }
273
274 } // namespace KLEIDICV_TARGET_NAMESPACE
275
276 #endif // KLEIDICV_GRAY_TO_RGB_SC_H
277