KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/gray_to_rgb_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 122 122 100.0%
Functions: 25 26 96.2%
Branches: 32 32 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_GRAY_TO_RGB_SC_H
6 #define KLEIDICV_GRAY_TO_RGB_SC_H
7
8 #include "kleidicv/conversions/gray_to_rgb.h"
9 #include "kleidicv/kleidicv.h"
10 #include "kleidicv/sve2.h"
11
12 namespace KLEIDICV_TARGET_NAMESPACE {
13
14 template <typename ScalarType>
15 class GrayToRGB final :
16 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
17 public UsesTailPath,
18 #endif
19 public UnrollTwice {
20 public:
21 using ContextType = Context;
22 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
23 using VectorType = typename VecTraits::VectorType;
24
25 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
26 void vector_path(ContextType ctx, VectorType src_vect,
27 ScalarType *dst) KLEIDICV_STREAMING {
28 auto pg = ctx.predicate();
29 svuint8x3_t dst_vect = svcreate3(src_vect, src_vect, src_vect);
30 svst3(pg, dst, dst_vect);
31 }
32 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
33 131 explicit GrayToRGB(svuint8x3_t &indices) KLEIDICV_STREAMING
34 131 : indices_{indices} {
35 131 initialize_indices();
36 131 }
37
38 564 void vector_path(ContextType ctx, VectorType src_vect,
39 ScalarType *dst) KLEIDICV_STREAMING {
40 // Call the common vector path.
41 564 auto pg = ctx.predicate();
42 564 common_vector_path(pg, pg, pg, src_vect, dst);
43 564 }
44
45 120 void tail_path(ContextType ctx, VectorType src_vect,
46 ScalarType *dst) KLEIDICV_STREAMING {
47 120 auto pg = ctx.predicate();
48 // Predicates for consecutive stores.
49 120 svbool_t pg_0, pg_1, pg_2;
50 120 VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2);
51 // Call the common vector path.
52 120 common_vector_path(pg_0, pg_1, pg_2, src_vect, dst);
53 120 }
54
55 private:
56 684 void common_vector_path(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2,
57 VectorType src_vect,
58 ScalarType *dst) KLEIDICV_STREAMING {
59 // Convert from gray to RGB using table-lookups.
60 684 VectorType dst_vec_0 = svtbl(src_vect, svget3(indices_, 0));
61 684 VectorType dst_vec_1 = svtbl(src_vect, svget3(indices_, 1));
62 684 VectorType dst_vec_2 = svtbl(src_vect, svget3(indices_, 2));
63
64 684 svst1(pg_0, &dst[0], dst_vec_0);
65 684 svst1_vnum(pg_1, &dst[0], 1, dst_vec_1);
66 684 svst1_vnum(pg_2, &dst[0], 2, dst_vec_2);
67 684 }
68
69 131 void initialize_indices() KLEIDICV_STREAMING {
70 // All-true predicate to shorten code.
71 131 svbool_t pg_all = VecTraits::svptrue();
72 // Constant used for division by 3.
73 131 VectorType const_171 = VecTraits::svdup(171);
74 // Generated indices.
75 131 VectorType indices_0, indices_1, indices_2;
76
77 131 indices_0 = svindex_u8(0, 1);
78
79
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 128 times.
131 if (KLEIDICV_UNLIKELY(svcntb() == 256)) {
80 3 indices_1 = svext(
81 3 svdup_u8(0),
82 3 svqadd(svindex_u8(svcntb() % 3, 1), static_cast<uint8_t>(2)), 254);
83 6 indices_2 = svext(svdup_u8(0),
84 3 svqadd(svindex_u8(0, 1), static_cast<uint8_t>(3)), 255);
85 3 } else {
86 128 indices_1 = svindex_u8(svcntb() % 3, 1);
87 128 indices_2 = svindex_u8((svcntb() * 2) % 3, 1);
88 }
89
90 131 indices_0 = svlsr_x(pg_all, svmulh_x(pg_all, indices_0, const_171), 1);
91 131 indices_1 = svqadd_x(
92 131 pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_1, const_171), 1),
93 131 static_cast<ScalarType>(svcntb() / 3));
94 131 indices_2 = svqadd_x(
95 131 pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_2, const_171), 1),
96 131 static_cast<ScalarType>((svcntb() * 2) / 3));
97
98 131 indices_ = svcreate3(indices_0, indices_1, indices_2);
99 131 }
100
101 svuint8x3_t &indices_;
102 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
103 }; // end of class GrayToRGB<ScalarType>
104
105 template <typename ScalarType>
106 class GrayToRGBAWithInterleaving final : public UnrollTwice {
107 public:
108 using ContextType = Context;
109 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
110 using VectorType = typename VecTraits::VectorType;
111 24 void vector_path(ContextType ctx, VectorType src_vect,
112 ScalarType *dst) KLEIDICV_STREAMING {
113 24 auto pg = ctx.predicate();
114 24 svuint8_t alpha = svdup_u8(0xff);
115 24 svuint8x4_t dst_vect = svcreate4(src_vect, src_vect, src_vect, alpha);
116
117 24 svst4(pg, dst, dst_vect);
118 24 }
119 }; // end of class GrayToRGBAWithInterleaving<ScalarType>
120
121 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
122 template <typename ScalarType>
123 class GrayToRGBAWithLookUpTable final : public UnrollTwice,
124 public UsesTailPath {
125 public:
126 using ContextType = Context;
127 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
128 using VectorType = typename VecTraits::VectorType;
129 128 explicit GrayToRGBAWithLookUpTable(svuint8x4_t &indices) KLEIDICV_STREAMING
130 128 : indices_{indices} {
131 128 initialize_indices();
132 128 }
133
134 544 void vector_path(ContextType ctx, VectorType src_vect,
135 ScalarType *dst) KLEIDICV_STREAMING {
136 // Call the common vector path.
137 544 auto pg = ctx.predicate();
138 544 common_vector_path(pg, pg, pg, pg, src_vect, dst);
139 544 }
140
141 116 void tail_path(ContextType ctx, VectorType src_vect,
142 ScalarType *dst) KLEIDICV_STREAMING {
143 116 auto pg = ctx.predicate();
144 // Predicates for consecutive stores.
145 116 svbool_t pg_0, pg_1, pg_2, pg_3;
146 116 VecTraits::make_consecutive_predicates(pg, pg_0, pg_1, pg_2, pg_3);
147 // Call the common vector path.
148 116 common_vector_path(pg_0, pg_1, pg_2, pg_3, src_vect, dst);
149 116 }
150
151 private:
152 660 void common_vector_path(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2,
153 svbool_t pg_3, VectorType src_vect,
154 ScalarType *dst) KLEIDICV_STREAMING {
155 660 svuint8x2_t src_and_alpha = svcreate2(src_vect, VecTraits::svdup(-1));
156
157 // Convert from gray to RGBA using table-lookups.
158 660 VectorType dst_vec_0 = svtbl2(src_and_alpha, svget4(indices_, 0));
159 660 VectorType dst_vec_1 = svtbl2(src_and_alpha, svget4(indices_, 1));
160 660 VectorType dst_vec_2 = svtbl2(src_and_alpha, svget4(indices_, 2));
161 660 VectorType dst_vec_3 = svtbl2(src_and_alpha, svget4(indices_, 3));
162
163 660 svst1(pg_0, &dst[0], dst_vec_0);
164 660 svst1_vnum(pg_1, &dst[0], 1, dst_vec_1);
165 660 svst1_vnum(pg_2, &dst[0], 2, dst_vec_2);
166 660 svst1_vnum(pg_3, &dst[0], 3, dst_vec_3);
167 660 }
168
169 128 void initialize_indices() KLEIDICV_STREAMING {
170 // Number of four-tuple elements.
171 128 uint64_t num_four_tuples = VecTraits::num_lanes() / 4;
172 // Index of alpha.
173 128 uint64_t idx_alpha = VecTraits::num_lanes();
174 // Start index.
175 128 uint64_t start_index = idx_alpha << 24;
176
177 // Index generation is similar to that of GrayToRGB above.
178 256 VectorType indices_0 =
179 128 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
180
181 // Repeat for 'indices_1' but add number of 4-tuple elements.
182 128 start_index += 0x10101 * num_four_tuples;
183 256 VectorType indices_1 =
184 128 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
185
186 // Similarly to 'indices_1', but add twice the number of 4-tuple elements.
187 128 start_index += 0x10101 * num_four_tuples;
188 256 VectorType indices_2 =
189 128 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
190
191 // Similarly to 'indices_1', but add three times the number of 4-tuple
192 // elements.
193 128 start_index += 0x10101 * num_four_tuples;
194 256 VectorType indices_3 =
195 128 svreinterpret_u8_u32(svindex_u32(start_index, 0x10101));
196
197 128 indices_ = svcreate4(indices_0, indices_1, indices_2, indices_3);
198 128 }
199
200 svuint8x4_t &indices_;
201 }; // end of class GrayToRGBAWithLookUpTable<ScalarType>
202 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
203
204 167 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgb_u8_sc(
205 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
206 size_t width, size_t height) KLEIDICV_STREAMING {
207
4/4
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 158 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 158 times.
167 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
208
4/4
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 149 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 149 times.
158 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
209
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 131 times.
149 CHECK_IMAGE_SIZE(width, height);
210
211 131 Rectangle rect{width, height};
212 131 Rows<const uint8_t> src_rows{src, src_stride};
213 131 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
214 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
215 GrayToRGB<uint8_t> operation;
216 #else
217 131 svuint8x3_t table_indices;
218 131 GrayToRGB<uint8_t> operation{table_indices};
219 #endif
220 131 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
221 131 return KLEIDICV_OK;
222 167 }
223
224 167 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t gray_to_rgba_u8_sc(
225 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
226 size_t width, size_t height) KLEIDICV_STREAMING {
227
4/4
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 158 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 158 times.
167 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
228
4/4
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 149 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 149 times.
158 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
229
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 140 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 131 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 131 times.
149 CHECK_IMAGE_SIZE(width, height);
230
231 131 Rectangle rect{width, height};
232 131 Rows<const uint8_t> src_rows{src, src_stride};
233 131 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
234
235 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
236 GrayToRGBAWithInterleaving<int8_t> operation{};
237 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
238 #else
239
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 128 times.
131 if (svcntb() > 128) {
240 3 GrayToRGBAWithInterleaving<uint8_t> operation{};
241 3 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
242 3 } else {
243 128 svuint8x4_t table_indices;
244 128 GrayToRGBAWithLookUpTable<uint8_t> operation{table_indices};
245 128 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
246 128 }
247 #endif
248 131 return KLEIDICV_OK;
249 167 }
250
251 } // namespace KLEIDICV_TARGET_NAMESPACE
252
253 #endif // KLEIDICV_GRAY_TO_RGB_SC_H
254