KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/float_conv_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 126 126 100.0%
Functions: 48 48 100.0%
Branches: 64 64 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_FLOAT_CONV_SC_H
6 #define KLEIDICV_FLOAT_CONV_SC_H
7
8 #include <limits>
9 #include <type_traits>
10
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <typename InputType, typename OutputType>
17 class float_conversion_operation;
18
19 template <typename OutputType>
20 class float_conversion_operation<float, OutputType> {
21 public:
22 using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
23 using SrcVectorType = typename SrcVecTraits::VectorType;
24 using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<
25 std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
26 using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
27 using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<OutputType>;
28 using DstVectorType = typename DstVecTraits::VectorType;
29
30 300 explicit float_conversion_operation(svuint8_t& index) KLEIDICV_STREAMING
31 300 : index_(index) {
32 // Index generation to reorder converted values by tbl instruction
33 300 auto index0 = svindex_u8(0, 4);
34 300 auto index1 = svindex_u8(1, 4);
35 300 auto index2 = svindex_u8(2, 4);
36 300 auto index3 = svindex_u8(3, 4);
37
38 300 svbool_t pg = svwhilelt_b8(uint64_t(0), svcntb() / 4);
39
40 300 index_ = svsplice(pg, index3, svdup_u8(0));
41 300 index_ = svsplice(pg, index2, index_);
42 300 index_ = svsplice(pg, index1, index_);
43 300 index_ = svsplice(pg, index0, index_);
44 300 }
45
46 474 void process_row(size_t width, Columns<const float> src,
47 Columns<OutputType> dst) KLEIDICV_STREAMING {
48 948 LoopUnroll{width, SrcVecTraits::num_lanes()}
49 1604 .unroll_n_times<4>([&](size_t step) KLEIDICV_STREAMING {
50 1130 svbool_t pg = DstVecTraits::svptrue();
51 1130 SrcVectorType src_v0 = svld1(pg, &src[0]);
52 1130 SrcVectorType src_v1 = svld1_vnum(pg, &src[0], 1);
53 1130 SrcVectorType src_v2 = svld1_vnum(pg, &src[0], 2);
54 1130 SrcVectorType src_v3 = svld1_vnum(pg, &src[0], 3);
55 1130 DstVectorType res0 = vector_path(pg, src_v0, src_v1, src_v2, src_v3);
56 1130 svst1(pg, &dst[0], res0);
57 1130 src += ptrdiff_t(step);
58 1130 dst += ptrdiff_t(step);
59 1130 })
60 824 .remaining([&](size_t length, size_t) KLEIDICV_STREAMING {
61 350 size_t index = 0;
62 350 svbool_t pg = SrcVecTraits::svwhilelt(index, length);
63
4/4
✓ Branch 0 taken 298 times.
✓ Branch 1 taken 175 times.
✓ Branch 2 taken 298 times.
✓ Branch 3 taken 175 times.
946 while (svptest_first(SrcVecTraits::svptrue(), pg)) {
64 596 SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
65 1192 IntermediateVectorType result_vector =
66 596 remaining_path<OutputType>(pg, src_vector);
67 596 svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
68 // Update loop counter and calculate the next governing predicate.
69 596 index += SrcVecTraits::num_lanes();
70 596 pg = SrcVecTraits::svwhilelt(index, length);
71 596 }
72 350 });
73 474 }
74
75 private:
76 template <
77 typename O,
78 std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
79 2260 decltype(auto) convert(svbool_t full_pg,
80 SrcVectorType in) KLEIDICV_STREAMING {
81 2260 return svcvt_s32_f32_x(full_pg, in);
82 }
83
84 template <
85 typename O,
86 std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
87 2260 decltype(auto) convert(svbool_t full_pg,
88 SrcVectorType in) KLEIDICV_STREAMING {
89 2260 return svcvt_u32_f32_x(full_pg, in);
90 }
91
92 1130 DstVectorType vector_path(svbool_t full_pg, SrcVectorType fsrc0,
93 SrcVectorType fsrc1, SrcVectorType fsrc2,
94 SrcVectorType fsrc3) KLEIDICV_STREAMING {
95 1130 fsrc0 = svrinti_f32_x(full_pg, fsrc0);
96 1130 fsrc1 = svrinti_f32_x(full_pg, fsrc1);
97 1130 fsrc2 = svrinti_f32_x(full_pg, fsrc2);
98 1130 fsrc3 = svrinti_f32_x(full_pg, fsrc3);
99
100 1130 auto _32bit_res0 = convert<OutputType>(full_pg, fsrc0);
101 1130 auto _32bit_res1 = convert<OutputType>(full_pg, fsrc1);
102 1130 auto _32bit_res2 = convert<OutputType>(full_pg, fsrc2);
103 1130 auto _32bit_res3 = convert<OutputType>(full_pg, fsrc3);
104
105 1130 auto _16bit_res0 = svqxtnb(_32bit_res0);
106 1130 _16bit_res0 = svqxtnt(_16bit_res0, _32bit_res2);
107
108 1130 auto _16bit_res1 = svqxtnb(_32bit_res1);
109 1130 _16bit_res1 = svqxtnt(_16bit_res1, _32bit_res3);
110
111 1130 auto _8bit_res = svqxtnb(_16bit_res0);
112 1130 _8bit_res = svqxtnt(_8bit_res, _16bit_res1);
113
114 2260 return svtbl(_8bit_res, index_);
115 1130 }
116
117 template <
118 typename O,
119 std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
120 298 IntermediateVectorType remaining_path(svbool_t& pg,
121 SrcVectorType src) KLEIDICV_STREAMING {
122 298 constexpr float min_val = std::numeric_limits<O>::lowest();
123 298 constexpr float max_val = std::numeric_limits<O>::max();
124
125 298 src = svrinti_f32_x(pg, src);
126
127 298 svbool_t less = svcmplt_n_f32(pg, src, min_val);
128 298 src = svdup_n_f32_m(src, less, min_val);
129
130 298 svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
131 298 src = svdup_n_f32_m(src, greater, max_val);
132
133 596 return svcvt_s32_f32_x(pg, src);
134 298 }
135
136 template <
137 typename O,
138 std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
139 298 IntermediateVectorType remaining_path(svbool_t& pg,
140 SrcVectorType src) KLEIDICV_STREAMING {
141 298 constexpr float max_val = std::numeric_limits<O>::max();
142
143 298 src = svrinti_f32_x(pg, src);
144
145 298 svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
146 298 src = svdup_n_f32_m(src, greater, max_val);
147
148 596 return svcvt_u32_f32_x(pg, src);
149 298 }
150
151 svuint8_t& index_;
152 }; // end of class float_conversion_operation<float, OutputType>
153
154 template <typename InputType>
155 class float_conversion_operation<InputType, float> {
156 public:
157 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
158 using VectorType = typename VecTraits::VectorType;
159
160 300 explicit float_conversion_operation(svuint8_t&) {}
161
162 474 void process_row(size_t width, Columns<const InputType> src,
163 Columns<float> dst) KLEIDICV_STREAMING {
164 948 LoopUnroll{width, VecTraits::num_lanes()}
165 2818 .unroll_twice([&](size_t step) KLEIDICV_STREAMING {
166 2344 svbool_t pg = VecTraits::svptrue();
167 2344 auto src_vect1 = load_src(pg, &src[0], 0);
168 2344 auto src_vect2 = load_src(pg, &src[0], 1);
169
170 2344 VectorType dst_vector1 = vector_path(pg, src_vect1);
171 2344 VectorType dst_vector2 = vector_path(pg, src_vect2);
172 2344 svst1(pg, &dst[0], dst_vector1);
173 2344 svst1_vnum(pg, &dst[0], 1, dst_vector2);
174 2344 src += ptrdiff_t(step);
175 2344 dst += ptrdiff_t(step);
176 2344 })
177 792 .remaining([&](size_t length, size_t) KLEIDICV_STREAMING {
178 318 size_t index = 0;
179 318 svbool_t pg = VecTraits::svwhilelt(index, length);
180
4/4
✓ Branch 0 taken 207 times.
✓ Branch 1 taken 159 times.
✓ Branch 2 taken 207 times.
✓ Branch 3 taken 159 times.
732 while (svptest_first(VecTraits::svptrue(), pg)) {
181 414 auto src_vect = load_src(pg, &src[ptrdiff_t(index)], 0);
182 414 VectorType dst_vector = vector_path(pg, src_vect);
183 414 svst1(pg, &dst[ptrdiff_t(index)], dst_vector);
184 // Update loop counter and calculate the next governing predicate.
185 414 index += VecTraits::num_lanes();
186 414 pg = VecTraits::svwhilelt(index, length);
187 414 }
188 318 });
189 474 }
190
191 private:
192 template <typename I, std::enable_if_t<std::is_same_v<I, svint32_t>, int> = 0>
193 2551 VectorType vector_path(svbool_t& pg, I src_vector) KLEIDICV_STREAMING {
194 2551 return svcvt_f32_s32_x(pg, src_vector);
195 }
196 template <typename I,
197 std::enable_if_t<std::is_same_v<I, svuint32_t>, int> = 0>
198 2551 VectorType vector_path(svbool_t& pg, I src_vector) KLEIDICV_STREAMING {
199 2551 return svcvt_f32_u32_x(pg, src_vector);
200 }
201
202 template <
203 typename I,
204 std::enable_if_t<std::is_integral_v<I> && std::is_signed_v<I>, int> = 0>
205 2551 svint32_t load_src(svbool_t& pg, const I* src,
206 size_t vnum) KLEIDICV_STREAMING {
207 2551 svint32_t src_vect = svld1sb_vnum_s32(pg, src, vnum);
208 5102 return src_vect;
209 2551 }
210
211 template <
212 typename I,
213 std::enable_if_t<std::is_integral_v<I> && !std::is_signed_v<I>, int> = 0>
214 2551 svuint32_t load_src(svbool_t& pg, const I* src,
215 size_t vnum) KLEIDICV_STREAMING {
216 2551 svuint32_t src_vect = svld1ub_vnum_u32(pg, src, vnum);
217 5102 return src_vect;
218 2551 }
219 }; // end of class float_conversion_operation<InputType, float>
220
221 template <typename InputType, typename OutputType>
222 640 static kleidicv_error_t float_conversion_sc(const InputType* src,
223 size_t src_stride, OutputType* dst,
224 size_t dst_stride, size_t width,
225 size_t height) KLEIDICV_STREAMING {
226
16/16
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 158 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 158 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 158 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 158 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 158 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 158 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 158 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 158 times.
640 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
227
16/16
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 156 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 156 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 156 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 156 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 156 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 156 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 156 times.
632 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
228
24/24
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 154 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 150 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 150 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 154 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 150 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 150 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 154 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 150 times.
✓ Branch 16 taken 6 times.
✓ Branch 17 taken 150 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 154 times.
✓ Branch 20 taken 4 times.
✓ Branch 21 taken 150 times.
✓ Branch 22 taken 6 times.
✓ Branch 23 taken 150 times.
624 CHECK_IMAGE_SIZE(width, height);
229
230 600 svuint8_t index;
231 600 float_conversion_operation<InputType, OutputType> operation{index};
232 600 Rectangle rect{width, height};
233 600 Rows<const InputType> src_rows{src, src_stride};
234 600 Rows<OutputType> dst_rows{dst, dst_stride};
235 600 zip_rows(operation, rect, src_rows, dst_rows);
236
237 600 return KLEIDICV_OK;
238 640 }
239
240 } // namespace KLEIDICV_TARGET_NAMESPACE
241
242 #endif // KLEIDICV_FLOAT_CONV_SC_H
243