KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/float_conv_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 120 120 100.0%
Functions: 72 72 100.0%
Branches: 64 64 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_FLOAT_CONV_SC_H
6 #define KLEIDICV_FLOAT_CONV_SC_H
7
8 #include <limits>
9 #include <type_traits>
10
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <typename InputType, typename OutputType>
17 class float_conversion_operation;
18
19 template <typename OutputType>
20 class float_conversion_operation<float, OutputType> {
21 public:
22 using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
23 using SrcVectorType = typename SrcVecTraits::VectorType;
24 using SrcVector4Type = typename SrcVecTraits::Vector4Type;
25 using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<
26 std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
27 using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
28 using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<OutputType>;
29 using DstVectorType = typename DstVecTraits::VectorType;
30
31 450 explicit float_conversion_operation(svuint8_t& index) KLEIDICV_STREAMING
32 450 : svqvct_wrapper(index) {}
33
34 692 void process_row(size_t width, Columns<const float> src,
35 Columns<OutputType> dst) KLEIDICV_STREAMING {
36 1384 LoopUnroll{width, SrcVecTraits::num_lanes()}
37 2028 .unroll_n_times<4>([&](size_t step) KLEIDICV_STREAMING {
38 1336 svbool_t pg = DstVecTraits::svptrue();
39 #if KLEIDICV_TARGET_SME2
40 206 svcount_t pg_counter = DstVecTraits::svptrue_c();
41 206 SrcVector4Type src4 = svld1_x4(pg_counter, &src[0]);
42 206 SrcVectorType src_v0 = svget4(src4, 0);
43 206 SrcVectorType src_v1 = svget4(src4, 1);
44 206 SrcVectorType src_v2 = svget4(src4, 2);
45 206 SrcVectorType src_v3 = svget4(src4, 3);
46 #else
47 1130 SrcVectorType src_v0 = svld1(pg, &src[0]);
48 1130 SrcVectorType src_v1 = svld1_vnum(pg, &src[0], 1);
49 1130 SrcVectorType src_v2 = svld1_vnum(pg, &src[0], 2);
50 1130 SrcVectorType src_v3 = svld1_vnum(pg, &src[0], 3);
51 #endif // KLEIDICV_TARGET_SME2
52 1336 DstVectorType res0 = vector_path(pg, src_v0, src_v1, src_v2, src_v3);
53 1336 svst1(pg, &dst[0], res0);
54 1336 src += ptrdiff_t(step);
55 1336 dst += ptrdiff_t(step);
56 1336 })
57 1228 .remaining([&](size_t length, size_t) KLEIDICV_STREAMING {
58 536 size_t index = 0;
59 536 svbool_t pg = SrcVecTraits::svwhilelt(index, length);
60
4/4
✓ Branch 0 taken 432 times.
✓ Branch 1 taken 268 times.
✓ Branch 2 taken 432 times.
✓ Branch 3 taken 268 times.
1400 while (svptest_first(SrcVecTraits::svptrue(), pg)) {
61 864 SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
62 1728 IntermediateVectorType result_vector =
63 864 remaining_path<OutputType>(pg, src_vector);
64 864 svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
65 // Update loop counter and calculate the next governing predicate.
66 864 index += SrcVecTraits::num_lanes();
67 864 pg = SrcVecTraits::svwhilelt(index, length);
68 864 }
69 536 });
70 692 }
71
72 private:
73 template <
74 typename O,
75 std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
76 2672 decltype(auto) convert(svbool_t full_pg,
77 SrcVectorType in) KLEIDICV_STREAMING {
78 2672 return svcvt_s32_f32_x(full_pg, in);
79 }
80
81 template <
82 typename O,
83 std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
84 2672 decltype(auto) convert(svbool_t full_pg,
85 SrcVectorType in) KLEIDICV_STREAMING {
86 2672 return svcvt_u32_f32_x(full_pg, in);
87 }
88
89 1336 DstVectorType vector_path(svbool_t full_pg, SrcVectorType fsrc0,
90 SrcVectorType fsrc1, SrcVectorType fsrc2,
91 SrcVectorType fsrc3) KLEIDICV_STREAMING {
92 1336 fsrc0 = svrinti_f32_x(full_pg, fsrc0);
93 1336 fsrc1 = svrinti_f32_x(full_pg, fsrc1);
94 1336 fsrc2 = svrinti_f32_x(full_pg, fsrc2);
95 1336 fsrc3 = svrinti_f32_x(full_pg, fsrc3);
96
97 1336 auto _32bit_res0 = convert<OutputType>(full_pg, fsrc0);
98 1336 auto _32bit_res1 = convert<OutputType>(full_pg, fsrc1);
99 1336 auto _32bit_res2 = convert<OutputType>(full_pg, fsrc2);
100 1336 auto _32bit_res3 = convert<OutputType>(full_pg, fsrc3);
101
102 3802 return svqvct_wrapper(
103 1336 svcreate4(_32bit_res0, _32bit_res1, _32bit_res2, _32bit_res3));
104 1336 }
105
106 template <
107 typename O,
108 std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
109 432 IntermediateVectorType remaining_path(svbool_t& pg,
110 SrcVectorType src) KLEIDICV_STREAMING {
111 432 constexpr float min_val = std::numeric_limits<O>::lowest();
112 432 constexpr float max_val = std::numeric_limits<O>::max();
113
114 432 src = svrinti_f32_x(pg, src);
115
116 432 svbool_t less = svcmplt_n_f32(pg, src, min_val);
117 432 src = svdup_n_f32_m(src, less, min_val);
118
119 432 svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
120 432 src = svdup_n_f32_m(src, greater, max_val);
121
122 864 return svcvt_s32_f32_x(pg, src);
123 432 }
124
125 template <
126 typename O,
127 std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
128 432 IntermediateVectorType remaining_path(svbool_t& pg,
129 SrcVectorType src) KLEIDICV_STREAMING {
130 432 constexpr float max_val = std::numeric_limits<O>::max();
131
132 432 src = svrinti_f32_x(pg, src);
133
134 432 svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
135 432 src = svdup_n_f32_m(src, greater, max_val);
136
137 864 return svcvt_u32_f32_x(pg, src);
138 432 }
139
140 SvqvctWrapper svqvct_wrapper;
141 }; // end of class float_conversion_operation<float, OutputType>
142
143 template <typename InputType>
144 class float_conversion_operation<InputType, float> {
145 public:
146 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
147 using VectorType = typename VecTraits::VectorType;
148 using Vector2Type = typename VecTraits::Vector2Type;
149
150 450 explicit float_conversion_operation(svuint8_t&) {}
151
152 692 void process_row(size_t width, Columns<const InputType> src,
153 Columns<float> dst) KLEIDICV_STREAMING {
154 1384 LoopUnroll{width, VecTraits::num_lanes()}
155 3486 .unroll_twice([&](size_t step) KLEIDICV_STREAMING {
156 2794 svbool_t pg = VecTraits::svptrue();
157 2794 auto src_vect1 = load_src(pg, &src[0], 0);
158 2794 auto src_vect2 = load_src(pg, &src[0], 1);
159
160 2794 VectorType dst_vector1 = vector_path(pg, src_vect1);
161 2794 VectorType dst_vector2 = vector_path(pg, src_vect2);
162 #if KLEIDICV_TARGET_SME2
163 450 svcount_t pg_counter = VecTraits::svptrue_c();
164 450 Vector2Type res2 = svcreate2(dst_vector1, dst_vector2);
165 450 svst1(pg_counter, &dst[0], res2);
166 #else
167 2344 svst1(pg, &dst[0], dst_vector1);
168 2344 svst1_vnum(pg, &dst[0], 1, dst_vector2);
169 #endif // KLEIDICV_TARGET_SME2
170 2794 src += ptrdiff_t(step);
171 2794 dst += ptrdiff_t(step);
172 2794 })
173 1174 .remaining([&](size_t length, size_t) KLEIDICV_STREAMING {
174 482 size_t index = 0;
175 482 svbool_t pg = VecTraits::svwhilelt(index, length);
176
4/4
✓ Branch 0 taken 302 times.
✓ Branch 1 taken 241 times.
✓ Branch 2 taken 302 times.
✓ Branch 3 taken 241 times.
1086 while (svptest_first(VecTraits::svptrue(), pg)) {
177 604 auto src_vect = load_src(pg, &src[ptrdiff_t(index)], 0);
178 604 VectorType dst_vector = vector_path(pg, src_vect);
179 604 svst1(pg, &dst[ptrdiff_t(index)], dst_vector);
180 // Update loop counter and calculate the next governing predicate.
181 604 index += VecTraits::num_lanes();
182 604 pg = VecTraits::svwhilelt(index, length);
183 604 }
184 482 });
185 692 }
186
187 private:
188 template <typename I, std::enable_if_t<std::is_same_v<I, svint32_t>, int> = 0>
189 3096 VectorType vector_path(svbool_t& pg, I src_vector) KLEIDICV_STREAMING {
190 3096 return svcvt_f32_s32_x(pg, src_vector);
191 }
192 template <typename I,
193 std::enable_if_t<std::is_same_v<I, svuint32_t>, int> = 0>
194 3096 VectorType vector_path(svbool_t& pg, I src_vector) KLEIDICV_STREAMING {
195 3096 return svcvt_f32_u32_x(pg, src_vector);
196 }
197
198 template <
199 typename I,
200 std::enable_if_t<std::is_integral_v<I> && std::is_signed_v<I>, int> = 0>
201 3096 svint32_t load_src(svbool_t& pg, const I* src,
202 size_t vnum) KLEIDICV_STREAMING {
203 3096 svint32_t src_vect = svld1sb_vnum_s32(pg, src, vnum);
204 6192 return src_vect;
205 3096 }
206
207 template <
208 typename I,
209 std::enable_if_t<std::is_integral_v<I> && !std::is_signed_v<I>, int> = 0>
210 3096 svuint32_t load_src(svbool_t& pg, const I* src,
211 size_t vnum) KLEIDICV_STREAMING {
212 3096 svuint32_t src_vect = svld1ub_vnum_u32(pg, src, vnum);
213 6192 return src_vect;
214 3096 }
215 }; // end of class float_conversion_operation<InputType, float>
216
217 template <typename InputType, typename OutputType>
218 960 static kleidicv_error_t float_conversion_sc(const InputType* src,
219 size_t src_stride, OutputType* dst,
220 size_t dst_stride, size_t width,
221 size_t height) KLEIDICV_STREAMING {
222
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 237 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 237 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 237 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 237 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 237 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 237 times.
✓ Branch 12 taken 3 times.
✓ Branch 13 taken 237 times.
✓ Branch 14 taken 3 times.
✓ Branch 15 taken 237 times.
960 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
223
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 234 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 234 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 234 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 234 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 234 times.
✓ Branch 12 taken 3 times.
✓ Branch 13 taken 234 times.
✓ Branch 14 taken 3 times.
✓ Branch 15 taken 234 times.
948 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
224
24/24
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 231 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 225 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 225 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 231 times.
✓ Branch 8 taken 6 times.
✓ Branch 9 taken 225 times.
✓ Branch 10 taken 9 times.
✓ Branch 11 taken 225 times.
✓ Branch 12 taken 3 times.
✓ Branch 13 taken 231 times.
✓ Branch 14 taken 6 times.
✓ Branch 15 taken 225 times.
✓ Branch 16 taken 9 times.
✓ Branch 17 taken 225 times.
✓ Branch 18 taken 3 times.
✓ Branch 19 taken 231 times.
✓ Branch 20 taken 6 times.
✓ Branch 21 taken 225 times.
✓ Branch 22 taken 9 times.
✓ Branch 23 taken 225 times.
936 CHECK_IMAGE_SIZE(width, height);
225
226 900 svuint8_t index;
227 900 float_conversion_operation<InputType, OutputType> operation{index};
228 900 Rectangle rect{width, height};
229 900 Rows<const InputType> src_rows{src, src_stride};
230 900 Rows<OutputType> dst_rows{dst, dst_stride};
231 900 zip_rows(operation, rect, src_rows, dst_rows);
232
233 900 return KLEIDICV_OK;
234 960 }
235
236 } // namespace KLEIDICV_TARGET_NAMESPACE
237
238 #endif // KLEIDICV_FLOAT_CONV_SC_H
239