KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/arithmetics/scale_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 172 172 100.0%
Functions: 48 48 100.0%
Branches: 38 38 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SCALE_SC_H
6 #define KLEIDICV_SCALE_SC_H
7
8 #include <algorithm>
9
10 #include "kleidicv/sve2.h"
11
12 namespace KLEIDICV_TARGET_NAMESPACE {
13
14 class AddFloat final : public UnrollTwice {
15 public:
16 using ContextType = Context;
17 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
18 using VectorType = typename VecTraits::VectorType;
19
20 18 explicit AddFloat(const svfloat32_t &svshift) KLEIDICV_STREAMING
21 18 : svshift_{svshift} {}
22
23 // NOLINTBEGIN(readability-make-member-function-const)
24 7611 VectorType vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING {
25 7611 return svadd_x(ctx.predicate(), src, svshift_);
26 }
27 // NOLINTEND(readability-make-member-function-const)
28
29 private:
30 const svfloat32_t &svshift_;
31 }; // end of class AddFloat
32
33 class ScaleFloat final : public UnrollTwice {
34 public:
35 using ContextType = Context;
36 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
37 using VectorType = typename VecTraits::VectorType;
38
39 255 ScaleFloat(const svfloat32_t &svscale,
40 const svfloat32_t &svshift) KLEIDICV_STREAMING
41 255 : svscale_{svscale},
42 255 svshift_{svshift} {}
43
44 // NOLINTBEGIN(readability-make-member-function-const)
45 10444 VectorType vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING {
46 10444 return svmla_x(ctx.predicate(), svshift_, src, svscale_);
47 }
48 // NOLINTEND(readability-make-member-function-const)
49
50 private:
51 const svfloat32_t &svscale_, &svshift_;
52 }; // end of class ScaleFloat
53
54 template <typename T, typename U>
55 static kleidicv_error_t scale_sc(const T *src, size_t src_stride, U *dst,
56 size_t dst_stride, size_t width, size_t height,
57 double scale, double shift) KLEIDICV_STREAMING;
58
59 // Specialization for float
60 template <>
61 291 kleidicv_error_t scale_sc(const float *src, size_t src_stride, float *dst,
62 size_t dst_stride, size_t width, size_t height,
63 double scale, double shift) KLEIDICV_STREAMING {
64
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 285 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 285 times.
291 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
65
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 279 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 279 times.
285 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
66
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 276 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 273 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 273 times.
279 CHECK_IMAGE_SIZE(width, height);
67
68 273 Rectangle rect{width, height};
69 273 Rows<const float> src_rows{src, src_stride};
70 273 Rows<float> dst_rows{dst, dst_stride};
71 273 svfloat32_t svscale = svdup_f32(static_cast<float>(scale));
72 273 svfloat32_t svshift = svdup_f32(static_cast<float>(shift));
73
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 255 times.
273 if (scale == 1.0) {
74 18 AddFloat operation(svshift);
75 18 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
76 18 } else {
77 255 ScaleFloat operation(svscale, svshift);
78 255 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
79 255 }
80 273 return KLEIDICV_OK;
81 291 }
82
83 // -----------------------------------------------------------------------
84 // Scale uint8 to float16
85 // -----------------------------------------------------------------------
86
87 class ScaleUint8ToFloat16Calc16 {
88 public:
89 using SrcType = uint8_t;
90 using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<SrcType>;
91 using SrcVectorType = typename SrcVecTraits::VectorType;
92 using SrcVector2Type = typename SrcVecTraits::Vector2Type;
93 using DstType = float16_t;
94 using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<DstType>;
95 using DstVectorType = typename DstVecTraits::VectorType;
96 using DstVector2Type = typename DstVecTraits::Vector2Type;
97 using DstVector4Type = typename DstVecTraits::Vector4Type;
98
99 63 ScaleUint8ToFloat16Calc16(double scale, double shift, svfloat16_t &svscale,
100 svfloat16_t &svshift) KLEIDICV_STREAMING
101 63 : svscale_{svscale},
102 63 svshift_{svshift} {
103 63 svscale_ = svdup_n_f16(static_cast<float16_t>(scale));
104 63 svshift_ = svdup_n_f16(static_cast<float16_t>(shift));
105 63 }
106
107 205 void process_row(size_t width, Columns<const SrcType> src,
108 Columns<DstType> dst) const KLEIDICV_STREAMING {
109 205 svbool_t p16 = svptrue_b16();
110 205 svuint8_t svzero = svdup_n_u8(0);
111 #if KLEIDICV_TARGET_SME2
112 45 svcount_t pc8 = SrcVecTraits::svptrue_c();
113 45 svcount_t pc16 = DstVecTraits::svptrue_c();
114 #else
115 160 svbool_t p8 = svptrue_b8();
116 #endif
117 1376 auto vector_path = [&](svuint16_t src) KLEIDICV_STREAMING {
118 1171 svfloat16_t fsrc = svcvt_f16_x(p16, src);
119 2342 return svmla_f16_x(p16, svshift_, fsrc, svscale_);
120 1171 };
121
122 410 LoopUnroll{width, SrcVecTraits::num_lanes()}
123 416 .unroll_twice([&](size_t step) KLEIDICV_STREAMING {
124 #if KLEIDICV_TARGET_SME2
125 48 SrcVector2Type src_2vec = svld1_x2(pc8, &src[0]);
126 48 svuint8_t src0 = svget2(src_2vec, 0);
127 48 svuint8_t src1 = svget2(src_2vec, 1);
128 #else
129 163 svuint8_t src0 = svld1(p8, &src[0]);
130 163 svuint8_t src1 = svld1_vnum(p8, &src[0], 1);
131 #endif // KLEIDICV_TARGET_SME2
132 422 DstVectorType dst0 =
133 211 vector_path(svreinterpret_u16_u8(svzip1(src0, svzero)));
134 422 DstVectorType dst1 =
135 211 vector_path(svreinterpret_u16_u8(svzip2(src0, svzero)));
136 422 DstVectorType dst2 =
137 211 vector_path(svreinterpret_u16_u8(svzip1(src1, svzero)));
138 422 DstVectorType dst3 =
139 211 vector_path(svreinterpret_u16_u8(svzip2(src1, svzero)));
140 #if KLEIDICV_TARGET_SME2
141 48 DstVector4Type dst4 = svcreate4(dst0, dst1, dst2, dst3);
142 48 svst1(pc16, &dst[0], dst4);
143 #else
144 163 svst1(p16, &dst[0], dst0);
145 163 svst1_vnum(p16, &dst[0], 1, dst1);
146 163 svst1_vnum(p16, &dst[0], 2, dst2);
147 163 svst1_vnum(p16, &dst[0], 3, dst3);
148 #endif // KLEIDICV_TARGET_SME2
149 211 src += ptrdiff_t(step);
150 211 dst += ptrdiff_t(step);
151 211 })
152 352 .remaining([&](size_t length, size_t) KLEIDICV_STREAMING {
153 147 disable_loop_vectorization();
154 147 size_t step = svcnth();
155
2/2
✓ Branch 0 taken 327 times.
✓ Branch 1 taken 147 times.
474 while (length > 0) {
156 327 svbool_t p16 = svwhilelt_b16_u64(0, length);
157 327 svuint16_t src0 = svld1ub_u16(p16, &src[0]);
158 327 DstVectorType dst0 = vector_path(src0);
159 327 svst1(p16, &dst[0], dst0);
160 327 src += ptrdiff_t(step);
161 327 dst += ptrdiff_t(step);
162 327 length -= std::min<size_t>(length, step);
163 327 }
164 147 });
165 205 }
166
167 svfloat16_t &svscale_, &svshift_;
168 }; // end of class ScaleUint8ToFloat16Calc16
169
170 class ScaleUint8ToFloat16Calc32 {
171 public:
172 using SrcType = uint8_t;
173 using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<SrcType>;
174 using SrcVectorType = typename SrcVecTraits::VectorType;
175 using SrcVector2Type = typename SrcVecTraits::Vector2Type;
176 using DstType = float16_t;
177 using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<DstType>;
178 using DstVectorType = typename DstVecTraits::VectorType;
179 using DstVector2Type = typename DstVecTraits::Vector2Type;
180 using DstVector4Type = typename DstVecTraits::Vector4Type;
181
182 225 ScaleUint8ToFloat16Calc32(double scale, double shift, svfloat32_t &svscale,
183 svfloat32_t &svshift) KLEIDICV_STREAMING
184 225 : svscale_{svscale},
185 225 svshift_{svshift} {
186 225 svscale_ = svdup_n_f32(static_cast<float>(scale));
187 225 svshift_ = svdup_n_f32(static_cast<float>(shift));
188 225 }
189
190 228 void process_row(size_t width, Columns<const SrcType> src,
191 Columns<DstType> dst) const KLEIDICV_STREAMING {
192 228 svbool_t p16 = svptrue_b16();
193 228 svbool_t p32 = svptrue_b32();
194 228 svuint8_t svzero = svdup_n_u8(0);
195 #if KLEIDICV_TARGET_SME2
196 76 svcount_t pc8 = SrcVecTraits::svptrue_c();
197 76 svcount_t pc16 = DstVecTraits::svptrue_c();
198 #else
199 152 svbool_t p8 = svptrue_b8();
200 #endif
201 1282 auto vector_path = [&](svuint8_t src) KLEIDICV_STREAMING {
202 // First Transpose them, to have even and odd elements separated
203 // so the final narrowing puts them into the right order.
204 1054 svuint8_t src_b = svtrn1(src, svzero);
205 1054 svuint8_t src_t = svtrn2(src, svzero);
206 1054 svuint8_t src_b0 = svzip1(src_b, svzero);
207 1054 svuint8_t src_b1 = svzip2(src_b, svzero);
208 1054 svuint8_t src_t0 = svzip1(src_t, svzero);
209 1054 svuint8_t src_t1 = svzip2(src_t, svzero);
210 1054 svfloat32_t fsrc_b0 = svcvt_f32_x(p32, svreinterpret_u32_u8(src_b0));
211 1054 svfloat32_t fsrc_b1 = svcvt_f32_x(p32, svreinterpret_u32_u8(src_b1));
212 1054 svfloat32_t fsrc_t0 = svcvt_f32_x(p32, svreinterpret_u32_u8(src_t0));
213 1054 svfloat32_t fsrc_t1 = svcvt_f32_x(p32, svreinterpret_u32_u8(src_t1));
214 1054 svfloat32_t res_b0 = svmla_f32_x(p32, svshift_, fsrc_b0, svscale_);
215 1054 svfloat32_t res_b1 = svmla_f32_x(p32, svshift_, fsrc_b1, svscale_);
216 1054 svfloat32_t res_t0 = svmla_f32_x(p32, svshift_, fsrc_t0, svscale_);
217 1054 svfloat32_t res_t1 = svmla_f32_x(p32, svshift_, fsrc_t1, svscale_);
218 1054 svfloat16_t res0 = svcvtnt_f16_x(svcvt_f16_x(p16, res_b0), p16, res_t0);
219 1054 svfloat16_t res1 = svcvtnt_f16_x(svcvt_f16_x(p16, res_b1), p16, res_t1);
220 2108 return svcreate2(res0, res1);
221 1054 };
222
223 456 LoopUnroll{width, SrcVecTraits::num_lanes()}
224 755 .unroll_twice([&](size_t step) KLEIDICV_STREAMING {
225 #if KLEIDICV_TARGET_SME2
226 81 SrcVector2Type src_2vec = svld1_x2(pc8, &src[0]);
227 81 svuint8_t src0 = svget2(src_2vec, 0);
228 81 svuint8_t src1 = svget2(src_2vec, 1);
229 #else
230 446 svuint8_t src0 = svld1(p8, &src[0]);
231 446 svuint8_t src1 = svld1_vnum(p8, &src[0], 1);
232 #endif // KLEIDICV_TARGET_SME2
233 527 DstVector2Type dst0 = vector_path(src0);
234 527 DstVector2Type dst1 = vector_path(src1);
235 #if KLEIDICV_TARGET_SME2
236 162 DstVector4Type dst4 = svcreate4(svget2(dst0, 0), svget2(dst0, 1),
237 81 svget2(dst1, 0), svget2(dst1, 1));
238 81 svst1(pc16, &dst[0], dst4);
239 #else
240 446 svst1(p16, &dst[0], svget2(dst0, 0));
241 446 svst1_vnum(p16, &dst[0], 1, svget2(dst0, 1));
242 446 svst1_vnum(p16, &dst[0], 2, svget2(dst1, 0));
243 446 svst1_vnum(p16, &dst[0], 3, svget2(dst1, 1));
244 #endif // KLEIDICV_TARGET_SME2
245 527 src += ptrdiff_t(step);
246 527 dst += ptrdiff_t(step);
247 527 })
248 405 .remaining([&](size_t length, size_t) KLEIDICV_STREAMING {
249 177 size_t step = svcnth();
250
2/2
✓ Branch 0 taken 296 times.
✓ Branch 1 taken 177 times.
473 while (length > 0) {
251 296 disable_loop_vectorization();
252 296 svbool_t p16 = svwhilelt_b16_u64(0, length);
253 296 svuint16_t src0 = svld1ub_u16(p16, &src[0]);
254 296 svuint16_t src_b = svtrn1(src0, svreinterpret_u16_u8(svzero));
255 296 svuint16_t src_t = svtrn2(src0, svreinterpret_u16_u8(svzero));
256 296 svfloat32_t fsrc_b = svcvt_f32_x(p32, svreinterpret_u32_u16(src_b));
257 296 svfloat32_t fsrc_t = svcvt_f32_x(p32, svreinterpret_u32_u16(src_t));
258 296 svfloat32_t res_b = svmla_f32_x(p32, svshift_, fsrc_b, svscale_);
259 296 svfloat32_t res_t = svmla_f32_x(p32, svshift_, fsrc_t, svscale_);
260 592 svfloat16_t res =
261 296 svcvtnt_f16_x(svcvt_f16_x(p16, res_b), p16, res_t);
262 296 svst1(p16, &dst[0], res);
263 296 src += ptrdiff_t(step);
264 296 dst += ptrdiff_t(step);
265 296 length -= std::min<size_t>(length, step);
266 296 }
267 177 });
268 228 }
269
270 svfloat32_t &svscale_, &svshift_;
271 }; // end of class ScaleUint8ToFloat16Calc32
272
273 // Specialization for uint8_t to float16_t
274 template <>
275 300 kleidicv_error_t scale_sc(const uint8_t *src, size_t src_stride, float16_t *dst,
276 size_t dst_stride, size_t width, size_t height,
277 double scale, double shift) KLEIDICV_STREAMING {
278
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 297 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 297 times.
300 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
279
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 294 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 294 times.
297 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
280
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 291 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 288 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 288 times.
294 CHECK_IMAGE_SIZE(width, height);
281
282 288 Rectangle rect{width, height};
283 288 Rows<const uint8_t> src_rows{src, src_stride};
284 288 Rows<float16_t> dst_rows{dst, dst_stride};
285
4/4
✓ Branch 0 taken 69 times.
✓ Branch 1 taken 219 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 63 times.
288 if (static_cast<double>(static_cast<float16_t>(scale)) == scale &&
286 69 static_cast<double>(static_cast<float16_t>(shift)) == shift) {
287 63 svfloat16_t s0, s1;
288 63 ScaleUint8ToFloat16Calc16 operation(scale, shift, s0, s1);
289 63 zip_rows(operation, rect, src_rows, dst_rows);
290 63 } else {
291 225 svfloat32_t s0, s1;
292 225 ScaleUint8ToFloat16Calc32 operation(scale, shift, s0, s1);
293 225 zip_rows(operation, rect, src_rows, dst_rows);
294 225 }
295
296 288 return KLEIDICV_OK;
297 300 }
298
299 } // namespace KLEIDICV_TARGET_NAMESPACE
300
301 #endif // KLEIDICV_SCALE_SC_H
302