KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/arithmetics/exp_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 45 45 100.0%
Functions: 6 6 100.0%
Branches: 16 16 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_EXP_SC_H
6 #define KLEIDICV_EXP_SC_H
7
8 #include "kleidicv/arithmetics/exp_constants.h"
9 #include "kleidicv/kleidicv.h"
10 #include "kleidicv/sve2.h"
11
12 namespace KLEIDICV_TARGET_NAMESPACE {
13 template <typename ScalarType, bool TryShortPath>
14 class Exp;
15
16 template <bool TryShortPath>
17 class Exp<float, TryShortPath> final : public UnrollOnce {
18 public:
19 using ContextType = Context;
20 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
21 using VectorType = typename VecTraits::VectorType;
22
23 4517 VectorType vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING {
24 4517 svfloat32_t n, r, poly, z;
25 4517 svuint32_t e;
26
27 /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
28 x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
29 4517 z = svmla_x(ctx.predicate(), svdup_f32(exp_f32::kShift), src,
30 exp_f32::kInvLn2);
31 4517 n = svsub_x(ctx.predicate(), z, exp_f32::kShift);
32 4517 r = svmla_x(ctx.predicate(), src, n, -exp_f32::kLn2Hi);
33 4517 r = svmla_x(ctx.predicate(), r, n, -exp_f32::kLn2Lo);
34 4517 e = svlsl_x(ctx.predicate(), svreinterpret_u32(z), 23);
35 9034 poly = svmla_x(ctx.predicate(), svdup_f32(exp_f32::kPoly[1]),
36 4517 svdup_f32(exp_f32::kPoly[0]), r);
37 4517 poly = svmla_x(ctx.predicate(), svdup_f32(exp_f32::kPoly[2]), poly, r);
38 4517 poly = svmla_x(ctx.predicate(), svdup_f32(exp_f32::kPoly[3]), poly, r);
39 4517 poly = svmla_x(ctx.predicate(), svdup_f32(exp_f32::kPoly[4]), poly, r);
40 4517 poly = svmla_x(ctx.predicate(), svdup_f32(1.0F), poly, r);
41 4517 poly = svmla_x(ctx.predicate(), svdup_f32(1.0F), poly, r);
42
43 if constexpr (TryShortPath) {
44 2937 svbool_t cmp = svacgt(ctx.predicate(), n, 126.0F);
45
2/2
✓ Branch 0 taken 2638 times.
✓ Branch 1 taken 299 times.
2937 if (KLEIDICV_UNLIKELY(svptest_any(ctx.predicate(), cmp))) {
46 2638 return specialcase(ctx.predicate(), poly, n, e);
47 }
48 598 svfloat32_t scale =
49 299 svreinterpret_f32(svadd_x(ctx.predicate(), e, 0x3f800000U));
50 299 return svmul_x(ctx.predicate(), scale, poly);
51 2937 }
52
53 3160 return specialcase(ctx.predicate(), poly, n, e);
54 4517 }
55
56 private:
57 4218 static svfloat32_t specialcase(svbool_t pg, svfloat32_t poly, svfloat32_t n,
58 svuint32_t e) KLEIDICV_STREAMING {
59 /* 2^n may overflow, break it up into s1*s2. */
60 8436 svuint32_t b = svsel(svcmple(pg, n, svdup_f32(0.0F)),
61 4218 svdup_u32(0x83000000U), svdup_u32(0.0F));
62 4218 svfloat32_t s1 = svreinterpret_f32(svadd_x(pg, b, 0x7f000000U));
63 4218 svfloat32_t s2 = svreinterpret_f32(svsub_x(pg, e, b));
64 4218 svbool_t cmp = svacgt(pg, n, 192.0F);
65 4218 svfloat32_t r1 = svmul_x(pg, s1, s1);
66 4218 svfloat32_t r0 = svmul_x(pg, s2, svmul_x(pg, poly, s1));
67
68 8436 return svsel(cmp, r1, r0);
69 4218 }
70 }; // end of class Exp<float>
71
72 template <typename T>
73 using ExpNoShortPath = Exp<T, false>;
74
75 template <typename T>
76 using ExpTryShortPath = Exp<T, true>;
77
78 template <typename T, typename Operation>
79 162 static kleidicv_error_t exp_sc(const T* src, size_t src_stride, T* dst,
80 size_t dst_stride, size_t width,
81 size_t height) KLEIDICV_STREAMING {
82
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 158 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 158 times.
162 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
83
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 154 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 154 times.
158 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
84
6/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 152 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 150 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 150 times.
154 CHECK_IMAGE_SIZE(width, height);
85
86 150 Operation operation;
87 150 Rectangle rect{width, height};
88 150 Rows<const T> src_rows{src, src_stride};
89 150 Rows<T> dst_rows{dst, dst_stride};
90 150 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
91 150 return KLEIDICV_OK;
92 162 }
93
94 } // namespace KLEIDICV_TARGET_NAMESPACE
95
96 #endif // KLEIDICV_EXP_SC_H
97