KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/arithmetics/exp_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 44 44 100.0%
Functions: 5 5 100.0%
Branches: 16 16 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cmath>
6
7 #include "kleidicv/arithmetics/exp_constants.h"
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/neon.h"
10
11 namespace kleidicv::neon {
12
13 template <typename ScalarType>
14 class Exp;
15
16 template <>
17 class Exp<float> final : public UnrollOnce {
18 public:
19 using VecTraits = neon::VecTraits<float>;
20 using VectorType = typename VecTraits::VectorType;
21
22 2903 VectorType vector_path(VectorType src) {
23 2903 float32x4_t n, r, scale, poly, z;
24 2903 uint32x4_t cmp, e;
25
26 /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
27 x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
28 2903 z = vfmaq_f32(vdupq_n(exp_f32::kShift), src, vdupq_n(exp_f32::kInvLn2));
29 2903 n = z - vdupq_n(exp_f32::kShift);
30 2903 r = vfmaq_f32(src, n, vdupq_n(-exp_f32::kLn2Hi));
31 2903 r = vfmaq_f32(r, n, vdupq_n(-exp_f32::kLn2Lo));
32 2903 e = vreinterpretq_u32_f32(z) << 23;
33 2903 scale = vreinterpretq_f32_u32(e + vdupq_n(0x3f800000));
34 2903 cmp = vcagtq_f32(n, vdupq_n(126.0F));
35 2903 poly = vfmaq_f32(vdupq_n(exp_f32::kPoly[1]), vdupq_n(exp_f32::kPoly[0]), r);
36 2903 poly = vfmaq_f32(vdupq_n(exp_f32::kPoly[2]), poly, r);
37 2903 poly = vfmaq_f32(vdupq_n(exp_f32::kPoly[3]), poly, r);
38 2903 poly = vfmaq_f32(vdupq_n(exp_f32::kPoly[4]), poly, r);
39 2903 poly = vfmaq_f32(vdupq_n(1.0F), poly, r);
40 2903 poly = vfmaq_f32(vdupq_n(1.0F), poly, r);
41
2/2
✓ Branch 0 taken 2624 times.
✓ Branch 1 taken 279 times.
2903 if (KLEIDICV_UNLIKELY(v_any_u32(cmp))) {
42 2624 return specialcase(poly, n, e);
43 }
44 279 return scale * poly;
45 2903 }
46
47 80 float scalar_path(float src) { return expf(src); }
48
49 private:
50 2903 static int v_any_u32(uint32x4_t x) {
51 /* assume elements in x are either 0 or -1u. */
52 2903 return vpaddd_u64(vreinterpretq_u64_u32(x)) != 0;
53 }
54
55 2624 static float32x4_t specialcase(float32x4_t poly, float32x4_t n,
56 uint32x4_t e) {
57 /* 2^n may overflow, break it up into s1*s2. */
58 2624 uint32x4_t b = (n <= vdupq_n(0.0F)) & vdupq_n(0x83000000);
59 2624 float32x4_t s1 = vreinterpretq_f32_u32(vdupq_n(0x7f000000) + b);
60 2624 float32x4_t s2 = vreinterpretq_f32_u32(e - b);
61 2624 uint32x4_t cmp = vcagtq_f32(n, vdupq_n(192.0F));
62 2624 float32x4_t r1 = s1 * s1;
63 2624 float32x4_t r0 = (poly * s1) * s2;
64 7872 return vreinterpretq_f32_u32((cmp & vreinterpretq_u32_f32(r1)) |
65 2624 (~cmp & vreinterpretq_u32_f32(r0)));
66 2624 }
67 }; // end of class Exp<float>
68
69 template <typename T>
70 81 KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t exp(const T* src, size_t src_stride,
71 T* dst, size_t dst_stride,
72 size_t width, size_t height) {
73
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 79 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 79 times.
81 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
74
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 77 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 77 times.
79 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
75
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 75 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 75 times.
77 CHECK_IMAGE_SIZE(width, height);
76
77 75 Exp<T> operation;
78 75 Rectangle rect{width, height};
79 75 Rows<const T> src_rows{src, src_stride};
80 75 Rows<T> dst_rows{dst, dst_stride};
81 75 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
82 75 return KLEIDICV_OK;
83 81 }
84
85 #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \
86 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t exp<type>( \
87 const type* src, size_t src_stride, type* dst, size_t dst_stride, \
88 size_t width, size_t height)
89
90 KLEIDICV_INSTANTIATE_TEMPLATE(float);
91
92 } // namespace kleidicv::neon
93