Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #include <limits> | ||
6 | |||
7 | #include "kleidicv/kleidicv.h" | ||
8 | #include "kleidicv/neon.h" | ||
9 | |||
10 | namespace kleidicv::neon { | ||
11 | |||
12 | template <typename ScalarType> | ||
13 | class SaturatingAddAbsWithThreshold final : public UnrollOnce, | ||
14 | public UnrollTwice, | ||
15 | public TryToAvoidTailLoop { | ||
16 | public: | ||
17 | using VecTraits = neon::VecTraits<ScalarType>; | ||
18 | using VectorType = typename VecTraits::VectorType; | ||
19 | |||
20 | 78 | explicit SaturatingAddAbsWithThreshold(ScalarType threshold) | |
21 | 78 | : threshold_{threshold}, threshold_vec_{vdupq_n_s16(threshold)} {} | |
22 | |||
23 | 988 | VectorType vector_path(VectorType src_a, VectorType src_b) { | |
24 | 988 | VectorType add_abs = vqaddq_s16(vqabsq_s16(src_a), vqabsq_s16(src_b)); | |
25 | 1976 | return vandq_s16(add_abs, vcgtq_s16(add_abs, threshold_vec_)); | |
26 | 988 | } | |
27 | |||
28 | 166 | ScalarType scalar_path(ScalarType src_a, ScalarType src_b) { | |
29 | 166 | ScalarType add_abs = 0; | |
30 | |||
31 |
2/2✓ Branch 0 taken 145 times.
✓ Branch 1 taken 21 times.
|
166 | if (__builtin_add_overflow(saturate_abs(src_a), saturate_abs(src_b), |
32 | &add_abs)) { | ||
33 | 21 | add_abs = std::numeric_limits<ScalarType>::max(); | |
34 | 21 | } | |
35 |
2/2✓ Branch 0 taken 86 times.
✓ Branch 1 taken 80 times.
|
166 | return add_abs > threshold_ ? add_abs : 0; |
36 | 166 | } | |
37 | |||
38 | private: | ||
39 | 332 | ScalarType saturate_abs(ScalarType input) { | |
40 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 327 times.
|
332 | if (std::numeric_limits<ScalarType>::is_signed && |
41 | 332 | input == std::numeric_limits<ScalarType>::lowest()) { | |
42 | 5 | return std::numeric_limits<ScalarType>::max(); | |
43 | } | ||
44 | 327 | return std::abs(input); | |
45 | 332 | } | |
46 | |||
47 | ScalarType threshold_; | ||
48 | VectorType threshold_vec_; | ||
49 | }; // end of class SaturatingAddAbsWithThreshold<ScalarType> | ||
50 | |||
51 | template <typename T> | ||
52 | 86 | kleidicv_error_t saturating_add_abs_with_threshold( | |
53 | const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, | ||
54 | T *dst, size_t dst_stride, size_t width, size_t height, T threshold) { | ||
55 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 84 times.
|
86 | CHECK_POINTER_AND_STRIDE(src_a, src_a_stride, height); |
56 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 82 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 82 times.
|
84 | CHECK_POINTER_AND_STRIDE(src_b, src_b_stride, height); |
57 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 80 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 80 times.
|
82 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
58 |
6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 79 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 78 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 78 times.
|
80 | CHECK_IMAGE_SIZE(width, height); |
59 | |||
60 | 78 | SaturatingAddAbsWithThreshold<T> operation{threshold}; | |
61 | 78 | Rectangle rect{width, height}; | |
62 | 78 | Rows<const T> src_a_rows{src_a, src_a_stride}; | |
63 | 78 | Rows<const T> src_b_rows{src_b, src_b_stride}; | |
64 | 78 | Rows<T> dst_rows{dst, dst_stride}; | |
65 | 78 | apply_operation_by_rows(operation, rect, src_a_rows, src_b_rows, dst_rows); | |
66 | 78 | return KLEIDICV_OK; | |
67 | 86 | } | |
68 | |||
69 | #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ | ||
70 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ | ||
71 | saturating_add_abs_with_threshold<type>( \ | ||
72 | const type *src_a, size_t src_a_stride, const type *src_b, \ | ||
73 | size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ | ||
74 | size_t height, type threshold) | ||
75 | |||
76 | KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); | ||
77 | |||
78 | } // namespace kleidicv::neon | ||
79 |