| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "kleidicv/kleidicv.h" | ||
| 8 | #include "kleidicv/neon.h" | ||
| 9 | |||
| 10 | namespace kleidicv::neon { | ||
| 11 | |||
| 12 | template <typename ScalarType> | ||
| 13 | class SaturatingAddAbsWithThreshold final : public UnrollOnce, | ||
| 14 | public UnrollTwice, | ||
| 15 | public TryToAvoidTailLoop { | ||
| 16 | public: | ||
| 17 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 18 | using VectorType = typename VecTraits::VectorType; | ||
| 19 | |||
| 20 | 78 | explicit SaturatingAddAbsWithThreshold(ScalarType threshold) | |
| 21 | 78 | : threshold_{threshold}, threshold_vec_{vdupq_n_s16(threshold)} {} | |
| 22 | |||
| 23 | 988 | VectorType vector_path(VectorType src_a, VectorType src_b) { | |
| 24 | 988 | VectorType add_abs = vqaddq_s16(vqabsq_s16(src_a), vqabsq_s16(src_b)); | |
| 25 | 1976 | return vandq_s16(add_abs, vcgtq_s16(add_abs, threshold_vec_)); | |
| 26 | 988 | } | |
| 27 | |||
| 28 | 166 | ScalarType scalar_path(ScalarType src_a, ScalarType src_b) { | |
| 29 | 166 | ScalarType add_abs = 0; | |
| 30 | |||
| 31 |
2/2✓ Branch 0 taken 137 times.
✓ Branch 1 taken 29 times.
|
166 | if (__builtin_add_overflow(saturate_abs(src_a), saturate_abs(src_b), |
| 32 | &add_abs)) { | ||
| 33 | 29 | add_abs = std::numeric_limits<ScalarType>::max(); | |
| 34 | 29 | } | |
| 35 |
2/2✓ Branch 0 taken 86 times.
✓ Branch 1 taken 80 times.
|
166 | return add_abs > threshold_ ? add_abs : 0; |
| 36 | 166 | } | |
| 37 | |||
| 38 | private: | ||
| 39 | 332 | ScalarType saturate_abs(ScalarType input) { | |
| 40 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 327 times.
|
332 | if (std::numeric_limits<ScalarType>::is_signed && |
| 41 | 332 | input == std::numeric_limits<ScalarType>::lowest()) { | |
| 42 | 5 | return std::numeric_limits<ScalarType>::max(); | |
| 43 | } | ||
| 44 | 327 | return std::abs(input); | |
| 45 | 332 | } | |
| 46 | |||
| 47 | ScalarType threshold_; | ||
| 48 | VectorType threshold_vec_; | ||
| 49 | }; // end of class SaturatingAddAbsWithThreshold<ScalarType> | ||
| 50 | |||
| 51 | template <typename T> | ||
| 52 | 86 | kleidicv_error_t saturating_add_abs_with_threshold( | |
| 53 | const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, | ||
| 54 | T *dst, size_t dst_stride, size_t width, size_t height, T threshold) { | ||
| 55 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 84 times.
|
86 | CHECK_POINTER_AND_STRIDE(src_a, src_a_stride, height); |
| 56 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 82 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 82 times.
|
84 | CHECK_POINTER_AND_STRIDE(src_b, src_b_stride, height); |
| 57 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 80 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 80 times.
|
82 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 58 |
6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 79 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 78 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 78 times.
|
80 | CHECK_IMAGE_SIZE(width, height); |
| 59 | |||
| 60 | 78 | SaturatingAddAbsWithThreshold<T> operation{threshold}; | |
| 61 | 78 | Rectangle rect{width, height}; | |
| 62 | 78 | Rows<const T> src_a_rows{src_a, src_a_stride}; | |
| 63 | 78 | Rows<const T> src_b_rows{src_b, src_b_stride}; | |
| 64 | 78 | Rows<T> dst_rows{dst, dst_stride}; | |
| 65 | 78 | apply_operation_by_rows(operation, rect, src_a_rows, src_b_rows, dst_rows); | |
| 66 | 78 | return KLEIDICV_OK; | |
| 67 | 86 | } | |
| 68 | |||
| 69 | #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ | ||
| 70 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ | ||
| 71 | saturating_add_abs_with_threshold<type>( \ | ||
| 72 | const type *src_a, size_t src_a_stride, const type *src_b, \ | ||
| 73 | size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ | ||
| 74 | size_t height, type threshold) | ||
| 75 | |||
| 76 | KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); | ||
| 77 | |||
| 78 | } // namespace kleidicv::neon | ||
| 79 |