| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include <type_traits> | ||
| 6 | |||
| 7 | #include "kleidicv/kleidicv.h" | ||
| 8 | #include "kleidicv/neon.h" | ||
| 9 | |||
| 10 | namespace kleidicv::neon { | ||
| 11 | |||
| 12 | template <typename ScalarType> | ||
| 13 | class SaturatingAbsDiff final : public UnrollTwice { | ||
| 14 | public: | ||
| 15 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 16 | using VectorType = typename VecTraits::VectorType; | ||
| 17 | |||
| 18 | 4590 | VectorType vector_path(VectorType src_a, VectorType src_b) { | |
| 19 | if constexpr (std::numeric_limits<ScalarType>::is_signed) { | ||
| 20 | // Results of VABD may be outside the signed range so use two | ||
| 21 | // saturating instructions instead. | ||
| 22 | 3254 | return vqabsq(vqsubq(src_a, src_b)); | |
| 23 | } | ||
| 24 | 1336 | return vabdq(src_a, src_b); | |
| 25 | } | ||
| 26 | |||
| 27 | 2918 | ScalarType scalar_path(ScalarType src_a, ScalarType src_b) { | |
| 28 | using UnsignedScalarType = std::make_unsigned_t<ScalarType>; | ||
| 29 | // Calculate unsigned difference and then apply saturating cast. | ||
| 30 | 2918 | UnsignedScalarType u_src_a = static_cast<UnsignedScalarType>(src_a); | |
| 31 | 2918 | UnsignedScalarType u_src_b = static_cast<UnsignedScalarType>(src_b); | |
| 32 | 5836 | UnsignedScalarType difference = | |
| 33 |
10/10✓ Branch 0 taken 357 times.
✓ Branch 1 taken 593 times.
✓ Branch 2 taken 360 times.
✓ Branch 3 taken 702 times.
✓ Branch 4 taken 138 times.
✓ Branch 5 taken 204 times.
✓ Branch 6 taken 140 times.
✓ Branch 7 taken 250 times.
✓ Branch 8 taken 73 times.
✓ Branch 9 taken 101 times.
|
2918 | src_a > src_b ? u_src_a - u_src_b : u_src_b - u_src_a; |
| 34 | 5836 | return saturating_cast<UnsignedScalarType, ScalarType>(difference); | |
| 35 | 2918 | } | |
| 36 | }; // end of class SaturatingAbsDiff<ScalarType> | ||
| 37 | |||
| 38 | template <typename T> | ||
| 39 | 449 | kleidicv_error_t saturating_absdiff(const T *src_a, size_t src_a_stride, | |
| 40 | const T *src_b, size_t src_b_stride, T *dst, | ||
| 41 | size_t dst_stride, size_t width, | ||
| 42 | size_t height) { | ||
| 43 |
20/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 87 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 87 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 87 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 89 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 89 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 89 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 89 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 89 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 89 times.
|
449 | CHECK_POINTER_AND_STRIDE(src_a, src_a_stride, height); |
| 44 |
20/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 86 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 86 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 86 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 86 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 87 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 87 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 87 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 87 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 87 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 87 times.
|
441 | CHECK_POINTER_AND_STRIDE(src_b, src_b_stride, height); |
| 45 |
20/20✓ Branch 0 taken 1 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 85 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 85 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 85 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 85 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 85 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 85 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 85 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 85 times.
|
433 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 46 |
30/30✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 83 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 83 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 84 times.
✓ Branch 8 taken 1 times.
✓ Branch 9 taken 83 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 83 times.
✓ Branch 12 taken 1 times.
✓ Branch 13 taken 84 times.
✓ Branch 14 taken 1 times.
✓ Branch 15 taken 83 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 83 times.
✓ Branch 18 taken 1 times.
✓ Branch 19 taken 84 times.
✓ Branch 20 taken 1 times.
✓ Branch 21 taken 83 times.
✓ Branch 22 taken 2 times.
✓ Branch 23 taken 83 times.
✓ Branch 24 taken 1 times.
✓ Branch 25 taken 84 times.
✓ Branch 26 taken 1 times.
✓ Branch 27 taken 83 times.
✓ Branch 28 taken 2 times.
✓ Branch 29 taken 83 times.
|
425 | CHECK_IMAGE_SIZE(width, height); |
| 47 | |||
| 48 | 415 | SaturatingAbsDiff<T> operation; | |
| 49 | 415 | Rectangle rect{width, height}; | |
| 50 | 415 | Rows<const T> src_a_rows{src_a, src_a_stride}; | |
| 51 | 415 | Rows<const T> src_b_rows{src_b, src_b_stride}; | |
| 52 | 415 | Rows<T> dst_rows{dst, dst_stride}; | |
| 53 | 415 | neon::apply_operation_by_rows(operation, rect, src_a_rows, src_b_rows, | |
| 54 | dst_rows); | ||
| 55 | 415 | return KLEIDICV_OK; | |
| 56 | 449 | } | |
| 57 | |||
| 58 | #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ | ||
| 59 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t saturating_absdiff<type>( \ | ||
| 60 | const type *src_a, size_t src_a_stride, const type *src_b, \ | ||
| 61 | size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ | ||
| 62 | size_t height) | ||
| 63 | |||
| 64 | KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); | ||
| 65 | KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); | ||
| 66 | KLEIDICV_INSTANTIATE_TEMPLATE(uint16_t); | ||
| 67 | KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); | ||
| 68 | KLEIDICV_INSTANTIATE_TEMPLATE(int32_t); | ||
| 69 | |||
| 70 | } // namespace kleidicv::neon | ||
| 71 |