| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include "kleidicv/kleidicv.h" | ||
| 6 | #include "kleidicv/neon.h" | ||
| 7 | #include "kleidicv/utils.h" | ||
| 8 | |||
| 9 | namespace kleidicv::neon { | ||
| 10 | |||
| 11 | template <typename ScalarType, typename ScalarTypeInternal> | ||
| 12 | class Sum; | ||
| 13 | |||
| 14 | template <> | ||
| 15 | class Sum<float, double> final : public UnrollTwice { | ||
| 16 | public: | ||
| 17 | using ScalarType = float; | ||
| 18 | using ScalarTypeInternal = double; | ||
| 19 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 20 | using VectorType = typename VecTraits::VectorType; | ||
| 21 | using VecTraitsInternal = | ||
| 22 | KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarTypeInternal>; | ||
| 23 | using VectorTypeInternal = typename VecTraitsInternal::VectorType; | ||
| 24 | |||
| 25 | VectorTypeInternal vector_sum; | ||
| 26 | ScalarTypeInternal scalar_sum; | ||
| 27 | |||
| 28 | 14 | Sum() : vector_sum(VectorTypeInternal{0}), scalar_sum(0) {} | |
| 29 | |||
| 30 | 276 | void vector_path(VectorType src) { | |
| 31 | 276 | VectorTypeInternal src_low = vcvt_f64(vget_low(src)); | |
| 32 | 276 | VectorTypeInternal src_high = vcvt_f64(vget_high(src)); | |
| 33 | 276 | vector_sum = vaddq(vector_sum, vaddq(src_low, src_high)); | |
| 34 | 276 | } | |
| 35 | |||
| 36 | 57 | void scalar_path(ScalarType src) { | |
| 37 | 57 | scalar_sum += static_cast<ScalarTypeInternal>(src); | |
| 38 | 57 | } | |
| 39 | |||
| 40 | 14 | ScalarType get_sum() const { | |
| 41 | 14 | ScalarTypeInternal sum = vaddvq(vector_sum) + scalar_sum; | |
| 42 | 28 | return static_cast<ScalarType>(sum); | |
| 43 | 14 | } | |
| 44 | }; | ||
| 45 | |||
| 46 | template <typename T, typename TInternal> | ||
| 47 | 20 | kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, | |
| 48 | size_t height, T *sum) { | ||
| 49 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 19 times.
|
20 | CHECK_POINTERS(sum); |
| 50 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 17 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 17 times.
|
19 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 51 |
6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 14 times.
|
17 | CHECK_IMAGE_SIZE(width, height); |
| 52 | |||
| 53 | 14 | Rectangle rect{width, height}; | |
| 54 | 14 | Rows<const T> src_rows{src, src_stride}; | |
| 55 | 14 | Sum<T, TInternal> operation; | |
| 56 | 14 | apply_operation_by_rows(operation, rect, src_rows); | |
| 57 | |||
| 58 | 14 | *sum = operation.get_sum(); | |
| 59 | |||
| 60 | 14 | return KLEIDICV_OK; | |
| 61 | 20 | } | |
| 62 | |||
| 63 | #define KLEIDICV_INSTANTIATE_TEMPLATE(type, type_internal) \ | ||
| 64 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum<type, type_internal>( \ | ||
| 65 | const type *src, size_t src_stride, size_t width, size_t height, \ | ||
| 66 | type *sum) | ||
| 67 | |||
| 68 | KLEIDICV_INSTANTIATE_TEMPLATE(float, double); | ||
| 69 | |||
| 70 | } // namespace kleidicv::neon | ||
| 71 |