| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #ifndef KLEIDICV_SUM_SC_H | ||
| 6 | #define KLEIDICV_SUM_SC_H | ||
| 7 | |||
| 8 | #include "kleidicv/kleidicv.h" | ||
| 9 | #include "kleidicv/sve2.h" | ||
| 10 | #include "kleidicv/utils.h" | ||
| 11 | |||
| 12 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
| 13 | |||
| 14 | template <typename ScalarType, typename ScalarTypeInternal> | ||
| 15 | class Sum; | ||
| 16 | |||
| 17 | template <> | ||
| 18 | class Sum<float, double> final : public UnrollTwice { | ||
| 19 | public: | ||
| 20 | using ScalarType = float; | ||
| 21 | using ScalarTypeInternal = double; | ||
| 22 | using ContextType = Context; | ||
| 23 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
| 24 | using VectorType = typename VecTraits::VectorType; | ||
| 25 | using VecTraitsInternal = | ||
| 26 | KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarTypeInternal>; | ||
| 27 | using VectorTypeInternal = typename VecTraitsInternal::VectorType; | ||
| 28 | |||
| 29 | 42 | explicit Sum(VectorTypeInternal &accumulator) KLEIDICV_STREAMING | |
| 30 | 42 | : accumulator_{accumulator} { | |
| 31 | 42 | accumulator_ = VecTraitsInternal::svdup(0); | |
| 32 | 42 | } | |
| 33 | |||
| 34 | 469 | void vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING { | |
| 35 | 938 | VectorTypeInternal src_widened_evens = | |
| 36 | 469 | svcvt_f64_f32_x(VecTraits::svptrue(), src); | |
| 37 | 938 | VectorTypeInternal src_widened_odds = | |
| 38 | 469 | svcvtlt_f64_f32_x(VecTraits::svptrue(), src); | |
| 39 | 469 | accumulator_ = | |
| 40 | 938 | svadd_m(ctx.predicate(), accumulator_, | |
| 41 | 469 | svadd_m(ctx.predicate(), src_widened_evens, src_widened_odds)); | |
| 42 | 469 | } | |
| 43 | |||
| 44 | 42 | ScalarType get_sum() const KLEIDICV_STREAMING { | |
| 45 | 42 | ScalarTypeInternal accumulator_final[VecTraitsInternal::max_num_lanes()] = { | |
| 46 | 0}; | ||
| 47 | 42 | svst1(VecTraitsInternal::svptrue(), accumulator_final, accumulator_); | |
| 48 | |||
| 49 | 42 | ScalarTypeInternal sum = 0; | |
| 50 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 252 times.
|
294 | for (size_t i = 0; i != VecTraitsInternal::num_lanes(); ++i) { |
| 51 | 252 | sum += accumulator_final[i]; | |
| 52 | 252 | } | |
| 53 | 84 | return static_cast<ScalarType>(sum); | |
| 54 | 42 | } | |
| 55 | |||
| 56 | private: | ||
| 57 | VectorTypeInternal &accumulator_; | ||
| 58 | }; | ||
| 59 | |||
| 60 | template <typename T, typename TInternal> | ||
| 61 | 60 | kleidicv_error_t sum_sc(const T *src, size_t src_stride, size_t width, | |
| 62 | size_t height, T *sum) KLEIDICV_STREAMING { | ||
| 63 | using VecTraitsInternal = KLEIDICV_TARGET_NAMESPACE::VecTraits<TInternal>; | ||
| 64 | using VectorTypeInternal = typename VecTraitsInternal::VectorType; | ||
| 65 | |||
| 66 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 57 times.
|
60 | CHECK_POINTERS(sum); |
| 67 |
4/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 51 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 51 times.
|
57 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 68 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 42 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 42 times.
|
51 | CHECK_IMAGE_SIZE(width, height); |
| 69 | |||
| 70 | 42 | Rectangle rect{width, height}; | |
| 71 | 42 | Rows<const T> src_rows{src, src_stride}; | |
| 72 | |||
| 73 | 42 | VectorTypeInternal accumulator; | |
| 74 | 42 | Sum<T, TInternal> operation{accumulator}; | |
| 75 | |||
| 76 | 42 | apply_operation_by_rows(operation, rect, src_rows); | |
| 77 | |||
| 78 | 42 | *sum = operation.get_sum(); | |
| 79 | |||
| 80 | 42 | return KLEIDICV_OK; | |
| 81 | 60 | } | |
| 82 | |||
| 83 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
| 84 | |||
| 85 | #endif // KLEIDICV_SUM_SC_H | ||
| 86 |