Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #include "kleidicv/kleidicv.h" | ||
6 | #include "kleidicv/neon.h" | ||
7 | #include "kleidicv/utils.h" | ||
8 | |||
9 | namespace kleidicv::neon { | ||
10 | |||
11 | template <typename ScalarType, typename ScalarTypeInternal> | ||
12 | class Sum; | ||
13 | |||
14 | template <> | ||
15 | class Sum<float, double> final : public UnrollTwice { | ||
16 | public: | ||
17 | using ScalarType = float; | ||
18 | using ScalarTypeInternal = double; | ||
19 | using VecTraits = neon::VecTraits<ScalarType>; | ||
20 | using VectorType = typename VecTraits::VectorType; | ||
21 | using VecTraitsInternal = | ||
22 | KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarTypeInternal>; | ||
23 | using VectorTypeInternal = typename VecTraitsInternal::VectorType; | ||
24 | |||
25 | VectorTypeInternal vector_sum; | ||
26 | ScalarTypeInternal scalar_sum; | ||
27 | |||
28 | 14 | Sum() : vector_sum(VectorTypeInternal{0}), scalar_sum(0) {} | |
29 | |||
30 | 276 | void vector_path(VectorType src) { | |
31 | 276 | VectorTypeInternal src_low = vcvt_f64(vget_low(src)); | |
32 | 276 | VectorTypeInternal src_high = vcvt_f64(vget_high(src)); | |
33 | 276 | vector_sum = vaddq(vector_sum, vaddq(src_low, src_high)); | |
34 | 276 | } | |
35 | |||
36 | 57 | void scalar_path(ScalarType src) { | |
37 | 57 | scalar_sum += static_cast<ScalarTypeInternal>(src); | |
38 | 57 | } | |
39 | |||
40 | 14 | ScalarType get_sum() const { | |
41 | 14 | ScalarTypeInternal sum = vaddvq(vector_sum) + scalar_sum; | |
42 | 28 | return static_cast<ScalarType>(sum); | |
43 | 14 | } | |
44 | }; | ||
45 | |||
46 | template <typename T, typename TInternal> | ||
47 | 20 | kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, | |
48 | size_t height, T *sum) { | ||
49 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 19 times.
|
20 | CHECK_POINTERS(sum); |
50 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 17 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 17 times.
|
19 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
51 |
6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 14 times.
|
17 | CHECK_IMAGE_SIZE(width, height); |
52 | |||
53 | 14 | Rectangle rect{width, height}; | |
54 | 14 | Rows<const T> src_rows{src, src_stride}; | |
55 | 14 | Sum<T, TInternal> operation; | |
56 | 14 | apply_operation_by_rows(operation, rect, src_rows); | |
57 | |||
58 | 14 | *sum = operation.get_sum(); | |
59 | |||
60 | 14 | return KLEIDICV_OK; | |
61 | 20 | } | |
62 | |||
63 | #define KLEIDICV_INSTANTIATE_TEMPLATE(type, type_internal) \ | ||
64 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum<type, type_internal>( \ | ||
65 | const type *src, size_t src_stride, size_t width, size_t height, \ | ||
66 | type *sum) | ||
67 | |||
68 | KLEIDICV_INSTANTIATE_TEMPLATE(float, double); | ||
69 | |||
70 | } // namespace kleidicv::neon | ||
71 |