Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_SUM_SC_H | ||
6 | #define KLEIDICV_SUM_SC_H | ||
7 | |||
8 | #include "kleidicv/kleidicv.h" | ||
9 | #include "kleidicv/sve2.h" | ||
10 | #include "kleidicv/utils.h" | ||
11 | |||
12 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
13 | |||
14 | template <typename ScalarType, typename ScalarTypeInternal> | ||
15 | class Sum; | ||
16 | |||
17 | template <> | ||
18 | class Sum<float, double> final : public UnrollTwice { | ||
19 | public: | ||
20 | using ScalarType = float; | ||
21 | using ScalarTypeInternal = double; | ||
22 | using ContextType = Context; | ||
23 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
24 | using VectorType = typename VecTraits::VectorType; | ||
25 | using VecTraitsInternal = | ||
26 | KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarTypeInternal>; | ||
27 | using VectorTypeInternal = typename VecTraitsInternal::VectorType; | ||
28 | |||
29 | 28 | explicit Sum(VectorTypeInternal &accumulator) KLEIDICV_STREAMING | |
30 | 28 | : accumulator_{accumulator} { | |
31 | 28 | accumulator_ = VecTraitsInternal::svdup(0); | |
32 | 28 | } | |
33 | |||
34 | 384 | void vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING { | |
35 | 768 | VectorTypeInternal src_widened_evens = | |
36 | 384 | svcvt_f64_f32_x(VecTraits::svptrue(), src); | |
37 | 768 | VectorTypeInternal src_widened_odds = | |
38 | 384 | svcvtlt_f64_f32_x(VecTraits::svptrue(), src); | |
39 | 384 | accumulator_ = | |
40 | 768 | svadd_m(ctx.predicate(), accumulator_, | |
41 | 384 | svadd_m(ctx.predicate(), src_widened_evens, src_widened_odds)); | |
42 | 384 | } | |
43 | |||
44 | 28 | ScalarType get_sum() const KLEIDICV_STREAMING { | |
45 | 28 | ScalarTypeInternal accumulator_final[VecTraitsInternal::max_num_lanes()] = { | |
46 | 0}; | ||
47 | 28 | svst1(VecTraitsInternal::svptrue(), accumulator_final, accumulator_); | |
48 | |||
49 | 28 | ScalarTypeInternal sum = 0; | |
50 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 140 times.
|
168 | for (size_t i = 0; i != VecTraitsInternal::num_lanes(); ++i) { |
51 | 140 | sum += accumulator_final[i]; | |
52 | 140 | } | |
53 | 56 | return static_cast<ScalarType>(sum); | |
54 | 28 | } | |
55 | |||
56 | private: | ||
57 | VectorTypeInternal &accumulator_; | ||
58 | }; | ||
59 | |||
60 | template <typename T, typename TInternal> | ||
61 | 40 | kleidicv_error_t sum_sc(const T *src, size_t src_stride, size_t width, | |
62 | size_t height, T *sum) KLEIDICV_STREAMING { | ||
63 | using VecTraitsInternal = KLEIDICV_TARGET_NAMESPACE::VecTraits<TInternal>; | ||
64 | using VectorTypeInternal = typename VecTraitsInternal::VectorType; | ||
65 | |||
66 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 38 times.
|
40 | CHECK_POINTERS(sum); |
67 |
4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 34 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 34 times.
|
38 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
68 |
6/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 28 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 28 times.
|
34 | CHECK_IMAGE_SIZE(width, height); |
69 | |||
70 | 28 | Rectangle rect{width, height}; | |
71 | 28 | Rows<const T> src_rows{src, src_stride}; | |
72 | |||
73 | 28 | VectorTypeInternal accumulator; | |
74 | 28 | Sum<T, TInternal> operation{accumulator}; | |
75 | |||
76 | 28 | apply_operation_by_rows(operation, rect, src_rows); | |
77 | |||
78 | 28 | *sum = operation.get_sum(); | |
79 | |||
80 | 28 | return KLEIDICV_OK; | |
81 | 40 | } | |
82 | |||
83 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
84 | |||
85 | #endif // KLEIDICV_SUM_SC_H | ||
86 |