Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #include <limits> | ||
6 | #include <type_traits> | ||
7 | |||
8 | #include "kleidicv/kleidicv.h" | ||
9 | #include "kleidicv/neon.h" | ||
10 | |||
11 | namespace kleidicv::neon { | ||
12 | |||
13 | template <typename ScalarType> | ||
14 | class CountNonZeros final : public UnrollTwice { | ||
15 | public: | ||
16 | using VecTraits = neon::VecTraits<ScalarType>; | ||
17 | using VectorType = typename VecTraits::VectorType; | ||
18 | |||
19 | 24 | CountNonZeros() : accumulator_(0), v_accumulator_(vdupq_n_u8(0)) {} | |
20 | |||
21 | 444000 | void vector_path(VectorType src) { | |
22 | 444000 | v_accumulator_ = vaddq_u8(v_accumulator_, vtstq_u8(src, src)); | |
23 | 444000 | } | |
24 | |||
25 | 1536 | void scalar_path(ScalarType src) { accumulator_ += !!src; } | |
26 | |||
27 | 2034 | void on_block_finished(size_t) { | |
28 | 2034 | accumulator_ += vaddlvq_u8(vnegq_s8(v_accumulator_)); | |
29 | 2034 | v_accumulator_ = vdupq_n_u8(0); | |
30 | 2034 | } | |
31 | |||
32 | 303 | size_t max_vectors_per_block() const { | |
33 | 303 | return std::numeric_limits<std::make_unsigned_t<ScalarType>>::max(); | |
34 | } | ||
35 | |||
36 | 24 | size_t result() { return accumulator_; } | |
37 | |||
38 | private: | ||
39 | size_t accumulator_; | ||
40 | VectorType v_accumulator_; | ||
41 | }; // end of class CountNonZeros<ScalarType> | ||
42 | |||
43 | template <typename T> | ||
44 | 36 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t count_nonzeros( | |
45 | const T *src, size_t src_stride, size_t width, size_t height, | ||
46 | size_t *count) { | ||
47 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 33 times.
|
36 | CHECK_POINTERS(count); |
48 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 30 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 30 times.
|
33 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
49 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 27 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 24 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 24 times.
|
30 | CHECK_IMAGE_SIZE(width, height); |
50 | |||
51 | 24 | Rectangle rect{width, height}; | |
52 | 24 | Rows<const T> src_rows{src, src_stride}; | |
53 | |||
54 | 24 | CountNonZeros<T> operation; | |
55 | 24 | apply_block_operation_by_rows(operation, rect, src_rows); | |
56 | 24 | *count = operation.result(); | |
57 | |||
58 | 24 | return KLEIDICV_OK; | |
59 | 36 | } | |
60 | |||
61 | } // namespace kleidicv::neon | ||
62 | |||
63 | extern "C" { | ||
64 | |||
65 | decltype(kleidicv::neon::count_nonzeros<uint8_t>) *kleidicv_count_nonzeros_u8 = | ||
66 | kleidicv::neon::count_nonzeros<uint8_t>; | ||
67 | |||
68 | } // extern "C" | ||
69 |