| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | #include <type_traits> | ||
| 7 | |||
| 8 | #include "kleidicv/kleidicv.h" | ||
| 9 | #include "kleidicv/neon.h" | ||
| 10 | |||
| 11 | namespace kleidicv::neon { | ||
| 12 | |||
| 13 | template <typename ScalarType> | ||
| 14 | class CountNonZeros final : public UnrollTwice { | ||
| 15 | public: | ||
| 16 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 17 | using VectorType = typename VecTraits::VectorType; | ||
| 18 | |||
| 19 | 32 | CountNonZeros() : accumulator_(0), v_accumulator_(vdupq_n_u8(0)) {} | |
| 20 | |||
| 21 | 838752 | void vector_path(VectorType src) { | |
| 22 | 838752 | v_accumulator_ = vaddq_u8(v_accumulator_, vtstq_u8(src, src)); | |
| 23 | 838752 | } | |
| 24 | |||
| 25 | 1536 | void scalar_path(ScalarType src) { accumulator_ += !!src; } | |
| 26 | |||
| 27 | 3774 | void on_block_finished(size_t) { | |
| 28 | 3774 | accumulator_ += vaddlvq_u8(vnegq_s8(v_accumulator_)); | |
| 29 | 3774 | v_accumulator_ = vdupq_n_u8(0); | |
| 30 | 3774 | } | |
| 31 | |||
| 32 | 500 | size_t max_vectors_per_block() const { | |
| 33 | 500 | return std::numeric_limits<std::make_unsigned_t<ScalarType>>::max(); | |
| 34 | } | ||
| 35 | |||
| 36 | 32 | size_t result() { return accumulator_; } | |
| 37 | |||
| 38 | private: | ||
| 39 | size_t accumulator_; | ||
| 40 | VectorType v_accumulator_; | ||
| 41 | }; // end of class CountNonZeros<ScalarType> | ||
| 42 | |||
| 43 | template <typename T> | ||
| 44 | 48 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t count_nonzeros( | |
| 45 | const T *src, size_t src_stride, size_t width, size_t height, | ||
| 46 | size_t *count) { | ||
| 47 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | CHECK_POINTERS(count); |
| 48 |
4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 40 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 40 times.
|
44 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 49 |
6/6✓ Branch 0 taken 4 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 32 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 32 times.
|
40 | CHECK_IMAGE_SIZE(width, height); |
| 50 | |||
| 51 | 32 | Rectangle rect{width, height}; | |
| 52 | 32 | Rows<const T> src_rows{src, src_stride}; | |
| 53 | |||
| 54 | 32 | CountNonZeros<T> operation; | |
| 55 | 32 | apply_block_operation_by_rows(operation, rect, src_rows); | |
| 56 | 32 | *count = operation.result(); | |
| 57 | |||
| 58 | 32 | return KLEIDICV_OK; | |
| 59 | 48 | } | |
| 60 | |||
| 61 | } // namespace kleidicv::neon | ||
| 62 | |||
| 63 | extern "C" { | ||
| 64 | |||
| 65 | decltype(kleidicv::neon::count_nonzeros<uint8_t>) *kleidicv_count_nonzeros_u8 = | ||
| 66 | kleidicv::neon::count_nonzeros<uint8_t>; | ||
| 67 | |||
| 68 | } // extern "C" | ||
| 69 |