KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/arithmetics/in_range_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 64 64 100.0%
Functions: 10 10 100.0%
Branches: 34 34 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/kleidicv.h"
6 #include "kleidicv/neon.h"
7
8 namespace kleidicv::neon {
9
10 template <typename ScalarType>
11 class InRange;
12
13 template <>
14 class InRange<uint8_t> : public UnrollTwice {
15 public:
16 using VecTraits = neon::VecTraits<uint8_t>;
17 using VectorType = typename VecTraits::VectorType;
18
19 75 InRange(uint8_t lower_bound, uint8_t upper_bound)
20 75 : lower_bound_vect_{vdupq_n(lower_bound)},
21 75 upper_bound_vect_{vdupq_n(upper_bound)},
22 75 lower_bound_{lower_bound},
23 75 upper_bound_{upper_bound} {}
24
25 454 VectorType vector_path(VectorType src) {
26 454 return vandq(vcgeq(src, lower_bound_vect_), vcleq(src, upper_bound_vect_));
27 }
28
29 // NOLINTBEGIN(readability-make-member-function-const)
30 722 uint8_t scalar_path(uint8_t src) {
31
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 650 times.
722 return (src >= lower_bound_ && src <= upper_bound_) ? 0xFF : 0;
32 }
33 // NOLINTEND(readability-make-member-function-const)
34
35 private:
36 VectorType lower_bound_vect_;
37 VectorType upper_bound_vect_;
38 uint8_t lower_bound_;
39 uint8_t upper_bound_;
40 }; // end of class InRange<uint8_t>
41
42 template <>
43 class InRange<float> {
44 public:
45 using SrcVecTraits = neon::VecTraits<float>;
46 using SrcVectorType = typename SrcVecTraits::VectorType;
47 using SrcVector4Type = typename SrcVecTraits::Vector4Type;
48 using DstVecTraits = neon::VecTraits<uint8_t>;
49 using DstVectorType = typename DstVecTraits::VectorType;
50
51 75 InRange(float lower_bound, float upper_bound)
52 75 : lower_bound_vect_{vdupq_n(lower_bound)},
53 75 upper_bound_vect_{vdupq_n(upper_bound)},
54 75 lower_bound_{lower_bound},
55 75 upper_bound_{upper_bound} {}
56
57 99 void process_row(size_t width, Columns<const float> src,
58 Columns<uint8_t> dst) {
59 198 LoopUnroll{width, SrcVecTraits::num_lanes()}
60 556 .unroll_n_times<4>([&](size_t step) {
61 457 SrcVector4Type src_vector;
62 457 SrcVecTraits::load(&src[0], src_vector);
63
64 457 DstVectorType result_vector = vector_path(src_vector);
65 457 vst1q(&dst[0], result_vector);
66 457 src += ptrdiff_t(step);
67 457 dst += ptrdiff_t(step);
68 457 })
69 153 .remaining([&](size_t length, size_t) {
70
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 442 times.
496 for (size_t index = 0; index < length; ++index) {
71 442 disable_loop_vectorization();
72 442 float f = src[ptrdiff_t(index)];
73 442 dst[ptrdiff_t(index)] =
74
2/2
✓ Branch 0 taken 152 times.
✓ Branch 1 taken 290 times.
442 (f >= lower_bound_ && f <= upper_bound_) ? 0xFF : 0;
75 442 }
76 54 });
77 99 }
78
79 private:
80 457 DstVectorType vector_path(SrcVector4Type src) {
81 457 SrcVectorType src0 = src.val[0];
82 457 SrcVectorType src1 = src.val[1];
83 457 SrcVectorType src2 = src.val[2];
84 457 SrcVectorType src3 = src.val[3];
85 914 uint32x4_t res00 =
86 457 vandq(vcgeq(src0, lower_bound_vect_), vcleq(src0, upper_bound_vect_));
87 914 uint32x4_t res01 =
88 457 vandq(vcgeq(src1, lower_bound_vect_), vcleq(src1, upper_bound_vect_));
89 914 uint32x4_t res11 =
90 457 vandq(vcgeq(src2, lower_bound_vect_), vcleq(src2, upper_bound_vect_));
91 914 uint32x4_t res12 =
92 457 vandq(vcgeq(src3, lower_bound_vect_), vcleq(src3, upper_bound_vect_));
93 // AND-ing the results of the compare ops sets all 32 bits to all 0's or all
94 // 1's. Unzipping them twice chooses 8 bits from those 32.
95 914 uint16x8_t res0 =
96 457 vuzp1q_u16(vreinterpretq_u16_u32(res00), vreinterpretq_u16_u32(res01));
97 914 uint16x8_t res1 =
98 457 vuzp1q_u16(vreinterpretq_u16_u32(res11), vreinterpretq_u16_u32(res12));
99 914 return vuzp1q_u8(vreinterpretq_u8_u16(res0), vreinterpretq_u8_u16(res1));
100 457 }
101
102 SrcVectorType lower_bound_vect_;
103 SrcVectorType upper_bound_vect_;
104 float lower_bound_;
105 float upper_bound_;
106 }; // end of class InRange<float>
107
108 template <typename T>
109 159 kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst,
110 size_t dst_stride, size_t width, size_t height,
111 T lower_bound, T upper_bound) {
112
8/8
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 78 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 78 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 78 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 78 times.
159 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
113
8/8
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 77 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 77 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 77 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 77 times.
156 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
114
12/12
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 75 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 75 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 76 times.
✓ Branch 8 taken 1 times.
✓ Branch 9 taken 75 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 75 times.
154 CHECK_IMAGE_SIZE(width, height);
115
116 150 InRange<T> operation{lower_bound, upper_bound};
117 150 Rectangle rect{width, height};
118 150 Rows<const T> src_rows{src, src_stride};
119 150 Rows<uint8_t> dst_rows{dst, dst_stride};
120
121 if constexpr (std::is_same_v<T, uint8_t>) {
122 75 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
123 } else {
124 75 zip_rows(operation, rect, src_rows, dst_rows);
125 }
126
127 150 return KLEIDICV_OK;
128 159 }
129
130 #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \
131 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range<type>( \
132 const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \
133 size_t width, size_t height, type lower_bound, type upper_bound)
134
135 KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t);
136 KLEIDICV_INSTANTIATE_TEMPLATE(float);
137
138 } // namespace kleidicv::neon
139