Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_IN_RANGE_SC_H | ||
6 | #define KLEIDICV_IN_RANGE_SC_H | ||
7 | |||
8 | #include "kleidicv/kleidicv.h" | ||
9 | #include "kleidicv/sve2.h" | ||
10 | |||
11 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
12 | |||
13 | template <typename ScalarType> | ||
14 | class InRange; | ||
15 | |||
16 | template <> | ||
17 | class InRange<uint8_t> : public UnrollTwice { | ||
18 | public: | ||
19 | using ContextType = Context; | ||
20 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<uint8_t>; | ||
21 | using VectorType = typename VecTraits::VectorType; | ||
22 | using SignedScalarType = typename std::make_signed<uint8_t>::type; | ||
23 | using SignedVecTraits = | ||
24 | KLEIDICV_TARGET_NAMESPACE::VecTraits<SignedScalarType>; | ||
25 | using SignedVectorType = typename SignedVecTraits::VectorType; | ||
26 | |||
27 | 150 | InRange(VectorType &vec_lower_bound, | |
28 | VectorType &vec_upper_bound) KLEIDICV_STREAMING | ||
29 | 150 | : vec_lower_bound_(vec_lower_bound), | |
30 | 150 | vec_upper_bound_(vec_upper_bound) {} | |
31 | |||
32 | // NOLINTBEGIN(readability-make-member-function-const) | ||
33 | 693 | VectorType vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING { | |
34 | 693 | svbool_t pg = ctx.predicate(); | |
35 | |||
36 | 693 | VectorType diff_low = svsub_x(pg, src, vec_lower_bound_); | |
37 | // Shift subtraction result 7 bits to the right, i.e. divide by 2^7 to keep | ||
38 | // sign bit only. | ||
39 | 1386 | VectorType result_within_low = | |
40 | 693 | VecTraits::svreinterpret(SignedVecTraits::svasr_n( | |
41 | 693 | pg, SignedVecTraits::svreinterpret(diff_low), 7)); | |
42 | |||
43 | 693 | VectorType diff_up = svsub_x(pg, vec_upper_bound_, src); | |
44 | 1386 | VectorType result_within_up = | |
45 | 693 | VecTraits::svreinterpret(SignedVecTraits::svasr_n( | |
46 | 693 | pg, SignedVecTraits::svreinterpret(diff_up), 7)); | |
47 | |||
48 | // src[i] < lower_bound OR src[i] > upper_bound | ||
49 | 693 | VectorType out_of_range = svorr_x(pg, result_within_low, result_within_up); | |
50 | // NOT(out_of_range) to set within elements to 1 and the rest to 0. | ||
51 | 693 | VectorType within_range = svcnot_x(pg, out_of_range); | |
52 | // Negate to set elements within to 0xFF (all 1s). | ||
53 | 1386 | return VecTraits::svreinterpret( | |
54 | 693 | svqneg_x(pg, SignedVecTraits::svreinterpret(within_range))); | |
55 | 693 | } | |
56 | // NOLINTEND(readability-make-member-function-const) | ||
57 | |||
58 | private: | ||
59 | VectorType &vec_lower_bound_; | ||
60 | VectorType &vec_upper_bound_; | ||
61 | }; // end of class InRange<uint8_t> | ||
62 | |||
63 | template <> | ||
64 | class InRange<float> { | ||
65 | public: | ||
66 | using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>; | ||
67 | using SrcVectorType = typename SrcVecTraits::VectorType; | ||
68 | using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<uint8_t>; | ||
69 | using DstVectorType = typename DstVecTraits::VectorType; | ||
70 | |||
71 | 150 | InRange(float lower_bound, float upper_bound) KLEIDICV_STREAMING | |
72 | 150 | : lower_bound_(lower_bound), | |
73 | 150 | upper_bound_(upper_bound) {} | |
74 | |||
75 | 179 | void process_row(size_t width, Columns<const float> src, | |
76 | Columns<uint8_t> dst) KLEIDICV_STREAMING { | ||
77 | 358 | LoopUnroll{width, SrcVecTraits::num_lanes()} | |
78 | 739 | .unroll_n_times<4>([&](size_t step) KLEIDICV_STREAMING { | |
79 | 560 | svbool_t pg_src = SrcVecTraits::svptrue(); | |
80 | 560 | SrcVectorType src_v0 = svld1(pg_src, &src[0]); | |
81 | 560 | SrcVectorType src_v1 = svld1_vnum(pg_src, &src[0], 1); | |
82 | 560 | SrcVectorType src_v2 = svld1_vnum(pg_src, &src[0], 2); | |
83 | 560 | SrcVectorType src_v3 = svld1_vnum(pg_src, &src[0], 3); | |
84 | 1120 | DstVectorType res0 = | |
85 | 560 | vector_path(pg_src, src_v0, src_v1, src_v2, src_v3); | |
86 | 560 | svbool_t pg_dst = DstVecTraits::svptrue(); | |
87 | 560 | svst1(pg_dst, &dst[0], res0); | |
88 | 560 | src += ptrdiff_t(step); | |
89 | 560 | dst += ptrdiff_t(step); | |
90 | 560 | }) | |
91 | 295 | .remaining([&](size_t length, size_t) KLEIDICV_STREAMING { | |
92 | 116 | size_t index = 0; | |
93 | 116 | svbool_t pg = SrcVecTraits::svwhilelt(index, length); | |
94 |
2/2✓ Branch 0 taken 225 times.
✓ Branch 1 taken 116 times.
|
341 | while (svptest_first(SrcVecTraits::svptrue(), pg)) { |
95 | 225 | SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); | |
96 | 225 | DstVectorType result_vector = remaining_path(pg, src_vector); | |
97 | 450 | svst1b(pg, &dst[ptrdiff_t(index)], | |
98 | 225 | svreinterpret_u32(result_vector)); | |
99 | // Update loop counter and calculate the next governing predicate. | ||
100 | 225 | index += SrcVecTraits::num_lanes(); | |
101 | 225 | pg = SrcVecTraits::svwhilelt(index, length); | |
102 | 225 | } | |
103 | 116 | }); | |
104 | 179 | } | |
105 | |||
106 | private: | ||
107 | // NOLINTBEGIN(readability-make-member-function-const) | ||
108 | 560 | DstVectorType vector_path(svbool_t full_pg, SrcVectorType fsrc0, | |
109 | SrcVectorType fsrc1, SrcVectorType fsrc2, | ||
110 | SrcVectorType fsrc3) KLEIDICV_STREAMING { | ||
111 | 1120 | svbool_t pred0 = svand_z(full_pg, svcmpge(full_pg, fsrc0, lower_bound_), | |
112 | 560 | svcmple(full_pg, fsrc0, upper_bound_)); | |
113 | 560 | auto res00 = svsel(pred0, svdup_u32(0xFF), svdup_u32(0)); | |
114 | |||
115 | 1120 | svbool_t pred1 = svand_z(full_pg, svcmpge(full_pg, fsrc1, lower_bound_), | |
116 | 560 | svcmple(full_pg, fsrc1, upper_bound_)); | |
117 | 560 | auto res01 = svsel(pred1, svdup_u32(0xFF), svdup_u32(0)); | |
118 | |||
119 | 1120 | svbool_t pred2 = svand_z(full_pg, svcmpge(full_pg, fsrc2, lower_bound_), | |
120 | 560 | svcmple(full_pg, fsrc2, upper_bound_)); | |
121 | 560 | auto res10 = svsel(pred2, svdup_u32(0xFF), svdup_u32(0)); | |
122 | |||
123 | 1120 | svbool_t pred3 = svand_z(full_pg, svcmpge(full_pg, fsrc3, lower_bound_), | |
124 | 560 | svcmple(full_pg, fsrc3, upper_bound_)); | |
125 | 560 | auto res11 = svsel(pred3, svdup_u32(0xFF), svdup_u32(0)); | |
126 | |||
127 | 1120 | auto res0 = | |
128 | 560 | svuzp1(svreinterpret_u16_u32(res00), svreinterpret_u16_u32(res01)); | |
129 | 1120 | auto res1 = | |
130 | 560 | svuzp1(svreinterpret_u16_u32(res10), svreinterpret_u16_u32(res11)); | |
131 | 1120 | return svuzp1(svreinterpret_u8_u16(res0), svreinterpret_u8_u16(res1)); | |
132 | 560 | } | |
133 | // NOLINTEND(readability-make-member-function-const) | ||
134 | |||
135 | // NOLINTBEGIN(readability-make-member-function-const) | ||
136 | 225 | DstVectorType remaining_path(svbool_t &pg, | |
137 | SrcVectorType src) KLEIDICV_STREAMING { | ||
138 | 450 | svbool_t predicate = svand_z(pg, svcmpge(pg, src, lower_bound_), | |
139 | 225 | svcmple(pg, src, upper_bound_)); | |
140 | 450 | return svsel(predicate, DstVecTraits::svdup(0xFF), DstVecTraits::svdup(0)); | |
141 | 225 | } | |
142 | // NOLINTEND(readability-make-member-function-const) | ||
143 | |||
144 | float lower_bound_; | ||
145 | float upper_bound_; | ||
146 | }; // end of class InRange<float> | ||
147 | |||
148 | template <typename T> | ||
149 | 318 | kleidicv_error_t in_range_sc(const T *src, size_t src_stride, uint8_t *dst, | |
150 | size_t dst_stride, size_t width, size_t height, | ||
151 | T lower_bound, T upper_bound) KLEIDICV_STREAMING { | ||
152 |
8/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 156 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 156 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 156 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 156 times.
|
318 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
153 |
8/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 154 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 154 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 154 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 154 times.
|
312 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
154 |
12/12✓ Branch 0 taken 2 times.
✓ Branch 1 taken 152 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 150 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 150 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 152 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 150 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 150 times.
|
308 | CHECK_IMAGE_SIZE(width, height); |
155 | |||
156 | 300 | Rectangle rect{width, height}; | |
157 | 300 | Rows<const T> src_rows{src, src_stride}; | |
158 | 300 | Rows<uint8_t> dst_rows{dst, dst_stride}; | |
159 | |||
160 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<T>; | ||
161 | using VectorType = typename VecTraits::VectorType; | ||
162 | |||
163 | if constexpr (std::is_same_v<T, uint8_t>) { | ||
164 | 150 | VectorType vec_lower_bound = VecTraits::svdup(lower_bound); | |
165 | 150 | VectorType vec_upper_bound = VecTraits::svdup(upper_bound); | |
166 | 150 | InRange<T> operation{vec_lower_bound, vec_upper_bound}; | |
167 | 150 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
168 | 150 | } else { | |
169 | 150 | InRange<T> operation{lower_bound, upper_bound}; | |
170 | 150 | zip_rows(operation, rect, src_rows, dst_rows); | |
171 | 150 | } | |
172 | |||
173 | 300 | return KLEIDICV_OK; | |
174 | 318 | } | |
175 | |||
176 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
177 | |||
178 | #endif // KLEIDICV_IN_RANGE_SC_H | ||
179 |