Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_UTILS_H | ||
6 | #define KLEIDICV_UTILS_H | ||
7 | |||
8 | #include <algorithm> | ||
9 | #include <limits> | ||
10 | #include <type_traits> | ||
11 | |||
12 | #include "kleidicv/config.h" | ||
13 | #include "kleidicv/ctypes.h" | ||
14 | #include "kleidicv/kleidicv.h" | ||
15 | #include "kleidicv/traits.h" | ||
16 | |||
17 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
18 | |||
19 | // Saturating cast from signed to unsigned type. | ||
20 | template <typename S, typename U, | ||
21 | std::enable_if_t<std::numeric_limits<S>::is_signed, bool> = true, | ||
22 | std::enable_if_t<not std::numeric_limits<U>::is_signed, bool> = true> | ||
23 | 139512 | static U saturating_cast(S value) KLEIDICV_STREAMING { | |
24 |
2/2✓ Branch 0 taken 30241 times.
✓ Branch 1 taken 109271 times.
|
139512 | if (value > std::numeric_limits<U>::max()) { |
25 | 30241 | return std::numeric_limits<U>::max(); | |
26 | } | ||
27 |
2/2✓ Branch 0 taken 83344 times.
✓ Branch 1 taken 25927 times.
|
109271 | if (value < 0) { |
28 | 25927 | return 0; | |
29 | } | ||
30 | |||
31 | 83344 | return static_cast<U>(value); | |
32 | 139512 | } | |
33 | |||
34 | // Saturating cast from unsigned to unsigned type. | ||
35 | template < | ||
36 | typename SrcType, typename DstType, | ||
37 | std::enable_if_t<std::is_unsigned_v<DstType> && std::is_unsigned_v<SrcType>, | ||
38 | bool> = true> | ||
39 | 764 | static DstType saturating_cast(SrcType value) KLEIDICV_STREAMING { | |
40 | 764 | return static_cast<DstType>(value); | |
41 | } | ||
42 | |||
43 | // Saturating cast from unsigned to signed type. | ||
44 | template < | ||
45 | typename SrcType, typename DstType, | ||
46 | std::enable_if_t<std::is_signed_v<DstType> && std::is_unsigned_v<SrcType>, | ||
47 | bool> = true> | ||
48 | 1042 | static DstType saturating_cast(SrcType value) KLEIDICV_STREAMING { | |
49 | 1042 | DstType max_value = std::numeric_limits<DstType>::max(); | |
50 | |||
51 |
6/6✓ Branch 0 taken 99 times.
✓ Branch 1 taken 579 times.
✓ Branch 2 taken 14 times.
✓ Branch 3 taken 232 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 106 times.
|
1042 | if (value > static_cast<SrcType>(max_value)) { |
52 | 125 | return max_value; | |
53 | } | ||
54 | |||
55 | 917 | return static_cast<DstType>(value); | |
56 | 1042 | } | |
57 | |||
58 | // Rounding shift right. | ||
59 | template <typename T> | ||
60 | 268590 | static T rounding_shift_right(T value, size_t shift) KLEIDICV_STREAMING { | |
61 | 268590 | return (value + (T{1} << (shift - 1))) >> shift; | |
62 | } | ||
63 | |||
64 | // When placed in a loop, it effectively disables loop vectorization. | ||
65 | 571104 | static inline void disable_loop_vectorization() KLEIDICV_STREAMING { | |
66 | 571104 | __asm__(""); | |
67 | 571104 | } | |
68 | |||
69 | // Helper class to unroll a loop as needed. | ||
70 | class LoopUnroll final { | ||
71 | public: | ||
72 | 44821 | explicit LoopUnroll(size_t length, size_t step) KLEIDICV_STREAMING | |
73 | 44821 | : length_(length), | |
74 | 44821 | step_(step), | |
75 | 44821 | index_(0), | |
76 | 44821 | can_avoid_tail_(length >= step) {} | |
77 | |||
78 | // Loop unrolled four times. | ||
79 | template <typename CallbackType> | ||
80 | LoopUnroll &unroll_four_times(CallbackType callback) KLEIDICV_STREAMING { | ||
81 | return unroll_n_times<4>(callback); | ||
82 | } | ||
83 | |||
84 | // Loop unrolled twice. | ||
85 | template <typename CallbackType> | ||
86 | 27193 | LoopUnroll &unroll_twice(CallbackType callback) KLEIDICV_STREAMING { | |
87 | 27193 | return unroll_n_times<2>(callback); | |
88 | } | ||
89 | |||
90 | // Unrolls the loop twice, if enabled. | ||
91 | template <bool Enable, typename CallbackType> | ||
92 | 31673 | LoopUnroll &unroll_twice_if([[maybe_unused]] CallbackType callback) | |
93 | KLEIDICV_STREAMING { | ||
94 | if constexpr (Enable) { | ||
95 | 26463 | return unroll_twice(callback); | |
96 | } | ||
97 | |||
98 | 5210 | return *this; | |
99 | } | ||
100 | |||
101 | // Loop unrolled once. | ||
102 | template <typename CallbackType> | ||
103 | 13494 | LoopUnroll &unroll_once(CallbackType callback) KLEIDICV_STREAMING { | |
104 | 13494 | return unroll_n_times<1>(callback); | |
105 | } | ||
106 | |||
107 | // Unrolls the loop once, if enabled. | ||
108 | template <bool Enable, typename CallbackType> | ||
109 | 31866 | LoopUnroll &unroll_once_if([[maybe_unused]] CallbackType callback) | |
110 | KLEIDICV_STREAMING { | ||
111 | if constexpr (Enable) { | ||
112 | 5338 | return unroll_once(callback); | |
113 | } | ||
114 | |||
115 | 26528 | return *this; | |
116 | } | ||
117 | |||
118 | // Processes trailing data. | ||
119 | template <typename CallbackType> | ||
120 | 5864 | LoopUnroll &tail(CallbackType callback) KLEIDICV_STREAMING { | |
121 |
16/16✓ Branch 0 taken 30 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 30 times.
✓ Branch 3 taken 42 times.
✓ Branch 4 taken 1344 times.
✓ Branch 5 taken 1730 times.
✓ Branch 6 taken 990 times.
✓ Branch 7 taken 1403 times.
✓ Branch 8 taken 30 times.
✓ Branch 9 taken 44 times.
✓ Branch 10 taken 908 times.
✓ Branch 11 taken 1206 times.
✓ Branch 12 taken 112 times.
✓ Branch 13 taken 178 times.
✓ Branch 14 taken 839 times.
✓ Branch 15 taken 1225 times.
|
10147 | for (index_ = 0; index_ < remaining_length(); ++index_) { |
122 | 4283 | disable_loop_vectorization(); | |
123 | 4283 | callback(index_); | |
124 | 4283 | } | |
125 | |||
126 | 5864 | length_ = 0; | |
127 | 5864 | return *this; | |
128 | } | ||
129 | |||
130 | // Processes all remaining data at once. | ||
131 | template <typename CallbackType> | ||
132 | 36374 | LoopUnroll &remaining(CallbackType callback) KLEIDICV_STREAMING { | |
133 |
24/24✓ Branch 0 taken 1907 times.
✓ Branch 1 taken 9173 times.
✓ Branch 2 taken 2104 times.
✓ Branch 3 taken 5563 times.
✓ Branch 4 taken 1005 times.
✓ Branch 5 taken 3336 times.
✓ Branch 6 taken 1687 times.
✓ Branch 7 taken 4203 times.
✓ Branch 8 taken 670 times.
✓ Branch 9 taken 1920 times.
✓ Branch 10 taken 438 times.
✓ Branch 11 taken 1535 times.
✓ Branch 12 taken 330 times.
✓ Branch 13 taken 727 times.
✓ Branch 14 taken 317 times.
✓ Branch 15 taken 583 times.
✓ Branch 16 taken 6 times.
✓ Branch 17 taken 282 times.
✓ Branch 18 taken 18 times.
✓ Branch 19 taken 90 times.
✓ Branch 20 taken 6 times.
✓ Branch 21 taken 186 times.
✓ Branch 22 taken 6 times.
✓ Branch 23 taken 282 times.
|
36374 | if (length_) { |
134 | 27880 | callback(length_, step_); | |
135 | 27880 | length_ = 0; | |
136 | 27880 | } | |
137 | |||
138 | 36374 | return *this; | |
139 | } | ||
140 | |||
141 | // Returns true if there is nothing left to process. | ||
142 | bool empty() const KLEIDICV_STREAMING { return length_ == 0; } | ||
143 | |||
144 | // Returns the step value. | ||
145 | 1374 | size_t step() const KLEIDICV_STREAMING { return step_; } | |
146 | |||
147 | // Returns the remaining length. | ||
148 | 56057 | size_t remaining_length() const KLEIDICV_STREAMING { return length_; } | |
149 | |||
150 | // Returns true if it is possible to avoid the tail loop. | ||
151 | bool can_avoid_tail() const KLEIDICV_STREAMING { return can_avoid_tail_; } | ||
152 | |||
153 | // Instructs the loop logic to prepare to avoid the tail loop. | ||
154 | void avoid_tail() KLEIDICV_STREAMING { length_ = step(); } | ||
155 | |||
156 | template <const size_t UnrollFactor, typename CallbackType> | ||
157 | 44022 | LoopUnroll &unroll_n_times(CallbackType callback) KLEIDICV_STREAMING { | |
158 | 44022 | const size_t step = UnrollFactor * step_; | |
159 | // In practice step will never be zero and we don't want to spend | ||
160 | // instructions on checking that. | ||
161 | // NOLINTBEGIN(clang-analyzer-core.DivideZero) | ||
162 | 44022 | const size_t max_index = remaining_length() / step; | |
163 | // NOLINTEND(clang-analyzer-core.DivideZero) | ||
164 | |||
165 |
24/24✓ Branch 0 taken 263375 times.
✓ Branch 1 taken 12244 times.
✓ Branch 2 taken 53546 times.
✓ Branch 3 taken 7316 times.
✓ Branch 4 taken 24109 times.
✓ Branch 5 taken 4662 times.
✓ Branch 6 taken 26872 times.
✓ Branch 7 taken 5619 times.
✓ Branch 8 taken 41052 times.
✓ Branch 9 taken 4692 times.
✓ Branch 10 taken 20234 times.
✓ Branch 11 taken 4494 times.
✓ Branch 12 taken 11655 times.
✓ Branch 13 taken 1994 times.
✓ Branch 14 taken 11838 times.
✓ Branch 15 taken 2125 times.
✓ Branch 16 taken 624 times.
✓ Branch 17 taken 288 times.
✓ Branch 18 taken 240 times.
✓ Branch 19 taken 108 times.
✓ Branch 20 taken 432 times.
✓ Branch 21 taken 192 times.
✓ Branch 22 taken 624 times.
✓ Branch 23 taken 288 times.
|
498623 | for (index_ = 0; index_ < max_index; ++index_) { |
166 | 454601 | callback(step); | |
167 | 454601 | } | |
168 | |||
169 | // Adjust length to reflect the processed data. | ||
170 | 44022 | length_ -= step * index_; | |
171 | 44022 | return *this; | |
172 | 44022 | } | |
173 | |||
174 | // Instructs the loop logic to avoid the tail loop. | ||
175 | template <typename CallbackType> | ||
176 | 1653 | bool try_avoid_tail_loop(CallbackType callback) KLEIDICV_STREAMING { | |
177 |
8/8✓ Branch 0 taken 262 times.
✓ Branch 1 taken 245 times.
✓ Branch 2 taken 228 times.
✓ Branch 3 taken 154 times.
✓ Branch 4 taken 228 times.
✓ Branch 5 taken 154 times.
✓ Branch 6 taken 228 times.
✓ Branch 7 taken 154 times.
|
1653 | if (KLEIDICV_UNLIKELY(!can_avoid_tail_)) { |
178 | 946 | return false; | |
179 | } | ||
180 | |||
181 |
8/8✓ Branch 0 taken 178 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 112 times.
✓ Branch 3 taken 42 times.
✓ Branch 4 taken 112 times.
✓ Branch 5 taken 42 times.
✓ Branch 6 taken 112 times.
✓ Branch 7 taken 42 times.
|
707 | if (KLEIDICV_UNLIKELY(!remaining_length())) { |
182 | 514 | return false; | |
183 | } | ||
184 | |||
185 | 193 | callback(step() - remaining_length()); | |
186 | 193 | length_ = step(); | |
187 | 193 | return true; | |
188 | 1653 | } | |
189 | |||
190 | private: | ||
191 | size_t length_; | ||
192 | size_t step_; | ||
193 | size_t index_; | ||
194 | bool can_avoid_tail_; | ||
195 | }; // end of class LoopUnroll | ||
196 | |||
197 | // This is the same as LoopUnroll, except that it passes indices to callbacks. | ||
198 | template <class Tail = UsesTailPath> | ||
199 | class LoopUnroll2 final { | ||
200 | public: | ||
201 | 292592 | explicit LoopUnroll2(size_t length, size_t step) KLEIDICV_STREAMING | |
202 | 292592 | : length_(length), | |
203 | 292592 | step_(step), | |
204 | 292592 | index_(0) {} | |
205 | |||
206 | 216 | explicit LoopUnroll2(size_t start_index, size_t length, | |
207 | size_t step) KLEIDICV_STREAMING | ||
208 | 216 | : length_(length), | |
209 | 216 | step_(step), | |
210 | 216 | index_(std::min(start_index, length)) {} | |
211 | |||
212 | // Loop unrolled four times. | ||
213 | template <typename CallbackType> | ||
214 | 44555 | LoopUnroll2 &unroll_four_times(CallbackType callback) KLEIDICV_STREAMING { | |
215 | 44555 | return unroll_n_times<4>(callback); | |
216 | } | ||
217 | |||
218 | // Loop unrolled twice. | ||
219 | template <typename CallbackType> | ||
220 | 161075 | LoopUnroll2 &unroll_twice(CallbackType callback) KLEIDICV_STREAMING { | |
221 | 161075 | return unroll_n_times<2>(callback); | |
222 | } | ||
223 | |||
224 | // Unrolls the loop twice, if enabled. | ||
225 | template <bool Enable, typename CallbackType> | ||
226 | LoopUnroll2 &unroll_twice_if(CallbackType callback) KLEIDICV_STREAMING { | ||
227 | if constexpr (Enable) { | ||
228 | return unroll_twice(callback); | ||
229 | } | ||
230 | |||
231 | return *this; | ||
232 | } | ||
233 | |||
234 | // Loop unrolled once. | ||
235 | template <typename CallbackType> | ||
236 | 200309 | LoopUnroll2 &unroll_once(CallbackType callback) KLEIDICV_STREAMING { | |
237 | 200309 | return unroll_n_times<1>(callback); | |
238 | } | ||
239 | |||
240 | // Unrolls the loop once, if enabled. | ||
241 | template <bool Enable, typename CallbackType> | ||
242 | LoopUnroll2 &unroll_once_if(CallbackType callback) KLEIDICV_STREAMING { | ||
243 | if constexpr (Enable) { | ||
244 | return unroll_once(callback); | ||
245 | } | ||
246 | |||
247 | return *this; | ||
248 | } | ||
249 | |||
250 | // Processes trailing data. | ||
251 | template <typename CallbackType> | ||
252 | 53454 | LoopUnroll2 &tail(CallbackType callback) KLEIDICV_STREAMING { | |
253 |
47/56✓ Branch 0 taken 42289 times.
✓ Branch 1 taken 11290 times.
✓ Branch 2 taken 21966 times.
✓ Branch 3 taken 9902 times.
✓ Branch 4 taken 32704 times.
✓ Branch 5 taken 9590 times.
✓ Branch 6 taken 18363 times.
✓ Branch 7 taken 8804 times.
✓ Branch 8 taken 3485 times.
✓ Branch 9 taken 1394 times.
✓ Branch 10 taken 329 times.
✓ Branch 11 taken 910 times.
✓ Branch 12 taken 3220 times.
✓ Branch 13 taken 1274 times.
✓ Branch 14 taken 1496 times.
✓ Branch 15 taken 1204 times.
✓ Branch 16 taken 1408 times.
✓ Branch 17 taken 1168 times.
✓ Branch 18 taken 164 times.
✓ Branch 19 taken 74 times.
✓ Branch 20 taken 384 times.
✓ Branch 21 taken 856 times.
✓ Branch 22 taken 1332 times.
✓ Branch 23 taken 820 times.
✓ Branch 24 taken 540 times.
✓ Branch 25 taken 592 times.
✓ Branch 26 taken 572 times.
✓ Branch 27 taken 142 times.
✓ Branch 28 taken 480 times.
✓ Branch 29 taken 924 times.
✓ Branch 30 taken 816 times.
✓ Branch 31 taken 748 times.
✗ Branch 32 not taken.
✓ Branch 33 taken 64 times.
✗ Branch 34 not taken.
✓ Branch 35 taken 32 times.
✗ Branch 36 not taken.
✓ Branch 37 taken 308 times.
✗ Branch 38 not taken.
✓ Branch 39 taken 416 times.
✗ Branch 40 not taken.
✓ Branch 41 taken 64 times.
✗ Branch 42 not taken.
✓ Branch 43 taken 32 times.
✗ Branch 44 not taken.
✓ Branch 45 taken 308 times.
✗ Branch 46 not taken.
✓ Branch 47 taken 416 times.
✓ Branch 48 taken 168 times.
✓ Branch 49 taken 520 times.
✓ Branch 50 taken 4 times.
✓ Branch 51 taken 36 times.
✓ Branch 52 taken 272 times.
✓ Branch 53 taken 818 times.
✗ Branch 54 not taken.
✓ Branch 55 taken 748 times.
|
183446 | while (index_ < length_) { |
254 | 129992 | disable_loop_vectorization(); | |
255 | 129992 | callback(index_++); | |
256 | } | ||
257 | |||
258 | 53454 | return *this; | |
259 | } | ||
260 | |||
261 | // Processes all remaining data at once. | ||
262 | template <typename CallbackType> | ||
263 | 217856 | LoopUnroll2 &remaining(CallbackType callback) KLEIDICV_STREAMING { | |
264 |
64/64✓ Branch 0 taken 617 times.
✓ Branch 1 taken 33005 times.
✓ Branch 2 taken 5114 times.
✓ Branch 3 taken 27322 times.
✓ Branch 4 taken 816 times.
✓ Branch 5 taken 29049 times.
✓ Branch 6 taken 6934 times.
✓ Branch 7 taken 24196 times.
✓ Branch 8 taken 674 times.
✓ Branch 9 taken 2344 times.
✓ Branch 10 taken 6833 times.
✓ Branch 11 taken 971 times.
✓ Branch 12 taken 768 times.
✓ Branch 13 taken 1989 times.
✓ Branch 14 taken 12854 times.
✓ Branch 15 taken 1528 times.
✓ Branch 16 taken 1664 times.
✓ Branch 17 taken 5866 times.
✓ Branch 18 taken 612 times.
✓ Branch 19 taken 1076 times.
✓ Branch 20 taken 1139 times.
✓ Branch 21 taken 1890 times.
✓ Branch 22 taken 1062 times.
✓ Branch 23 taken 916 times.
✓ Branch 24 taken 2220 times.
✓ Branch 25 taken 10252 times.
✓ Branch 26 taken 1080 times.
✓ Branch 27 taken 2312 times.
✓ Branch 28 taken 1791 times.
✓ Branch 29 taken 5844 times.
✓ Branch 30 taken 1850 times.
✓ Branch 31 taken 3120 times.
✓ Branch 32 taken 72 times.
✓ Branch 33 taken 64 times.
✓ Branch 34 taken 36 times.
✓ Branch 35 taken 36 times.
✓ Branch 36 taken 334 times.
✓ Branch 37 taken 290 times.
✓ Branch 38 taken 468 times.
✓ Branch 39 taken 372 times.
✓ Branch 40 taken 72 times.
✓ Branch 41 taken 64 times.
✓ Branch 42 taken 36 times.
✓ Branch 43 taken 36 times.
✓ Branch 44 taken 334 times.
✓ Branch 45 taken 290 times.
✓ Branch 46 taken 468 times.
✓ Branch 47 taken 372 times.
✓ Branch 48 taken 662 times.
✓ Branch 49 taken 516 times.
✓ Branch 50 taken 158 times.
✓ Branch 51 taken 60 times.
✓ Branch 52 taken 996 times.
✓ Branch 53 taken 778 times.
✓ Branch 54 taken 842 times.
✓ Branch 55 taken 800 times.
✓ Branch 56 taken 866 times.
✓ Branch 57 taken 2128 times.
✓ Branch 58 taken 866 times.
✓ Branch 59 taken 2136 times.
✓ Branch 60 taken 866 times.
✓ Branch 61 taken 2128 times.
✓ Branch 62 taken 866 times.
✓ Branch 63 taken 2136 times.
|
217856 | if (remaining_length()) { |
265 | 163886 | callback(index_, length_); | |
266 | 163886 | index_ = length_; | |
267 | 163886 | } | |
268 | |||
269 | 217856 | return *this; | |
270 | } | ||
271 | |||
272 | // Returns true if there is nothing left to process. | ||
273 | bool empty() const KLEIDICV_STREAMING { return length_ == index_; } | ||
274 | |||
275 | // Returns the step value. | ||
276 | 405939 | size_t step() const KLEIDICV_STREAMING { return step_; } | |
277 | |||
278 | // Returns the remaining length. | ||
279 | 715503 | size_t remaining_length() const KLEIDICV_STREAMING { | |
280 | 715503 | return length_ - index_; | |
281 | } | ||
282 | |||
283 | private: | ||
284 | template <const size_t UnrollFactor, typename CallbackType> | ||
285 | 405939 | LoopUnroll2 &unroll_n_times(CallbackType callback) KLEIDICV_STREAMING { | |
286 | 405939 | const size_t n_step = UnrollFactor * step(); | |
287 | 405939 | size_t max_index = index_ + (remaining_length() / n_step) * n_step; | |
288 | |||
289 | // A tail mechanism is built into the single vector processing loop, if | ||
290 | // enabled. The single vector path is executed iteratively, and at the end | ||
291 | // it rewinds the loop to one vector before the end of the data, and | ||
292 | // executes one final vector path, so the scalar path can be omitted. | ||
293 | if constexpr (try_to_avoid_tail_loop<Tail> && (UnrollFactor == 1)) { | ||
294 | // Enter this loop only if there's enough data for a full vector | ||
295 |
60/64✓ Branch 0 taken 3536 times.
✓ Branch 1 taken 1678 times.
✓ Branch 2 taken 2271 times.
✓ Branch 3 taken 2985 times.
✓ Branch 4 taken 3292 times.
✓ Branch 5 taken 1672 times.
✓ Branch 6 taken 2581 times.
✓ Branch 7 taken 4527 times.
✓ Branch 8 taken 912 times.
✓ Branch 9 taken 687 times.
✓ Branch 10 taken 517 times.
✓ Branch 11 taken 6459 times.
✓ Branch 12 taken 741 times.
✓ Branch 13 taken 717 times.
✓ Branch 14 taken 824 times.
✓ Branch 15 taken 12350 times.
✓ Branch 16 taken 386 times.
✓ Branch 17 taken 3298 times.
✓ Branch 18 taken 508 times.
✓ Branch 19 taken 237 times.
✓ Branch 20 taken 490 times.
✓ Branch 21 taken 933 times.
✓ Branch 22 taken 502 times.
✓ Branch 23 taken 345 times.
✓ Branch 24 taken 442 times.
✓ Branch 25 taken 5713 times.
✓ Branch 26 taken 76 times.
✓ Branch 27 taken 1521 times.
✓ Branch 28 taken 518 times.
✓ Branch 29 taken 3208 times.
✓ Branch 30 taken 554 times.
✓ Branch 31 taken 1789 times.
✓ Branch 32 taken 32 times.
✓ Branch 33 taken 36 times.
✓ Branch 34 taken 16 times.
✓ Branch 35 taken 20 times.
✓ Branch 36 taken 144 times.
✓ Branch 37 taken 168 times.
✓ Branch 38 taken 208 times.
✓ Branch 39 taken 212 times.
✓ Branch 40 taken 32 times.
✓ Branch 41 taken 36 times.
✓ Branch 42 taken 16 times.
✓ Branch 43 taken 20 times.
✓ Branch 44 taken 144 times.
✓ Branch 45 taken 168 times.
✓ Branch 46 taken 208 times.
✓ Branch 47 taken 212 times.
✓ Branch 48 taken 318 times.
✓ Branch 49 taken 235 times.
✓ Branch 50 taken 20 times.
✓ Branch 51 taken 53 times.
✓ Branch 52 taken 450 times.
✓ Branch 53 taken 401 times.
✓ Branch 54 taken 336 times.
✓ Branch 55 taken 449 times.
✗ Branch 56 not taken.
✓ Branch 57 taken 1461 times.
✗ Branch 58 not taken.
✓ Branch 59 taken 1465 times.
✗ Branch 60 not taken.
✓ Branch 61 taken 1461 times.
✗ Branch 62 not taken.
✓ Branch 63 taken 1465 times.
|
76055 | if (length_ >= n_step) { |
296 | // External loop only ends when all data has been processed | ||
297 |
64/64✓ Branch 0 taken 3294 times.
✓ Branch 1 taken 1678 times.
✓ Branch 2 taken 4990 times.
✓ Branch 3 taken 2985 times.
✓ Branch 4 taken 3214 times.
✓ Branch 5 taken 1672 times.
✓ Branch 6 taken 6914 times.
✓ Branch 7 taken 4527 times.
✓ Branch 8 taken 1255 times.
✓ Branch 9 taken 687 times.
✓ Branch 10 taken 9449 times.
✓ Branch 11 taken 6459 times.
✓ Branch 12 taken 1404 times.
✓ Branch 13 taken 717 times.
✓ Branch 14 taken 18504 times.
✓ Branch 15 taken 12350 times.
✓ Branch 16 taken 6073 times.
✓ Branch 17 taken 3298 times.
✓ Branch 18 taken 267 times.
✓ Branch 19 taken 237 times.
✓ Branch 20 taken 1624 times.
✓ Branch 21 taken 933 times.
✓ Branch 22 taken 635 times.
✓ Branch 23 taken 345 times.
✓ Branch 24 taken 10418 times.
✓ Branch 25 taken 5713 times.
✓ Branch 26 taken 2299 times.
✓ Branch 27 taken 1521 times.
✓ Branch 28 taken 5711 times.
✓ Branch 29 taken 3208 times.
✓ Branch 30 taken 2827 times.
✓ Branch 31 taken 1789 times.
✓ Branch 32 taken 64 times.
✓ Branch 33 taken 36 times.
✓ Branch 34 taken 36 times.
✓ Branch 35 taken 20 times.
✓ Branch 36 taken 290 times.
✓ Branch 37 taken 168 times.
✓ Branch 38 taken 372 times.
✓ Branch 39 taken 212 times.
✓ Branch 40 taken 64 times.
✓ Branch 41 taken 36 times.
✓ Branch 42 taken 36 times.
✓ Branch 43 taken 20 times.
✓ Branch 44 taken 290 times.
✓ Branch 45 taken 168 times.
✓ Branch 46 taken 372 times.
✓ Branch 47 taken 212 times.
✓ Branch 48 taken 371 times.
✓ Branch 49 taken 235 times.
✓ Branch 50 taken 79 times.
✓ Branch 51 taken 53 times.
✓ Branch 52 taken 529 times.
✓ Branch 53 taken 401 times.
✓ Branch 54 taken 823 times.
✓ Branch 55 taken 449 times.
✓ Branch 56 taken 2219 times.
✓ Branch 57 taken 1461 times.
✓ Branch 58 taken 2533 times.
✓ Branch 59 taken 1465 times.
✓ Branch 60 taken 2219 times.
✓ Branch 61 taken 1461 times.
✓ Branch 62 taken 2533 times.
✓ Branch 63 taken 1465 times.
|
147689 | while (index_ < length_) { |
298 | // Internal loop checks if the vector path can be executed | ||
299 |
64/64✓ Branch 0 taken 5438 times.
✓ Branch 1 taken 3294 times.
✓ Branch 2 taken 32124 times.
✓ Branch 3 taken 4990 times.
✓ Branch 4 taken 6705 times.
✓ Branch 5 taken 3214 times.
✓ Branch 6 taken 118408 times.
✓ Branch 7 taken 6914 times.
✓ Branch 8 taken 7505 times.
✓ Branch 9 taken 1255 times.
✓ Branch 10 taken 461329 times.
✓ Branch 11 taken 9449 times.
✓ Branch 12 taken 13796 times.
✓ Branch 13 taken 1404 times.
✓ Branch 14 taken 1841236 times.
✓ Branch 15 taken 18504 times.
✓ Branch 16 taken 85169 times.
✓ Branch 17 taken 6073 times.
✓ Branch 18 taken 495 times.
✓ Branch 19 taken 267 times.
✓ Branch 20 taken 40122 times.
✓ Branch 21 taken 1624 times.
✓ Branch 22 taken 1915 times.
✓ Branch 23 taken 635 times.
✓ Branch 24 taken 12624 times.
✓ Branch 25 taken 10418 times.
✓ Branch 26 taken 2251 times.
✓ Branch 27 taken 2299 times.
✓ Branch 28 taken 7115 times.
✓ Branch 29 taken 5711 times.
✓ Branch 30 taken 3831 times.
✓ Branch 31 taken 2827 times.
✓ Branch 32 taken 496 times.
✓ Branch 33 taken 64 times.
✓ Branch 34 taken 260 times.
✓ Branch 35 taken 36 times.
✓ Branch 36 taken 2048 times.
✓ Branch 37 taken 290 times.
✓ Branch 38 taken 2572 times.
✓ Branch 39 taken 372 times.
✓ Branch 40 taken 480 times.
✓ Branch 41 taken 64 times.
✓ Branch 42 taken 260 times.
✓ Branch 43 taken 36 times.
✓ Branch 44 taken 2032 times.
✓ Branch 45 taken 290 times.
✓ Branch 46 taken 2572 times.
✓ Branch 47 taken 372 times.
✓ Branch 48 taken 1167 times.
✓ Branch 49 taken 371 times.
✓ Branch 50 taken 343 times.
✓ Branch 51 taken 79 times.
✓ Branch 52 taken 2991 times.
✓ Branch 53 taken 529 times.
✓ Branch 54 taken 3539 times.
✓ Branch 55 taken 823 times.
✓ Branch 56 taken 2083 times.
✓ Branch 57 taken 2219 times.
✓ Branch 58 taken 3983 times.
✓ Branch 59 taken 2533 times.
✓ Branch 60 taken 2083 times.
✓ Branch 61 taken 2219 times.
✓ Branch 62 taken 3983 times.
✓ Branch 63 taken 2533 times.
|
2762663 | while (index_ < max_index) { |
300 | 2670955 | callback(index_); | |
301 | 2670955 | index_ += n_step; | |
302 | } | ||
303 | // Check if a final iteration is needed. The double loop is needed to | ||
304 | // avoid the repetition of the callback function, which is usually | ||
305 | // inlined into the binary. (Save some code space) | ||
306 |
64/64✓ Branch 0 taken 1670 times.
✓ Branch 1 taken 1624 times.
✓ Branch 2 taken 2969 times.
✓ Branch 3 taken 2021 times.
✓ Branch 4 taken 1672 times.
✓ Branch 5 taken 1542 times.
✓ Branch 6 taken 4343 times.
✓ Branch 7 taken 2571 times.
✓ Branch 8 taken 687 times.
✓ Branch 9 taken 568 times.
✓ Branch 10 taken 6291 times.
✓ Branch 11 taken 3158 times.
✓ Branch 12 taken 717 times.
✓ Branch 13 taken 687 times.
✓ Branch 14 taken 12350 times.
✓ Branch 15 taken 6154 times.
✓ Branch 16 taken 3298 times.
✓ Branch 17 taken 2775 times.
✓ Branch 18 taken 237 times.
✓ Branch 19 taken 30 times.
✓ Branch 20 taken 933 times.
✓ Branch 21 taken 691 times.
✓ Branch 22 taken 345 times.
✓ Branch 23 taken 290 times.
✓ Branch 24 taken 5506 times.
✓ Branch 25 taken 4912 times.
✓ Branch 26 taken 1215 times.
✓ Branch 27 taken 1084 times.
✓ Branch 28 taken 3071 times.
✓ Branch 29 taken 2640 times.
✓ Branch 30 taken 1483 times.
✓ Branch 31 taken 1344 times.
✓ Branch 32 taken 36 times.
✓ Branch 33 taken 28 times.
✓ Branch 34 taken 20 times.
✓ Branch 35 taken 16 times.
✓ Branch 36 taken 168 times.
✓ Branch 37 taken 122 times.
✓ Branch 38 taken 212 times.
✓ Branch 39 taken 160 times.
✓ Branch 40 taken 36 times.
✓ Branch 41 taken 28 times.
✓ Branch 42 taken 20 times.
✓ Branch 43 taken 16 times.
✓ Branch 44 taken 168 times.
✓ Branch 45 taken 122 times.
✓ Branch 46 taken 212 times.
✓ Branch 47 taken 160 times.
✓ Branch 48 taken 235 times.
✓ Branch 49 taken 136 times.
✓ Branch 50 taken 53 times.
✓ Branch 51 taken 26 times.
✓ Branch 52 taken 401 times.
✓ Branch 53 taken 128 times.
✓ Branch 54 taken 449 times.
✓ Branch 55 taken 374 times.
✓ Branch 56 taken 1155 times.
✓ Branch 57 taken 1064 times.
✓ Branch 58 taken 1465 times.
✓ Branch 59 taken 1068 times.
✓ Branch 60 taken 1155 times.
✓ Branch 61 taken 1064 times.
✓ Branch 62 taken 1465 times.
✓ Branch 63 taken 1068 times.
|
91708 | if (remaining_length()) { |
307 | 37671 | index_ = length_ - n_step; | |
308 | 37671 | max_index = length_; | |
309 | 37671 | } | |
310 | } | ||
311 | 55981 | } | |
312 | } else { | ||
313 |
112/112✓ Branch 0 taken 10145 times.
✓ Branch 1 taken 44609 times.
✓ Branch 2 taken 10635 times.
✓ Branch 3 taken 44383 times.
✓ Branch 4 taken 9199 times.
✓ Branch 5 taken 36967 times.
✓ Branch 6 taken 10185 times.
✓ Branch 7 taken 35723 times.
✓ Branch 8 taken 22998 times.
✓ Branch 9 taken 21499 times.
✓ Branch 10 taken 2639 times.
✓ Branch 11 taken 16634 times.
✓ Branch 12 taken 12141 times.
✓ Branch 13 taken 11483 times.
✓ Branch 14 taken 2768 times.
✓ Branch 15 taken 10066 times.
✓ Branch 16 taken 871 times.
✓ Branch 17 taken 2956 times.
✓ Branch 18 taken 1272 times.
✓ Branch 19 taken 1186 times.
✓ Branch 20 taken 2788 times.
✓ Branch 21 taken 4017 times.
✓ Branch 22 taken 2862 times.
✓ Branch 23 taken 3877 times.
✓ Branch 24 taken 2668 times.
✓ Branch 25 taken 2386 times.
✓ Branch 26 taken 510 times.
✓ Branch 27 taken 1418 times.
✓ Branch 28 taken 1946 times.
✓ Branch 29 taken 2940 times.
✓ Branch 30 taken 1748 times.
✓ Branch 31 taken 1580 times.
✓ Branch 32 taken 45992 times.
✓ Branch 33 taken 5380 times.
✓ Branch 34 taken 11492 times.
✓ Branch 35 taken 5316 times.
✓ Branch 36 taken 2800 times.
✓ Branch 37 taken 1026 times.
✓ Branch 38 taken 3210 times.
✓ Branch 39 taken 1242 times.
✓ Branch 40 taken 23012 times.
✓ Branch 41 taken 1550 times.
✓ Branch 42 taken 3282 times.
✓ Branch 43 taken 1554 times.
✓ Branch 44 taken 2752 times.
✓ Branch 45 taken 1094 times.
✓ Branch 46 taken 3226 times.
✓ Branch 47 taken 1310 times.
✓ Branch 48 taken 50372 times.
✓ Branch 49 taken 12310 times.
✓ Branch 50 taken 14920 times.
✓ Branch 51 taken 11342 times.
✓ Branch 52 taken 5560 times.
✓ Branch 53 taken 4758 times.
✓ Branch 54 taken 5966 times.
✓ Branch 55 taken 4618 times.
✓ Branch 56 taken 24612 times.
✓ Branch 57 taken 5748 times.
✓ Branch 58 taken 6298 times.
✓ Branch 59 taken 5748 times.
✓ Branch 60 taken 2228 times.
✓ Branch 61 taken 3122 times.
✓ Branch 62 taken 2238 times.
✓ Branch 63 taken 3122 times.
✓ Branch 64 taken 52 times.
✓ Branch 65 taken 8 times.
✓ Branch 66 taken 8 times.
✓ Branch 67 taken 8 times.
✓ Branch 68 taken 112 times.
✓ Branch 69 taken 8 times.
✓ Branch 70 taken 52 times.
✓ Branch 71 taken 8 times.
✓ Branch 72 taken 8 times.
✓ Branch 73 taken 8 times.
✓ Branch 74 taken 112 times.
✓ Branch 75 taken 8 times.
✓ Branch 76 taken 52 times.
✓ Branch 77 taken 8 times.
✓ Branch 78 taken 8 times.
✓ Branch 79 taken 8 times.
✓ Branch 80 taken 112 times.
✓ Branch 81 taken 8 times.
✓ Branch 82 taken 52 times.
✓ Branch 83 taken 8 times.
✓ Branch 84 taken 8 times.
✓ Branch 85 taken 8 times.
✓ Branch 86 taken 112 times.
✓ Branch 87 taken 8 times.
✓ Branch 88 taken 130 times.
✓ Branch 89 taken 138 times.
✓ Branch 90 taken 138 times.
✓ Branch 91 taken 138 times.
✓ Branch 92 taken 510 times.
✓ Branch 93 taken 146 times.
✓ Branch 94 taken 130 times.
✓ Branch 95 taken 138 times.
✓ Branch 96 taken 138 times.
✓ Branch 97 taken 138 times.
✓ Branch 98 taken 510 times.
✓ Branch 99 taken 146 times.
✓ Branch 100 taken 1966 times.
✓ Branch 101 taken 2994 times.
✓ Branch 102 taken 2110 times.
✓ Branch 103 taken 2994 times.
✓ Branch 104 taken 6154 times.
✓ Branch 105 taken 3002 times.
✓ Branch 106 taken 1966 times.
✓ Branch 107 taken 2994 times.
✓ Branch 108 taken 2110 times.
✓ Branch 109 taken 2994 times.
✓ Branch 110 taken 6154 times.
✓ Branch 111 taken 3002 times.
|
655923 | while (index_ < max_index) { |
314 | 326039 | callback(index_); | |
315 | 326039 | index_ += n_step; | |
316 | } | ||
317 | } | ||
318 | |||
319 | 405939 | return *this; | |
320 | 405939 | } | |
321 | |||
322 | size_t length_; | ||
323 | size_t step_; | ||
324 | size_t index_; | ||
325 | }; // end of class LoopUnroll2 | ||
326 | |||
327 | // Check whether any of the arguments are null pointers. | ||
328 | template <typename... Pointers> | ||
329 | 13665 | bool any_null(Pointers... pointers) KLEIDICV_STREAMING { | |
330 |
12/12✓ Branch 0 taken 222 times.
✓ Branch 1 taken 2043 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 195 times.
✓ Branch 4 taken 198 times.
✓ Branch 5 taken 3 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 195 times.
✓ Branch 8 taken 195 times.
✓ Branch 9 taken 3 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 192 times.
|
13665 | return (... || (pointers == nullptr)); |
331 | } | ||
332 | |||
333 | #define CHECK_POINTERS(...) \ | ||
334 | do { \ | ||
335 | if (KLEIDICV_TARGET_NAMESPACE::any_null(__VA_ARGS__)) { \ | ||
336 | return KLEIDICV_ERROR_NULL_POINTER; \ | ||
337 | } \ | ||
338 | } while (false) | ||
339 | |||
340 | template <typename AlignType, typename Value> | ||
341 | 52823 | bool is_misaligned(Value v) KLEIDICV_STREAMING { | |
342 | 52823 | constexpr size_t kMask = alignof(AlignType) - 1; | |
343 | static_assert(kMask == 0b0001 || kMask == 0b0011 || kMask == 0b0111 || | ||
344 | kMask == 0b1111); | ||
345 | 105646 | return (v & kMask) != 0; | |
346 | 52823 | } | |
347 | |||
348 | // Return value aligned up to the next multiple of alignment | ||
349 | // Assumes alignment is a power of two. | ||
350 | template <typename T> | ||
351 | 10379 | T align_up(T value, size_t alignment) KLEIDICV_STREAMING { | |
352 | 10379 | return (value + alignment - 1) & ~(alignment - 1); | |
353 | } | ||
354 | |||
355 | template <typename T> | ||
356 | 4536 | T *align_up(T *value, size_t alignment) KLEIDICV_STREAMING { | |
357 | // NOLINTBEGIN(performance-no-int-to-ptr) | ||
358 | 4536 | return reinterpret_cast<T *>( | |
359 | 4536 | align_up(reinterpret_cast<uintptr_t>(value), alignment)); | |
360 | // NOLINTEND(performance-no-int-to-ptr) | ||
361 | } | ||
362 | |||
363 | // Specialisation for when stride misalignment is possible. | ||
364 | template <typename T> | ||
365 | 50513 | std::enable_if_t<alignof(T) != 1, kleidicv_error_t> check_pointer_and_stride( | |
366 | T *pointer, size_t stride, size_t height) KLEIDICV_STREAMING { | ||
367 |
24/24✓ Branch 0 taken 14439 times.
✓ Branch 1 taken 229 times.
✓ Branch 2 taken 9612 times.
✓ Branch 3 taken 181 times.
✓ Branch 4 taken 7278 times.
✓ Branch 5 taken 168 times.
✓ Branch 6 taken 3891 times.
✓ Branch 7 taken 144 times.
✓ Branch 8 taken 4023 times.
✓ Branch 9 taken 63 times.
✓ Branch 10 taken 2868 times.
✓ Branch 11 taken 51 times.
✓ Branch 12 taken 1584 times.
✓ Branch 13 taken 15 times.
✓ Branch 14 taken 1089 times.
✓ Branch 15 taken 9 times.
✓ Branch 16 taken 1959 times.
✓ Branch 17 taken 15 times.
✓ Branch 18 taken 1464 times.
✓ Branch 19 taken 9 times.
✓ Branch 20 taken 948 times.
✓ Branch 21 taken 12 times.
✓ Branch 22 taken 456 times.
✓ Branch 23 taken 6 times.
|
50513 | if (pointer == nullptr) { |
368 | 902 | return KLEIDICV_ERROR_NULL_POINTER; | |
369 | } | ||
370 |
48/48✓ Branch 0 taken 12929 times.
✓ Branch 1 taken 1510 times.
✓ Branch 2 taken 12824 times.
✓ Branch 3 taken 105 times.
✓ Branch 4 taken 8905 times.
✓ Branch 5 taken 707 times.
✓ Branch 6 taken 8833 times.
✓ Branch 7 taken 72 times.
✓ Branch 8 taken 6567 times.
✓ Branch 9 taken 711 times.
✓ Branch 10 taken 6495 times.
✓ Branch 11 taken 72 times.
✓ Branch 12 taken 3624 times.
✓ Branch 13 taken 267 times.
✓ Branch 14 taken 3567 times.
✓ Branch 15 taken 57 times.
✓ Branch 16 taken 3640 times.
✓ Branch 17 taken 383 times.
✓ Branch 18 taken 3574 times.
✓ Branch 19 taken 66 times.
✓ Branch 20 taken 2644 times.
✓ Branch 21 taken 224 times.
✓ Branch 22 taken 2590 times.
✓ Branch 23 taken 54 times.
✓ Branch 24 taken 1482 times.
✓ Branch 25 taken 102 times.
✓ Branch 26 taken 1470 times.
✓ Branch 27 taken 12 times.
✓ Branch 28 taken 1047 times.
✓ Branch 29 taken 42 times.
✓ Branch 30 taken 1041 times.
✓ Branch 31 taken 6 times.
✓ Branch 32 taken 1827 times.
✓ Branch 33 taken 132 times.
✓ Branch 34 taken 1815 times.
✓ Branch 35 taken 12 times.
✓ Branch 36 taken 1392 times.
✓ Branch 37 taken 72 times.
✓ Branch 38 taken 1386 times.
✓ Branch 39 taken 6 times.
✓ Branch 40 taken 846 times.
✓ Branch 41 taken 102 times.
✓ Branch 42 taken 834 times.
✓ Branch 43 taken 12 times.
✓ Branch 44 taken 414 times.
✓ Branch 45 taken 42 times.
✓ Branch 46 taken 408 times.
✓ Branch 47 taken 6 times.
|
49611 | if (height > 1 && is_misaligned<T>(stride)) { |
371 | 480 | return KLEIDICV_ERROR_ALIGNMENT; | |
372 | } | ||
373 | 49131 | return KLEIDICV_OK; | |
374 | 50513 | } | |
375 | |||
376 | // Specialisation for when stride misalignment is impossible. | ||
377 | template <typename T> | ||
378 | 55680 | std::enable_if_t<alignof(T) == 1, kleidicv_error_t> check_pointer_and_stride( | |
379 | T *pointer, size_t /*stride*/, size_t /*height*/) KLEIDICV_STREAMING { | ||
380 |
8/8✓ Branch 0 taken 26438 times.
✓ Branch 1 taken 959 times.
✓ Branch 2 taken 22159 times.
✓ Branch 3 taken 711 times.
✓ Branch 4 taken 3128 times.
✓ Branch 5 taken 30 times.
✓ Branch 6 taken 2237 times.
✓ Branch 7 taken 18 times.
|
55680 | if (pointer == nullptr) { |
381 | 1718 | return KLEIDICV_ERROR_NULL_POINTER; | |
382 | } | ||
383 | 53962 | return KLEIDICV_OK; | |
384 | 55680 | } | |
385 | |||
386 | #define CHECK_POINTER_AND_STRIDE(pointer, stride, height) \ | ||
387 | do { \ | ||
388 | if (kleidicv_error_t ptr_stride_err = \ | ||
389 | KLEIDICV_TARGET_NAMESPACE::check_pointer_and_stride( \ | ||
390 | pointer, stride, height)) { \ | ||
391 | return ptr_stride_err; \ | ||
392 | } \ | ||
393 | } while (false) | ||
394 | |||
395 | #define MAKE_POINTER_CHECK_ALIGNMENT(ElementType, name, from) \ | ||
396 | if constexpr (alignof(ElementType) > 1) { \ | ||
397 | if (KLEIDICV_TARGET_NAMESPACE::is_misaligned<ElementType>( \ | ||
398 | reinterpret_cast<uintptr_t>(from))) { \ | ||
399 | return KLEIDICV_ERROR_ALIGNMENT; \ | ||
400 | } \ | ||
401 | } \ | ||
402 | ElementType *name = reinterpret_cast<ElementType *>(from) | ||
403 | |||
404 | // Check whether the image size is acceptable by limiting it. | ||
405 | #define CHECK_IMAGE_SIZE(width, height) \ | ||
406 | do { \ | ||
407 | size_t image_size = 0; \ | ||
408 | if (__builtin_mul_overflow(width, height, &image_size)) { \ | ||
409 | return KLEIDICV_ERROR_RANGE; \ | ||
410 | } \ | ||
411 | \ | ||
412 | if (image_size > KLEIDICV_MAX_IMAGE_PIXELS) { \ | ||
413 | return KLEIDICV_ERROR_RANGE; \ | ||
414 | } \ | ||
415 | } while (false) | ||
416 | |||
417 | // Check whether the rectangle size is acceptable by limiting it. | ||
418 | #define CHECK_RECTANGLE_SIZE(rect) CHECK_IMAGE_SIZE(rect.width, rect.height) | ||
419 | |||
420 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
421 | |||
422 | #endif // KLEIDICV_UTILS_H | ||
423 |