Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_TYPES_H | ||
6 | #define KLEIDICV_TYPES_H | ||
7 | |||
8 | #include <cstring> | ||
9 | #include <memory> | ||
10 | #include <utility> | ||
11 | |||
12 | #include "kleidicv/config.h" | ||
13 | #include "kleidicv/ctypes.h" | ||
14 | #include "kleidicv/utils.h" | ||
15 | |||
16 | #if KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2 | ||
17 | #include <arm_sme.h> | ||
18 | #endif | ||
19 | |||
20 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
21 | |||
22 | // Represents a point on a 2D plane. | ||
23 | class Point final { | ||
24 | public: | ||
25 | 1080 | explicit Point(size_t x, size_t y) KLEIDICV_STREAMING : x_{x}, y_{y} {} | |
26 | |||
27 | 3120 | size_t x() const KLEIDICV_STREAMING { return x_; } | |
28 | 446403 | size_t y() const KLEIDICV_STREAMING { return y_; } | |
29 | |||
30 | private: | ||
31 | size_t x_; | ||
32 | size_t y_; | ||
33 | }; // end of class Point | ||
34 | |||
35 | // Represents an area given by its width and height. | ||
36 | class Rectangle final { | ||
37 | public: | ||
38 | 51934 | explicit Rectangle(size_t width, size_t height) KLEIDICV_STREAMING | |
39 | 51934 | : width_(width), | |
40 | 51934 | height_(height) {} | |
41 | |||
42 | explicit Rectangle(int width, int height) KLEIDICV_STREAMING | ||
43 | : Rectangle(static_cast<size_t>(width), static_cast<size_t>(height)) {} | ||
44 | |||
45 | 2010 | explicit Rectangle(kleidicv_rectangle_t rect) KLEIDICV_STREAMING | |
46 | 2010 | : Rectangle(rect.width, rect.height) {} | |
47 | |||
48 | 1334464 | size_t width() const KLEIDICV_STREAMING { return width_; } | |
49 | 3092753 | size_t height() const KLEIDICV_STREAMING { return height_; } | |
50 | 15516 | size_t area() const KLEIDICV_STREAMING { return width() * height(); } | |
51 | |||
52 | 15516 | void flatten() KLEIDICV_STREAMING { | |
53 | 15516 | width_ = area(); | |
54 | 15516 | height_ = 1; | |
55 | 15516 | } | |
56 | |||
57 | 1032 | bool operator==(const Rectangle &rhs) const KLEIDICV_STREAMING { | |
58 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1020 times.
|
1032 | return width() == rhs.width() && height() == rhs.height(); |
59 | } | ||
60 | |||
61 | 1032 | bool operator!=(const Rectangle &rhs) const KLEIDICV_STREAMING { | |
62 | 1032 | return !operator==(rhs); | |
63 | } | ||
64 | |||
65 | private: | ||
66 | size_t width_; | ||
67 | size_t height_; | ||
68 | }; // end of class Rectangle | ||
69 | |||
70 | // Represents margins around a two dimensional area. | ||
71 | class Margin final { | ||
72 | public: | ||
73 | 1974 | explicit constexpr Margin(size_t left, size_t top, size_t right, | |
74 | size_t bottom) KLEIDICV_STREAMING | ||
75 | 1974 | : left_(left), | |
76 | 1974 | top_(top), | |
77 | 1974 | right_(right), | |
78 | 1974 | bottom_(bottom) {} | |
79 | |||
80 | explicit constexpr Margin(size_t margin) KLEIDICV_STREAMING | ||
81 | : left_(margin), | ||
82 | top_(margin), | ||
83 | right_(margin), | ||
84 | bottom_(margin) {} | ||
85 | |||
86 | 1974 | explicit Margin(kleidicv_rectangle_t kernel, | |
87 | kleidicv_point_t anchor) KLEIDICV_STREAMING | ||
88 | 3948 | : Margin(anchor.x, anchor.y, kernel.width - anchor.x - 1, | |
89 | 3948 | kernel.height - anchor.y - 1) {} | |
90 | |||
91 | explicit Margin(Rectangle kernel, Point anchor) KLEIDICV_STREAMING | ||
92 | : Margin(anchor.x(), anchor.y(), kernel.width() - anchor.x() - 1, | ||
93 | kernel.height() - anchor.y() - 1) {} | ||
94 | |||
95 | 29784 | size_t left() const KLEIDICV_STREAMING { return left_; } | |
96 | 16604 | size_t top() const KLEIDICV_STREAMING { return top_; } | |
97 | 9792 | size_t right() const KLEIDICV_STREAMING { return right_; } | |
98 | 1116 | size_t bottom() const KLEIDICV_STREAMING { return bottom_; } | |
99 | |||
100 | private: | ||
101 | size_t left_; | ||
102 | size_t top_; | ||
103 | size_t right_; | ||
104 | size_t bottom_; | ||
105 | }; // end of class Margin | ||
106 | |||
107 | // Describes the layout of one row given by a base pointer and channel count. | ||
108 | template <typename T> | ||
109 | class Columns final { | ||
110 | public: | ||
111 | 643165 | explicit Columns(T *ptr, size_t channels) KLEIDICV_STREAMING | |
112 | 643165 | : ptr_{ptr}, | |
113 | 643165 | channels_{channels} {} | |
114 | |||
115 | // Subscript operator to return an arbitrary column at an index. To account | ||
116 | // for channel count use at() method. | ||
117 | 2097463 | T &operator[](ptrdiff_t index) KLEIDICV_STREAMING { return ptr_[index]; } | |
118 | |||
119 | // Addition assignment operator to step across the columns. | ||
120 | 794038 | Columns &operator+=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
121 | 794038 | ptr_ += static_cast<ptrdiff_t>(channels()) * diff; | |
122 | 794038 | return *this; | |
123 | } | ||
124 | |||
125 | // Subtraction assignment operator to step across the columns. | ||
126 | 2965 | Columns &operator-=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
127 | 2965 | ptr_ -= static_cast<ptrdiff_t>(channels()) * diff; | |
128 | 2965 | return *this; | |
129 | } | ||
130 | |||
131 | // Prefix increment operator to advance to the next column. | ||
132 | Columns &operator++() KLEIDICV_STREAMING { return operator+=(1); } | ||
133 | |||
134 | // NOLINTBEGIN(hicpp-explicit-conversions) | ||
135 | // Implicit conversion operator from Columns<T> to Columns<const T>. | ||
136 | 3276 | [[nodiscard]] operator Columns<const T>() const KLEIDICV_STREAMING { | |
137 | 3276 | return Columns<const T>{ptr_, channels()}; | |
138 | } | ||
139 | // NOLINTEND(hicpp-explicit-conversions) | ||
140 | |||
141 | // Returns a new instance at a given column. | ||
142 | 292082 | [[nodiscard]] Columns<T> at(ptrdiff_t column) KLEIDICV_STREAMING { | |
143 | 584164 | return Columns<T>{&ptr_[column * static_cast<ptrdiff_t>(channels())], | |
144 | 292082 | channels()}; | |
145 | } | ||
146 | |||
147 | // Returns a pointer to a given column. | ||
148 | 56726 | [[nodiscard]] T *ptr_at(ptrdiff_t column) KLEIDICV_STREAMING { | |
149 | 56726 | return ptr_ + column * static_cast<ptrdiff_t>(channels()); | |
150 | } | ||
151 | |||
152 | // Returns the number of channels in a row. | ||
153 | 1447059 | size_t channels() const KLEIDICV_STREAMING { return channels_; } | |
154 | |||
155 | private: | ||
156 | // Pointer to the current position. | ||
157 | T *ptr_; | ||
158 | // Number of channels within a row. | ||
159 | size_t channels_; | ||
160 | }; // end of class Columns<T> | ||
161 | |||
162 | // Describes the layout of one row given by a base pointer and channel count. | ||
163 | template <typename T> | ||
164 | class ParallelColumns final { | ||
165 | public: | ||
166 | 5952 | explicit ParallelColumns(Columns<T> columns_0, | |
167 | Columns<T> columns_1) KLEIDICV_STREAMING | ||
168 | 5952 | : columns_{columns_0, columns_1} {} | |
169 | |||
170 | // Addition assignment operator to step across the columns. | ||
171 | 288 | ParallelColumns &operator+=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
172 | 288 | columns_[0] += diff; | |
173 | 288 | columns_[1] += diff; | |
174 | 288 | return *this; | |
175 | } | ||
176 | |||
177 | // Subtraction assignment operator to navigate among rows. | ||
178 | 96 | ParallelColumns &operator-=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
179 | 96 | return operator+=(-1 * diff); | |
180 | } | ||
181 | |||
182 | // Prefix increment operator to advance to the next column. | ||
183 | ParallelColumns &operator++() KLEIDICV_STREAMING { return operator+=(1); } | ||
184 | |||
185 | // Returns the columns belonging to the first row. | ||
186 | 5728 | [[nodiscard]] Columns<T> first() const KLEIDICV_STREAMING { | |
187 | 5728 | return columns_[0]; | |
188 | } | ||
189 | |||
190 | // Returns the columns belonging to the second row. | ||
191 | 5728 | [[nodiscard]] Columns<T> second() const KLEIDICV_STREAMING { | |
192 | 5728 | return columns_[1]; | |
193 | } | ||
194 | |||
195 | private: | ||
196 | // The columns this instance handles. | ||
197 | Columns<T> columns_[2]; | ||
198 | }; // end of class ParallelColumns<T> | ||
199 | |||
200 | // Base class of different row implementations. | ||
201 | template <typename T> | ||
202 | class RowBase { | ||
203 | public: | ||
204 | // Returns the distance in bytes between two consecutive rows. | ||
205 | 81053435 | size_t stride() const KLEIDICV_STREAMING { return stride_; } | |
206 | |||
207 | // Returns the number of channels in a row. | ||
208 | 82323452 | size_t channels() const KLEIDICV_STREAMING { return channels_; } | |
209 | |||
210 | // Returns true if rows are continuous for a given length, otherwise false. | ||
211 | 48546 | bool is_continuous(size_t length) const KLEIDICV_STREAMING { | |
212 | 48546 | return stride() == (length * channels() * sizeof(T)); | |
213 | } | ||
214 | |||
215 | // When handling multiple rows this switches to a single row in an | ||
216 | // implementation defined way. | ||
217 | 672 | void make_single_row() const KLEIDICV_STREAMING {} | |
218 | |||
219 | // Returns false if is_continuous() always returns false, otherwise true. | ||
220 | static constexpr bool maybe_continuous() KLEIDICV_STREAMING { return true; } | ||
221 | |||
222 | protected: | ||
223 | // TODO: default initialise members. | ||
224 | // NOLINTBEGIN(hicpp-member-init) | ||
225 | // The default constructor creates an uninitialized instance. | ||
226 | RowBase() KLEIDICV_STREAMING = default; | ||
227 | // NOLINTEND(hicpp-member-init) | ||
228 | |||
229 | 40466026 | RowBase(size_t stride, size_t channels) KLEIDICV_STREAMING | |
230 | 40466026 | : stride_(stride), | |
231 | 40466026 | channels_(channels) {} | |
232 | |||
233 | // Adds a stride to a pointer, and returns the new pointer. | ||
234 | template <typename P> | ||
235 | 40459104 | [[nodiscard]] static P *add_stride(P *ptr, | |
236 | ptrdiff_t stride) KLEIDICV_STREAMING { | ||
237 | 40459104 | uintptr_t intptr = reinterpret_cast<uintptr_t>(ptr); | |
238 | 40459104 | intptr += stride; | |
239 | // NOLINTBEGIN(performance-no-int-to-ptr) | ||
240 | 80918208 | return reinterpret_cast<P *>(intptr); | |
241 | // NOLINTEND(performance-no-int-to-ptr) | ||
242 | 40459104 | } | |
243 | |||
244 | // Subtracts a stride to a pointer, and returns the new pointer. | ||
245 | template <typename P> | ||
246 | [[nodiscard]] static P *subtract_stride(P *ptr, | ||
247 | ptrdiff_t stride) KLEIDICV_STREAMING { | ||
248 | uintptr_t intptr = reinterpret_cast<uintptr_t>(ptr); | ||
249 | intptr -= stride; | ||
250 | // NOLINTBEGIN(performance-no-int-to-ptr) | ||
251 | return reinterpret_cast<P *>(intptr); | ||
252 | // NOLINTEND(performance-no-int-to-ptr) | ||
253 | } | ||
254 | |||
255 | private: | ||
256 | // Distance in bytes between two consecutive rows. | ||
257 | size_t stride_; | ||
258 | // Number of channels within a row. | ||
259 | size_t channels_; | ||
260 | }; // end of class RowBase<T> | ||
261 | |||
262 | // Describes the layout of rows given by a base pointer, channel count and a | ||
263 | // stride in bytes. | ||
264 | template <typename T> | ||
265 | class Rows final : public RowBase<T> { | ||
266 | public: | ||
267 | // Shorten code: no need for 'this->'. | ||
268 | using RowBase<T>::channels; | ||
269 | using RowBase<T>::stride; | ||
270 | |||
271 | // The default constructor creates an uninitialized instance. | ||
272 | 6900 | Rows() KLEIDICV_STREAMING : RowBase<T>() {} | |
273 | |||
274 | 40452080 | explicit Rows(T *ptr, size_t stride, size_t channels) KLEIDICV_STREAMING | |
275 | 40452080 | : RowBase<T>(stride, channels), | |
276 | 40452080 | ptr_{ptr} {} | |
277 | |||
278 | 35611 | explicit Rows(T *ptr, size_t stride) KLEIDICV_STREAMING | |
279 | 35611 | : Rows(ptr, stride, 1) {} | |
280 | |||
281 | explicit Rows(T *ptr) KLEIDICV_STREAMING : Rows(ptr, 0, 0) {} | ||
282 | |||
283 | // Subscript operator to return an arbitrary position within the current row. | ||
284 | // To account for stride and channel count use at() method. | ||
285 | 41273752 | T &operator[](ptrdiff_t index) KLEIDICV_STREAMING { return ptr_[index]; } | |
286 | |||
287 | // Addition assignment operator to navigate among rows. | ||
288 | 272618 | Rows<T> &operator+=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
289 | 272618 | ptr_ = get_pointer_at(diff); | |
290 | 272618 | return *this; | |
291 | } | ||
292 | |||
293 | // Prefix increment operator to advance to the next row. | ||
294 | 265496 | Rows<T> &operator++() KLEIDICV_STREAMING { return operator+=(1); } | |
295 | |||
296 | // NOLINTBEGIN(hicpp-explicit-conversions) | ||
297 | // Returns a const variant of this instance. | ||
298 | 166926 | [[nodiscard]] operator Rows<const T>() KLEIDICV_STREAMING { | |
299 | 166926 | return Rows<const T>{ptr_, stride(), channels()}; | |
300 | } | ||
301 | // NOLINTEND(hicpp-explicit-conversions) | ||
302 | |||
303 | // Returns a new instance at a given row and column. | ||
304 | 40172150 | [[nodiscard]] Rows<T> at(ptrdiff_t row, | |
305 | ptrdiff_t column = 0) KLEIDICV_STREAMING { | ||
306 | 40172150 | return Rows<T>{get_pointer_at(row, column), stride(), channels()}; | |
307 | } | ||
308 | |||
309 | // Returns a view on columns within the current row. | ||
310 | 335795 | [[nodiscard]] Columns<T> as_columns() const KLEIDICV_STREAMING { | |
311 | 335795 | return Columns{ptr_, channels()}; | |
312 | } | ||
313 | |||
314 | // Translates a logical one-dimensional element index into physical byte | ||
315 | // offset for that element with a given row width. | ||
316 | 516 | [[nodiscard]] size_t offset_for_index(size_t index, | |
317 | size_t width) const KLEIDICV_STREAMING { | ||
318 | 516 | size_t row = index / width; | |
319 | 516 | size_t column = index % width; | |
320 | 1032 | return row * stride() + column * sizeof(T); | |
321 | 516 | } | |
322 | |||
323 | private: | ||
324 | // Returns a column in a row at a given index taking stride and channels into | ||
325 | // account. | ||
326 | 40444768 | [[nodiscard]] T *get_pointer_at(ptrdiff_t row, | |
327 | ptrdiff_t column = 0) KLEIDICV_STREAMING { | ||
328 | 80889536 | T *ptr = | |
329 | 40444768 | RowBase<T>::add_stride(ptr_, row * static_cast<ptrdiff_t>(stride())); | |
330 | 80889536 | return &ptr[column * static_cast<ptrdiff_t>(channels())]; | |
331 | 40444768 | } | |
332 | |||
333 | // Pointer to the first row. | ||
334 | T *ptr_; | ||
335 | }; // end of class Rows<T> | ||
336 | |||
337 | // Similar to Rows<T>, but in this case rows are indirectly addressed. | ||
338 | template <typename T> | ||
339 | class IndirectRows : public RowBase<T> { | ||
340 | public: | ||
341 | // Shorten code: no need for 'this->'. | ||
342 | using RowBase<T>::channels; | ||
343 | using RowBase<T>::stride; | ||
344 | |||
345 | // The default constructor creates an uninitialized instance. | ||
346 | IndirectRows() KLEIDICV_STREAMING : RowBase<T>() {} | ||
347 | |||
348 | 10398 | explicit IndirectRows(T **ptr_storage, size_t stride, | |
349 | size_t channels) KLEIDICV_STREAMING | ||
350 | 10398 | : RowBase<T>(stride, channels), | |
351 | 10398 | ptr_storage_(ptr_storage) {} | |
352 | |||
353 | 1116 | explicit IndirectRows(T **ptr_storage, size_t depth, | |
354 | Rows<T> rows) KLEIDICV_STREAMING | ||
355 | 1116 | : RowBase<T>(rows.stride(), rows.channels()), | |
356 | 1116 | ptr_storage_(ptr_storage) { | |
357 |
2/2✓ Branch 0 taken 1116 times.
✓ Branch 1 taken 71424 times.
|
72540 | for (size_t index = 0; index < depth; ++index) { |
358 | 71424 | ptr_storage_[index] = &rows.at(index, 0)[0]; | |
359 | 71424 | } | |
360 | 1116 | } | |
361 | |||
362 | // Subscript operator to return a position within the current row. To account | ||
363 | // for stride and channel count use at() method. | ||
364 | 21116 | T &operator[](ptrdiff_t index) KLEIDICV_STREAMING { | |
365 | 21116 | return ptr_storage_[0][index]; | |
366 | } | ||
367 | |||
368 | // Addition assignment operator to navigate among rows. | ||
369 | 24097 | IndirectRows<T> &operator+=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
370 | 24097 | ptr_storage_ += diff; | |
371 | 24097 | return *this; | |
372 | } | ||
373 | |||
374 | // Prefix increment operator to advance to the next row. | ||
375 | 16119 | IndirectRows<T> &operator++() KLEIDICV_STREAMING { | |
376 | 16119 | return this->operator+=(1); | |
377 | } | ||
378 | |||
379 | // Returns a new instance at a given row and column. | ||
380 | 18896 | [[nodiscard]] Rows<T> at(ptrdiff_t row, | |
381 | ptrdiff_t column = 0) KLEIDICV_STREAMING { | ||
382 | 18896 | auto rows = Rows<T>{ptr_storage_[row], stride(), channels()}; | |
383 | 18896 | return rows.at(0, column); | |
384 | 18896 | } | |
385 | |||
386 | // Returns a view on columns within the current row. | ||
387 | 108 | [[nodiscard]] Columns<T> as_columns() const KLEIDICV_STREAMING { | |
388 | 108 | return Columns{ptr_storage_[0], channels()}; | |
389 | } | ||
390 | |||
391 | protected: | ||
392 | // Pointer to the pointer storage. | ||
393 | T **ptr_storage_; | ||
394 | }; // end of class IndirectRows<T> | ||
395 | |||
396 | // Same as IndirectRows<T> but with double buffering. Requires 3 times the depth | ||
397 | // of pointers. | ||
398 | template <typename T> | ||
399 | class DoubleBufferedIndirectRows final : public IndirectRows<T> { | ||
400 | public: | ||
401 | // Shorten code: no need for 'this->'. | ||
402 | using IndirectRows<T>::channels; | ||
403 | using IndirectRows<T>::stride; | ||
404 | |||
405 | 1116 | explicit DoubleBufferedIndirectRows(T **ptr_storage, size_t depth, | |
406 | Rows<T> rows) KLEIDICV_STREAMING | ||
407 | 1116 | : IndirectRows<T>(ptr_storage, 2 * depth, rows) { | |
408 | // Fill the second half of the pointer storage. | ||
409 |
2/2✓ Branch 0 taken 1116 times.
✓ Branch 1 taken 71424 times.
|
72540 | for (size_t index = 0; index < 2 * depth; ++index) { |
410 | 71424 | this->ptr_storage_[2 * depth + index] = this->ptr_storage_[index]; | |
411 | 71424 | } | |
412 | |||
413 | 1116 | db_ptr_storage_[0] = &this->ptr_storage_[0]; | |
414 | 1116 | db_ptr_storage_[1] = &this->ptr_storage_[depth]; | |
415 | 1116 | } | |
416 | |||
417 | // Swaps the double buffered indirect rows. | ||
418 | 2292 | void swap() KLEIDICV_STREAMING { | |
419 | 2292 | std::swap(db_ptr_storage_[0], db_ptr_storage_[1]); | |
420 | 2292 | } | |
421 | |||
422 | // Returns indirect rows where write is allowed. | ||
423 | 9222 | [[nodiscard]] IndirectRows<T> write_at() KLEIDICV_STREAMING { | |
424 | 9222 | return IndirectRows<T>{db_ptr_storage_[0], stride(), channels()}; | |
425 | } | ||
426 | |||
427 | // Returns indirect rows where read is allowed. | ||
428 | 1176 | [[nodiscard]] IndirectRows<T> read_at() KLEIDICV_STREAMING { | |
429 | 1176 | return IndirectRows<T>{db_ptr_storage_[1], stride(), channels()}; | |
430 | } | ||
431 | |||
432 | private: | ||
433 | // The double buffer. | ||
434 | T **db_ptr_storage_[2]; | ||
435 | }; // end of class DoubleBufferedIndirectRows<T> | ||
436 | |||
437 | // Describes the layout of two parallel rows. | ||
438 | template <typename T> | ||
439 | class ParallelRows final : public RowBase<T> { | ||
440 | public: | ||
441 | // Shorten code: no need for 'this->'. | ||
442 | using RowBase<T>::channels; | ||
443 | using RowBase<T>::stride; | ||
444 | |||
445 | 2432 | explicit ParallelRows(T *ptr, size_t stride, | |
446 | size_t channels) KLEIDICV_STREAMING | ||
447 | 2432 | : RowBase<T>(2 * stride, channels), | |
448 | 2432 | ptrs_{ptr, RowBase<T>::add_stride(ptr, stride)} {} | |
449 | |||
450 | 1216 | explicit ParallelRows(T *ptr, size_t stride) KLEIDICV_STREAMING | |
451 | 1216 | : ParallelRows(ptr, stride, 1) {} | |
452 | |||
453 | // Addition assignment operator to navigate among rows. | ||
454 | 5952 | ParallelRows<T> &operator+=(ptrdiff_t diff) KLEIDICV_STREAMING { | |
455 | 5952 | ptrs_[0] = RowBase<T>::add_stride(ptrs_[0], diff * stride()); | |
456 | 5952 | ptrs_[1] = RowBase<T>::add_stride(ptrs_[1], diff * stride()); | |
457 | 5952 | return *this; | |
458 | } | ||
459 | |||
460 | // Prefix increment operator to advance to the next row. | ||
461 | 5952 | ParallelRows<T> &operator++() KLEIDICV_STREAMING { return operator+=(1); } | |
462 | |||
463 | // Returns views on columns within the current rows. | ||
464 | 5952 | [[nodiscard]] ParallelColumns<T> as_columns() const KLEIDICV_STREAMING { | |
465 | 5952 | Columns columns_0{ptrs_[0], channels()}; | |
466 | 5952 | Columns columns_1{ptrs_[1], channels()}; | |
467 | 5952 | return ParallelColumns{columns_0, columns_1}; | |
468 | 5952 | } | |
469 | |||
470 | // Instructs the logic to drop the second row. | ||
471 | 1344 | void make_single_row() KLEIDICV_STREAMING { ptrs_[1] = ptrs_[0]; } | |
472 | |||
473 | private: | ||
474 | // Pointers to the two parallel rows. | ||
475 | T *ptrs_[2]; | ||
476 | }; // end of class ParallelRows<T> | ||
477 | |||
478 | template <typename OperationType, typename... RowTypes> | ||
479 | 23751 | void zip_rows(OperationType &operation, Rectangle rect, | |
480 | RowTypes... rows) KLEIDICV_STREAMING { | ||
481 | // Unary left fold. Evaluates the expression for every part of the unexpanded | ||
482 | // parameter pack 'rows'. | ||
483 |
96/96✓ Branch 0 taken 6258 times.
✓ Branch 1 taken 2883 times.
✓ Branch 2 taken 3741 times.
✓ Branch 3 taken 2583 times.
✓ Branch 4 taken 1916 times.
✓ Branch 5 taken 2049 times.
✓ Branch 6 taken 1590 times.
✓ Branch 7 taken 1922 times.
✓ Branch 8 taken 2564 times.
✓ Branch 9 taken 429 times.
✓ Branch 10 taken 387 times.
✓ Branch 11 taken 2447 times.
✓ Branch 12 taken 2124 times.
✓ Branch 13 taken 212 times.
✓ Branch 14 taken 1212 times.
✓ Branch 15 taken 1056 times.
✓ Branch 16 taken 247 times.
✓ Branch 17 taken 980 times.
✓ Branch 18 taken 996 times.
✓ Branch 19 taken 271 times.
✓ Branch 20 taken 1159 times.
✓ Branch 21 taken 32 times.
✓ Branch 22 taken 12 times.
✓ Branch 23 taken 1147 times.
✓ Branch 24 taken 1002 times.
✓ Branch 25 taken 90 times.
✓ Branch 26 taken 978 times.
✓ Branch 27 taken 24 times.
✓ Branch 28 taken 12 times.
✓ Branch 29 taken 966 times.
✓ Branch 30 taken 642 times.
✓ Branch 31 taken 96 times.
✓ Branch 32 taken 606 times.
✓ Branch 33 taken 36 times.
✓ Branch 34 taken 24 times.
✓ Branch 35 taken 582 times.
✓ Branch 36 taken 594 times.
✓ Branch 37 taken 63 times.
✓ Branch 38 taken 654 times.
✓ Branch 39 taken 84 times.
✓ Branch 40 taken 60 times.
✓ Branch 41 taken 594 times.
✓ Branch 42 taken 618 times.
✓ Branch 43 taken 72 times.
✓ Branch 44 taken 594 times.
✓ Branch 45 taken 24 times.
✓ Branch 46 taken 12 times.
✓ Branch 47 taken 582 times.
✓ Branch 48 taken 36 times.
✓ Branch 49 taken 30 times.
✓ Branch 50 taken 24 times.
✓ Branch 51 taken 12 times.
✓ Branch 52 taken 6 times.
✓ Branch 53 taken 18 times.
✓ Branch 54 taken 54 times.
✓ Branch 55 taken 48 times.
✓ Branch 56 taken 30 times.
✓ Branch 57 taken 24 times.
✓ Branch 58 taken 18 times.
✓ Branch 59 taken 12 times.
✓ Branch 60 taken 6 times.
✓ Branch 61 taken 12 times.
✓ Branch 62 taken 78 times.
✓ Branch 63 taken 72 times.
✓ Branch 64 taken 54 times.
✓ Branch 65 taken 24 times.
✓ Branch 66 taken 30 times.
✓ Branch 67 taken 24 times.
✓ Branch 68 taken 18 times.
✓ Branch 69 taken 12 times.
✓ Branch 70 taken 6 times.
✓ Branch 71 taken 12 times.
✓ Branch 72 taken 36 times.
✓ Branch 73 taken 30 times.
✓ Branch 74 taken 24 times.
✓ Branch 75 taken 12 times.
✓ Branch 76 taken 6 times.
✓ Branch 77 taken 18 times.
✓ Branch 78 taken 54 times.
✓ Branch 79 taken 48 times.
✓ Branch 80 taken 30 times.
✓ Branch 81 taken 24 times.
✓ Branch 82 taken 18 times.
✓ Branch 83 taken 12 times.
✓ Branch 84 taken 6 times.
✓ Branch 85 taken 12 times.
✓ Branch 86 taken 78 times.
✓ Branch 87 taken 72 times.
✓ Branch 88 taken 54 times.
✓ Branch 89 taken 24 times.
✓ Branch 90 taken 30 times.
✓ Branch 91 taken 24 times.
✓ Branch 92 taken 18 times.
✓ Branch 93 taken 12 times.
✓ Branch 94 taken 6 times.
✓ Branch 95 taken 12 times.
|
23751 | if ((... && (rows.is_continuous(rect.width())))) { |
484 | 15516 | rect.flatten(); | |
485 | 15516 | } | |
486 | |||
487 |
24/24✓ Branch 0 taken 13668 times.
✓ Branch 1 taken 9141 times.
✓ Branch 2 taken 8397 times.
✓ Branch 3 taken 3734 times.
✓ Branch 4 taken 4499 times.
✓ Branch 5 taken 2942 times.
✓ Branch 6 taken 5688 times.
✓ Branch 7 taken 2825 times.
✓ Branch 8 taken 2588 times.
✓ Branch 9 taken 1767 times.
✓ Branch 10 taken 1973 times.
✓ Branch 11 taken 1431 times.
✓ Branch 12 taken 1215 times.
✓ Branch 13 taken 705 times.
✓ Branch 14 taken 1056 times.
✓ Branch 15 taken 738 times.
✓ Branch 16 taken 288 times.
✓ Branch 17 taken 150 times.
✓ Branch 18 taken 108 times.
✓ Branch 19 taken 66 times.
✓ Branch 20 taken 192 times.
✓ Branch 21 taken 102 times.
✓ Branch 22 taken 288 times.
✓ Branch 23 taken 150 times.
|
63711 | for (size_t row_index = 0; row_index < rect.height(); ++row_index) { |
488 | 39960 | operation.process_row(rect.width(), rows.as_columns()...); | |
489 | // Call pre-increment operator on all elements in the parameter pack. | ||
490 | 39960 | ((++rows), ...); | |
491 | 39960 | } | |
492 | 23751 | } | |
493 | |||
494 | template <typename OperationType, typename... RowTypes> | ||
495 | 1216 | void zip_parallel_rows(OperationType &operation, Rectangle rect, | |
496 | RowTypes... rows) KLEIDICV_STREAMING { | ||
497 |
8/8✓ Branch 0 taken 304 times.
✓ Branch 1 taken 744 times.
✓ Branch 2 taken 304 times.
✓ Branch 3 taken 744 times.
✓ Branch 4 taken 304 times.
✓ Branch 5 taken 744 times.
✓ Branch 6 taken 304 times.
✓ Branch 7 taken 744 times.
|
4192 | for (size_t row_index = 0; row_index < rect.height(); row_index += 2) { |
498 | // Handle the last odd row in a special way. | ||
499 |
8/8✓ Branch 0 taken 576 times.
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 576 times.
✓ Branch 3 taken 168 times.
✓ Branch 4 taken 576 times.
✓ Branch 5 taken 168 times.
✓ Branch 6 taken 576 times.
✓ Branch 7 taken 168 times.
|
2976 | if (KLEIDICV_UNLIKELY(row_index == (rect.height() - 1))) { |
500 | 672 | ((rows.make_single_row(), ...)); | |
501 | 672 | } | |
502 | |||
503 | 2976 | operation.process_row(rect.width(), rows.as_columns()...); | |
504 | // Call pre-increment operator on all elements in the parameter pack. | ||
505 | 2976 | ((++rows), ...); | |
506 | 2976 | } | |
507 | 1216 | } | |
508 | |||
509 | // Copy rows with support for overlapping memory. | ||
510 | template <typename T> | ||
511 | class CopyRows final { | ||
512 | public: | ||
513 | 490 | void process_row(size_t length, Columns<const T> src, | |
514 | Columns<T> dst) KLEIDICV_STREAMING { | ||
515 | #if (KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2) && defined(__ANDROID__) | ||
516 | __arm_sc_memmove(static_cast<void *>(&dst[0]), | ||
517 | static_cast<const void *>(&src[0]), | ||
518 | length * sizeof(T) * dst.channels()); | ||
519 | #else | ||
520 | 980 | std::memmove(static_cast<void *>(&dst[0]), | |
521 | 490 | static_cast<const void *>(&src[0]), | |
522 | 490 | length * sizeof(T) * dst.channels()); | |
523 | #endif | ||
524 | 490 | } | |
525 | |||
526 | template <typename S, typename D> | ||
527 | 486 | static void copy_rows(Rectangle rect, S src, D dst) KLEIDICV_STREAMING { | |
528 | 486 | CopyRows<T> operation; | |
529 | 486 | zip_rows(operation, rect, src, dst); | |
530 | 486 | } | |
531 | }; // end of class CopyRows<T> | ||
532 | |||
533 | // Copy non-verlapping rows. | ||
534 | template <typename T> | ||
535 | class CopyNonOverlappingRows final { | ||
536 | public: | ||
537 | 5400 | void process_row(size_t length, Columns<const T> src, | |
538 | Columns<T> dst) KLEIDICV_STREAMING { | ||
539 | #if (KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2) && defined(__ANDROID__) | ||
540 | __arm_sc_memcpy(static_cast<void *>(&dst[0]), | ||
541 | static_cast<const void *>(&src[0]), | ||
542 | length * sizeof(T) * dst.channels()); | ||
543 | #else | ||
544 | 10800 | std::memcpy(static_cast<void *>(&dst[0]), | |
545 | 5400 | static_cast<const void *>(&src[0]), | |
546 | 5400 | length * sizeof(T) * dst.channels()); | |
547 | #endif | ||
548 | 5400 | } | |
549 | |||
550 | 720 | static void copy_rows(Rectangle rect, Rows<const T> src, | |
551 | Rows<T> dst) KLEIDICV_STREAMING { | ||
552 | 720 | CopyNonOverlappingRows<T> operation; | |
553 | 720 | zip_rows(operation, rect, src, dst); | |
554 | 720 | } | |
555 | }; // end of class CopyNonOverlappingRows<T> | ||
556 | |||
557 | // Sets the margins to zero. It takes both channel count and element size into | ||
558 | // account. For example, margin.left() = 1 means that one pixel worth of space, | ||
559 | // that is sizeof(T) * channels, will be set to zero. The first argument, rect, | ||
560 | // describes the total available memory, including all margins. | ||
561 | template <typename T> | ||
562 | void make_zero_border_border(Rectangle rect, Rows<T> rows, Margin margin) { | ||
563 | if (margin.left()) { | ||
564 | size_t margin_width_in_bytes = margin.left() * sizeof(T) * rows.channels(); | ||
565 | for (size_t index = 0; index < rect.height(); ++index) { | ||
566 | #if (KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2) && defined(__ANDROID__) | ||
567 | __arm_sc_memset(&rows.at(index)[0], 0, margin_width_in_bytes); | ||
568 | #else | ||
569 | std::memset(&rows.at(index)[0], 0, margin_width_in_bytes); | ||
570 | #endif | ||
571 | } | ||
572 | } | ||
573 | |||
574 | if (margin.top()) { | ||
575 | size_t top_width = rect.width() - margin.left() - margin.right(); | ||
576 | size_t top_width_in_bytes = top_width * sizeof(T) * rows.channels(); | ||
577 | for (size_t index = 0; index < margin.top(); ++index) { | ||
578 | #if (KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2) && defined(__ANDROID__) | ||
579 | __arm_sc_memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); | ||
580 | #else | ||
581 | std::memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); | ||
582 | #endif | ||
583 | } | ||
584 | } | ||
585 | |||
586 | if (margin.right()) { | ||
587 | size_t margin_width_in_bytes = margin.right() * sizeof(T) * rows.channels(); | ||
588 | for (size_t index = 0; index < rect.height(); ++index) { | ||
589 | #if (KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2) && defined(__ANDROID__) | ||
590 | __arm_sc_memset(&rows.at(index, rect.width() - margin.right())[0], 0, | ||
591 | margin_width_in_bytes); | ||
592 | #else | ||
593 | std::memset(&rows.at(index, rect.width() - margin.right())[0], 0, | ||
594 | margin_width_in_bytes); | ||
595 | #endif | ||
596 | } | ||
597 | } | ||
598 | |||
599 | if (margin.bottom()) { | ||
600 | size_t bottom_width = rect.width() - margin.left() - margin.right(); | ||
601 | size_t bottom_width_in_bytes = bottom_width * sizeof(T) * rows.channels(); | ||
602 | for (size_t index = rect.height() - margin.bottom(); index < rect.height(); | ||
603 | ++index) { | ||
604 | #if (KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2) && defined(__ANDROID__) | ||
605 | __arm_sc_memset(&rows.at(index, margin.left())[0], 0, | ||
606 | bottom_width_in_bytes); | ||
607 | #else | ||
608 | std::memset(&rows.at(index, margin.left())[0], 0, bottom_width_in_bytes); | ||
609 | #endif | ||
610 | } | ||
611 | } | ||
612 | } | ||
613 | |||
614 | // Struct for providing Rows object over memory managed by std::unique_ptr. | ||
615 | template <typename T> | ||
616 | class RowsOverUniquePtr { | ||
617 | public: | ||
618 | // Returns a rectangle which describes the layout of the allocated memory. | ||
619 | Rectangle rect() const { return rect_; } | ||
620 | |||
621 | // Returns a raw pointer to the allocated memory. | ||
622 | T *data() const { return data_.get(); } | ||
623 | |||
624 | // Returns a Rows instance over the allocated memory. | ||
625 | Rows<T> rows() const { return rows_; } | ||
626 | |||
627 | // Like rows() but without margins. | ||
628 | Rows<T> rows_without_margin() const { return rows_without_margin_; } | ||
629 | |||
630 | protected: | ||
631 | RowsOverUniquePtr(Rectangle rect, Margin margin) | ||
632 | : rect_{get_rectangle(rect, margin)}, | ||
633 | data_{std::unique_ptr<T[]>(new(std::nothrow) T[rect_.area()])} { | ||
634 | if (!data_) { | ||
635 | // Code that uses this class is required to check that data() is valid. | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | rows_ = Rows<T>{&data_[0], rect_.width() * sizeof(T)}; | ||
640 | rows_without_margin_ = rows_.at(margin.top(), margin.left()); | ||
641 | make_zero_border_border<T>(rect_, rows_, margin); | ||
642 | } | ||
643 | |||
644 | private: | ||
645 | static Rectangle get_rectangle(Rectangle rect, Margin margin) { | ||
646 | return Rectangle{margin.left() + rect.width() + margin.right(), | ||
647 | margin.top() + rect.height() + margin.bottom()}; | ||
648 | } | ||
649 | |||
650 | Rectangle rect_; | ||
651 | Rows<T> rows_; | ||
652 | Rows<T> rows_without_margin_; | ||
653 | std::unique_ptr<T[]> data_; | ||
654 | }; | ||
655 | |||
656 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
657 | |||
658 | #endif // KLEIDICV_TYPES_H | ||
659 |