KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/morphology/workspace.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 198 198 100.0%
Functions: 44 44 100.0%
Branches: 79 83 95.2%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_MORPHOLOGY_WORKSPACE_H
6 #define KLEIDICV_MORPHOLOGY_WORKSPACE_H
7
8 #include <algorithm>
9 #include <array>
10 #include <cstdlib>
11 #include <memory>
12 #include <optional>
13
14 #include "kleidicv/kleidicv.h"
15 #include "kleidicv/types.h"
16
17 #if KLEIDICV_TARGET_SME
18 #include <arm_sme.h>
19 #endif
20
21 namespace KLEIDICV_TARGET_NAMESPACE {
22
23 // Forward declarations.
24 class MorphologyWorkspace;
25
26 // Deleter for MorphologyWorkspace instances.
27 class MorphologyWorkspaceDeleter {
28 public:
29 984 void operator()(MorphologyWorkspace *ptr) const KLEIDICV_STREAMING {
30 984 std::free(ptr);
31 984 };
32 };
33
34 // Workspace for morphological operations.
35 class MorphologyWorkspace final {
36 public:
37 // Shorthand for std::unique_ptr<> holding a workspace.
38 using Pointer =
39 std::unique_ptr<MorphologyWorkspace, MorphologyWorkspaceDeleter>;
40
41 enum class BorderType {
42 CONSTANT,
43 REPLICATE,
44 };
45
46 1014 static std::optional<BorderType> get_border_type(
47 kleidicv_border_type_t border_type) KLEIDICV_STREAMING {
48
3/3
✓ Branch 0 taken 291 times.
✓ Branch 1 taken 15 times.
✓ Branch 2 taken 708 times.
1014 switch (border_type) {
49 case KLEIDICV_BORDER_TYPE_REPLICATE:
50 708 return BorderType::REPLICATE;
51 case KLEIDICV_BORDER_TYPE_CONSTANT:
52 291 return BorderType::CONSTANT;
53 default:
54 15 return std::optional<BorderType>();
55 }
56 1014 }
57
58 template <typename T>
59 class CopyDataMemcpy {
60 public:
61 4740 constexpr void operator()(Rows<const T> src_rows, Rows<T> dst_rows,
62 size_t length) const KLEIDICV_STREAMING {
63 #if KLEIDICV_TARGET_SME
64 __arm_sc_memcpy(static_cast<void *>(&dst_rows[0]),
65 static_cast<const void *>(&src_rows[0]),
66 length * sizeof(T) * dst_rows.channels());
67 #else
68 9480 std::memcpy(static_cast<void *>(&dst_rows[0]),
69 4740 static_cast<const void *>(&src_rows[0]),
70 4740 length * sizeof(T) * dst_rows.channels());
71 #endif
72 4740 }
73 };
74
75 // MorphologyWorkspace is only constructible with create().
76 MorphologyWorkspace() = delete;
77
78 // Creates a workspace on the heap.
79 999 static kleidicv_error_t create(
80 Pointer &workspace, kleidicv_rectangle_t kernel, kleidicv_point_t anchor,
81 BorderType border_type, const uint8_t *border_value, size_t channels,
82 size_t iterations, size_t type_size,
83 kleidicv_rectangle_t image) KLEIDICV_STREAMING {
84 // These values are arbitrarily choosen.
85 1998 const size_t rows_per_iteration =
86 999 std::max(2 * kernel.height, static_cast<size_t>(32ULL));
87 // To avoid load/store penalties.
88 999 const size_t kAlignment = 16;
89
90
4/4
✓ Branch 0 taken 993 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 990 times.
999 if (anchor.x >= kernel.width || anchor.y >= kernel.height) {
91 9 return KLEIDICV_ERROR_RANGE;
92 }
93
94 990 Rectangle image_size{image};
95 990 Margin margin{kernel, anchor};
96
97 // A single wide row which can hold one row worth of data in addition
98 // to left and right margins.
99 1980 size_t wide_rows_width =
100 990 margin.left() + image_size.width() + margin.right();
101 990 size_t wide_rows_stride = wide_rows_width * channels;
102 990 wide_rows_stride = align_up(wide_rows_stride, kAlignment);
103 990 size_t wide_rows_height = 1UL; // There is only one wide row.
104 990 size_t wide_rows_size = wide_rows_stride * wide_rows_height;
105 990 wide_rows_size += kAlignment - 1;
106
107 // Multiple buffer rows to hold rows without any borders.
108 990 size_t buffer_rows_width = type_size * image_size.width();
109 990 size_t buffer_rows_stride = buffer_rows_width * channels;
110 990 buffer_rows_stride = align_up(buffer_rows_stride, kAlignment);
111 990 size_t buffer_rows_height = 2 * rows_per_iteration;
112 990 size_t buffer_rows_size = 0UL;
113
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 987 times.
990 if (__builtin_mul_overflow(buffer_rows_stride, buffer_rows_height,
114 &buffer_rows_size)) {
115 3 return KLEIDICV_ERROR_RANGE;
116 }
117 987 buffer_rows_size += kAlignment - 1;
118
119 // Storage for indirect row access.
120 987 size_t indirect_row_storage_size = 3 * rows_per_iteration * sizeof(void *);
121
122 // Try to allocate workspace at once.
123 2961 size_t allocation_size = sizeof(MorphologyWorkspace) +
124 2961 indirect_row_storage_size + buffer_rows_size +
125 987 wide_rows_size;
126 987 void *allocation = std::malloc(allocation_size);
127 1974 workspace = MorphologyWorkspace::Pointer{
128 987 reinterpret_cast<MorphologyWorkspace *>(allocation)};
129
2/2
✓ Branch 0 taken 984 times.
✓ Branch 1 taken 3 times.
987 if (!workspace) {
130 3 return KLEIDICV_ERROR_ALLOCATION;
131 }
132
133 984 workspace->rows_per_iteration_ = rows_per_iteration;
134 984 workspace->wide_rows_src_width_ = image_size.width();
135 984 workspace->channels_ = channels;
136
137 984 auto *buffer_rows_address = &workspace->data_[indirect_row_storage_size];
138 984 buffer_rows_address = align_up(buffer_rows_address, kAlignment);
139 984 workspace->buffer_rows_offset_ = buffer_rows_address - &workspace->data_[0];
140 984 workspace->buffer_rows_stride_ = buffer_rows_stride;
141
142 1968 auto *wide_rows_address =
143 984 &workspace->data_[indirect_row_storage_size + buffer_rows_size];
144 984 wide_rows_address += margin.left() * channels;
145 984 wide_rows_address = align_up(wide_rows_address, kAlignment);
146 984 wide_rows_address -= margin.left() * channels;
147 984 workspace->wide_rows_offset_ = wide_rows_address - &workspace->data_[0];
148 984 workspace->wide_rows_stride_ = wide_rows_stride;
149
150 984 workspace->kernel_ = kernel;
151 984 workspace->anchor_ = anchor;
152 984 workspace->border_type_ = border_type;
153
2/2
✓ Branch 0 taken 693 times.
✓ Branch 1 taken 291 times.
984 if (border_type == BorderType::CONSTANT) {
154
2/2
✓ Branch 0 taken 288 times.
✓ Branch 1 taken 3 times.
291 if (border_value == nullptr) {
155 3 return KLEIDICV_ERROR_NULL_POINTER;
156 }
157
2/2
✓ Branch 0 taken 414 times.
✓ Branch 1 taken 288 times.
702 for (size_t i = 0; i < channels; ++i) {
158 414 workspace->border_value_[i] = border_value[i];
159 414 }
160 288 }
161 981 workspace->channels_ = channels;
162 981 workspace->iterations_ = iterations;
163 981 workspace->type_size_ = type_size;
164 981 workspace->image_size_ = image_size;
165
166 981 return KLEIDICV_OK;
167 999 }
168
169 2004 kleidicv_rectangle_t kernel() const { return kernel_; }
170 984 kleidicv_point_t anchor() const { return anchor_; }
171 1128 BorderType border_type() const { return border_type_; }
172 1968 size_t channels() const { return channels_; }
173 2112 size_t iterations() const { return iterations_; }
174 1038 size_t type_size() const { return type_size_; }
175 1032 Rectangle image_size() const { return image_size_; }
176
177 // This function is too complex, but disable the warning for now.
178 // NOLINTBEGIN(readability-function-cognitive-complexity)
179 template <typename O>
180 1128 void process(Rectangle rect, Rows<const typename O::SourceType> src_rows,
181 Rows<typename O::DestinationType> dst_rows, Margin margin,
182 BorderType border_type, O operation) KLEIDICV_STREAMING {
183 using S = typename O::SourceType;
184 using B = typename O::BufferType;
185 1128 typename O::CopyData copy_data{};
186
187
8/8
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 609 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 606 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 513 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 510 times.
1128 if (KLEIDICV_UNLIKELY(rect.width() == 0 || rect.height() == 0)) {
188 12 return;
189 }
190
191 // Wide rows which can hold data with left and right margins.
192 2232 auto wide_rows = Rows{reinterpret_cast<S *>(&data_[wide_rows_offset_]),
193 1116 wide_rows_stride_, channels_};
194
195 // Double buffered indirect rows to access the buffer rows.
196 2232 auto db_indirect_rows = DoubleBufferedIndirectRows{
197 1116 reinterpret_cast<B **>(&data_[0]), rows_per_iteration_,
198 2232 Rows{reinterpret_cast<B *>(&data_[buffer_rows_offset_]),
199 1116 buffer_rows_stride_, channels_}};
200
201 // [Step 1] Initialize workspace.
202 1116 horizontal_height_ = margin.top() + rect.height() + margin.bottom();
203 1116 vertical_height_ = rect.height();
204 1116 row_index_ = 0;
205
206 // Used by replicate border type.
207 1116 auto first_src_rows = src_rows;
208 1116 auto last_src_rows = src_rows.at(rect.height() - 1);
209
210 1116 size_t horizontal_height = get_next_horizontal_height();
211
4/4
✓ Branch 0 taken 4639 times.
✓ Branch 1 taken 606 times.
✓ Branch 2 taken 3331 times.
✓ Branch 3 taken 510 times.
9086 for (size_t index = 0; index < horizontal_height; ++index) {
212
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 951 times.
✓ Branch 2 taken 3688 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 951 times.
✓ Branch 5 taken 2380 times.
7970 switch (border_type) {
213 case BorderType::CONSTANT: {
214 1902 make_constant_border(wide_rows, 0, margin.left());
215
216
8/8
✓ Branch 0 taken 807 times.
✓ Branch 1 taken 144 times.
✓ Branch 2 taken 234 times.
✓ Branch 3 taken 573 times.
✓ Branch 4 taken 711 times.
✓ Branch 5 taken 240 times.
✓ Branch 6 taken 138 times.
✓ Branch 7 taken 573 times.
1902 if (row_index_ < margin.top() ||
217 1518 row_index_ >= margin.top() + rect.height()) {
218 1512 make_constant_border(wide_rows, margin.left(),
219 756 wide_rows_src_width_);
220 756 } else {
221 2292 copy_data(src_rows, wide_rows.at(0, margin.left()),
222 1146 wide_rows_src_width_);
223 // Advance source rows.
224 1146 ++src_rows;
225 }
226
227 3804 make_constant_border(wide_rows, margin.left() + wide_rows_src_width_,
228 1902 margin.right());
229
230 // Advance counters.
231 1902 ++row_index_;
232 1902 } break;
233
234 case BorderType::REPLICATE: {
235 6068 Rows<const S> current_src_row;
236
237
4/4
✓ Branch 0 taken 864 times.
✓ Branch 1 taken 2824 times.
✓ Branch 2 taken 456 times.
✓ Branch 3 taken 1924 times.
6068 if (row_index_ < margin.top()) {
238 1320 current_src_row = first_src_rows;
239
4/4
✓ Branch 0 taken 2182 times.
✓ Branch 1 taken 642 times.
✓ Branch 2 taken 1522 times.
✓ Branch 3 taken 402 times.
6068 } else if (row_index_ < (margin.top() + rect.height())) {
240 3704 current_src_row = src_rows;
241 // Advance source rows.
242 3704 ++src_rows;
243 3704 } else {
244 1044 current_src_row = last_src_rows;
245 }
246
247 6068 replicate_border(current_src_row, wide_rows, 0, 0, margin.left());
248 12136 copy_data(current_src_row, wide_rows.at(0, margin.left()),
249 6068 wide_rows_src_width_);
250 12136 replicate_border(current_src_row, wide_rows, wide_rows_src_width_ - 1,
251 6068 margin.left() + wide_rows_src_width_,
252 6068 margin.right());
253
254 // Advance counters.
255 6068 ++row_index_;
256 6068 } break;
257 } // switch (border_type)
258
259 // [Step 2] Process the preloaded data.
260 15940 operation.process_horizontal(Rectangle{rect.width(), 1UL}, wide_rows,
261 7970 db_indirect_rows.write_at().at(index));
262 7970 } // for (...; index < horizontal_height; ...)
263
264 1116 db_indirect_rows.swap();
265
266 // [Step 3] Process any remaining data.
267
4/4
✓ Branch 0 taken 636 times.
✓ Branch 1 taken 606 times.
✓ Branch 2 taken 540 times.
✓ Branch 3 taken 510 times.
2292 while (vertical_height_) {
268 1176 size_t horizontal_height = get_next_horizontal_height();
269
4/4
✓ Branch 0 taken 626 times.
✓ Branch 1 taken 636 times.
✓ Branch 2 taken 626 times.
✓ Branch 3 taken 540 times.
2428 for (size_t index = 0; index < horizontal_height; ++index) {
270
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 210 times.
✓ Branch 2 taken 416 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 210 times.
✓ Branch 5 taken 416 times.
1252 switch (border_type) {
271 case BorderType::CONSTANT: {
272
4/4
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 204 times.
✓ Branch 3 taken 6 times.
420 if (row_index_ < (margin.top() + rect.height())) {
273 // Constant left and right borders with source data.
274 816 copy_data(src_rows, wide_rows.at(0, margin.left()),
275 408 wide_rows_src_width_);
276 // Advance source rows.
277 408 ++src_rows;
278 408 } else {
279 24 make_constant_border(wide_rows, margin.left(),
280 12 wide_rows_src_width_);
281 }
282
283 // Advance row counter.
284 420 ++row_index_;
285 420 } break;
286
287 case BorderType::REPLICATE: {
288 832 Rows<const S> current_src_row;
289
290
4/4
✓ Branch 0 taken 404 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 404 times.
✓ Branch 3 taken 12 times.
832 if (row_index_ < (margin.top() + rect.height())) {
291 808 current_src_row = src_rows;
292 // Advance source rows.
293 808 ++src_rows;
294 808 } else {
295 24 current_src_row = last_src_rows;
296 }
297
298 832 replicate_border(current_src_row, wide_rows, 0, 0, margin.left());
299 1664 copy_data(current_src_row, wide_rows.at(0, margin.left()),
300 832 wide_rows_src_width_);
301 832 replicate_border(
302 832 current_src_row, wide_rows, wide_rows_src_width_ - 1,
303 832 margin.left() + wide_rows_src_width_, margin.right());
304
305 // Advance counters.
306 832 ++row_index_;
307 832 } break;
308 } // switch (border_type)
309
310 2504 operation.process_horizontal(Rectangle{rect.width(), 1UL}, wide_rows,
311 1252 db_indirect_rows.write_at().at(index));
312 1252 } // for (...; index < horizontal_height; ...)
313
314 1176 size_t next_vertical_height = get_next_vertical_height();
315 2352 operation.process_vertical(Rectangle{rect.width(), next_vertical_height},
316 1176 db_indirect_rows.read_at(), dst_rows);
317 1176 dst_rows += next_vertical_height;
318
319 1176 db_indirect_rows.swap();
320 1176 }
321 1128 }
322 // NOLINTEND(readability-function-cognitive-complexity)
323
324 private:
325 // The number of wide rows to process in the next iteration.
326 2292 [[nodiscard]] size_t get_next_horizontal_height() KLEIDICV_STREAMING {
327 2292 size_t height = std::min(horizontal_height_, rows_per_iteration_);
328 2292 horizontal_height_ -= height;
329 4584 return height;
330 2292 }
331
332 // The number of indirect rows to process in the next iteration.
333 1176 [[nodiscard]] size_t get_next_vertical_height() KLEIDICV_STREAMING {
334 1176 size_t height = std::min(vertical_height_, rows_per_iteration_);
335 1176 vertical_height_ -= height;
336 2352 return height;
337 1176 }
338
339 template <typename T>
340 4572 void make_constant_border(Rows<T> dst_rows, size_t dst_index,
341 size_t count) KLEIDICV_STREAMING {
342 4572 auto dst = &dst_rows.at(0, dst_index)[0];
343
2/2
✓ Branch 0 taken 4572 times.
✓ Branch 1 taken 13668 times.
18240 for (size_t index = 0; index < count; ++index) {
344
2/2
✓ Branch 0 taken 15396 times.
✓ Branch 1 taken 13668 times.
29064 for (size_t channel = 0; channel < dst_rows.channels(); ++channel) {
345 15396 dst[index * dst_rows.channels() + channel] = border_value_[channel];
346 15396 }
347 13668 }
348 4572 }
349
350 template <typename T>
351 13800 void replicate_border(Rows<const T> src_rows, Rows<T> dst_rows,
352 size_t src_index, size_t dst_index,
353 size_t count) KLEIDICV_STREAMING {
354
2/2
✓ Branch 0 taken 10674 times.
✓ Branch 1 taken 3126 times.
13800 if (!count) {
355 3126 return;
356 }
357
358
2/2
✓ Branch 0 taken 10674 times.
✓ Branch 1 taken 15666 times.
26340 for (size_t channel = 0; channel < src_rows.channels(); ++channel) {
359
2/2
✓ Branch 0 taken 28515 times.
✓ Branch 1 taken 15666 times.
44181 for (size_t index = dst_index; index < dst_index + count; ++index) {
360 28515 dst_rows.at(0, index)[channel] = src_rows.at(0, src_index)[channel];
361 28515 }
362 15666 }
363 13800 }
364
365 static_assert(sizeof(Pointer) == sizeof(void *), "Unexpected type size");
366
367 kleidicv_rectangle_t kernel_;
368 kleidicv_point_t anchor_;
369 BorderType border_type_;
370 std::array<uint8_t, KLEIDICV_MAXIMUM_CHANNEL_COUNT> border_value_;
371 size_t iterations_;
372 size_t type_size_;
373 Rectangle image_size_;
374
375 // Number of wide rows in this workspace.
376 size_t rows_per_iteration_;
377 // Size of the data in bytes within a row.
378 size_t wide_rows_src_width_;
379 // The number of channels.
380 size_t channels_;
381 // Remaining height to process in horizontal direction.
382 size_t horizontal_height_;
383 // Remaining height to process in vertical direction.
384 size_t vertical_height_;
385 // Index of the processed row.
386 size_t row_index_;
387 // Offset in bytes to the buffer rows from &data_[0].
388 size_t buffer_rows_offset_;
389 // Stride of the buffer rows.
390 size_t buffer_rows_stride_;
391 // Offset in bytes to the wide rows from &data_[0].
392 size_t wide_rows_offset_;
393 // Stride of the wide rows.
394 size_t wide_rows_stride_;
395 // Workspace area begins here.
396 uint8_t data_[0] KLEIDICV_ATTR_ALIGNED(sizeof(void *));
397 }; // end of class MorphologyWorkspace
398
399 } // namespace KLEIDICV_TARGET_NAMESPACE
400
401 #endif // KLEIDICV_MORPHOLOGY_WORKSPACE_H
402