KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/morphology/morphology_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 357 357 100.0%
Functions: 68 68 100.0%
Branches: 96 96 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <algorithm>
6 #include <limits>
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/morphology/workspace.h"
10 #include "kleidicv/neon.h"
11 #include "kleidicv/types.h"
12
13 namespace kleidicv::neon {
14
15 template <typename ScalarType, typename O>
16 class VerticalOp final {
17 public:
18 using VecTraits = neon::VecTraits<ScalarType>;
19
20 380 VerticalOp(Rectangle rect, Rectangle kernel) : rect_(rect), kernel_(kernel) {}
21
22 380 void process_rows(IndirectRows<ScalarType> src_rows,
23 Rows<ScalarType> dst_rows) {
24
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 190 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 158 times.
380 if (KLEIDICV_UNLIKELY(kernel_.height()) == 1) {
25 32 CopyRows<ScalarType>::copy_rows(rect_, src_rows, dst_rows);
26 32 return;
27 }
28
29 // Iterate across the rows from top to bottom. This implementation can
30 // handle two rows at once.
31
4/4
✓ Branch 0 taken 466 times.
✓ Branch 1 taken 190 times.
✓ Branch 2 taken 357 times.
✓ Branch 3 taken 158 times.
1171 for (size_t height = 0; height < rect_.height(); height += 2) {
32 // Iterate across the columns from left to right.
33 1646 LoopUnroll2<TryToAvoidTailLoop> loop{rect_.width() * src_rows.channels(),
34 823 VecTraits::num_lanes()};
35 // clang-format off
36 loop
37 847 .unroll_four_times([&](size_t index) {
38 24 vector_path_4x(src_rows, dst_rows, index, height);
39 24 })
40 855 .unroll_twice([&](size_t index) {
41 32 vector_path_2x(src_rows, dst_rows, index, height);
42 32 })
43 1249 .unroll_once([&](size_t index) {
44 426 vector_path(src_rows, dst_rows, index, height);
45 426 })
46 3427 .tail([&](size_t index) {
47 2604 scalar_path(src_rows, dst_rows, index, height);
48 2604 });
49 // clang-format on
50 823 src_rows += 2;
51 823 dst_rows += 2;
52 823 }
53 380 }
54
55 private:
56 24 void vector_path_4x(IndirectRows<ScalarType> src_rows,
57 Rows<ScalarType> dst_rows, const size_t index,
58 const size_t height) {
59 24 const ScalarType *src_row = &src_rows[index];
60 24 auto first_row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
61 24 auto first_row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
62 24 auto first_row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
63 24 auto first_row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
64 24 ++src_rows;
65
66 24 src_row = &src_rows[index];
67 24 auto acc0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
68 24 auto acc1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
69 24 auto acc2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
70 24 auto acc3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
71 24 ++src_rows;
72
73 24 LoopUnroll loop{kernel_.height() - 2, 2};
74
75 36 loop.unroll_once([&](size_t step) {
76 12 const ScalarType *src_row0 = &src_rows.at(0)[index];
77 12 const ScalarType *src_row1 = &src_rows.at(1)[index];
78 12 auto row00 = vld1q(&src_row0[0 * VecTraits::num_lanes()]);
79 12 auto row01 = vld1q(&src_row0[1 * VecTraits::num_lanes()]);
80 12 auto row02 = vld1q(&src_row0[2 * VecTraits::num_lanes()]);
81 12 auto row03 = vld1q(&src_row0[3 * VecTraits::num_lanes()]);
82 12 auto row10 = vld1q(&src_row1[0 * VecTraits::num_lanes()]);
83 12 auto row11 = vld1q(&src_row1[1 * VecTraits::num_lanes()]);
84 12 auto row12 = vld1q(&src_row1[2 * VecTraits::num_lanes()]);
85 12 auto row13 = vld1q(&src_row1[3 * VecTraits::num_lanes()]);
86 12 acc0 = O::operation(acc0, O::operation(row00, row10));
87 12 acc1 = O::operation(acc1, O::operation(row01, row11));
88 12 acc2 = O::operation(acc2, O::operation(row02, row12));
89 12 acc3 = O::operation(acc3, O::operation(row03, row13));
90 12 src_rows += step;
91 12 });
92
93 44 loop.tail([&](size_t /* index */) {
94 20 const ScalarType *src_row = &src_rows[index];
95 20 auto row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
96 20 auto row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
97 20 auto row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
98 20 auto row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
99 20 acc0 = O::operation(acc0, row0);
100 20 acc1 = O::operation(acc1, row1);
101 20 acc2 = O::operation(acc2, row2);
102 20 acc3 = O::operation(acc3, row3);
103 20 ++src_rows;
104 20 });
105
106 // Save partial results which do not contain the first row.
107 24 auto partial_acc0 = acc0;
108 24 auto partial_acc1 = acc1;
109 24 auto partial_acc2 = acc2;
110 24 auto partial_acc3 = acc3;
111
112 // Take the first row into account.
113 24 acc0 = O::operation(acc0, first_row0);
114 24 acc1 = O::operation(acc1, first_row1);
115 24 acc2 = O::operation(acc2, first_row2);
116 24 acc3 = O::operation(acc3, first_row3);
117
118 // Store the results.
119 24 ScalarType *dst_row = &dst_rows[index];
120 24 vst1q(&dst_row[0 * VecTraits::num_lanes()], acc0);
121 24 vst1q(&dst_row[1 * VecTraits::num_lanes()], acc1);
122 24 vst1q(&dst_row[2 * VecTraits::num_lanes()], acc2);
123 24 vst1q(&dst_row[3 * VecTraits::num_lanes()], acc3);
124
125 // Try to process one more row, because it is relatively cheap to do so.
126
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 8 times.
24 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
127 8 return;
128 }
129
130 16 ++dst_rows;
131
132 16 src_row = &src_rows[index];
133 16 auto next_row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
134 16 auto next_row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
135 16 auto next_row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
136 16 auto next_row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
137
138 16 acc0 = O::operation(partial_acc0, next_row0);
139 16 acc1 = O::operation(partial_acc1, next_row1);
140 16 acc2 = O::operation(partial_acc2, next_row2);
141 16 acc3 = O::operation(partial_acc3, next_row3);
142
143 16 dst_row = &dst_rows[index];
144 16 vst1q(&dst_row[0 * VecTraits::num_lanes()], acc0);
145 16 vst1q(&dst_row[1 * VecTraits::num_lanes()], acc1);
146 16 vst1q(&dst_row[2 * VecTraits::num_lanes()], acc2);
147 16 vst1q(&dst_row[3 * VecTraits::num_lanes()], acc3);
148 24 }
149
150 32 void vector_path_2x(IndirectRows<ScalarType> src_rows,
151 Rows<ScalarType> dst_rows, const size_t index,
152 const size_t height) {
153 32 const ScalarType *src_row = &src_rows[index];
154 32 auto first_row0 = vld1q(&src_row[0]);
155 32 auto first_row1 = vld1q(&src_row[VecTraits::num_lanes()]);
156 32 ++src_rows;
157
158 32 src_row = &src_rows[index];
159 32 auto acc0 = vld1q(&src_row[0]);
160 32 auto acc1 = vld1q(&src_row[VecTraits::num_lanes()]);
161 32 ++src_rows;
162
163 32 LoopUnroll loop{kernel_.height() - 2, 2};
164
165 48 loop.unroll_once([&](size_t step) {
166 16 const ScalarType *src_row0 = &src_rows.at(0)[index];
167 16 const ScalarType *src_row1 = &src_rows.at(1)[index];
168 16 auto row00 = vld1q(&src_row0[0]);
169 16 auto row01 = vld1q(&src_row0[VecTraits::num_lanes()]);
170 16 auto row10 = vld1q(&src_row1[0]);
171 16 auto row11 = vld1q(&src_row1[VecTraits::num_lanes()]);
172 16 acc0 = O::operation(acc0, O::operation(row00, row10));
173 16 acc1 = O::operation(acc1, O::operation(row01, row11));
174 16 src_rows += step;
175 16 });
176
177 52 loop.tail([&](size_t /* index */) {
178 20 const ScalarType *src_row = &src_rows[index];
179 20 auto row0 = vld1q(&src_row[0]);
180 20 auto row1 = vld1q(&src_row[VecTraits::num_lanes()]);
181 20 acc0 = O::operation(acc0, row0);
182 20 acc1 = O::operation(acc1, row1);
183 20 ++src_rows;
184 20 });
185
186 // Save partial results which do not contain the first row.
187 32 auto partial_acc0 = acc0;
188 32 auto partial_acc1 = acc1;
189
190 // Take the first row into account.
191 32 acc0 = O::operation(acc0, first_row0);
192 32 acc1 = O::operation(acc1, first_row1);
193
194 // Store the results.
195 32 ScalarType *dst_row = &dst_rows[index];
196 32 vst1q(&dst_row[0], acc0);
197 32 vst1q(&dst_row[VecTraits::num_lanes()], acc1);
198
199 // Try to process one more row, because it is relatively cheap to do so.
200
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 14 times.
32 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
201 6 return;
202 }
203
204 26 ++dst_rows;
205
206 26 src_row = &src_rows[index];
207 26 auto next_row0 = vld1q(&src_row[0]);
208 26 auto next_row1 = vld1q(&src_row[VecTraits::num_lanes()]);
209
210 26 acc0 = O::operation(partial_acc0, next_row0);
211 26 acc1 = O::operation(partial_acc1, next_row1);
212
213 26 dst_row = &dst_rows[index];
214 26 vst1q(&dst_row[0], acc0);
215 26 vst1q(&dst_row[VecTraits::num_lanes()], acc1);
216 32 }
217
218 426 void vector_path(IndirectRows<ScalarType> src_rows, Rows<ScalarType> dst_rows,
219 const size_t index, const size_t height) {
220 426 auto first_row = vld1q(&src_rows[index]);
221 426 ++src_rows;
222
223 426 auto acc = vld1q(&src_rows[index]);
224 426 ++src_rows;
225
226 426 LoopUnroll loop{kernel_.height() - 2, 2};
227
228 902 loop.unroll_once([&](size_t step) {
229 476 auto row0 = vld1q(&src_rows.at(0)[index]);
230 476 auto row1 = vld1q(&src_rows.at(1)[index]);
231 476 acc = O::operation(acc, O::operation(row0, row1));
232 476 src_rows += step;
233 476 });
234
235 714 loop.tail([&](size_t /* index */) {
236 288 auto row = vld1q(&src_rows[index]);
237 288 acc = O::operation(acc, row);
238 288 ++src_rows;
239 288 });
240
241 // Save partial result which does not contain the first row.
242 426 auto partial_acc = acc;
243
244 // Take the first row into account.
245 426 acc = O::operation(acc, first_row);
246
247 // Store the results.
248 426 vst1q(&dst_rows[index], acc);
249
250 // Try to process one more row, because it is relatively cheap to do so.
251
4/4
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 212 times.
✓ Branch 2 taken 42 times.
✓ Branch 3 taken 136 times.
426 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
252 78 return;
253 }
254
255 348 ++dst_rows;
256
257 348 auto next_row = vld1q(&src_rows[index]);
258 348 acc = O::operation(partial_acc, next_row);
259 348 vst1q(&dst_rows[index], acc);
260 426 }
261
262 2604 void scalar_path(IndirectRows<ScalarType> src_rows, Rows<ScalarType> dst_rows,
263 const size_t index, const size_t height) {
264 2604 disable_loop_vectorization();
265
266 2604 ScalarType first_row = src_rows[index];
267 2604 ++src_rows;
268
269 2604 ScalarType acc = src_rows[index];
270 2604 ++src_rows;
271
272 2604 LoopUnroll loop{kernel_.height() - 2, 2};
273
274 5029 loop.unroll_once([&](size_t step) {
275 2425 auto row0 = src_rows.at(0)[index];
276 2425 auto row1 = src_rows.at(1)[index];
277 2425 acc = O::operation(acc, O::operation(row0, row1));
278 2425 src_rows += step;
279 2425 });
280
281 4413 loop.tail([&](size_t /* index */) {
282 1809 auto row = src_rows[index];
283 1809 acc = O::operation(acc, row);
284 1809 ++src_rows;
285 1809 });
286
287 // Save partial result which does not contain the first row.
288 2604 auto partial_acc = acc;
289
290 // Take the first row into account.
291 2604 acc = O::operation(acc, first_row);
292
293 // Store the results.
294 2604 dst_rows[index] = acc;
295
296 // Try to process one more row, because it is relatively cheap to do so.
297
4/4
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 1220 times.
✓ Branch 2 taken 205 times.
✓ Branch 3 taken 1020 times.
2604 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
298 364 return;
299 }
300
301 2240 ++dst_rows;
302
303 2240 auto next_row = src_rows[index];
304 2240 acc = O::operation(partial_acc, next_row);
305 2240 dst_rows[index] = acc;
306 2604 }
307
308 Rectangle rect_;
309 Rectangle kernel_;
310 }; // end of class VerticalOp<ScalarType, O>
311
312 template <typename ScalarType, typename O>
313 class HorizontalOp final {
314 public:
315 using VecTraits = neon::VecTraits<ScalarType>;
316
317 2626 HorizontalOp(Rectangle rect, Rectangle kernel)
318 2626 : rect_(rect), kernel_(kernel) {}
319
320 2626 void process_rows(Rows<const ScalarType> src_rows,
321 Rows<ScalarType> dst_rows) {
322 // Iterate across the rows from top to bottom.
323
4/4
✓ Branch 0 taken 1531 times.
✓ Branch 1 taken 1531 times.
✓ Branch 2 taken 1095 times.
✓ Branch 3 taken 1095 times.
5252 for (size_t height = 0; height < rect_.height(); ++height) {
324 // Iterate across the columns from left to right.
325 5252 LoopUnroll2<TryToAvoidTailLoop> loop{rect_.width() * src_rows.channels(),
326 2626 VecTraits::num_lanes()};
327 // clang-format off
328 loop
329 2694 .unroll_four_times([&](size_t index) {
330 68 vector_path_4x(src_rows, dst_rows, index);
331 68 })
332 2718 .unroll_twice([&](size_t index) {
333 92 vector_path_2x(src_rows, dst_rows, index);
334 92 })
335 4138 .unroll_once([&](size_t index) {
336 1512 vector_path(src_rows, dst_rows, index);
337 1512 })
338 10927 .tail([&](size_t index) {
339 8301 scalar_path(src_rows, dst_rows, index);
340 8301 });
341 // clang-format on
342 2626 ++src_rows;
343 2626 ++dst_rows;
344 2626 }
345 2626 }
346
347 private:
348 68 void vector_path_4x(Rows<const ScalarType> src_rows,
349 Rows<ScalarType> dst_rows, const size_t index) {
350 68 const auto *src_row = &src_rows[index];
351 68 auto acc0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
352 68 auto acc1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
353 68 auto acc2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
354 68 auto acc3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
355
356
4/4
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 92 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 92 times.
252 for (size_t width = 1; width < kernel_.width(); ++width) {
357 184 src_row = &src_rows[index + width * src_rows.channels()];
358 184 auto row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
359 184 auto row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
360 184 auto row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
361 184 auto row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
362 184 acc0 = O::operation(acc0, row0);
363 184 acc1 = O::operation(acc1, row1);
364 184 acc2 = O::operation(acc2, row2);
365 184 acc3 = O::operation(acc3, row3);
366 184 }
367
368 68 auto dst_row = &dst_rows[index];
369 68 vst1q(&dst_row[0 * VecTraits::num_lanes()], acc0);
370 68 vst1q(&dst_row[1 * VecTraits::num_lanes()], acc1);
371 68 vst1q(&dst_row[2 * VecTraits::num_lanes()], acc2);
372 68 vst1q(&dst_row[3 * VecTraits::num_lanes()], acc3);
373 68 }
374
375 92 void vector_path_2x(Rows<const ScalarType> src_rows,
376 Rows<ScalarType> dst_rows, const size_t index) {
377 92 const auto *src_row = &src_rows[index];
378 92 auto acc0 = vld1q(&src_row[0]);
379 92 auto acc1 = vld1q(&src_row[VecTraits::num_lanes()]);
380
381
4/4
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 104 times.
✓ Branch 2 taken 52 times.
✓ Branch 3 taken 236 times.
432 for (size_t width = 1; width < kernel_.width(); ++width) {
382 340 src_row = &src_rows[index + width * src_rows.channels()];
383 340 auto row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
384 340 auto row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
385 340 acc0 = O::operation(acc0, row0);
386 340 acc1 = O::operation(acc1, row1);
387 340 }
388
389 92 auto dst_row = &dst_rows[index];
390 92 vst1q(&dst_row[0], acc0);
391 92 vst1q(&dst_row[VecTraits::num_lanes()], acc1);
392 92 }
393
394 1512 void vector_path(Rows<const ScalarType> src_rows, Rows<ScalarType> dst_rows,
395 const size_t index) {
396 1512 auto acc = vld1q(&src_rows[index]);
397
398
4/4
✓ Branch 0 taken 902 times.
✓ Branch 1 taken 2292 times.
✓ Branch 2 taken 610 times.
✓ Branch 3 taken 2520 times.
6324 for (size_t width = 1; width < kernel_.width(); ++width) {
399 // TODO: Check if EXT was any faster.
400 4812 const auto *src_row = &src_rows[index + width * src_rows.channels()];
401 4812 acc = O::operation(acc, vld1q(&src_row[0]));
402 4812 }
403
404 1512 vst1q(&dst_rows[index], acc);
405 1512 }
406
407 8301 void scalar_path(Rows<const ScalarType> src_rows, Rows<ScalarType> dst_rows,
408 const size_t index) {
409 8301 auto acc = src_rows[index];
410
411
4/4
✓ Branch 0 taken 4507 times.
✓ Branch 1 taken 13656 times.
✓ Branch 2 taken 3794 times.
✓ Branch 3 taken 13880 times.
35837 for (size_t width = 1; width < kernel_.width(); ++width) {
412 27536 disable_loop_vectorization();
413 27536 acc = O::operation(acc, src_rows[index + width * src_rows.channels()]);
414 27536 }
415
416 8301 dst_rows[index] = acc;
417 8301 }
418
419 Rectangle rect_;
420 Rectangle kernel_;
421 }; // end of class HorizontalOp<ScalarType, O>
422
423 template <typename ScalarType>
424 class Min final {
425 public:
426 using VecTraits = neon::VecTraits<ScalarType>;
427 using VectorType = typename VecTraits::VectorType;
428
429 4334 static VectorType operation(VectorType lhs, VectorType rhs) {
430 4334 return vminq_u8(lhs, rhs);
431 }
432
433 18378 static ScalarType operation(ScalarType lhs, ScalarType rhs) {
434 18378 return std::min(lhs, rhs);
435 }
436 }; // end of class Min<ScalarType>
437
438 template <typename ScalarType>
439 class Max final {
440 public:
441 using VecTraits = neon::VecTraits<ScalarType>;
442 using VectorType = typename VecTraits::VectorType;
443
444 4464 static VectorType operation(VectorType lhs, VectorType rhs) {
445 4464 return vmaxq_u8(lhs, rhs);
446 }
447
448 20661 static ScalarType operation(ScalarType lhs, ScalarType rhs) {
449 20661 return std::max(lhs, rhs);
450 }
451 }; // end of class Max<ScalarType>
452
453 template <typename T>
454 using VerticalMin = VerticalOp<T, Min<T>>;
455 template <typename T>
456 using VerticalMax = VerticalOp<T, Max<T>>;
457
458 template <typename T>
459 using HorizontalMin = HorizontalOp<T, Min<T>>;
460 template <typename T>
461 using HorizontalMax = HorizontalOp<T, Max<T>>;
462
463 // Helper structure for dilate.
464 template <typename ScalarType>
465 class DilateOperation final {
466 public:
467 using SourceType = ScalarType;
468 using BufferType = ScalarType;
469 using DestinationType = ScalarType;
470 using CopyData = MorphologyWorkspace::CopyDataMemcpy<ScalarType>;
471
472 204 explicit DilateOperation(Rectangle kernel) : kernel_{kernel} {}
473
474 1531 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
475 Rows<BufferType> dst_rows) {
476 3062 neon::HorizontalMax<ScalarType>{rect, kernel_}.process_rows(src_rows,
477 1531 dst_rows);
478 1531 }
479
480 206 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
481 Rows<DestinationType> dst_rows) {
482 412 neon::VerticalMax<ScalarType>{rect, kernel_}.process_rows(src_rows,
483 206 dst_rows);
484 206 }
485
486 private:
487 Rectangle kernel_;
488 }; // end of class DilateOperation<ScalarType>
489
490 template <typename T>
491 186 kleidicv_error_t dilate(const T *src, size_t src_stride, T *dst,
492 size_t dst_stride, size_t width, size_t height,
493 kleidicv_morphology_context_t *context) {
494
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 185 times.
186 CHECK_POINTERS(context);
495
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 184 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 184 times.
185 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
496
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 183 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 183 times.
184 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
497
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 182 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 181 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 181 times.
183 CHECK_IMAGE_SIZE(width, height);
498
499 181 auto *workspace = reinterpret_cast<MorphologyWorkspace *>(context);
500
501
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 180 times.
181 if (workspace->type_size() != sizeof(T)) {
502 1 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
503 }
504
505 180 Rectangle rect{width, height};
506
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 178 times.
180 if (workspace->image_size() != rect) {
507 2 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
508 }
509
510 // Currently valid, will need to be changed if morphology supports more border
511 // types, like KLEIDICV_BORDER_TYPE_REVERSE.
512 178 Rectangle kernel{workspace->kernel()};
513
4/4
✓ Branch 0 taken 174 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 172 times.
178 if (width < kernel.width() - 1 || height < kernel.height() - 1) {
514 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
515 }
516
517 172 Rows<const T> src_rows{src, src_stride, workspace->channels()};
518 172 Rows<T> dst_rows{dst, dst_stride, workspace->channels()};
519 172 Margin margin{workspace->kernel(), workspace->anchor()};
520
521 172 Rows<const T> current_src_rows = src_rows;
522 172 Rows<T> current_dst_rows = dst_rows;
523
2/2
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 172 times.
376 for (size_t iteration = 0; iteration < workspace->iterations(); ++iteration) {
524 204 DilateOperation<T> operation{kernel};
525 408 workspace->process(rect, current_src_rows, current_dst_rows, margin,
526 204 workspace->border_type(), operation);
527 // Update source for the next iteration.
528 204 current_src_rows = dst_rows;
529 204 }
530 172 return KLEIDICV_OK;
531 186 }
532
533 // Helper structure for erode.
534 template <typename ScalarType>
535 class ErodeOperation final {
536 public:
537 using SourceType = ScalarType;
538 using BufferType = ScalarType;
539 using DestinationType = ScalarType;
540 using CopyData = MorphologyWorkspace::CopyDataMemcpy<ScalarType>;
541
542 172 explicit ErodeOperation(Rectangle kernel) : kernel_{kernel} {}
543
544 1095 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
545 Rows<BufferType> dst_rows) {
546 2190 neon::HorizontalMin<ScalarType>{rect, kernel_}.process_rows(src_rows,
547 1095 dst_rows);
548 1095 }
549
550 174 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
551 Rows<DestinationType> dst_rows) {
552 348 neon::VerticalMin<ScalarType>{rect, kernel_}.process_rows(src_rows,
553 174 dst_rows);
554 174 }
555
556 private:
557 Rectangle kernel_;
558 }; // end of class ErodeOperation<ScalarType>
559
560 template <typename T>
561 170 kleidicv_error_t erode(const T *src, size_t src_stride, T *dst,
562 size_t dst_stride, size_t width, size_t height,
563 kleidicv_morphology_context_t *context) {
564
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 169 times.
170 CHECK_POINTERS(context);
565
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 168 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 168 times.
169 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
566
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 167 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 167 times.
168 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
567
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 166 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 165 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 165 times.
167 CHECK_IMAGE_SIZE(width, height);
568
569 165 auto *workspace = reinterpret_cast<MorphologyWorkspace *>(context);
570
571
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 164 times.
165 if (workspace->type_size() != sizeof(T)) {
572 1 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
573 }
574
575 164 Rectangle rect{width, height};
576
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 162 times.
164 if (workspace->image_size() != rect) {
577 2 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
578 }
579
580 // Currently valid, will need to be changed if morphology supports more border
581 // types, like KLEIDICV_BORDER_TYPE_REVERSE.
582 162 Rectangle kernel{workspace->kernel()};
583
4/4
✓ Branch 0 taken 158 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 156 times.
162 if (width < kernel.width() - 1 || height < kernel.height() - 1) {
584 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
585 }
586
587 156 Rows<const T> src_rows{src, src_stride, workspace->channels()};
588 156 Rows<T> dst_rows{dst, dst_stride, workspace->channels()};
589 156 Margin margin{workspace->kernel(), workspace->anchor()};
590
591 156 Rows<const T> current_src_rows = src_rows;
592 156 Rows<T> current_dst_rows = dst_rows;
593
2/2
✓ Branch 0 taken 172 times.
✓ Branch 1 taken 156 times.
328 for (size_t iteration = 0; iteration < workspace->iterations(); ++iteration) {
594 172 ErodeOperation<T> operation{kernel};
595 344 workspace->process(rect, current_src_rows, current_dst_rows, margin,
596 172 workspace->border_type(), operation);
597 // Update source for the next iteration.
598 172 current_src_rows = dst_rows;
599 172 }
600 156 return KLEIDICV_OK;
601 170 }
602
603 #define KLEIDICV_INSTANTIATE_TEMPLATE(name, type) \
604 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t name<type>( \
605 const type *src, size_t src_stride, type *dst, size_t dst_stride, \
606 size_t width, size_t height, kleidicv_morphology_context_t *context)
607
608 KLEIDICV_INSTANTIATE_TEMPLATE(dilate, uint8_t);
609 KLEIDICV_INSTANTIATE_TEMPLATE(erode, uint8_t);
610
611 } // namespace kleidicv::neon
612