KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/morphology/morphology_neon.cpp
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 367 367 100.0%
Functions: 68 68 100.0%
Branches: 108 108 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <algorithm>
6 #include <limits>
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/morphology/morphology.h"
10 #include "kleidicv/morphology/workspace.h"
11 #include "kleidicv/neon.h"
12 #include "kleidicv/types.h"
13
14 namespace kleidicv::neon {
15
16 template <typename ScalarType, typename O>
17 class VerticalOp final {
18 public:
19 using VecTraits = neon::VecTraits<ScalarType>;
20
21 382 VerticalOp(Rectangle rect, Rectangle kernel) : rect_(rect), kernel_(kernel) {}
22
23 382 void process_rows(IndirectRows<ScalarType> src_rows,
24 Rows<ScalarType> dst_rows) {
25
4/4
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 190 times.
✓ Branch 2 taken 17 times.
✓ Branch 3 taken 158 times.
382 if (KLEIDICV_UNLIKELY(kernel_.height()) == 1) {
26 34 CopyRows<ScalarType>::copy_rows(rect_, src_rows, dst_rows);
27 34 return;
28 }
29
30 // Iterate across the rows from top to bottom. This implementation can
31 // handle two rows at once.
32
4/4
✓ Branch 0 taken 466 times.
✓ Branch 1 taken 190 times.
✓ Branch 2 taken 357 times.
✓ Branch 3 taken 158 times.
1171 for (size_t height = 0; height < rect_.height(); height += 2) {
33 // Iterate across the columns from left to right.
34 1646 LoopUnroll2<TryToAvoidTailLoop> loop{rect_.width() * src_rows.channels(),
35 823 VecTraits::num_lanes()};
36 // clang-format off
37 loop
38 847 .unroll_four_times([&](size_t index) {
39 24 vector_path_4x(src_rows, dst_rows, index, height);
40 24 })
41 855 .unroll_twice([&](size_t index) {
42 32 vector_path_2x(src_rows, dst_rows, index, height);
43 32 })
44 1249 .unroll_once([&](size_t index) {
45 426 vector_path(src_rows, dst_rows, index, height);
46 426 })
47 3427 .tail([&](size_t index) {
48 2604 scalar_path(src_rows, dst_rows, index, height);
49 2604 });
50 // clang-format on
51 823 src_rows += 2;
52 823 dst_rows += 2;
53 823 }
54 382 }
55
56 private:
57 24 void vector_path_4x(IndirectRows<ScalarType> src_rows,
58 Rows<ScalarType> dst_rows, const size_t index,
59 const size_t height) {
60 24 const ScalarType *src_row = &src_rows[index];
61 24 auto first_row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
62 24 auto first_row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
63 24 auto first_row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
64 24 auto first_row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
65 24 ++src_rows;
66
67 24 src_row = &src_rows[index];
68 24 auto acc0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
69 24 auto acc1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
70 24 auto acc2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
71 24 auto acc3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
72 24 ++src_rows;
73
74 24 LoopUnroll loop{kernel_.height() - 2, 2};
75
76 36 loop.unroll_once([&](size_t step) {
77 12 const ScalarType *src_row0 = &src_rows.at(0)[index];
78 12 const ScalarType *src_row1 = &src_rows.at(1)[index];
79 12 auto row00 = vld1q(&src_row0[0 * VecTraits::num_lanes()]);
80 12 auto row01 = vld1q(&src_row0[1 * VecTraits::num_lanes()]);
81 12 auto row02 = vld1q(&src_row0[2 * VecTraits::num_lanes()]);
82 12 auto row03 = vld1q(&src_row0[3 * VecTraits::num_lanes()]);
83 12 auto row10 = vld1q(&src_row1[0 * VecTraits::num_lanes()]);
84 12 auto row11 = vld1q(&src_row1[1 * VecTraits::num_lanes()]);
85 12 auto row12 = vld1q(&src_row1[2 * VecTraits::num_lanes()]);
86 12 auto row13 = vld1q(&src_row1[3 * VecTraits::num_lanes()]);
87 12 acc0 = O::operation(acc0, O::operation(row00, row10));
88 12 acc1 = O::operation(acc1, O::operation(row01, row11));
89 12 acc2 = O::operation(acc2, O::operation(row02, row12));
90 12 acc3 = O::operation(acc3, O::operation(row03, row13));
91 12 src_rows += step;
92 12 });
93
94 44 loop.tail([&](size_t /* index */) {
95 20 const ScalarType *src_row = &src_rows[index];
96 20 auto row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
97 20 auto row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
98 20 auto row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
99 20 auto row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
100 20 acc0 = O::operation(acc0, row0);
101 20 acc1 = O::operation(acc1, row1);
102 20 acc2 = O::operation(acc2, row2);
103 20 acc3 = O::operation(acc3, row3);
104 20 ++src_rows;
105 20 });
106
107 // Save partial results which do not contain the first row.
108 24 auto partial_acc0 = acc0;
109 24 auto partial_acc1 = acc1;
110 24 auto partial_acc2 = acc2;
111 24 auto partial_acc3 = acc3;
112
113 // Take the first row into account.
114 24 acc0 = O::operation(acc0, first_row0);
115 24 acc1 = O::operation(acc1, first_row1);
116 24 acc2 = O::operation(acc2, first_row2);
117 24 acc3 = O::operation(acc3, first_row3);
118
119 // Store the results.
120 24 ScalarType *dst_row = &dst_rows[index];
121 24 vst1q(&dst_row[0 * VecTraits::num_lanes()], acc0);
122 24 vst1q(&dst_row[1 * VecTraits::num_lanes()], acc1);
123 24 vst1q(&dst_row[2 * VecTraits::num_lanes()], acc2);
124 24 vst1q(&dst_row[3 * VecTraits::num_lanes()], acc3);
125
126 // Try to process one more row, because it is relatively cheap to do so.
127
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 8 times.
24 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
128 8 return;
129 }
130
131 16 ++dst_rows;
132
133 16 src_row = &src_rows[index];
134 16 auto next_row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
135 16 auto next_row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
136 16 auto next_row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
137 16 auto next_row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
138
139 16 acc0 = O::operation(partial_acc0, next_row0);
140 16 acc1 = O::operation(partial_acc1, next_row1);
141 16 acc2 = O::operation(partial_acc2, next_row2);
142 16 acc3 = O::operation(partial_acc3, next_row3);
143
144 16 dst_row = &dst_rows[index];
145 16 vst1q(&dst_row[0 * VecTraits::num_lanes()], acc0);
146 16 vst1q(&dst_row[1 * VecTraits::num_lanes()], acc1);
147 16 vst1q(&dst_row[2 * VecTraits::num_lanes()], acc2);
148 16 vst1q(&dst_row[3 * VecTraits::num_lanes()], acc3);
149 24 }
150
151 32 void vector_path_2x(IndirectRows<ScalarType> src_rows,
152 Rows<ScalarType> dst_rows, const size_t index,
153 const size_t height) {
154 32 const ScalarType *src_row = &src_rows[index];
155 32 auto first_row0 = vld1q(&src_row[0]);
156 32 auto first_row1 = vld1q(&src_row[VecTraits::num_lanes()]);
157 32 ++src_rows;
158
159 32 src_row = &src_rows[index];
160 32 auto acc0 = vld1q(&src_row[0]);
161 32 auto acc1 = vld1q(&src_row[VecTraits::num_lanes()]);
162 32 ++src_rows;
163
164 32 LoopUnroll loop{kernel_.height() - 2, 2};
165
166 48 loop.unroll_once([&](size_t step) {
167 16 const ScalarType *src_row0 = &src_rows.at(0)[index];
168 16 const ScalarType *src_row1 = &src_rows.at(1)[index];
169 16 auto row00 = vld1q(&src_row0[0]);
170 16 auto row01 = vld1q(&src_row0[VecTraits::num_lanes()]);
171 16 auto row10 = vld1q(&src_row1[0]);
172 16 auto row11 = vld1q(&src_row1[VecTraits::num_lanes()]);
173 16 acc0 = O::operation(acc0, O::operation(row00, row10));
174 16 acc1 = O::operation(acc1, O::operation(row01, row11));
175 16 src_rows += step;
176 16 });
177
178 52 loop.tail([&](size_t /* index */) {
179 20 const ScalarType *src_row = &src_rows[index];
180 20 auto row0 = vld1q(&src_row[0]);
181 20 auto row1 = vld1q(&src_row[VecTraits::num_lanes()]);
182 20 acc0 = O::operation(acc0, row0);
183 20 acc1 = O::operation(acc1, row1);
184 20 ++src_rows;
185 20 });
186
187 // Save partial results which do not contain the first row.
188 32 auto partial_acc0 = acc0;
189 32 auto partial_acc1 = acc1;
190
191 // Take the first row into account.
192 32 acc0 = O::operation(acc0, first_row0);
193 32 acc1 = O::operation(acc1, first_row1);
194
195 // Store the results.
196 32 ScalarType *dst_row = &dst_rows[index];
197 32 vst1q(&dst_row[0], acc0);
198 32 vst1q(&dst_row[VecTraits::num_lanes()], acc1);
199
200 // Try to process one more row, because it is relatively cheap to do so.
201
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 14 times.
32 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
202 6 return;
203 }
204
205 26 ++dst_rows;
206
207 26 src_row = &src_rows[index];
208 26 auto next_row0 = vld1q(&src_row[0]);
209 26 auto next_row1 = vld1q(&src_row[VecTraits::num_lanes()]);
210
211 26 acc0 = O::operation(partial_acc0, next_row0);
212 26 acc1 = O::operation(partial_acc1, next_row1);
213
214 26 dst_row = &dst_rows[index];
215 26 vst1q(&dst_row[0], acc0);
216 26 vst1q(&dst_row[VecTraits::num_lanes()], acc1);
217 32 }
218
219 426 void vector_path(IndirectRows<ScalarType> src_rows, Rows<ScalarType> dst_rows,
220 const size_t index, const size_t height) {
221 426 auto first_row = vld1q(&src_rows[index]);
222 426 ++src_rows;
223
224 426 auto acc = vld1q(&src_rows[index]);
225 426 ++src_rows;
226
227 426 LoopUnroll loop{kernel_.height() - 2, 2};
228
229 902 loop.unroll_once([&](size_t step) {
230 476 auto row0 = vld1q(&src_rows.at(0)[index]);
231 476 auto row1 = vld1q(&src_rows.at(1)[index]);
232 476 acc = O::operation(acc, O::operation(row0, row1));
233 476 src_rows += step;
234 476 });
235
236 714 loop.tail([&](size_t /* index */) {
237 288 auto row = vld1q(&src_rows[index]);
238 288 acc = O::operation(acc, row);
239 288 ++src_rows;
240 288 });
241
242 // Save partial result which does not contain the first row.
243 426 auto partial_acc = acc;
244
245 // Take the first row into account.
246 426 acc = O::operation(acc, first_row);
247
248 // Store the results.
249 426 vst1q(&dst_rows[index], acc);
250
251 // Try to process one more row, because it is relatively cheap to do so.
252
4/4
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 212 times.
✓ Branch 2 taken 42 times.
✓ Branch 3 taken 136 times.
426 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
253 78 return;
254 }
255
256 348 ++dst_rows;
257
258 348 auto next_row = vld1q(&src_rows[index]);
259 348 acc = O::operation(partial_acc, next_row);
260 348 vst1q(&dst_rows[index], acc);
261 426 }
262
263 2604 void scalar_path(IndirectRows<ScalarType> src_rows, Rows<ScalarType> dst_rows,
264 const size_t index, const size_t height) {
265 2604 disable_loop_vectorization();
266
267 2604 ScalarType first_row = src_rows[index];
268 2604 ++src_rows;
269
270 2604 ScalarType acc = src_rows[index];
271 2604 ++src_rows;
272
273 2604 LoopUnroll loop{kernel_.height() - 2, 2};
274
275 5029 loop.unroll_once([&](size_t step) {
276 2425 auto row0 = src_rows.at(0)[index];
277 2425 auto row1 = src_rows.at(1)[index];
278 2425 acc = O::operation(acc, O::operation(row0, row1));
279 2425 src_rows += step;
280 2425 });
281
282 4413 loop.tail([&](size_t /* index */) {
283 1809 auto row = src_rows[index];
284 1809 acc = O::operation(acc, row);
285 1809 ++src_rows;
286 1809 });
287
288 // Save partial result which does not contain the first row.
289 2604 auto partial_acc = acc;
290
291 // Take the first row into account.
292 2604 acc = O::operation(acc, first_row);
293
294 // Store the results.
295 2604 dst_rows[index] = acc;
296
297 // Try to process one more row, because it is relatively cheap to do so.
298
4/4
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 1220 times.
✓ Branch 2 taken 205 times.
✓ Branch 3 taken 1020 times.
2604 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
299 364 return;
300 }
301
302 2240 ++dst_rows;
303
304 2240 auto next_row = src_rows[index];
305 2240 acc = O::operation(partial_acc, next_row);
306 2240 dst_rows[index] = acc;
307 2604 }
308
309 Rectangle rect_;
310 Rectangle kernel_;
311 }; // end of class VerticalOp<ScalarType, O>
312
313 template <typename ScalarType, typename O>
314 class HorizontalOp final {
315 public:
316 using VecTraits = neon::VecTraits<ScalarType>;
317
318 2628 HorizontalOp(Rectangle rect, Rectangle kernel)
319 2628 : rect_(rect), kernel_(kernel) {}
320
321 2628 void process_rows(Rows<const ScalarType> src_rows,
322 Rows<ScalarType> dst_rows) {
323 // Iterate across the rows from top to bottom.
324
4/4
✓ Branch 0 taken 1532 times.
✓ Branch 1 taken 1532 times.
✓ Branch 2 taken 1096 times.
✓ Branch 3 taken 1096 times.
5256 for (size_t height = 0; height < rect_.height(); ++height) {
325 // Iterate across the columns from left to right.
326 5256 LoopUnroll2<TryToAvoidTailLoop> loop{rect_.width() * src_rows.channels(),
327 2628 VecTraits::num_lanes()};
328 // clang-format off
329 loop
330 2696 .unroll_four_times([&](size_t index) {
331 68 vector_path_4x(src_rows, dst_rows, index);
332 68 })
333 2720 .unroll_twice([&](size_t index) {
334 92 vector_path_2x(src_rows, dst_rows, index);
335 92 })
336 4140 .unroll_once([&](size_t index) {
337 1512 vector_path(src_rows, dst_rows, index);
338 1512 })
339 10931 .tail([&](size_t index) {
340 8303 scalar_path(src_rows, dst_rows, index);
341 8303 });
342 // clang-format on
343 2628 ++src_rows;
344 2628 ++dst_rows;
345 2628 }
346 2628 }
347
348 private:
349 68 void vector_path_4x(Rows<const ScalarType> src_rows,
350 Rows<ScalarType> dst_rows, const size_t index) {
351 68 const auto *src_row = &src_rows[index];
352 68 auto acc0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
353 68 auto acc1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
354 68 auto acc2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
355 68 auto acc3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
356
357
4/4
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 92 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 92 times.
252 for (size_t width = 1; width < kernel_.width(); ++width) {
358 184 src_row = &src_rows[index + width * src_rows.channels()];
359 184 auto row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
360 184 auto row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
361 184 auto row2 = vld1q(&src_row[2 * VecTraits::num_lanes()]);
362 184 auto row3 = vld1q(&src_row[3 * VecTraits::num_lanes()]);
363 184 acc0 = O::operation(acc0, row0);
364 184 acc1 = O::operation(acc1, row1);
365 184 acc2 = O::operation(acc2, row2);
366 184 acc3 = O::operation(acc3, row3);
367 184 }
368
369 68 auto dst_row = &dst_rows[index];
370 68 vst1q(&dst_row[0 * VecTraits::num_lanes()], acc0);
371 68 vst1q(&dst_row[1 * VecTraits::num_lanes()], acc1);
372 68 vst1q(&dst_row[2 * VecTraits::num_lanes()], acc2);
373 68 vst1q(&dst_row[3 * VecTraits::num_lanes()], acc3);
374 68 }
375
376 92 void vector_path_2x(Rows<const ScalarType> src_rows,
377 Rows<ScalarType> dst_rows, const size_t index) {
378 92 const auto *src_row = &src_rows[index];
379 92 auto acc0 = vld1q(&src_row[0]);
380 92 auto acc1 = vld1q(&src_row[VecTraits::num_lanes()]);
381
382
4/4
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 104 times.
✓ Branch 2 taken 52 times.
✓ Branch 3 taken 236 times.
432 for (size_t width = 1; width < kernel_.width(); ++width) {
383 340 src_row = &src_rows[index + width * src_rows.channels()];
384 340 auto row0 = vld1q(&src_row[0 * VecTraits::num_lanes()]);
385 340 auto row1 = vld1q(&src_row[1 * VecTraits::num_lanes()]);
386 340 acc0 = O::operation(acc0, row0);
387 340 acc1 = O::operation(acc1, row1);
388 340 }
389
390 92 auto dst_row = &dst_rows[index];
391 92 vst1q(&dst_row[0], acc0);
392 92 vst1q(&dst_row[VecTraits::num_lanes()], acc1);
393 92 }
394
395 1512 void vector_path(Rows<const ScalarType> src_rows, Rows<ScalarType> dst_rows,
396 const size_t index) {
397 1512 auto acc = vld1q(&src_rows[index]);
398
399
4/4
✓ Branch 0 taken 902 times.
✓ Branch 1 taken 2292 times.
✓ Branch 2 taken 610 times.
✓ Branch 3 taken 2520 times.
6324 for (size_t width = 1; width < kernel_.width(); ++width) {
400 // TODO: Check if EXT was any faster.
401 4812 const auto *src_row = &src_rows[index + width * src_rows.channels()];
402 4812 acc = O::operation(acc, vld1q(&src_row[0]));
403 4812 }
404
405 1512 vst1q(&dst_rows[index], acc);
406 1512 }
407
408 8303 void scalar_path(Rows<const ScalarType> src_rows, Rows<ScalarType> dst_rows,
409 const size_t index) {
410 8303 auto acc = src_rows[index];
411
412
4/4
✓ Branch 0 taken 4508 times.
✓ Branch 1 taken 13656 times.
✓ Branch 2 taken 3795 times.
✓ Branch 3 taken 13880 times.
35839 for (size_t width = 1; width < kernel_.width(); ++width) {
413 27536 disable_loop_vectorization();
414 27536 acc = O::operation(acc, src_rows[index + width * src_rows.channels()]);
415 27536 }
416
417 8303 dst_rows[index] = acc;
418 8303 }
419
420 Rectangle rect_;
421 Rectangle kernel_;
422 }; // end of class HorizontalOp<ScalarType, O>
423
424 template <typename ScalarType>
425 class Min final {
426 public:
427 using VecTraits = neon::VecTraits<ScalarType>;
428 using VectorType = typename VecTraits::VectorType;
429
430 4334 static VectorType operation(VectorType lhs, VectorType rhs) {
431 4334 return vminq_u8(lhs, rhs);
432 }
433
434 18378 static ScalarType operation(ScalarType lhs, ScalarType rhs) {
435 18378 return std::min(lhs, rhs);
436 }
437 }; // end of class Min<ScalarType>
438
439 template <typename ScalarType>
440 class Max final {
441 public:
442 using VecTraits = neon::VecTraits<ScalarType>;
443 using VectorType = typename VecTraits::VectorType;
444
445 4464 static VectorType operation(VectorType lhs, VectorType rhs) {
446 4464 return vmaxq_u8(lhs, rhs);
447 }
448
449 20661 static ScalarType operation(ScalarType lhs, ScalarType rhs) {
450 20661 return std::max(lhs, rhs);
451 }
452 }; // end of class Max<ScalarType>
453
454 template <typename T>
455 using VerticalMin = VerticalOp<T, Min<T>>;
456 template <typename T>
457 using VerticalMax = VerticalOp<T, Max<T>>;
458
459 template <typename T>
460 using HorizontalMin = HorizontalOp<T, Min<T>>;
461 template <typename T>
462 using HorizontalMax = HorizontalOp<T, Max<T>>;
463
464 // Helper structure for dilate.
465 template <typename ScalarType>
466 class DilateOperation final {
467 public:
468 using SourceType = ScalarType;
469 using BufferType = ScalarType;
470 using DestinationType = ScalarType;
471 using CopyData = MorphologyWorkspace::CopyDataMemcpy<ScalarType>;
472
473 205 explicit DilateOperation(Rectangle kernel) : kernel_{kernel} {}
474
475 1532 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
476 Rows<BufferType> dst_rows) {
477 3064 neon::HorizontalMax<ScalarType>{rect, kernel_}.process_rows(src_rows,
478 1532 dst_rows);
479 1532 }
480
481 207 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
482 Rows<DestinationType> dst_rows) {
483 414 neon::VerticalMax<ScalarType>{rect, kernel_}.process_rows(src_rows,
484 207 dst_rows);
485 207 }
486
487 private:
488 Rectangle kernel_;
489 }; // end of class DilateOperation<ScalarType>
490
491 template <typename T>
492 202 kleidicv_error_t dilate(const T *src, size_t src_stride, T *dst,
493 size_t dst_stride, size_t width, size_t height,
494 size_t channels, size_t kernel_width,
495 size_t kernel_height, size_t anchor_x, size_t anchor_y,
496 kleidicv_border_type_t border_type,
497 const uint8_t *border_value, size_t iterations) {
498
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 201 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 201 times.
202 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
499
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 200 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 200 times.
201 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
500
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 197 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 195 times.
✓ Branch 4 taken 5 times.
✓ Branch 5 taken 195 times.
200 CHECK_IMAGE_SIZE(width, height);
501
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 194 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 192 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 192 times.
195 CHECK_IMAGE_SIZE(kernel_width, kernel_height);
502 192 auto morphology_border_type =
503 192 MorphologyWorkspace::get_border_type(border_type);
504
2/2
✓ Branch 0 taken 187 times.
✓ Branch 1 taken 5 times.
192 if (!morphology_border_type) {
505 5 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
506 }
507
4/4
✓ Branch 0 taken 178 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 178 times.
✓ Branch 3 taken 9 times.
374 if (!morphology_is_implemented(width, height, kernel_width, kernel_height,
508 187 channels)) {
509 9 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
510 }
511
512 178 Rectangle rect{width, height};
513 178 Rectangle kernel_rect{kernel_width, kernel_height};
514 178 Point anchor{anchor_x, anchor_y};
515
516 356 auto workspace_variant = MorphologyWorkspace::create(
517 178 kernel_rect, anchor, *morphology_border_type, border_value, channels,
518 178 sizeof(uint8_t), rect);
519
4/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 173 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 173 times.
183 if (auto *err = std::get_if<kleidicv_error_t>(&workspace_variant)) {
520 5 return *err;
521 }
522 173 auto &workspace = *std::get_if<MorphologyWorkspace>(&workspace_variant);
523
524 173 Rows<const T> src_rows{src, src_stride, channels};
525 173 Rows<T> dst_rows{dst, dst_stride, channels};
526
527 173 Rows<const T> current_src_rows = src_rows;
528 173 Rows<T> current_dst_rows = dst_rows;
529
2/2
✓ Branch 0 taken 205 times.
✓ Branch 1 taken 173 times.
378 for (size_t i = 0; i < iterations; ++i) {
530 205 DilateOperation<T> operation{kernel_rect};
531 205 workspace.process(current_src_rows, current_dst_rows, operation);
532 // Update source for the next iteration.
533 205 current_src_rows = dst_rows;
534 205 }
535 173 return KLEIDICV_OK;
536 202 }
537
538 // Helper structure for erode.
539 template <typename ScalarType>
540 class ErodeOperation final {
541 public:
542 using SourceType = ScalarType;
543 using BufferType = ScalarType;
544 using DestinationType = ScalarType;
545 using CopyData = MorphologyWorkspace::CopyDataMemcpy<ScalarType>;
546
547 173 explicit ErodeOperation(Rectangle kernel) : kernel_{kernel} {}
548
549 1096 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
550 Rows<BufferType> dst_rows) {
551 2192 neon::HorizontalMin<ScalarType>{rect, kernel_}.process_rows(src_rows,
552 1096 dst_rows);
553 1096 }
554
555 175 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
556 Rows<DestinationType> dst_rows) {
557 350 neon::VerticalMin<ScalarType>{rect, kernel_}.process_rows(src_rows,
558 175 dst_rows);
559 175 }
560
561 private:
562 Rectangle kernel_;
563 }; // end of class ErodeOperation<ScalarType>
564
565 template <typename T>
566 186 kleidicv_error_t erode(const T *src, size_t src_stride, T *dst,
567 size_t dst_stride, size_t width, size_t height,
568 size_t channels, size_t kernel_width,
569 size_t kernel_height, size_t anchor_x, size_t anchor_y,
570 kleidicv_border_type_t border_type,
571 const uint8_t *border_value, size_t iterations) {
572
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 185 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 185 times.
186 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
573
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 184 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 184 times.
185 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
574
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 181 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 179 times.
✓ Branch 4 taken 5 times.
✓ Branch 5 taken 179 times.
184 CHECK_IMAGE_SIZE(width, height);
575
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 178 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 176 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 176 times.
179 CHECK_IMAGE_SIZE(kernel_width, kernel_height);
576 176 auto morphology_border_type =
577 176 MorphologyWorkspace::get_border_type(border_type);
578
2/2
✓ Branch 0 taken 171 times.
✓ Branch 1 taken 5 times.
176 if (!morphology_border_type) {
579 5 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
580 }
581
4/4
✓ Branch 0 taken 162 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 162 times.
✓ Branch 3 taken 9 times.
342 if (!morphology_is_implemented(width, height, kernel_width, kernel_height,
582 171 channels)) {
583 9 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
584 }
585
586 162 Rectangle rect{width, height};
587 162 Rectangle kernel_rect{kernel_width, kernel_height};
588 162 Point anchor{anchor_x, anchor_y};
589
590 324 auto workspace_variant = MorphologyWorkspace::create(
591 162 kernel_rect, anchor, *morphology_border_type, border_value, channels,
592 162 sizeof(uint8_t), rect);
593
4/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 157 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 157 times.
167 if (auto *err = std::get_if<kleidicv_error_t>(&workspace_variant)) {
594 5 return *err;
595 }
596 157 auto &workspace = *std::get_if<MorphologyWorkspace>(&workspace_variant);
597
598 157 Rows<const T> src_rows{src, src_stride, channels};
599 157 Rows<T> dst_rows{dst, dst_stride, channels};
600
601 157 Rows<const T> current_src_rows = src_rows;
602 157 Rows<T> current_dst_rows = dst_rows;
603
2/2
✓ Branch 0 taken 173 times.
✓ Branch 1 taken 157 times.
330 for (size_t i = 0; i < iterations; ++i) {
604 173 ErodeOperation<T> operation{kernel_rect};
605 173 workspace.process(current_src_rows, current_dst_rows, operation);
606 // Update source for the next iteration.
607 173 current_src_rows = dst_rows;
608 173 }
609 157 return KLEIDICV_OK;
610 186 }
611
612 #define KLEIDICV_INSTANTIATE_TEMPLATE(name, type) \
613 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t name<type>( \
614 const type *src, size_t src_stride, type *dst, size_t dst_stride, \
615 size_t width, size_t height, size_t channels, size_t kernel_width, \
616 size_t kernel_height, size_t anchor_x, size_t anchor_y, \
617 kleidicv_border_type_t border_type, const uint8_t *border_value, \
618 size_t iterations)
619
620 KLEIDICV_INSTANTIATE_TEMPLATE(dilate, uint8_t);
621 KLEIDICV_INSTANTIATE_TEMPLATE(erode, uint8_t);
622
623 } // namespace kleidicv::neon
624