KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/morphology/morphology_sc.h
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 425 425 100.0%
Functions: 166 166 100.0%
Branches: 108 108 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_MORPHOLOGY_SC_H
6 #define KLEIDICV_MORPHOLOGY_SC_H
7
8 #include <cstdio>
9
10 #include "kleidicv/morphology/morphology.h"
11 #include "kleidicv/morphology/workspace.h"
12 #include "kleidicv/sve2.h"
13 #include "kleidicv/types.h"
14
15 namespace KLEIDICV_TARGET_NAMESPACE {
16
17 template <typename T>
18 class CopyDataSVE2 {
19 class CopyOperation final : public UnrollTwice {
20 public:
21 using ContextType = Context;
22 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<T>;
23 using VectorType = typename VecTraits::VectorType;
24
25 9404 VectorType vector_path(ContextType, VectorType src) KLEIDICV_STREAMING {
26 9404 return src;
27 }
28 }; // end of class CopyOperation
29
30 public:
31 7432 void operator()(Rows<const T> src_rows, Rows<T> dst_rows,
32 size_t length) const KLEIDICV_STREAMING {
33 // 'apply_operation_by_rows' can only handle one channel well
34 // so width must be multiplied in order to copy all the data
35 7432 Rectangle rect{length * dst_rows.channels(), std::size_t{1}};
36 7432 Rows<const T> src_1ch{&src_rows[0], src_rows.stride(), 1};
37 7432 Rows<T> dst_1ch{&dst_rows[0], dst_rows.stride(), 1};
38 7432 CopyOperation op{};
39 7432 apply_operation_by_rows(op, rect, src_1ch, dst_1ch);
40 7432 }
41 };
42
43 template <typename ScalarType, typename O>
44 class VerticalOp final {
45 public:
46 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
47 using Vector4Type = typename VecTraits::Vector4Type;
48 using Vector2Type = typename VecTraits::Vector2Type;
49
50 1218 VerticalOp(Rectangle rect, Rectangle kernel) KLEIDICV_STREAMING
51 1218 : rect_(rect),
52 1218 kernel_(kernel) {}
53
54 1218 void process_rows(IndirectRows<ScalarType> src_rows,
55 Rows<ScalarType> dst_rows) KLEIDICV_STREAMING {
56
4/4
✓ Branch 0 taken 51 times.
✓ Branch 1 taken 606 times.
✓ Branch 2 taken 51 times.
✓ Branch 3 taken 510 times.
1218 if (KLEIDICV_UNLIKELY(kernel_.height()) == 1) {
57 102 CopyRows<ScalarType>::copy_rows(rect_, src_rows, dst_rows);
58 102 return;
59 }
60
61 // Iterate across the rows from top to bottom. This implementation can
62 // handle two rows at once.
63
4/4
✓ Branch 0 taken 2070 times.
✓ Branch 1 taken 606 times.
✓ Branch 2 taken 1743 times.
✓ Branch 3 taken 510 times.
4929 for (size_t height = 0; height < rect_.height(); height += 2) {
64 // Iterate across the columns from left to right.
65 7626 LoopUnroll2 loop{rect_.width() * src_rows.channels(),
66 3813 VecTraits::num_lanes()};
67 // clang-format off
68 3885 loop.unroll_four_times([&](size_t index) KLEIDICV_STREAMING {
69 72 vector_path_4x(src_rows, dst_rows, index, height);
70 72 })
71 3901 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
72 88 vector_path_2x(src_rows, dst_rows, index, height);
73 88 })
74 7626 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
75 3813 svbool_t pg = VecTraits::svwhilelt(index, length);
76
4/4
✓ Branch 0 taken 2389 times.
✓ Branch 1 taken 2070 times.
✓ Branch 2 taken 1947 times.
✓ Branch 3 taken 1743 times.
8149 while (svptest_first(VecTraits::svptrue(), pg)) {
77 4336 vector_path(pg, src_rows, dst_rows, index, height);
78 4336 index += VecTraits::num_lanes();
79 4336 pg = VecTraits::svwhilelt(index, length);
80 }
81 3813 });
82 // clang-format on
83 3813 src_rows += 2;
84 3813 dst_rows += 2;
85 3813 }
86 1218 }
87
88 private:
89 72 void vector_path_4x(IndirectRows<ScalarType> src_rows,
90 Rows<ScalarType> dst_rows, const size_t index,
91 const size_t height) KLEIDICV_STREAMING {
92 72 const ScalarType *src_row = &src_rows[index];
93 #if KLEIDICV_TARGET_SME2
94 24 svcount_t p_counter = VecTraits::svptrue_c();
95 24 Vector4Type v = svld1_x4(p_counter, &src_row[0]);
96 24 auto first_row0 = svget4(v, 0);
97 24 auto first_row1 = svget4(v, 1);
98 24 auto first_row2 = svget4(v, 2);
99 24 auto first_row3 = svget4(v, 3);
100 #else
101 48 auto first_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
102 48 auto first_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
103 48 auto first_row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
104 48 auto first_row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
105 #endif // KLEIDICV_TARGET_SME2
106 72 ++src_rows;
107
108 72 src_row = &src_rows[index];
109 #if KLEIDICV_TARGET_SME2
110 24 v = svld1_x4(p_counter, &src_row[0]);
111 24 auto acc0 = svget4(v, 0);
112 24 auto acc1 = svget4(v, 1);
113 24 auto acc2 = svget4(v, 2);
114 24 auto acc3 = svget4(v, 3);
115 #else
116 48 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
117 48 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
118 48 auto acc2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
119 48 auto acc3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
120 #endif // KLEIDICV_TARGET_SME2
121 72 ++src_rows;
122
123 72 LoopUnroll loop{kernel_.height() - 2, 2};
124
125 108 loop.unroll_once([&](size_t step) KLEIDICV_STREAMING {
126 36 const ScalarType *src_row0 = &src_rows.at(0)[index];
127 36 const ScalarType *src_row1 = &src_rows.at(1)[index];
128 #if KLEIDICV_TARGET_SME2
129 12 v = svld1_x4(p_counter, src_row0);
130 12 auto row00 = svget4(v, 0);
131 12 auto row01 = svget4(v, 1);
132 12 auto row02 = svget4(v, 2);
133 12 auto row03 = svget4(v, 3);
134 12 v = svld1_x4(p_counter, src_row1);
135 12 auto row10 = svget4(v, 0);
136 12 auto row11 = svget4(v, 1);
137 12 auto row12 = svget4(v, 2);
138 12 auto row13 = svget4(v, 3);
139 #else
140 24 auto row00 = svld1(VecTraits::svptrue(), src_row0);
141 24 auto row01 = svld1_vnum(VecTraits::svptrue(), src_row0, 1);
142 24 auto row02 = svld1_vnum(VecTraits::svptrue(), src_row0, 2);
143 24 auto row03 = svld1_vnum(VecTraits::svptrue(), src_row0, 3);
144 24 auto row10 = svld1(VecTraits::svptrue(), src_row1);
145 24 auto row11 = svld1_vnum(VecTraits::svptrue(), src_row1, 1);
146 24 auto row12 = svld1_vnum(VecTraits::svptrue(), src_row1, 2);
147 24 auto row13 = svld1_vnum(VecTraits::svptrue(), src_row1, 3);
148 #endif // KLEIDICV_TARGET_SME2
149 72 acc0 = O::operation(VecTraits::svptrue(), acc0,
150 36 O::operation(VecTraits::svptrue(), row00, row10));
151 72 acc1 = O::operation(VecTraits::svptrue(), acc1,
152 36 O::operation(VecTraits::svptrue(), row01, row11));
153 72 acc2 = O::operation(VecTraits::svptrue(), acc2,
154 36 O::operation(VecTraits::svptrue(), row02, row12));
155 72 acc3 = O::operation(VecTraits::svptrue(), acc3,
156 36 O::operation(VecTraits::svptrue(), row03, row13));
157 36 src_rows += step;
158 36 });
159
160 132 loop.tail([&](size_t /* index */) // NOLINT(readability/casting)
161 KLEIDICV_STREAMING {
162 60 const ScalarType *src_row = &src_rows[index];
163 #if KLEIDICV_TARGET_SME2
164 20 v = svld1_x4(p_counter, &src_row[0]);
165 20 auto row0 = svget4(v, 0);
166 20 auto row1 = svget4(v, 1);
167 20 auto row2 = svget4(v, 2);
168 20 auto row3 = svget4(v, 3);
169 #else
170 40 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
171 40 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
172 40 auto row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
173 40 auto row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
174 #endif // KLEIDICV_TARGET_SME2
175 60 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
176 60 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
177 60 acc2 = O::operation(VecTraits::svptrue(), acc2, row2);
178 60 acc3 = O::operation(VecTraits::svptrue(), acc3, row3);
179 60 ++src_rows;
180 60 });
181
182 // Save partial results which do not contain the first row.
183 72 auto partial_acc0 = acc0;
184 72 auto partial_acc1 = acc1;
185 72 auto partial_acc2 = acc2;
186 72 auto partial_acc3 = acc3;
187
188 // Take the first row into account.
189 72 acc0 = O::operation(VecTraits::svptrue(), acc0, first_row0);
190 72 acc1 = O::operation(VecTraits::svptrue(), acc1, first_row1);
191 72 acc2 = O::operation(VecTraits::svptrue(), acc2, first_row2);
192 72 acc3 = O::operation(VecTraits::svptrue(), acc3, first_row3);
193
194 // Store the results.
195 72 ScalarType *dst_row = &dst_rows[index];
196 #if KLEIDICV_TARGET_SME2
197 24 Vector4Type res4 = svcreate4(acc0, acc1, acc2, acc3);
198 24 svst1(p_counter, &dst_row[0], res4);
199 #else
200 48 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
201 48 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
202 48 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2);
203 48 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3);
204 #endif // KLEIDICV_TARGET_SME2
205
206 // Try to process one more row, because it is relatively cheap to do so.
207
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 24 times.
72 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
208 24 return;
209 }
210
211 48 ++dst_rows;
212
213 48 src_row = &src_rows[index];
214 #if KLEIDICV_TARGET_SME2
215 16 v = svld1_x4(p_counter, &src_row[0]);
216 16 auto next_row0 = svget4(v, 0);
217 16 auto next_row1 = svget4(v, 1);
218 16 auto next_row2 = svget4(v, 2);
219 16 auto next_row3 = svget4(v, 3);
220 #else
221 32 auto next_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
222 32 auto next_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
223 32 auto next_row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
224 32 auto next_row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
225 #endif // KLEIDICV_TARGET_SME2
226
227 48 acc0 = O::operation(VecTraits::svptrue(), partial_acc0, next_row0);
228 48 acc1 = O::operation(VecTraits::svptrue(), partial_acc1, next_row1);
229 48 acc2 = O::operation(VecTraits::svptrue(), partial_acc2, next_row2);
230 48 acc3 = O::operation(VecTraits::svptrue(), partial_acc3, next_row3);
231
232 // Store the results.
233 48 dst_row = &dst_rows[index];
234 #if KLEIDICV_TARGET_SME2
235 16 res4 = svcreate4(acc0, acc1, acc2, acc3);
236 16 svst1(p_counter, &dst_row[0], res4);
237 #else
238 32 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
239 32 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
240 32 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2);
241 32 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3);
242 #endif // KLEIDICV_TARGET_SME2
243 72 }
244
245 88 void vector_path_2x(IndirectRows<ScalarType> src_rows,
246 Rows<ScalarType> dst_rows, const size_t index,
247 const size_t height) KLEIDICV_STREAMING {
248 88 const ScalarType *src_row = &src_rows[index];
249 #if KLEIDICV_TARGET_SME2
250 28 svcount_t p_counter = VecTraits::svptrue_c();
251 28 Vector2Type v = svld1_x2(p_counter, &src_row[0]);
252 28 auto first_row0 = svget2(v, 0);
253 28 auto first_row1 = svget2(v, 1);
254 #else
255 60 auto first_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
256 60 auto first_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
257 #endif // KLEIDICV_TARGET_SME2
258
259 88 ++src_rows;
260
261 88 src_row = &src_rows[index];
262 #if KLEIDICV_TARGET_SME2
263 28 v = svld1_x2(p_counter, &src_row[0]);
264 28 auto acc0 = svget2(v, 0);
265 28 auto acc1 = svget2(v, 1);
266 #else
267 60 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
268 60 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
269 #endif // KLEIDICV_TARGET_SME2
270 88 ++src_rows;
271
272 88 LoopUnroll loop{kernel_.height() - 2, 2};
273
274 128 loop.unroll_once([&](size_t step) KLEIDICV_STREAMING {
275 40 const ScalarType *src_row0 = &src_rows.at(0)[index];
276 40 const ScalarType *src_row1 = &src_rows.at(1)[index];
277 #if KLEIDICV_TARGET_SME2
278 12 Vector2Type v0 = svld1_x2(p_counter, src_row0);
279 12 Vector2Type v1 = svld1_x2(p_counter, src_row1);
280 12 auto row00 = svget2(v0, 0);
281 12 auto row01 = svget2(v0, 1);
282 12 auto row10 = svget2(v1, 0);
283 12 auto row11 = svget2(v1, 1);
284 #else
285 28 auto row00 = svld1(VecTraits::svptrue(), src_row0);
286 28 auto row01 = svld1_vnum(VecTraits::svptrue(), src_row0, 1);
287 28 auto row10 = svld1(VecTraits::svptrue(), src_row1);
288 28 auto row11 = svld1_vnum(VecTraits::svptrue(), src_row1, 1);
289 #endif // KLEIDICV_TARGET_SME2
290 80 acc0 = O::operation(VecTraits::svptrue(), acc0,
291 40 O::operation(VecTraits::svptrue(), row00, row10));
292 80 acc1 = O::operation(VecTraits::svptrue(), acc1,
293 40 O::operation(VecTraits::svptrue(), row01, row11));
294 40 src_rows += step;
295 40 });
296
297 148 loop.tail([&](size_t /* index */) // NOLINT(readability/casting)
298 KLEIDICV_STREAMING {
299 60 const ScalarType *src_row = &src_rows[index];
300 #if KLEIDICV_TARGET_SME2
301 20 v = svld1_x2(p_counter, &src_row[0]);
302 20 auto row0 = svget2(v, 0);
303 20 auto row1 = svget2(v, 1);
304 #else
305 40 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
306 40 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
307 #endif // KLEIDICV_TARGET_SME2
308 60 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
309 60 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
310 60 ++src_rows;
311 60 });
312
313 // Save partial results which do not contain the first row.
314 88 auto partial_acc0 = acc0;
315 88 auto partial_acc1 = acc1;
316
317 // Take the first row into account.
318 88 acc0 = O::operation(VecTraits::svptrue(), acc0, first_row0);
319 88 acc1 = O::operation(VecTraits::svptrue(), acc1, first_row1);
320
321 // Store the results.
322 88 ScalarType *dst_row = &dst_rows[index];
323 #if KLEIDICV_TARGET_SME2
324 28 Vector2Type res2 = svcreate2(acc0, acc1);
325 28 svst1(p_counter, &dst_row[0], res2);
326 #else
327 60 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
328 60 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
329 #endif // KLEIDICV_TARGET_SME2
330
331 // Try to process one more row, because it is relatively cheap to do so.
332
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 38 times.
88 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
333 14 return;
334 }
335
336 74 ++dst_rows;
337
338 74 src_row = &src_rows[index];
339 #if KLEIDICV_TARGET_SME2
340 24 v = svld1_x2(p_counter, &src_row[0]);
341 24 auto next_row0 = svget2(v, 0);
342 24 auto next_row1 = svget2(v, 1);
343 #else
344 50 auto next_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
345 50 auto next_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
346 #endif // KLEIDICV_TARGET_SME2
347
348 74 acc0 = O::operation(VecTraits::svptrue(), partial_acc0, next_row0);
349 74 acc1 = O::operation(VecTraits::svptrue(), partial_acc1, next_row1);
350
351 74 dst_row = &dst_rows[index];
352 #if KLEIDICV_TARGET_SME2
353 24 res2 = svcreate2(acc0, acc1);
354 24 svst1(p_counter, &dst_row[0], res2);
355 #else
356 50 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
357 50 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
358 #endif // KLEIDICV_TARGET_SME2
359 88 }
360
361 4336 void vector_path(svbool_t pg, IndirectRows<ScalarType> src_rows,
362 Rows<ScalarType> dst_rows, const size_t index,
363 const size_t height) KLEIDICV_STREAMING {
364 4336 auto first_row = svld1(pg, &src_rows[index]);
365 4336 ++src_rows;
366
367 4336 auto acc = svld1(pg, &src_rows[index]);
368 4336 ++src_rows;
369
370 4336 LoopUnroll loop{kernel_.height() - 2, 2};
371
372 7163 loop.unroll_once([&](size_t step) KLEIDICV_STREAMING {
373 2827 auto row0 = svld1(pg, &src_rows.at(0)[index]);
374 2827 auto row1 = svld1(pg, &src_rows.at(1)[index]);
375 2827 acc = O::operation(pg, acc, O::operation(pg, row0, row1));
376 2827 src_rows += step;
377 2827 });
378
379 7721 loop.tail([&](size_t /* index */) // NOLINT(readability/casting)
380 KLEIDICV_STREAMING {
381 3385 auto row = svld1(pg, &src_rows[index]);
382 3385 acc = O::operation(pg, acc, row);
383 3385 ++src_rows;
384 3385 });
385
386 // Save partial result which does not contain the first row.
387 4336 auto partial_acc = acc;
388
389 // Take the first row into account.
390 4336 acc = O::operation(pg, acc, first_row);
391
392 // Store the results.
393 4336 svst1(pg, &dst_rows[index], acc);
394
395 // Try to process one more row, because it is relatively cheap to do so.
396
4/4
✓ Branch 0 taken 200 times.
✓ Branch 1 taken 2189 times.
✓ Branch 2 taken 214 times.
✓ Branch 3 taken 1733 times.
4336 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
397 414 return;
398 }
399
400 3922 ++dst_rows;
401
402 3922 auto next_row = svld1(pg, &src_rows[index]);
403 3922 acc = O::operation(pg, partial_acc, next_row);
404 3922 svst1(pg, &dst_rows[index], acc);
405 4336 }
406
407 Rectangle rect_;
408 Rectangle kernel_;
409 }; // end of class VerticalOp<ScalarType, )>
410
411 template <typename ScalarType, typename O>
412 class HorizontalOp final {
413 public:
414 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
415 using Vector4Type = typename VecTraits::Vector4Type;
416 using Vector2Type = typename VecTraits::Vector2Type;
417
418 10572 HorizontalOp(Rectangle rect, Rectangle kernel) KLEIDICV_STREAMING
419 10572 : rect_(rect),
420 10572 kernel_(kernel) {}
421
422 10572 void process_rows(Rows<const ScalarType> src_rows,
423 Rows<ScalarType> dst_rows) KLEIDICV_STREAMING {
424 // Iterate across the rows from top to bottom.
425
4/4
✓ Branch 0 taken 5940 times.
✓ Branch 1 taken 5940 times.
✓ Branch 2 taken 4632 times.
✓ Branch 3 taken 4632 times.
21144 for (size_t height = 0; height < rect_.height(); ++height) {
426 // Iterate across the columns from left to right.
427 21144 LoopUnroll2 loop{rect_.width() * src_rows.channels(),
428 10572 VecTraits::num_lanes()};
429 // clang-format off
430 10776 loop.unroll_four_times([&](size_t index) KLEIDICV_STREAMING {
431 204 vector_path_4x(src_rows, dst_rows, index);
432 204 })
433 10824 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
434 252 vector_path_2x(src_rows, dst_rows, index);
435 252 })
436 21144 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
437 10572 svbool_t pg = VecTraits::svwhilelt(index, length);
438
4/4
✓ Branch 0 taken 7134 times.
✓ Branch 1 taken 5940 times.
✓ Branch 2 taken 5350 times.
✓ Branch 3 taken 4632 times.
23056 while (svptest_first(VecTraits::svptrue(), pg)) {
439 12484 vector_path(pg, src_rows, dst_rows, index);
440 12484 index += VecTraits::num_lanes();
441 12484 pg = VecTraits::svwhilelt(index, length);
442 }
443 10572 });
444 // clang-format on
445 10572 ++src_rows;
446 10572 ++dst_rows;
447 10572 }
448 10572 }
449
450 private:
451 204 void vector_path_4x(Rows<const ScalarType> src_rows,
452 Rows<ScalarType> dst_rows,
453 const size_t index) KLEIDICV_STREAMING {
454 204 const auto *src_row = &src_rows[index];
455 #if KLEIDICV_TARGET_SME2
456 68 svcount_t p_counter = VecTraits::svptrue_c();
457 68 Vector4Type v = svld1_x4(p_counter, &src_row[0]);
458 68 auto acc0 = svget4(v, 0);
459 68 auto acc1 = svget4(v, 1);
460 68 auto acc2 = svget4(v, 2);
461 68 auto acc3 = svget4(v, 3);
462 #else
463 136 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
464 136 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
465 136 auto acc2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
466 136 auto acc3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
467 #endif // KLEIDICV_TARGET_SME2
468
469
4/4
✓ Branch 0 taken 102 times.
✓ Branch 1 taken 276 times.
✓ Branch 2 taken 102 times.
✓ Branch 3 taken 276 times.
756 for (size_t width = 1; width < kernel_.width(); ++width) {
470 552 src_row = &src_rows[index + width * src_rows.channels()];
471 #if KLEIDICV_TARGET_SME2
472 184 Vector4Type v = svld1_x4(p_counter, &src_row[0]);
473 184 auto row0 = svget4(v, 0);
474 184 auto row1 = svget4(v, 1);
475 184 auto row2 = svget4(v, 2);
476 184 auto row3 = svget4(v, 3);
477 #else
478 368 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
479 368 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
480 368 auto row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
481 368 auto row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
482 #endif // KLEIDICV_TARGET_SME2
483 552 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
484 552 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
485 552 acc2 = O::operation(VecTraits::svptrue(), acc2, row2);
486 552 acc3 = O::operation(VecTraits::svptrue(), acc3, row3);
487 552 }
488
489 204 auto dst_row = &dst_rows[index];
490 #if KLEIDICV_TARGET_SME2
491 68 Vector4Type res4 = svcreate4(acc0, acc1, acc2, acc3);
492 68 svst1(p_counter, &dst_row[0], res4);
493 #else
494 136 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
495 136 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
496 136 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2);
497 136 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3);
498 #endif // KLEIDICV_TARGET_SME2
499 204 }
500
501 252 void vector_path_2x(Rows<const ScalarType> src_rows,
502 Rows<ScalarType> dst_rows,
503 const size_t index) KLEIDICV_STREAMING {
504 252 const auto *src_row = &src_rows[index];
505 #if KLEIDICV_TARGET_SME2
506 80 svcount_t p_counter = VecTraits::svptrue_c();
507 80 Vector2Type v = svld1_x2(p_counter, &src_row[0]);
508 80 auto acc0 = svget2(v, 0);
509 80 auto acc1 = svget2(v, 1);
510 #else
511 172 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
512 172 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
513 #endif // KLEIDICV_TARGET_SME2
514
515
4/4
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 132 times.
✓ Branch 3 taken 444 times.
1008 for (size_t width = 1; width < kernel_.width(); ++width) {
516 756 src_row = &src_rows[index + width * src_rows.channels()];
517 #if KLEIDICV_TARGET_SME2
518 208 v = svld1_x2(p_counter, &src_row[0]);
519 208 auto row0 = svget2(v, 0);
520 208 auto row1 = svget2(v, 1);
521 #else
522 548 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
523 548 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
524 #endif // KLEIDICV_TARGET_SME2
525 756 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
526 756 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
527 756 }
528
529 252 auto dst_row = &dst_rows[index];
530 #if KLEIDICV_TARGET_SME2
531 80 Vector2Type res2 = svcreate2(acc0, acc1);
532 80 svst1(p_counter, &dst_row[0], res2);
533 #else
534 172 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
535 172 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
536 #endif // KLEIDICV_TARGET_SME2
537 252 }
538
539 12484 void vector_path(svbool_t pg, Rows<const ScalarType> src_rows,
540 Rows<ScalarType> dst_rows,
541 const size_t index) KLEIDICV_STREAMING {
542 12484 auto acc = svld1(pg, &src_rows[index]);
543
544
4/4
✓ Branch 0 taken 7134 times.
✓ Branch 1 taken 16575 times.
✓ Branch 2 taken 5350 times.
✓ Branch 3 taken 16350 times.
45409 for (size_t width = 1; width < kernel_.width(); ++width) {
545 32925 const auto *src_row = &src_rows[index + width * src_rows.channels()];
546 32925 acc = O::operation(pg, acc, svld1(pg, &src_row[0]));
547 32925 }
548
549 12484 svst1(pg, &dst_rows[index], acc);
550 12484 }
551
552 Rectangle rect_;
553 Rectangle kernel_;
554 }; // end of class HorizontalOp<ScalarType>
555
556 template <typename ScalarType>
557 class Min final {
558 public:
559 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
560 using VectorType = typename VecTraits::VectorType;
561
562 25885 static VectorType operation(svbool_t pg, VectorType lhs,
563 VectorType rhs) KLEIDICV_STREAMING {
564 25885 return svmin_x(pg, lhs, rhs);
565 }
566 }; // end of class Min<ScalarType>
567
568 template <typename ScalarType>
569 class Max final {
570 public:
571 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
572 using VectorType = typename VecTraits::VectorType;
573
574 29669 static VectorType operation(svbool_t pg, VectorType lhs,
575 VectorType rhs) KLEIDICV_STREAMING {
576 29669 return svmax_x(pg, lhs, rhs);
577 }
578 }; // end of class Max<ScalarType>
579
580 template <typename T>
581 using VerticalMin = VerticalOp<T, Min<T>>;
582 template <typename T>
583 using VerticalMax = VerticalOp<T, Max<T>>;
584
585 template <typename T>
586 using HorizontalMin = HorizontalOp<T, Min<T>>;
587 template <typename T>
588 using HorizontalMax = HorizontalOp<T, Max<T>>;
589
590 template <typename ScalarType, typename CopyDataOperation>
591 class DilateOperation final {
592 public:
593 using SourceType = ScalarType;
594 using BufferType = ScalarType;
595 using DestinationType = ScalarType;
596 using CopyData = CopyDataOperation;
597
598 615 explicit DilateOperation(Rectangle kernel) KLEIDICV_STREAMING
599 615 : kernel_{kernel} {}
600
601 5940 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
602 Rows<BufferType> dst_rows) KLEIDICV_STREAMING {
603 5940 HorizontalMax<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
604 5940 }
605
606 657 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
607 Rows<DestinationType> dst_rows) KLEIDICV_STREAMING {
608 657 VerticalMax<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
609 657 }
610
611 private:
612 Rectangle kernel_;
613 }; // end of class DilateOperation<ScalarType>
614
615 template <typename T, typename CopyOperation>
616 606 static kleidicv_error_t dilate_sc(
617 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
618 size_t height, size_t channels, size_t kernel_width, size_t kernel_height,
619 size_t anchor_x, size_t anchor_y, kleidicv_border_type_t border_type,
620 const uint8_t *border_value, size_t iterations) KLEIDICV_STREAMING {
621
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 603 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 603 times.
606 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
622
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 600 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 600 times.
603 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
623
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 591 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 585 times.
✓ Branch 4 taken 15 times.
✓ Branch 5 taken 585 times.
600 CHECK_IMAGE_SIZE(width, height);
624
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 582 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 576 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 576 times.
585 CHECK_IMAGE_SIZE(kernel_width, kernel_height);
625 576 auto morphology_border_type =
626 576 MorphologyWorkspace::get_border_type(border_type);
627
2/2
✓ Branch 0 taken 561 times.
✓ Branch 1 taken 15 times.
576 if (!morphology_border_type) {
628 15 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
629 }
630
4/4
✓ Branch 0 taken 534 times.
✓ Branch 1 taken 27 times.
✓ Branch 2 taken 534 times.
✓ Branch 3 taken 27 times.
1122 if (!morphology_is_implemented(width, height, kernel_width, kernel_height,
631 561 channels)) {
632 27 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
633 }
634
635 534 Rectangle rect{width, height};
636 534 Rectangle kernel_rect{kernel_width, kernel_height};
637 534 Point anchor{anchor_x, anchor_y};
638
639 1068 auto workspace_variant = MorphologyWorkspace::create(
640 534 kernel_rect, anchor, *morphology_border_type, border_value, channels,
641 534 sizeof(uint8_t), rect);
642
4/4
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 519 times.
✓ Branch 2 taken 15 times.
✓ Branch 3 taken 519 times.
549 if (auto *err = std::get_if<kleidicv_error_t>(&workspace_variant)) {
643 15 return *err;
644 }
645 519 auto &workspace = *std::get_if<MorphologyWorkspace>(&workspace_variant);
646
647 519 Rows<const T> src_rows{src, src_stride, channels};
648 519 Rows<T> dst_rows{dst, dst_stride, channels};
649
650 519 Rows<const T> current_src_rows = src_rows;
651 519 Rows<T> current_dst_rows = dst_rows;
652
2/2
✓ Branch 0 taken 615 times.
✓ Branch 1 taken 519 times.
1134 for (size_t i = 0; i < iterations; ++i) {
653 615 DilateOperation<T, CopyOperation> operation{kernel_rect};
654 615 workspace.process(current_src_rows, current_dst_rows, operation);
655 // Update source for the next iteration.
656 615 current_src_rows = dst_rows;
657 615 }
658 519 return KLEIDICV_OK;
659 606 }
660
661 // Helper structure for erode.
662 template <typename ScalarType, typename CopyDataOperation>
663 class ErodeOperation final {
664 public:
665 using SourceType = ScalarType;
666 using BufferType = ScalarType;
667 using DestinationType = ScalarType;
668 using CopyData = CopyDataOperation;
669
670 519 explicit ErodeOperation(Rectangle kernel) KLEIDICV_STREAMING
671 519 : kernel_{kernel} {}
672
673 4632 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
674 Rows<BufferType> dst_rows) KLEIDICV_STREAMING {
675 4632 HorizontalMin<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
676 4632 }
677
678 561 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
679 Rows<DestinationType> dst_rows) KLEIDICV_STREAMING {
680 561 VerticalMin<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
681 561 }
682
683 private:
684 Rectangle kernel_;
685 }; // end of class ErodeOperation<ScalarType>
686
687 template <typename T, typename CopyOperation>
688 558 static kleidicv_error_t erode_sc(
689 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
690 size_t height, size_t channels, size_t kernel_width, size_t kernel_height,
691 size_t anchor_x, size_t anchor_y, kleidicv_border_type_t border_type,
692 const uint8_t *border_value, size_t iterations) KLEIDICV_STREAMING {
693
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 555 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 555 times.
558 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
694
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 552 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 552 times.
555 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
695
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 543 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 537 times.
✓ Branch 4 taken 15 times.
✓ Branch 5 taken 537 times.
552 CHECK_IMAGE_SIZE(width, height);
696
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 534 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 528 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 528 times.
537 CHECK_IMAGE_SIZE(kernel_width, kernel_height);
697 528 auto morphology_border_type =
698 528 MorphologyWorkspace::get_border_type(border_type);
699
2/2
✓ Branch 0 taken 513 times.
✓ Branch 1 taken 15 times.
528 if (!morphology_border_type) {
700 15 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
701 }
702
4/4
✓ Branch 0 taken 486 times.
✓ Branch 1 taken 27 times.
✓ Branch 2 taken 486 times.
✓ Branch 3 taken 27 times.
1026 if (!morphology_is_implemented(width, height, kernel_width, kernel_height,
703 513 channels)) {
704 27 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
705 }
706
707 486 Rectangle rect{width, height};
708 486 Rectangle kernel_rect{kernel_width, kernel_height};
709 486 Point anchor{anchor_x, anchor_y};
710
711 972 auto workspace_variant = MorphologyWorkspace::create(
712 486 kernel_rect, anchor, *morphology_border_type, border_value, channels,
713 486 sizeof(uint8_t), rect);
714
4/4
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 471 times.
✓ Branch 2 taken 15 times.
✓ Branch 3 taken 471 times.
501 if (auto *err = std::get_if<kleidicv_error_t>(&workspace_variant)) {
715 15 return *err;
716 }
717 471 auto &workspace = *std::get_if<MorphologyWorkspace>(&workspace_variant);
718
719 471 Rows<const T> src_rows{src, src_stride, channels};
720 471 Rows<T> dst_rows{dst, dst_stride, channels};
721
722 471 Rows<const T> current_src_rows = src_rows;
723 471 Rows<T> current_dst_rows = dst_rows;
724
2/2
✓ Branch 0 taken 519 times.
✓ Branch 1 taken 471 times.
990 for (size_t i = 0; i < iterations; ++i) {
725 519 ErodeOperation<T, CopyOperation> operation{kernel_rect};
726 519 workspace.process(current_src_rows, current_dst_rows, operation);
727 // Update source for the next iteration.
728 519 current_src_rows = dst_rows;
729 519 }
730 471 return KLEIDICV_OK;
731 558 }
732
733 } // namespace KLEIDICV_TARGET_NAMESPACE
734
735 #endif // KLEIDICV_MORPHOLOGY_SC_H
736