KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/morphology/morphology_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 415 415 100.0%
Functions: 166 166 100.0%
Branches: 96 96 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_MORPHOLOGY_SC_H
6 #define KLEIDICV_MORPHOLOGY_SC_H
7
8 #include <cstdio>
9
10 #include "kleidicv/morphology/workspace.h"
11 #include "kleidicv/sve2.h"
12 #include "kleidicv/types.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <typename T>
17 class CopyDataSVE2 {
18 class CopyOperation final : public UnrollTwice {
19 public:
20 using ContextType = Context;
21 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<T>;
22 using VectorType = typename VecTraits::VectorType;
23
24 9400 VectorType vector_path(ContextType, VectorType src) KLEIDICV_STREAMING {
25 9400 return src;
26 }
27 }; // end of class CopyOperation
28
29 public:
30 7428 void operator()(Rows<const T> src_rows, Rows<T> dst_rows,
31 size_t length) const KLEIDICV_STREAMING {
32 // 'apply_operation_by_rows' can only handle one channel well
33 // so width must be multiplied in order to copy all the data
34 7428 Rectangle rect{length * dst_rows.channels(), std::size_t{1}};
35 7428 Rows<const T> src_1ch{&src_rows[0], src_rows.stride(), 1};
36 7428 Rows<T> dst_1ch{&dst_rows[0], dst_rows.stride(), 1};
37 7428 CopyOperation op{};
38 7428 apply_operation_by_rows(op, rect, src_1ch, dst_1ch);
39 7428 }
40 };
41
42 template <typename ScalarType, typename O>
43 class VerticalOp final {
44 public:
45 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
46 using Vector4Type = typename VecTraits::Vector4Type;
47 using Vector2Type = typename VecTraits::Vector2Type;
48
49 1212 VerticalOp(Rectangle rect, Rectangle kernel) KLEIDICV_STREAMING
50 1212 : rect_(rect),
51 1212 kernel_(kernel) {}
52
53 1212 void process_rows(IndirectRows<ScalarType> src_rows,
54 Rows<ScalarType> dst_rows) KLEIDICV_STREAMING {
55
4/4
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 606 times.
✓ Branch 2 taken 48 times.
✓ Branch 3 taken 510 times.
1212 if (KLEIDICV_UNLIKELY(kernel_.height()) == 1) {
56 96 CopyRows<ScalarType>::copy_rows(rect_, src_rows, dst_rows);
57 96 return;
58 }
59
60 // Iterate across the rows from top to bottom. This implementation can
61 // handle two rows at once.
62
4/4
✓ Branch 0 taken 2070 times.
✓ Branch 1 taken 606 times.
✓ Branch 2 taken 1743 times.
✓ Branch 3 taken 510 times.
4929 for (size_t height = 0; height < rect_.height(); height += 2) {
63 // Iterate across the columns from left to right.
64 7626 LoopUnroll2 loop{rect_.width() * src_rows.channels(),
65 3813 VecTraits::num_lanes()};
66 // clang-format off
67 3885 loop.unroll_four_times([&](size_t index) KLEIDICV_STREAMING {
68 72 vector_path_4x(src_rows, dst_rows, index, height);
69 72 })
70 3901 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
71 88 vector_path_2x(src_rows, dst_rows, index, height);
72 88 })
73 7626 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
74 3813 svbool_t pg = VecTraits::svwhilelt(index, length);
75
4/4
✓ Branch 0 taken 2389 times.
✓ Branch 1 taken 2070 times.
✓ Branch 2 taken 1947 times.
✓ Branch 3 taken 1743 times.
8149 while (svptest_first(VecTraits::svptrue(), pg)) {
76 4336 vector_path(pg, src_rows, dst_rows, index, height);
77 4336 index += VecTraits::num_lanes();
78 4336 pg = VecTraits::svwhilelt(index, length);
79 }
80 3813 });
81 // clang-format on
82 3813 src_rows += 2;
83 3813 dst_rows += 2;
84 3813 }
85 1212 }
86
87 private:
88 72 void vector_path_4x(IndirectRows<ScalarType> src_rows,
89 Rows<ScalarType> dst_rows, const size_t index,
90 const size_t height) KLEIDICV_STREAMING {
91 72 const ScalarType *src_row = &src_rows[index];
92 #if KLEIDICV_TARGET_SME2
93 24 svcount_t p_counter = VecTraits::svptrue_c();
94 24 Vector4Type v = svld1_x4(p_counter, &src_row[0]);
95 24 auto first_row0 = svget4(v, 0);
96 24 auto first_row1 = svget4(v, 1);
97 24 auto first_row2 = svget4(v, 2);
98 24 auto first_row3 = svget4(v, 3);
99 #else
100 48 auto first_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
101 48 auto first_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
102 48 auto first_row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
103 48 auto first_row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
104 #endif // KLEIDICV_TARGET_SME2
105 72 ++src_rows;
106
107 72 src_row = &src_rows[index];
108 #if KLEIDICV_TARGET_SME2
109 24 v = svld1_x4(p_counter, &src_row[0]);
110 24 auto acc0 = svget4(v, 0);
111 24 auto acc1 = svget4(v, 1);
112 24 auto acc2 = svget4(v, 2);
113 24 auto acc3 = svget4(v, 3);
114 #else
115 48 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
116 48 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
117 48 auto acc2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
118 48 auto acc3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
119 #endif // KLEIDICV_TARGET_SME2
120 72 ++src_rows;
121
122 72 LoopUnroll loop{kernel_.height() - 2, 2};
123
124 108 loop.unroll_once([&](size_t step) KLEIDICV_STREAMING {
125 36 const ScalarType *src_row0 = &src_rows.at(0)[index];
126 36 const ScalarType *src_row1 = &src_rows.at(1)[index];
127 #if KLEIDICV_TARGET_SME2
128 12 v = svld1_x4(p_counter, src_row0);
129 12 auto row00 = svget4(v, 0);
130 12 auto row01 = svget4(v, 1);
131 12 auto row02 = svget4(v, 2);
132 12 auto row03 = svget4(v, 3);
133 12 v = svld1_x4(p_counter, src_row1);
134 12 auto row10 = svget4(v, 0);
135 12 auto row11 = svget4(v, 1);
136 12 auto row12 = svget4(v, 2);
137 12 auto row13 = svget4(v, 3);
138 #else
139 24 auto row00 = svld1(VecTraits::svptrue(), src_row0);
140 24 auto row01 = svld1_vnum(VecTraits::svptrue(), src_row0, 1);
141 24 auto row02 = svld1_vnum(VecTraits::svptrue(), src_row0, 2);
142 24 auto row03 = svld1_vnum(VecTraits::svptrue(), src_row0, 3);
143 24 auto row10 = svld1(VecTraits::svptrue(), src_row1);
144 24 auto row11 = svld1_vnum(VecTraits::svptrue(), src_row1, 1);
145 24 auto row12 = svld1_vnum(VecTraits::svptrue(), src_row1, 2);
146 24 auto row13 = svld1_vnum(VecTraits::svptrue(), src_row1, 3);
147 #endif // KLEIDICV_TARGET_SME2
148 72 acc0 = O::operation(VecTraits::svptrue(), acc0,
149 36 O::operation(VecTraits::svptrue(), row00, row10));
150 72 acc1 = O::operation(VecTraits::svptrue(), acc1,
151 36 O::operation(VecTraits::svptrue(), row01, row11));
152 72 acc2 = O::operation(VecTraits::svptrue(), acc2,
153 36 O::operation(VecTraits::svptrue(), row02, row12));
154 72 acc3 = O::operation(VecTraits::svptrue(), acc3,
155 36 O::operation(VecTraits::svptrue(), row03, row13));
156 36 src_rows += step;
157 36 });
158
159 132 loop.tail([&](size_t /* index */) // NOLINT(readability/casting)
160 KLEIDICV_STREAMING {
161 60 const ScalarType *src_row = &src_rows[index];
162 #if KLEIDICV_TARGET_SME2
163 20 v = svld1_x4(p_counter, &src_row[0]);
164 20 auto row0 = svget4(v, 0);
165 20 auto row1 = svget4(v, 1);
166 20 auto row2 = svget4(v, 2);
167 20 auto row3 = svget4(v, 3);
168 #else
169 40 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
170 40 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
171 40 auto row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
172 40 auto row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
173 #endif // KLEIDICV_TARGET_SME2
174 60 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
175 60 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
176 60 acc2 = O::operation(VecTraits::svptrue(), acc2, row2);
177 60 acc3 = O::operation(VecTraits::svptrue(), acc3, row3);
178 60 ++src_rows;
179 60 });
180
181 // Save partial results which do not contain the first row.
182 72 auto partial_acc0 = acc0;
183 72 auto partial_acc1 = acc1;
184 72 auto partial_acc2 = acc2;
185 72 auto partial_acc3 = acc3;
186
187 // Take the first row into account.
188 72 acc0 = O::operation(VecTraits::svptrue(), acc0, first_row0);
189 72 acc1 = O::operation(VecTraits::svptrue(), acc1, first_row1);
190 72 acc2 = O::operation(VecTraits::svptrue(), acc2, first_row2);
191 72 acc3 = O::operation(VecTraits::svptrue(), acc3, first_row3);
192
193 // Store the results.
194 72 ScalarType *dst_row = &dst_rows[index];
195 #if KLEIDICV_TARGET_SME2
196 24 Vector4Type res4 = svcreate4(acc0, acc1, acc2, acc3);
197 24 svst1(p_counter, &dst_row[0], res4);
198 #else
199 48 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
200 48 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
201 48 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2);
202 48 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3);
203 #endif // KLEIDICV_TARGET_SME2
204
205 // Try to process one more row, because it is relatively cheap to do so.
206
4/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 24 times.
72 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
207 24 return;
208 }
209
210 48 ++dst_rows;
211
212 48 src_row = &src_rows[index];
213 #if KLEIDICV_TARGET_SME2
214 16 v = svld1_x4(p_counter, &src_row[0]);
215 16 auto next_row0 = svget4(v, 0);
216 16 auto next_row1 = svget4(v, 1);
217 16 auto next_row2 = svget4(v, 2);
218 16 auto next_row3 = svget4(v, 3);
219 #else
220 32 auto next_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
221 32 auto next_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
222 32 auto next_row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
223 32 auto next_row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
224 #endif // KLEIDICV_TARGET_SME2
225
226 48 acc0 = O::operation(VecTraits::svptrue(), partial_acc0, next_row0);
227 48 acc1 = O::operation(VecTraits::svptrue(), partial_acc1, next_row1);
228 48 acc2 = O::operation(VecTraits::svptrue(), partial_acc2, next_row2);
229 48 acc3 = O::operation(VecTraits::svptrue(), partial_acc3, next_row3);
230
231 // Store the results.
232 48 dst_row = &dst_rows[index];
233 #if KLEIDICV_TARGET_SME2
234 16 res4 = svcreate4(acc0, acc1, acc2, acc3);
235 16 svst1(p_counter, &dst_row[0], res4);
236 #else
237 32 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
238 32 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
239 32 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2);
240 32 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3);
241 #endif // KLEIDICV_TARGET_SME2
242 72 }
243
244 88 void vector_path_2x(IndirectRows<ScalarType> src_rows,
245 Rows<ScalarType> dst_rows, const size_t index,
246 const size_t height) KLEIDICV_STREAMING {
247 88 const ScalarType *src_row = &src_rows[index];
248 #if KLEIDICV_TARGET_SME2
249 28 svcount_t p_counter = VecTraits::svptrue_c();
250 28 Vector2Type v = svld1_x2(p_counter, &src_row[0]);
251 28 auto first_row0 = svget2(v, 0);
252 28 auto first_row1 = svget2(v, 1);
253 #else
254 60 auto first_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
255 60 auto first_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
256 #endif // KLEIDICV_TARGET_SME2
257
258 88 ++src_rows;
259
260 88 src_row = &src_rows[index];
261 #if KLEIDICV_TARGET_SME2
262 28 v = svld1_x2(p_counter, &src_row[0]);
263 28 auto acc0 = svget2(v, 0);
264 28 auto acc1 = svget2(v, 1);
265 #else
266 60 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
267 60 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
268 #endif // KLEIDICV_TARGET_SME2
269 88 ++src_rows;
270
271 88 LoopUnroll loop{kernel_.height() - 2, 2};
272
273 128 loop.unroll_once([&](size_t step) KLEIDICV_STREAMING {
274 40 const ScalarType *src_row0 = &src_rows.at(0)[index];
275 40 const ScalarType *src_row1 = &src_rows.at(1)[index];
276 #if KLEIDICV_TARGET_SME2
277 12 Vector2Type v0 = svld1_x2(p_counter, src_row0);
278 12 Vector2Type v1 = svld1_x2(p_counter, src_row1);
279 12 auto row00 = svget2(v0, 0);
280 12 auto row01 = svget2(v0, 1);
281 12 auto row10 = svget2(v1, 0);
282 12 auto row11 = svget2(v1, 1);
283 #else
284 28 auto row00 = svld1(VecTraits::svptrue(), src_row0);
285 28 auto row01 = svld1_vnum(VecTraits::svptrue(), src_row0, 1);
286 28 auto row10 = svld1(VecTraits::svptrue(), src_row1);
287 28 auto row11 = svld1_vnum(VecTraits::svptrue(), src_row1, 1);
288 #endif // KLEIDICV_TARGET_SME2
289 80 acc0 = O::operation(VecTraits::svptrue(), acc0,
290 40 O::operation(VecTraits::svptrue(), row00, row10));
291 80 acc1 = O::operation(VecTraits::svptrue(), acc1,
292 40 O::operation(VecTraits::svptrue(), row01, row11));
293 40 src_rows += step;
294 40 });
295
296 148 loop.tail([&](size_t /* index */) // NOLINT(readability/casting)
297 KLEIDICV_STREAMING {
298 60 const ScalarType *src_row = &src_rows[index];
299 #if KLEIDICV_TARGET_SME2
300 20 v = svld1_x2(p_counter, &src_row[0]);
301 20 auto row0 = svget2(v, 0);
302 20 auto row1 = svget2(v, 1);
303 #else
304 40 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
305 40 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
306 #endif // KLEIDICV_TARGET_SME2
307 60 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
308 60 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
309 60 ++src_rows;
310 60 });
311
312 // Save partial results which do not contain the first row.
313 88 auto partial_acc0 = acc0;
314 88 auto partial_acc1 = acc1;
315
316 // Take the first row into account.
317 88 acc0 = O::operation(VecTraits::svptrue(), acc0, first_row0);
318 88 acc1 = O::operation(VecTraits::svptrue(), acc1, first_row1);
319
320 // Store the results.
321 88 ScalarType *dst_row = &dst_rows[index];
322 #if KLEIDICV_TARGET_SME2
323 28 Vector2Type res2 = svcreate2(acc0, acc1);
324 28 svst1(p_counter, &dst_row[0], res2);
325 #else
326 60 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
327 60 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
328 #endif // KLEIDICV_TARGET_SME2
329
330 // Try to process one more row, because it is relatively cheap to do so.
331
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 38 times.
88 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
332 14 return;
333 }
334
335 74 ++dst_rows;
336
337 74 src_row = &src_rows[index];
338 #if KLEIDICV_TARGET_SME2
339 24 v = svld1_x2(p_counter, &src_row[0]);
340 24 auto next_row0 = svget2(v, 0);
341 24 auto next_row1 = svget2(v, 1);
342 #else
343 50 auto next_row0 = svld1(VecTraits::svptrue(), &src_row[0]);
344 50 auto next_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
345 #endif // KLEIDICV_TARGET_SME2
346
347 74 acc0 = O::operation(VecTraits::svptrue(), partial_acc0, next_row0);
348 74 acc1 = O::operation(VecTraits::svptrue(), partial_acc1, next_row1);
349
350 74 dst_row = &dst_rows[index];
351 #if KLEIDICV_TARGET_SME2
352 24 res2 = svcreate2(acc0, acc1);
353 24 svst1(p_counter, &dst_row[0], res2);
354 #else
355 50 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
356 50 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
357 #endif // KLEIDICV_TARGET_SME2
358 88 }
359
360 4336 void vector_path(svbool_t pg, IndirectRows<ScalarType> src_rows,
361 Rows<ScalarType> dst_rows, const size_t index,
362 const size_t height) KLEIDICV_STREAMING {
363 4336 auto first_row = svld1(pg, &src_rows[index]);
364 4336 ++src_rows;
365
366 4336 auto acc = svld1(pg, &src_rows[index]);
367 4336 ++src_rows;
368
369 4336 LoopUnroll loop{kernel_.height() - 2, 2};
370
371 7163 loop.unroll_once([&](size_t step) KLEIDICV_STREAMING {
372 2827 auto row0 = svld1(pg, &src_rows.at(0)[index]);
373 2827 auto row1 = svld1(pg, &src_rows.at(1)[index]);
374 2827 acc = O::operation(pg, acc, O::operation(pg, row0, row1));
375 2827 src_rows += step;
376 2827 });
377
378 7721 loop.tail([&](size_t /* index */) // NOLINT(readability/casting)
379 KLEIDICV_STREAMING {
380 3385 auto row = svld1(pg, &src_rows[index]);
381 3385 acc = O::operation(pg, acc, row);
382 3385 ++src_rows;
383 3385 });
384
385 // Save partial result which does not contain the first row.
386 4336 auto partial_acc = acc;
387
388 // Take the first row into account.
389 4336 acc = O::operation(pg, acc, first_row);
390
391 // Store the results.
392 4336 svst1(pg, &dst_rows[index], acc);
393
394 // Try to process one more row, because it is relatively cheap to do so.
395
4/4
✓ Branch 0 taken 200 times.
✓ Branch 1 taken 2189 times.
✓ Branch 2 taken 214 times.
✓ Branch 3 taken 1733 times.
4336 if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) {
396 414 return;
397 }
398
399 3922 ++dst_rows;
400
401 3922 auto next_row = svld1(pg, &src_rows[index]);
402 3922 acc = O::operation(pg, partial_acc, next_row);
403 3922 svst1(pg, &dst_rows[index], acc);
404 4336 }
405
406 Rectangle rect_;
407 Rectangle kernel_;
408 }; // end of class VerticalOp<ScalarType, )>
409
410 template <typename ScalarType, typename O>
411 class HorizontalOp final {
412 public:
413 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
414 using Vector4Type = typename VecTraits::Vector4Type;
415 using Vector2Type = typename VecTraits::Vector2Type;
416
417 10566 HorizontalOp(Rectangle rect, Rectangle kernel) KLEIDICV_STREAMING
418 10566 : rect_(rect),
419 10566 kernel_(kernel) {}
420
421 10566 void process_rows(Rows<const ScalarType> src_rows,
422 Rows<ScalarType> dst_rows) KLEIDICV_STREAMING {
423 // Iterate across the rows from top to bottom.
424
4/4
✓ Branch 0 taken 5937 times.
✓ Branch 1 taken 5937 times.
✓ Branch 2 taken 4629 times.
✓ Branch 3 taken 4629 times.
21132 for (size_t height = 0; height < rect_.height(); ++height) {
425 // Iterate across the columns from left to right.
426 21132 LoopUnroll2 loop{rect_.width() * src_rows.channels(),
427 10566 VecTraits::num_lanes()};
428 // clang-format off
429 10770 loop.unroll_four_times([&](size_t index) KLEIDICV_STREAMING {
430 204 vector_path_4x(src_rows, dst_rows, index);
431 204 })
432 10818 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
433 252 vector_path_2x(src_rows, dst_rows, index);
434 252 })
435 21132 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
436 10566 svbool_t pg = VecTraits::svwhilelt(index, length);
437
4/4
✓ Branch 0 taken 7131 times.
✓ Branch 1 taken 5937 times.
✓ Branch 2 taken 5347 times.
✓ Branch 3 taken 4629 times.
23044 while (svptest_first(VecTraits::svptrue(), pg)) {
438 12478 vector_path(pg, src_rows, dst_rows, index);
439 12478 index += VecTraits::num_lanes();
440 12478 pg = VecTraits::svwhilelt(index, length);
441 }
442 10566 });
443 // clang-format on
444 10566 ++src_rows;
445 10566 ++dst_rows;
446 10566 }
447 10566 }
448
449 private:
450 204 void vector_path_4x(Rows<const ScalarType> src_rows,
451 Rows<ScalarType> dst_rows,
452 const size_t index) KLEIDICV_STREAMING {
453 204 const auto *src_row = &src_rows[index];
454 #if KLEIDICV_TARGET_SME2
455 68 svcount_t p_counter = VecTraits::svptrue_c();
456 68 Vector4Type v = svld1_x4(p_counter, &src_row[0]);
457 68 auto acc0 = svget4(v, 0);
458 68 auto acc1 = svget4(v, 1);
459 68 auto acc2 = svget4(v, 2);
460 68 auto acc3 = svget4(v, 3);
461 #else
462 136 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
463 136 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
464 136 auto acc2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
465 136 auto acc3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
466 #endif // KLEIDICV_TARGET_SME2
467
468
4/4
✓ Branch 0 taken 102 times.
✓ Branch 1 taken 276 times.
✓ Branch 2 taken 102 times.
✓ Branch 3 taken 276 times.
756 for (size_t width = 1; width < kernel_.width(); ++width) {
469 552 src_row = &src_rows[index + width * src_rows.channels()];
470 #if KLEIDICV_TARGET_SME2
471 184 Vector4Type v = svld1_x4(p_counter, &src_row[0]);
472 184 auto row0 = svget4(v, 0);
473 184 auto row1 = svget4(v, 1);
474 184 auto row2 = svget4(v, 2);
475 184 auto row3 = svget4(v, 3);
476 #else
477 368 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
478 368 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
479 368 auto row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2);
480 368 auto row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3);
481 #endif // KLEIDICV_TARGET_SME2
482 552 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
483 552 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
484 552 acc2 = O::operation(VecTraits::svptrue(), acc2, row2);
485 552 acc3 = O::operation(VecTraits::svptrue(), acc3, row3);
486 552 }
487
488 204 auto dst_row = &dst_rows[index];
489 #if KLEIDICV_TARGET_SME2
490 68 Vector4Type res4 = svcreate4(acc0, acc1, acc2, acc3);
491 68 svst1(p_counter, &dst_row[0], res4);
492 #else
493 136 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
494 136 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
495 136 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2);
496 136 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3);
497 #endif // KLEIDICV_TARGET_SME2
498 204 }
499
500 252 void vector_path_2x(Rows<const ScalarType> src_rows,
501 Rows<ScalarType> dst_rows,
502 const size_t index) KLEIDICV_STREAMING {
503 252 const auto *src_row = &src_rows[index];
504 #if KLEIDICV_TARGET_SME2
505 80 svcount_t p_counter = VecTraits::svptrue_c();
506 80 Vector2Type v = svld1_x2(p_counter, &src_row[0]);
507 80 auto acc0 = svget2(v, 0);
508 80 auto acc1 = svget2(v, 1);
509 #else
510 172 auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]);
511 172 auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
512 #endif // KLEIDICV_TARGET_SME2
513
514
4/4
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 132 times.
✓ Branch 3 taken 444 times.
1008 for (size_t width = 1; width < kernel_.width(); ++width) {
515 756 src_row = &src_rows[index + width * src_rows.channels()];
516 #if KLEIDICV_TARGET_SME2
517 208 v = svld1_x2(p_counter, &src_row[0]);
518 208 auto row0 = svget2(v, 0);
519 208 auto row1 = svget2(v, 1);
520 #else
521 548 auto row0 = svld1(VecTraits::svptrue(), &src_row[0]);
522 548 auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1);
523 #endif // KLEIDICV_TARGET_SME2
524 756 acc0 = O::operation(VecTraits::svptrue(), acc0, row0);
525 756 acc1 = O::operation(VecTraits::svptrue(), acc1, row1);
526 756 }
527
528 252 auto dst_row = &dst_rows[index];
529 #if KLEIDICV_TARGET_SME2
530 80 Vector2Type res2 = svcreate2(acc0, acc1);
531 80 svst1(p_counter, &dst_row[0], res2);
532 #else
533 172 svst1(VecTraits::svptrue(), &dst_row[0], acc0);
534 172 svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1);
535 #endif // KLEIDICV_TARGET_SME2
536 252 }
537
538 12478 void vector_path(svbool_t pg, Rows<const ScalarType> src_rows,
539 Rows<ScalarType> dst_rows,
540 const size_t index) KLEIDICV_STREAMING {
541 12478 auto acc = svld1(pg, &src_rows[index]);
542
543
4/4
✓ Branch 0 taken 7131 times.
✓ Branch 1 taken 16575 times.
✓ Branch 2 taken 5347 times.
✓ Branch 3 taken 16350 times.
45403 for (size_t width = 1; width < kernel_.width(); ++width) {
544 32925 const auto *src_row = &src_rows[index + width * src_rows.channels()];
545 32925 acc = O::operation(pg, acc, svld1(pg, &src_row[0]));
546 32925 }
547
548 12478 svst1(pg, &dst_rows[index], acc);
549 12478 }
550
551 Rectangle rect_;
552 Rectangle kernel_;
553 }; // end of class HorizontalOp<ScalarType>
554
555 template <typename ScalarType>
556 class Min final {
557 public:
558 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
559 using VectorType = typename VecTraits::VectorType;
560
561 25885 static VectorType operation(svbool_t pg, VectorType lhs,
562 VectorType rhs) KLEIDICV_STREAMING {
563 25885 return svmin_x(pg, lhs, rhs);
564 }
565 }; // end of class Min<ScalarType>
566
567 template <typename ScalarType>
568 class Max final {
569 public:
570 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
571 using VectorType = typename VecTraits::VectorType;
572
573 29669 static VectorType operation(svbool_t pg, VectorType lhs,
574 VectorType rhs) KLEIDICV_STREAMING {
575 29669 return svmax_x(pg, lhs, rhs);
576 }
577 }; // end of class Max<ScalarType>
578
579 template <typename T>
580 using VerticalMin = VerticalOp<T, Min<T>>;
581 template <typename T>
582 using VerticalMax = VerticalOp<T, Max<T>>;
583
584 template <typename T>
585 using HorizontalMin = HorizontalOp<T, Min<T>>;
586 template <typename T>
587 using HorizontalMax = HorizontalOp<T, Max<T>>;
588
589 template <typename ScalarType, typename CopyDataOperation>
590 class DilateOperation final {
591 public:
592 using SourceType = ScalarType;
593 using BufferType = ScalarType;
594 using DestinationType = ScalarType;
595 using CopyData = CopyDataOperation;
596
597 612 explicit DilateOperation(Rectangle kernel) KLEIDICV_STREAMING
598 612 : kernel_{kernel} {}
599
600 5937 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
601 Rows<BufferType> dst_rows) KLEIDICV_STREAMING {
602 5937 HorizontalMax<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
603 5937 }
604
605 654 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
606 Rows<DestinationType> dst_rows) KLEIDICV_STREAMING {
607 654 VerticalMax<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
608 654 }
609
610 private:
611 Rectangle kernel_;
612 }; // end of class DilateOperation<ScalarType>
613
614 template <typename T, typename CopyOperation>
615 558 static kleidicv_error_t dilate_sc(
616 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
617 size_t height, kleidicv_morphology_context_t *context) KLEIDICV_STREAMING {
618
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 555 times.
558 CHECK_POINTERS(context);
619
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 552 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 552 times.
555 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
620
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 549 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 549 times.
552 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
621
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 546 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 543 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 543 times.
549 CHECK_IMAGE_SIZE(width, height);
622
623 543 auto *workspace = reinterpret_cast<MorphologyWorkspace *>(context);
624
625
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 540 times.
543 if (workspace->type_size() != sizeof(T)) {
626 3 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
627 }
628
629 540 Rectangle rect{width, height};
630
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 534 times.
540 if (workspace->image_size() != rect) {
631 6 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
632 }
633
634 // Currently valid, will need to be changed if morphology supports more border
635 // types, like KLEIDICV_BORDER_TYPE_REVERSE.
636 534 Rectangle kernel{workspace->kernel()};
637
4/4
✓ Branch 0 taken 522 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 516 times.
534 if (width < kernel.width() - 1 || height < kernel.height() - 1) {
638 18 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
639 }
640
641 516 Rows<const T> src_rows{src, src_stride, workspace->channels()};
642 516 Rows<T> dst_rows{dst, dst_stride, workspace->channels()};
643 516 Margin margin{workspace->kernel(), workspace->anchor()};
644
645 516 Rows<const T> current_src_rows = src_rows;
646 516 Rows<T> current_dst_rows = dst_rows;
647
2/2
✓ Branch 0 taken 612 times.
✓ Branch 1 taken 516 times.
1128 for (size_t iteration = 0; iteration < workspace->iterations(); ++iteration) {
648 612 DilateOperation<T, CopyOperation> operation{kernel};
649 1224 workspace->process(rect, current_src_rows, current_dst_rows, margin,
650 612 workspace->border_type(), operation);
651 // Update source for the next iteration.
652 612 current_src_rows = dst_rows;
653 612 }
654 516 return KLEIDICV_OK;
655 558 }
656
657 // Helper structure for erode.
658 template <typename ScalarType, typename CopyDataOperation>
659 class ErodeOperation final {
660 public:
661 using SourceType = ScalarType;
662 using BufferType = ScalarType;
663 using DestinationType = ScalarType;
664 using CopyData = CopyDataOperation;
665
666 516 explicit ErodeOperation(Rectangle kernel) KLEIDICV_STREAMING
667 516 : kernel_{kernel} {}
668
669 4629 void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows,
670 Rows<BufferType> dst_rows) KLEIDICV_STREAMING {
671 4629 HorizontalMin<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
672 4629 }
673
674 558 void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows,
675 Rows<DestinationType> dst_rows) KLEIDICV_STREAMING {
676 558 VerticalMin<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows);
677 558 }
678
679 private:
680 Rectangle kernel_;
681 }; // end of class ErodeOperation<ScalarType>
682
683 template <typename T, typename CopyOperation>
684 510 static kleidicv_error_t erode_sc(
685 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
686 size_t height, kleidicv_morphology_context_t *context) KLEIDICV_STREAMING {
687
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 507 times.
510 CHECK_POINTERS(context);
688
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 504 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 504 times.
507 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
689
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 501 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 501 times.
504 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
690
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 498 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 495 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 495 times.
501 CHECK_IMAGE_SIZE(width, height);
691
692 495 auto *workspace = reinterpret_cast<MorphologyWorkspace *>(context);
693
694
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 492 times.
495 if (workspace->type_size() != sizeof(T)) {
695 3 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
696 }
697
698 492 Rectangle rect{width, height};
699
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 486 times.
492 if (workspace->image_size() != rect) {
700 6 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
701 }
702
703 // Currently valid, will need to be changed if morphology supports more border
704 // types, like KLEIDICV_BORDER_TYPE_REVERSE.
705 486 Rectangle kernel{workspace->kernel()};
706
4/4
✓ Branch 0 taken 474 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 468 times.
486 if (width < kernel.width() - 1 || height < kernel.height() - 1) {
707 18 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
708 }
709
710 468 Rows<const T> src_rows{src, src_stride, workspace->channels()};
711 468 Rows<T> dst_rows{dst, dst_stride, workspace->channels()};
712 468 Margin margin{workspace->kernel(), workspace->anchor()};
713
714 468 Rows<const T> current_src_rows = src_rows;
715 468 Rows<T> current_dst_rows = dst_rows;
716
2/2
✓ Branch 0 taken 516 times.
✓ Branch 1 taken 468 times.
984 for (size_t iteration = 0; iteration < workspace->iterations(); ++iteration) {
717 516 ErodeOperation<T, CopyOperation> operation{kernel};
718 1032 workspace->process(rect, current_src_rows, current_dst_rows, margin,
719 516 workspace->border_type(), operation);
720 // Update source for the next iteration.
721 516 current_src_rows = dst_rows;
722 516 }
723 468 return KLEIDICV_OK;
724 510 }
725
726 } // namespace KLEIDICV_TARGET_NAMESPACE
727
728 #endif // KLEIDICV_MORPHOLOGY_SC_H
729