Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_MORPHOLOGY_SC_H | ||
6 | #define KLEIDICV_MORPHOLOGY_SC_H | ||
7 | |||
8 | #include <algorithm> | ||
9 | #include <limits> | ||
10 | |||
11 | #include "kleidicv/kleidicv.h" | ||
12 | #include "kleidicv/morphology/workspace.h" | ||
13 | #include "kleidicv/sve2.h" | ||
14 | #include "kleidicv/types.h" | ||
15 | |||
16 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
17 | |||
18 | template <typename T> | ||
19 | class CopyDataSVE2 { | ||
20 | class CopyOperation final : public UnrollTwice { | ||
21 | public: | ||
22 | using ContextType = Context; | ||
23 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<T>; | ||
24 | using VectorType = typename VecTraits::VectorType; | ||
25 | |||
26 | 4700 | VectorType vector_path(ContextType, VectorType src) KLEIDICV_STREAMING { | |
27 | 4700 | return src; | |
28 | } | ||
29 | }; // end of class CopyOperation | ||
30 | |||
31 | public: | ||
32 | 3714 | void operator()(Rows<const T> src_rows, Rows<T> dst_rows, | |
33 | size_t length) const KLEIDICV_STREAMING { | ||
34 | // 'apply_operation_by_rows' can only handle one channel well | ||
35 | // so width must be multiplied in order to copy all the data | ||
36 | 3714 | Rectangle rect{length * dst_rows.channels(), std::size_t{1}}; | |
37 | 3714 | Rows<const T> src_1ch{&src_rows[0], src_rows.stride(), 1}; | |
38 | 3714 | Rows<T> dst_1ch{&dst_rows[0], dst_rows.stride(), 1}; | |
39 | 3714 | CopyOperation op{}; | |
40 | 3714 | apply_operation_by_rows(op, rect, src_1ch, dst_1ch); | |
41 | 3714 | } | |
42 | }; | ||
43 | |||
44 | template <typename ScalarType, typename O> | ||
45 | class VerticalOp final { | ||
46 | public: | ||
47 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
48 | |||
49 | 796 | VerticalOp(Rectangle rect, Rectangle kernel) KLEIDICV_STREAMING | |
50 | 796 | : rect_(rect), | |
51 | 796 | kernel_(kernel) {} | |
52 | |||
53 | 796 | void process_rows(IndirectRows<ScalarType> src_rows, | |
54 | Rows<ScalarType> dst_rows) KLEIDICV_STREAMING { | ||
55 |
4/4✓ Branch 0 taken 32 times.
✓ Branch 1 taken 398 times.
✓ Branch 2 taken 32 times.
✓ Branch 3 taken 334 times.
|
796 | if (KLEIDICV_UNLIKELY(kernel_.height()) == 1) { |
56 | 64 | CopyRows<ScalarType>::copy_rows(rect_, src_rows, dst_rows); | |
57 | 64 | return; | |
58 | } | ||
59 | |||
60 | // Iterate across the rows from top to bottom. This implementation can | ||
61 | // handle two rows at once. | ||
62 |
4/4✓ Branch 0 taken 1268 times.
✓ Branch 1 taken 398 times.
✓ Branch 2 taken 1050 times.
✓ Branch 3 taken 334 times.
|
3050 | for (size_t height = 0; height < rect_.height(); height += 2) { |
63 | // Iterate across the columns from left to right. | ||
64 | 4636 | LoopUnroll2 loop{rect_.width() * src_rows.channels(), | |
65 | 2318 | VecTraits::num_lanes()}; | |
66 | // clang-format off | ||
67 | 2366 | loop.unroll_four_times([&](size_t index) KLEIDICV_STREAMING { | |
68 | 48 | vector_path_4x(src_rows, dst_rows, index, height); | |
69 | 48 | }) | |
70 | 2378 | .unroll_twice([&](size_t index) KLEIDICV_STREAMING { | |
71 | 60 | vector_path_2x(src_rows, dst_rows, index, height); | |
72 | 60 | }) | |
73 | 4636 | .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { | |
74 | 2318 | svbool_t pg = VecTraits::svwhilelt(index, length); | |
75 |
4/4✓ Branch 0 taken 1482 times.
✓ Branch 1 taken 1268 times.
✓ Branch 2 taken 1188 times.
✓ Branch 3 taken 1050 times.
|
4988 | while (svptest_first(VecTraits::svptrue(), pg)) { |
76 | 2670 | vector_path(pg, src_rows, dst_rows, index, height); | |
77 | 2670 | index += VecTraits::num_lanes(); | |
78 | 2670 | pg = VecTraits::svwhilelt(index, length); | |
79 | } | ||
80 | 2318 | }); | |
81 | // clang-format on | ||
82 | 2318 | src_rows += 2; | |
83 | 2318 | dst_rows += 2; | |
84 | 2318 | } | |
85 | 796 | } | |
86 | |||
87 | private: | ||
88 | 48 | void vector_path_4x(IndirectRows<ScalarType> src_rows, | |
89 | Rows<ScalarType> dst_rows, const size_t index, | ||
90 | const size_t height) KLEIDICV_STREAMING { | ||
91 | 48 | const ScalarType *src_row = &src_rows[index]; | |
92 | 48 | auto first_row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
93 | 48 | auto first_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
94 | 48 | auto first_row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2); | |
95 | 48 | auto first_row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3); | |
96 | 48 | ++src_rows; | |
97 | |||
98 | 48 | src_row = &src_rows[index]; | |
99 | 48 | auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
100 | 48 | auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
101 | 48 | auto acc2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2); | |
102 | 48 | auto acc3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3); | |
103 | 48 | ++src_rows; | |
104 | |||
105 | 48 | LoopUnroll loop{kernel_.height() - 2, 2}; | |
106 | |||
107 | 72 | loop.unroll_once([&](size_t step) KLEIDICV_STREAMING { | |
108 | 24 | const ScalarType *src_row0 = &src_rows.at(0)[index]; | |
109 | 24 | const ScalarType *src_row1 = &src_rows.at(1)[index]; | |
110 | 24 | auto row00 = svld1(VecTraits::svptrue(), src_row0); | |
111 | 24 | auto row01 = svld1_vnum(VecTraits::svptrue(), src_row0, 1); | |
112 | 24 | auto row02 = svld1_vnum(VecTraits::svptrue(), src_row0, 2); | |
113 | 24 | auto row03 = svld1_vnum(VecTraits::svptrue(), src_row0, 3); | |
114 | 24 | auto row10 = svld1(VecTraits::svptrue(), src_row1); | |
115 | 24 | auto row11 = svld1_vnum(VecTraits::svptrue(), src_row1, 1); | |
116 | 24 | auto row12 = svld1_vnum(VecTraits::svptrue(), src_row1, 2); | |
117 | 24 | auto row13 = svld1_vnum(VecTraits::svptrue(), src_row1, 3); | |
118 | 48 | acc0 = O::operation(VecTraits::svptrue(), acc0, | |
119 | 24 | O::operation(VecTraits::svptrue(), row00, row10)); | |
120 | 48 | acc1 = O::operation(VecTraits::svptrue(), acc1, | |
121 | 24 | O::operation(VecTraits::svptrue(), row01, row11)); | |
122 | 48 | acc2 = O::operation(VecTraits::svptrue(), acc2, | |
123 | 24 | O::operation(VecTraits::svptrue(), row02, row12)); | |
124 | 48 | acc3 = O::operation(VecTraits::svptrue(), acc3, | |
125 | 24 | O::operation(VecTraits::svptrue(), row03, row13)); | |
126 | 24 | src_rows += step; | |
127 | 24 | }); | |
128 | |||
129 | 88 | loop.tail([&](size_t /* index */) // NOLINT(readability/casting) | |
130 | KLEIDICV_STREAMING { | ||
131 | 40 | const ScalarType *src_row = &src_rows[index]; | |
132 | 40 | auto row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
133 | 40 | auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
134 | 40 | auto row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2); | |
135 | 40 | auto row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3); | |
136 | 40 | acc0 = O::operation(VecTraits::svptrue(), acc0, row0); | |
137 | 40 | acc1 = O::operation(VecTraits::svptrue(), acc1, row1); | |
138 | 40 | acc2 = O::operation(VecTraits::svptrue(), acc2, row2); | |
139 | 40 | acc3 = O::operation(VecTraits::svptrue(), acc3, row3); | |
140 | 40 | ++src_rows; | |
141 | 40 | }); | |
142 | |||
143 | // Save partial results which do not contain the first row. | ||
144 | 48 | auto partial_acc0 = acc0; | |
145 | 48 | auto partial_acc1 = acc1; | |
146 | 48 | auto partial_acc2 = acc2; | |
147 | 48 | auto partial_acc3 = acc3; | |
148 | |||
149 | // Take the first row into account. | ||
150 | 48 | acc0 = O::operation(VecTraits::svptrue(), acc0, first_row0); | |
151 | 48 | acc1 = O::operation(VecTraits::svptrue(), acc1, first_row1); | |
152 | 48 | acc2 = O::operation(VecTraits::svptrue(), acc2, first_row2); | |
153 | 48 | acc3 = O::operation(VecTraits::svptrue(), acc3, first_row3); | |
154 | |||
155 | // Store the results. | ||
156 | 48 | ScalarType *dst_row = &dst_rows[index]; | |
157 | 48 | svst1(VecTraits::svptrue(), &dst_row[0], acc0); | |
158 | 48 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1); | |
159 | 48 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2); | |
160 | 48 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3); | |
161 | |||
162 | // Try to process one more row, because it is relatively cheap to do so. | ||
163 |
4/4✓ Branch 0 taken 8 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 16 times.
|
48 | if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) { |
164 | 16 | return; | |
165 | } | ||
166 | |||
167 | 32 | ++dst_rows; | |
168 | |||
169 | 32 | src_row = &src_rows[index]; | |
170 | 32 | auto next_row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
171 | 32 | auto next_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
172 | 32 | auto next_row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2); | |
173 | 32 | auto next_row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3); | |
174 | |||
175 | 32 | acc0 = O::operation(VecTraits::svptrue(), partial_acc0, next_row0); | |
176 | 32 | acc1 = O::operation(VecTraits::svptrue(), partial_acc1, next_row1); | |
177 | 32 | acc2 = O::operation(VecTraits::svptrue(), partial_acc2, next_row2); | |
178 | 32 | acc3 = O::operation(VecTraits::svptrue(), partial_acc3, next_row3); | |
179 | |||
180 | // Store the results. | ||
181 | 32 | dst_row = &dst_rows[index]; | |
182 | 32 | svst1(VecTraits::svptrue(), &dst_row[0], acc0); | |
183 | 32 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1); | |
184 | 32 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2); | |
185 | 32 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3); | |
186 | 48 | } | |
187 | |||
188 | 60 | void vector_path_2x(IndirectRows<ScalarType> src_rows, | |
189 | Rows<ScalarType> dst_rows, const size_t index, | ||
190 | const size_t height) KLEIDICV_STREAMING { | ||
191 | 60 | const ScalarType *src_row = &src_rows[index]; | |
192 | 60 | auto first_row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
193 | 60 | auto first_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
194 | 60 | ++src_rows; | |
195 | |||
196 | 60 | src_row = &src_rows[index]; | |
197 | 60 | auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
198 | 60 | auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
199 | 60 | ++src_rows; | |
200 | |||
201 | 60 | LoopUnroll loop{kernel_.height() - 2, 2}; | |
202 | |||
203 | 88 | loop.unroll_once([&](size_t step) KLEIDICV_STREAMING { | |
204 | 28 | const ScalarType *src_row0 = &src_rows.at(0)[index]; | |
205 | 28 | const ScalarType *src_row1 = &src_rows.at(1)[index]; | |
206 | 28 | auto row00 = svld1(VecTraits::svptrue(), src_row0); | |
207 | 28 | auto row01 = svld1_vnum(VecTraits::svptrue(), src_row0, 1); | |
208 | 28 | auto row10 = svld1(VecTraits::svptrue(), src_row1); | |
209 | 28 | auto row11 = svld1_vnum(VecTraits::svptrue(), src_row1, 1); | |
210 | 56 | acc0 = O::operation(VecTraits::svptrue(), acc0, | |
211 | 28 | O::operation(VecTraits::svptrue(), row00, row10)); | |
212 | 56 | acc1 = O::operation(VecTraits::svptrue(), acc1, | |
213 | 28 | O::operation(VecTraits::svptrue(), row01, row11)); | |
214 | 28 | src_rows += step; | |
215 | 28 | }); | |
216 | |||
217 | 100 | loop.tail([&](size_t /* index */) // NOLINT(readability/casting) | |
218 | KLEIDICV_STREAMING { | ||
219 | 40 | const ScalarType *src_row = &src_rows[index]; | |
220 | 40 | auto row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
221 | 40 | auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
222 | 40 | acc0 = O::operation(VecTraits::svptrue(), acc0, row0); | |
223 | 40 | acc1 = O::operation(VecTraits::svptrue(), acc1, row1); | |
224 | 40 | ++src_rows; | |
225 | 40 | }); | |
226 | |||
227 | // Save partial results which do not contain the first row. | ||
228 | 60 | auto partial_acc0 = acc0; | |
229 | 60 | auto partial_acc1 = acc1; | |
230 | |||
231 | // Take the first row into account. | ||
232 | 60 | acc0 = O::operation(VecTraits::svptrue(), acc0, first_row0); | |
233 | 60 | acc1 = O::operation(VecTraits::svptrue(), acc1, first_row1); | |
234 | |||
235 | // Store the results. | ||
236 | 60 | ScalarType *dst_row = &dst_rows[index]; | |
237 | 60 | svst1(VecTraits::svptrue(), &dst_row[0], acc0); | |
238 | 60 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1); | |
239 | |||
240 | // Try to process one more row, because it is relatively cheap to do so. | ||
241 |
4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 26 times.
|
60 | if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) { |
242 | 10 | return; | |
243 | } | ||
244 | |||
245 | 50 | ++dst_rows; | |
246 | |||
247 | 50 | src_row = &src_rows[index]; | |
248 | 50 | auto next_row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
249 | 50 | auto next_row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
250 | |||
251 | 50 | acc0 = O::operation(VecTraits::svptrue(), partial_acc0, next_row0); | |
252 | 50 | acc1 = O::operation(VecTraits::svptrue(), partial_acc1, next_row1); | |
253 | |||
254 | 50 | dst_row = &dst_rows[index]; | |
255 | 50 | svst1(VecTraits::svptrue(), &dst_row[0], acc0); | |
256 | 50 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1); | |
257 | 60 | } | |
258 | |||
259 | 2670 | void vector_path(svbool_t pg, IndirectRows<ScalarType> src_rows, | |
260 | Rows<ScalarType> dst_rows, const size_t index, | ||
261 | const size_t height) KLEIDICV_STREAMING { | ||
262 | 2670 | auto first_row = svld1(pg, &src_rows[index]); | |
263 | 2670 | ++src_rows; | |
264 | |||
265 | 2670 | auto acc = svld1(pg, &src_rows[index]); | |
266 | 2670 | ++src_rows; | |
267 | |||
268 | 2670 | LoopUnroll loop{kernel_.height() - 2, 2}; | |
269 | |||
270 | 4526 | loop.unroll_once([&](size_t step) KLEIDICV_STREAMING { | |
271 | 1856 | auto row0 = svld1(pg, &src_rows.at(0)[index]); | |
272 | 1856 | auto row1 = svld1(pg, &src_rows.at(1)[index]); | |
273 | 1856 | acc = O::operation(pg, acc, O::operation(pg, row0, row1)); | |
274 | 1856 | src_rows += step; | |
275 | 1856 | }); | |
276 | |||
277 | 4736 | loop.tail([&](size_t /* index */) // NOLINT(readability/casting) | |
278 | KLEIDICV_STREAMING { | ||
279 | 2066 | auto row = svld1(pg, &src_rows[index]); | |
280 | 2066 | acc = O::operation(pg, acc, row); | |
281 | 2066 | ++src_rows; | |
282 | 2066 | }); | |
283 | |||
284 | // Save partial result which does not contain the first row. | ||
285 | 2670 | auto partial_acc = acc; | |
286 | |||
287 | // Take the first row into account. | ||
288 | 2670 | acc = O::operation(pg, acc, first_row); | |
289 | |||
290 | // Store the results. | ||
291 | 2670 | svst1(pg, &dst_rows[index], acc); | |
292 | |||
293 | // Try to process one more row, because it is relatively cheap to do so. | ||
294 |
4/4✓ Branch 0 taken 134 times.
✓ Branch 1 taken 1348 times.
✓ Branch 2 taken 143 times.
✓ Branch 3 taken 1045 times.
|
2670 | if (KLEIDICV_UNLIKELY((height + 1) >= rect_.height())) { |
295 | 277 | return; | |
296 | } | ||
297 | |||
298 | 2393 | ++dst_rows; | |
299 | |||
300 | 2393 | auto next_row = svld1(pg, &src_rows[index]); | |
301 | 2393 | acc = O::operation(pg, partial_acc, next_row); | |
302 | 2393 | svst1(pg, &dst_rows[index], acc); | |
303 | 2670 | } | |
304 | |||
305 | Rectangle rect_; | ||
306 | Rectangle kernel_; | ||
307 | }; // end of class VerticalOp<ScalarType, )> | ||
308 | |||
309 | template <typename ScalarType, typename O> | ||
310 | class HorizontalOp final { | ||
311 | public: | ||
312 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
313 | |||
314 | 6596 | HorizontalOp(Rectangle rect, Rectangle kernel) KLEIDICV_STREAMING | |
315 | 6596 | : rect_(rect), | |
316 | 6596 | kernel_(kernel) {} | |
317 | |||
318 | 6596 | void process_rows(Rows<const ScalarType> src_rows, | |
319 | Rows<ScalarType> dst_rows) KLEIDICV_STREAMING { | ||
320 | // Iterate across the rows from top to bottom. | ||
321 |
4/4✓ Branch 0 taken 3734 times.
✓ Branch 1 taken 3734 times.
✓ Branch 2 taken 2862 times.
✓ Branch 3 taken 2862 times.
|
13192 | for (size_t height = 0; height < rect_.height(); ++height) { |
322 | // Iterate across the columns from left to right. | ||
323 | 13192 | LoopUnroll2 loop{rect_.width() * src_rows.channels(), | |
324 | 6596 | VecTraits::num_lanes()}; | |
325 | // clang-format off | ||
326 | 6732 | loop.unroll_four_times([&](size_t index) KLEIDICV_STREAMING { | |
327 | 136 | vector_path_4x(src_rows, dst_rows, index); | |
328 | 136 | }) | |
329 | 6768 | .unroll_twice([&](size_t index) KLEIDICV_STREAMING { | |
330 | 172 | vector_path_2x(src_rows, dst_rows, index); | |
331 | 172 | }) | |
332 | 13192 | .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { | |
333 | 6596 | svbool_t pg = VecTraits::svwhilelt(index, length); | |
334 |
4/4✓ Branch 0 taken 4534 times.
✓ Branch 1 taken 3734 times.
✓ Branch 2 taken 3348 times.
✓ Branch 3 taken 2862 times.
|
14478 | while (svptest_first(VecTraits::svptrue(), pg)) { |
335 | 7882 | vector_path(pg, src_rows, dst_rows, index); | |
336 | 7882 | index += VecTraits::num_lanes(); | |
337 | 7882 | pg = VecTraits::svwhilelt(index, length); | |
338 | } | ||
339 | 6596 | }); | |
340 | // clang-format on | ||
341 | 6596 | ++src_rows; | |
342 | 6596 | ++dst_rows; | |
343 | 6596 | } | |
344 | 6596 | } | |
345 | |||
346 | private: | ||
347 | 136 | void vector_path_4x(Rows<const ScalarType> src_rows, | |
348 | Rows<ScalarType> dst_rows, | ||
349 | const size_t index) KLEIDICV_STREAMING { | ||
350 | 136 | const auto *src_row = &src_rows[index]; | |
351 | 136 | auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
352 | 136 | auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
353 | 136 | auto acc2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2); | |
354 | 136 | auto acc3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3); | |
355 | |||
356 |
4/4✓ Branch 0 taken 68 times.
✓ Branch 1 taken 184 times.
✓ Branch 2 taken 68 times.
✓ Branch 3 taken 184 times.
|
504 | for (size_t width = 1; width < kernel_.width(); ++width) { |
357 | 368 | src_row = &src_rows[index + width * src_rows.channels()]; | |
358 | 368 | auto row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
359 | 368 | auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
360 | 368 | auto row2 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 2); | |
361 | 368 | auto row3 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 3); | |
362 | 368 | acc0 = O::operation(VecTraits::svptrue(), acc0, row0); | |
363 | 368 | acc1 = O::operation(VecTraits::svptrue(), acc1, row1); | |
364 | 368 | acc2 = O::operation(VecTraits::svptrue(), acc2, row2); | |
365 | 368 | acc3 = O::operation(VecTraits::svptrue(), acc3, row3); | |
366 | 368 | } | |
367 | |||
368 | 136 | auto dst_row = &dst_rows[index]; | |
369 | 136 | svst1(VecTraits::svptrue(), &dst_row[0], acc0); | |
370 | 136 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1); | |
371 | 136 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 2, acc2); | |
372 | 136 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 3, acc3); | |
373 | 136 | } | |
374 | |||
375 | 172 | void vector_path_2x(Rows<const ScalarType> src_rows, | |
376 | Rows<ScalarType> dst_rows, | ||
377 | const size_t index) KLEIDICV_STREAMING { | ||
378 | 172 | const auto *src_row = &src_rows[index]; | |
379 | 172 | auto acc0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
380 | 172 | auto acc1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
381 | |||
382 |
4/4✓ Branch 0 taken 80 times.
✓ Branch 1 taken 208 times.
✓ Branch 2 taken 92 times.
✓ Branch 3 taken 340 times.
|
720 | for (size_t width = 1; width < kernel_.width(); ++width) { |
383 | 548 | src_row = &src_rows[index + width * src_rows.channels()]; | |
384 | 548 | auto row0 = svld1(VecTraits::svptrue(), &src_row[0]); | |
385 | 548 | auto row1 = svld1_vnum(VecTraits::svptrue(), &src_row[0], 1); | |
386 | 548 | acc0 = O::operation(VecTraits::svptrue(), acc0, row0); | |
387 | 548 | acc1 = O::operation(VecTraits::svptrue(), acc1, row1); | |
388 | 548 | } | |
389 | |||
390 | 172 | auto dst_row = &dst_rows[index]; | |
391 | 172 | svst1(VecTraits::svptrue(), &dst_row[0], acc0); | |
392 | 172 | svst1_vnum(VecTraits::svptrue(), &dst_row[0], 1, acc1); | |
393 | 172 | } | |
394 | |||
395 | 7882 | void vector_path(svbool_t pg, Rows<const ScalarType> src_rows, | |
396 | Rows<ScalarType> dst_rows, | ||
397 | const size_t index) KLEIDICV_STREAMING { | ||
398 | 7882 | auto acc = svld1(pg, &src_rows[index]); | |
399 | |||
400 |
4/4✓ Branch 0 taken 4534 times.
✓ Branch 1 taken 10630 times.
✓ Branch 2 taken 3348 times.
✓ Branch 3 taken 10506 times.
|
29018 | for (size_t width = 1; width < kernel_.width(); ++width) { |
401 | 21136 | const auto *src_row = &src_rows[index + width * src_rows.channels()]; | |
402 | 21136 | acc = O::operation(pg, acc, svld1(pg, &src_row[0])); | |
403 | 21136 | } | |
404 | |||
405 | 7882 | svst1(pg, &dst_rows[index], acc); | |
406 | 7882 | } | |
407 | |||
408 | Rectangle rect_; | ||
409 | Rectangle kernel_; | ||
410 | }; // end of class HorizontalOp<ScalarType> | ||
411 | |||
412 | template <typename ScalarType> | ||
413 | class Min final { | ||
414 | public: | ||
415 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
416 | using VectorType = typename VecTraits::VectorType; | ||
417 | |||
418 | 16617 | static VectorType operation(svbool_t pg, VectorType lhs, | |
419 | VectorType rhs) KLEIDICV_STREAMING { | ||
420 | 16617 | return svmin_x(pg, lhs, rhs); | |
421 | } | ||
422 | }; // end of class Min<ScalarType> | ||
423 | |||
424 | template <typename ScalarType> | ||
425 | class Max final { | ||
426 | public: | ||
427 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
428 | using VectorType = typename VecTraits::VectorType; | ||
429 | |||
430 | 19012 | static VectorType operation(svbool_t pg, VectorType lhs, | |
431 | VectorType rhs) KLEIDICV_STREAMING { | ||
432 | 19012 | return svmax_x(pg, lhs, rhs); | |
433 | } | ||
434 | }; // end of class Max<ScalarType> | ||
435 | |||
436 | template <typename T> | ||
437 | using VerticalMin = VerticalOp<T, Min<T>>; | ||
438 | template <typename T> | ||
439 | using VerticalMax = VerticalOp<T, Max<T>>; | ||
440 | |||
441 | template <typename T> | ||
442 | using HorizontalMin = HorizontalOp<T, Min<T>>; | ||
443 | template <typename T> | ||
444 | using HorizontalMax = HorizontalOp<T, Max<T>>; | ||
445 | |||
446 | template <typename ScalarType, typename CopyDataOperation> | ||
447 | class DilateOperation final { | ||
448 | public: | ||
449 | using SourceType = ScalarType; | ||
450 | using BufferType = ScalarType; | ||
451 | using DestinationType = ScalarType; | ||
452 | using CopyData = CopyDataOperation; | ||
453 | |||
454 | 408 | explicit DilateOperation(Rectangle kernel) KLEIDICV_STREAMING | |
455 | 408 | : kernel_{kernel} {} | |
456 | |||
457 | 3734 | void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows, | |
458 | Rows<BufferType> dst_rows) KLEIDICV_STREAMING { | ||
459 | 3734 | HorizontalMax<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows); | |
460 | 3734 | } | |
461 | |||
462 | 430 | void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows, | |
463 | Rows<DestinationType> dst_rows) KLEIDICV_STREAMING { | ||
464 | 430 | VerticalMax<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows); | |
465 | 430 | } | |
466 | |||
467 | private: | ||
468 | Rectangle kernel_; | ||
469 | }; // end of class DilateOperation<ScalarType> | ||
470 | |||
471 | template <typename T, typename CopyOperation> | ||
472 | 372 | static kleidicv_error_t dilate_sc( | |
473 | const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, | ||
474 | size_t height, kleidicv_morphology_context_t *context) KLEIDICV_STREAMING { | ||
475 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 370 times.
|
372 | CHECK_POINTERS(context); |
476 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 368 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 368 times.
|
370 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
477 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 366 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 366 times.
|
368 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
478 |
6/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 364 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 362 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 362 times.
|
366 | CHECK_IMAGE_SIZE(width, height); |
479 | |||
480 | 362 | auto *workspace = reinterpret_cast<MorphologyWorkspace *>(context); | |
481 | |||
482 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 360 times.
|
362 | if (workspace->type_size() != sizeof(T)) { |
483 | 2 | return KLEIDICV_ERROR_CONTEXT_MISMATCH; | |
484 | } | ||
485 | |||
486 | 360 | Rectangle rect{width, height}; | |
487 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 356 times.
|
360 | if (workspace->image_size() != rect) { |
488 | 4 | return KLEIDICV_ERROR_CONTEXT_MISMATCH; | |
489 | } | ||
490 | |||
491 | // Currently valid, will need to be changed if morphology supports more border | ||
492 | // types, like KLEIDICV_BORDER_TYPE_REVERSE. | ||
493 | 356 | Rectangle kernel{workspace->kernel()}; | |
494 |
4/4✓ Branch 0 taken 348 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 344 times.
|
356 | if (width < kernel.width() - 1 || height < kernel.height() - 1) { |
495 | 12 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
496 | } | ||
497 | |||
498 | 344 | Rows<const T> src_rows{src, src_stride, workspace->channels()}; | |
499 | 344 | Rows<T> dst_rows{dst, dst_stride, workspace->channels()}; | |
500 | 344 | Margin margin{workspace->kernel(), workspace->anchor()}; | |
501 | |||
502 | 344 | Rows<const T> current_src_rows = src_rows; | |
503 | 344 | Rows<T> current_dst_rows = dst_rows; | |
504 |
2/2✓ Branch 0 taken 408 times.
✓ Branch 1 taken 344 times.
|
752 | for (size_t iteration = 0; iteration < workspace->iterations(); ++iteration) { |
505 | 408 | DilateOperation<T, CopyOperation> operation{kernel}; | |
506 | 816 | workspace->process(rect, current_src_rows, current_dst_rows, margin, | |
507 | 408 | workspace->border_type(), operation); | |
508 | // Update source for the next iteration. | ||
509 | 408 | current_src_rows = dst_rows; | |
510 | 408 | } | |
511 | 344 | return KLEIDICV_OK; | |
512 | 372 | } | |
513 | |||
514 | // Helper structure for erode. | ||
515 | template <typename ScalarType, typename CopyDataOperation> | ||
516 | class ErodeOperation final { | ||
517 | public: | ||
518 | using SourceType = ScalarType; | ||
519 | using BufferType = ScalarType; | ||
520 | using DestinationType = ScalarType; | ||
521 | using CopyData = CopyDataOperation; | ||
522 | |||
523 | 344 | explicit ErodeOperation(Rectangle kernel) KLEIDICV_STREAMING | |
524 | 344 | : kernel_{kernel} {} | |
525 | |||
526 | 2862 | void process_horizontal(Rectangle rect, Rows<const SourceType> src_rows, | |
527 | Rows<BufferType> dst_rows) KLEIDICV_STREAMING { | ||
528 | 2862 | HorizontalMin<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows); | |
529 | 2862 | } | |
530 | |||
531 | 366 | void process_vertical(Rectangle rect, IndirectRows<BufferType> src_rows, | |
532 | Rows<DestinationType> dst_rows) KLEIDICV_STREAMING { | ||
533 | 366 | VerticalMin<ScalarType>{rect, kernel_}.process_rows(src_rows, dst_rows); | |
534 | 366 | } | |
535 | |||
536 | private: | ||
537 | Rectangle kernel_; | ||
538 | }; // end of class ErodeOperation<ScalarType> | ||
539 | |||
540 | template <typename T, typename CopyOperation> | ||
541 | 340 | static kleidicv_error_t erode_sc( | |
542 | const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, | ||
543 | size_t height, kleidicv_morphology_context_t *context) KLEIDICV_STREAMING { | ||
544 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 338 times.
|
340 | CHECK_POINTERS(context); |
545 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 336 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 336 times.
|
338 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
546 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 334 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 334 times.
|
336 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
547 |
6/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 332 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 330 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 330 times.
|
334 | CHECK_IMAGE_SIZE(width, height); |
548 | |||
549 | 330 | auto *workspace = reinterpret_cast<MorphologyWorkspace *>(context); | |
550 | |||
551 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 328 times.
|
330 | if (workspace->type_size() != sizeof(T)) { |
552 | 2 | return KLEIDICV_ERROR_CONTEXT_MISMATCH; | |
553 | } | ||
554 | |||
555 | 328 | Rectangle rect{width, height}; | |
556 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 324 times.
|
328 | if (workspace->image_size() != rect) { |
557 | 4 | return KLEIDICV_ERROR_CONTEXT_MISMATCH; | |
558 | } | ||
559 | |||
560 | // Currently valid, will need to be changed if morphology supports more border | ||
561 | // types, like KLEIDICV_BORDER_TYPE_REVERSE. | ||
562 | 324 | Rectangle kernel{workspace->kernel()}; | |
563 |
4/4✓ Branch 0 taken 316 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 312 times.
|
324 | if (width < kernel.width() - 1 || height < kernel.height() - 1) { |
564 | 12 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
565 | } | ||
566 | |||
567 | 312 | Rows<const T> src_rows{src, src_stride, workspace->channels()}; | |
568 | 312 | Rows<T> dst_rows{dst, dst_stride, workspace->channels()}; | |
569 | 312 | Margin margin{workspace->kernel(), workspace->anchor()}; | |
570 | |||
571 | 312 | Rows<const T> current_src_rows = src_rows; | |
572 | 312 | Rows<T> current_dst_rows = dst_rows; | |
573 |
2/2✓ Branch 0 taken 344 times.
✓ Branch 1 taken 312 times.
|
656 | for (size_t iteration = 0; iteration < workspace->iterations(); ++iteration) { |
574 | 344 | ErodeOperation<T, CopyOperation> operation{kernel}; | |
575 | 688 | workspace->process(rect, current_src_rows, current_dst_rows, margin, | |
576 | 344 | workspace->border_type(), operation); | |
577 | // Update source for the next iteration. | ||
578 | 344 | current_src_rows = dst_rows; | |
579 | 344 | } | |
580 | 312 | return KLEIDICV_OK; | |
581 | 340 | } | |
582 | |||
583 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
584 | |||
585 | #endif // KLEIDICV_MORPHOLOGY_SC_H | ||
586 |