KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/filter_2d_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 134 134 100.0%
Functions: 378 378 100.0%
Branches: 56 56 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_FILTER_2D_SC_H
6 #define KLEIDICV_FILTER_2D_SC_H
7
8 #include "filter_2d_window_loader_3x3.h"
9 #include "filter_2d_window_loader_5x5.h"
10 #include "filter_2d_window_loader_7x7.h"
11 #include "kleidicv/sve2.h"
12 #include "process_filter_2d.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <typename SourceType, typename DestinationType,
17 typename WindowLoaderType>
18 class Filter2D3x3VectorOperations {
19 public:
20 using BorderInfoType =
21 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, 3>;
22 using BorderOffsets = typename BorderInfoType::Offsets;
23
24 template <typename InnerFilterType, typename SourceVectorType,
25 typename DestinationVectorType>
26 5180 static void process_one_element_with_vector_operation(
27 svbool_t pg, Rows<const SourceType> src_rows,
28 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
29 BorderOffsets window_col_offsets, size_t index,
30 const InnerFilterType& filter_) KLEIDICV_STREAMING {
31 5180 SourceVectorType src_0_0, src_0_1, src_0_2, src_1_0, src_1_1, src_1_2,
32 src_2_0, src_2_1, src_2_2;
33 5180 DestinationVectorType dst_vec;
34 15540 ScalableVectorArray2D<SourceVectorType, 3, 3> KernelWindow = {{
35 5180 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2)},
36 5180 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2)},
37 5180 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2)},
38 }};
39
40 51800 auto load_array_element = [&](const SourceType& x)
41 46620 KLEIDICV_STREAMING { return svld1(pg, &x); };
42
43 10360 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
44 5180 window_row_offsets, window_col_offsets,
45 5180 index);
46 5180 filter_.vector_path(pg, KernelWindow, dst_vec);
47
48 5180 svst1(pg, &dst_rows[index], dst_vec);
49 5180 }
50
51 template <typename InnerFilterType, typename SourceVectorType,
52 typename DestinationVectorType>
53 25302 static void process_two_elements_with_vector_operation(
54 svbool_t pg, Rows<const SourceType> src_rows,
55 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets_0,
56 BorderOffsets window_row_offsets_1, BorderOffsets window_col_offsets,
57 size_t index, const InnerFilterType& filter_) KLEIDICV_STREAMING {
58 25302 SourceVectorType src_0_0, src_0_1, src_0_2, src_1_0, src_1_1, src_1_2,
59 src_2_0, src_2_1, src_2_2, src_3_0, src_3_1, src_3_2;
60 25302 DestinationVectorType dst_vec_0, dst_vec_1;
61 101208 ScalableVectorArray2D<SourceVectorType, 4, 3> KernelWindow = {{
62 25302 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2)},
63 25302 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2)},
64 25302 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2)},
65 25302 {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2)},
66 }};
67
68 328926 auto load_array_element = [&](const SourceType& x)
69 303624 KLEIDICV_STREAMING { return svld1(pg, &x); };
70
71 25302 WindowLoaderType::load_window_to_handle_dual_rows(
72 25302 KernelWindow, load_array_element, src_rows, window_row_offsets_0,
73 25302 window_row_offsets_1, window_col_offsets, index);
74
75 25302 filter_.vector_path_for_dual_row_handling(pg, KernelWindow, dst_vec_0,
76 dst_vec_1);
77 25302 svst1(pg, &dst_rows.at(0, 0)[index], dst_vec_0);
78 25302 svst1(pg, &dst_rows.at(1, 0)[index], dst_vec_1);
79 25302 }
80 };
81
82 template <typename SourceType, typename DestinationType,
83 typename WindowLoaderType>
84 class Filter2D5x5VectorOperations {
85 public:
86 using BorderInfoType =
87 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, 5>;
88 using BorderOffsets = typename BorderInfoType::Offsets;
89
90 template <typename InnerFilterType, typename SourceVectorType,
91 typename DestinationVectorType>
92 109904 static void process_one_element_with_vector_operation(
93 svbool_t pg, Rows<const SourceType> src_rows,
94 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
95 BorderOffsets window_col_offsets, size_t index,
96 const InnerFilterType& filter_) KLEIDICV_STREAMING {
97 109904 SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_1_0,
98 src_1_1, src_1_2, src_1_3, src_1_4, src_2_0, src_2_1, src_2_2, src_2_3,
99 src_2_4, src_3_0, src_3_1, src_3_2, src_3_3, src_3_4, src_4_0, src_4_1,
100 src_4_2, src_4_3, src_4_4;
101 109904 DestinationVectorType output_vector;
102 // Initialization
103 549520 ScalableVectorArray2D<SourceVectorType, 5, 5> KernelWindow = {{
104 219808 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2),
105 219808 std::ref(src_0_3), std::ref(src_0_4)},
106 219808 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2),
107 219808 std::ref(src_1_3), std::ref(src_1_4)},
108 219808 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2),
109 219808 std::ref(src_2_3), std::ref(src_2_4)},
110 219808 {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2),
111 219808 std::ref(src_3_3), std::ref(src_3_4)},
112 219808 {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2),
113 219808 std::ref(src_4_3), std::ref(src_4_4)},
114 }};
115
116 2857504 auto load_array_element = [&](const SourceType& x)
117 2747600 KLEIDICV_STREAMING { return svld1(pg, &x); };
118
119 219808 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
120 109904 window_row_offsets, window_col_offsets,
121 109904 index);
122 109904 filter_.vector_path(pg, KernelWindow, output_vector);
123 109904 svst1(pg, &dst_rows[index], output_vector);
124 109904 }
125 };
126
127 template <typename SourceType, typename DestinationType,
128 typename WindowLoaderType>
129 class Filter2D7x7VectorOperations {
130 public:
131 using BorderInfoType =
132 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, 7>;
133 using BorderOffsets = typename BorderInfoType::Offsets;
134
135 template <typename InnerFilterType, typename SourceVectorType,
136 typename DestinationVectorType>
137 178172 static void process_one_element_with_vector_operation(
138 svbool_t pg, Rows<const SourceType> src_rows,
139 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
140 BorderOffsets window_col_offsets, size_t index,
141 const InnerFilterType& filter_) KLEIDICV_STREAMING {
142 178172 SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_0_5,
143 src_0_6, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, src_1_5, src_1_6,
144 src_2_0, src_2_1, src_2_2, src_2_3, src_2_4, src_2_5, src_2_6, src_3_0,
145 src_3_1, src_3_2, src_3_3, src_3_4, src_3_5, src_3_6, src_4_0, src_4_1,
146 src_4_2, src_4_3, src_4_4, src_4_5, src_4_6, src_5_0, src_5_1, src_5_2,
147 src_5_3, src_5_4, src_5_5, src_5_6, src_6_0, src_6_1, src_6_2, src_6_3,
148 src_6_4, src_6_5, src_6_6;
149 178172 DestinationVectorType output_vector;
150
151 // Initialization
152 1247204 ScalableVectorArray2D<SourceVectorType, 7, 7> KernelWindow = {{
153 712688 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2),
154 534516 std::ref(src_0_3), std::ref(src_0_4), std::ref(src_0_5),
155 178172 std::ref(src_0_6)},
156 712688 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2),
157 534516 std::ref(src_1_3), std::ref(src_1_4), std::ref(src_1_5),
158 178172 std::ref(src_1_6)},
159 712688 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2),
160 534516 std::ref(src_2_3), std::ref(src_2_4), std::ref(src_2_5),
161 178172 std::ref(src_2_6)},
162 712688 {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2),
163 534516 std::ref(src_3_3), std::ref(src_3_4), std::ref(src_3_5),
164 178172 std::ref(src_3_6)},
165 712688 {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2),
166 534516 std::ref(src_4_3), std::ref(src_4_4), std::ref(src_4_5),
167 178172 std::ref(src_4_6)},
168 712688 {std::ref(src_5_0), std::ref(src_5_1), std::ref(src_5_2),
169 534516 std::ref(src_5_3), std::ref(src_5_4), std::ref(src_5_5),
170 178172 std::ref(src_5_6)},
171 712688 {std::ref(src_6_0), std::ref(src_6_1), std::ref(src_6_2),
172 534516 std::ref(src_6_3), std::ref(src_6_4), std::ref(src_6_5),
173 178172 std::ref(src_6_6)},
174 }};
175
176 8908600 auto load_array_element = [&](const SourceType& x)
177 8730428 KLEIDICV_STREAMING { return svld1(pg, &x); };
178
179 356344 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
180 178172 window_row_offsets, window_col_offsets,
181 178172 index);
182 178172 filter_.vector_path(pg, KernelWindow, output_vector);
183 178172 svst1(pg, &dst_rows[index], output_vector);
184 178172 }
185 };
186
187 template <typename InnerFilterType, size_t KSize,
188 typename VectorOperationProviderType>
189 class Filter2d {
190 public:
191 using SourceType = typename InnerFilterType::SourceType;
192 using DestinationType = typename InnerFilterType::DestinationType;
193 using SourceVecTraits =
194 typename KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
195 using DestinationVecTraits =
196 typename KLEIDICV_TARGET_NAMESPACE::VecTraits<DestinationType>;
197 using SourceVectorType = typename SourceVecTraits::VectorType;
198 using DestinationVectorType = typename DestinationVecTraits::VectorType;
199 using BorderInfoType =
200 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, KSize>;
201 using BorderType = FixedBorderType;
202 using BorderOffsets = typename BorderInfoType::Offsets;
203 static constexpr size_t kMargin = KSize / 2UL;
204 4830 explicit Filter2d(InnerFilterType filter) KLEIDICV_STREAMING
205 4830 : filter_{filter} {}
206
207 26028 void process_pixels_without_horizontal_borders(
208 size_t width, Rows<const SourceType> src_rows,
209 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
210 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
211 26028 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
212
213 57826 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
214 31798 svbool_t pg = SourceVecTraits::svptrue();
215 31798 VectorOperationProviderType::
216 template process_one_element_with_vector_operation<
217 InnerFilterType, SourceVectorType, DestinationVectorType>(
218 31798 pg, src_rows, dst_rows, window_row_offsets, window_col_offsets,
219 31798 index, filter_);
220 31798 });
221
222 39926 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
223 13898 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
224 13898 VectorOperationProviderType::
225 template process_one_element_with_vector_operation<
226 InnerFilterType, SourceVectorType, DestinationVectorType>(
227 13898 pg, src_rows, dst_rows, window_row_offsets, window_col_offsets,
228 13898 index, filter_);
229 13898 });
230 26028 }
231
232 128112 void process_one_pixel_with_horizontal_borders(
233 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
234 BorderOffsets window_row_offsets,
235 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
236
42/42
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 480 times.
✓ Branch 2 taken 3696 times.
✓ Branch 3 taken 9240 times.
✓ Branch 4 taken 7488 times.
✓ Branch 5 taken 18720 times.
✓ Branch 6 taken 216 times.
✓ Branch 7 taken 504 times.
✓ Branch 8 taken 9816 times.
✓ Branch 9 taken 15360 times.
✓ Branch 10 taken 13464 times.
✓ Branch 11 taken 24696 times.
✓ Branch 12 taken 216 times.
✓ Branch 13 taken 504 times.
✓ Branch 14 taken 9816 times.
✓ Branch 15 taken 15360 times.
✓ Branch 16 taken 13464 times.
✓ Branch 17 taken 24696 times.
✓ Branch 18 taken 216 times.
✓ Branch 19 taken 504 times.
✓ Branch 20 taken 9816 times.
✓ Branch 21 taken 15360 times.
✓ Branch 22 taken 13464 times.
✓ Branch 23 taken 24696 times.
✓ Branch 24 taken 192 times.
✓ Branch 25 taken 480 times.
✓ Branch 26 taken 3696 times.
✓ Branch 27 taken 9240 times.
✓ Branch 28 taken 7488 times.
✓ Branch 29 taken 18720 times.
✓ Branch 30 taken 192 times.
✓ Branch 31 taken 480 times.
✓ Branch 32 taken 3696 times.
✓ Branch 33 taken 9240 times.
✓ Branch 34 taken 7488 times.
✓ Branch 35 taken 18720 times.
✓ Branch 36 taken 216 times.
✓ Branch 37 taken 504 times.
✓ Branch 38 taken 9816 times.
✓ Branch 39 taken 15360 times.
✓ Branch 40 taken 13464 times.
✓ Branch 41 taken 24696 times.
375672 for (size_t index = 0; index < src_rows.channels(); ++index) {
237 247560 VectorOperationProviderType::
238 template process_one_element_with_vector_operation<
239 InnerFilterType, SourceVectorType, DestinationVectorType>(
240 247560 SourceVecTraits::template svptrue_pat<SV_VL1>(), src_rows,
241 247560 dst_rows, window_row_offsets, window_col_offsets, index, filter_);
242 247560 }
243 128112 }
244
245 6816 void process_pixels_of_dual_rows_without_horizontal_borders(
246 size_t width, Rows<const SourceType> src_rows,
247 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets_0,
248 BorderOffsets window_row_offsets_1,
249 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
250 6816 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
251 10826 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
252 4010 svbool_t pg = SourceVecTraits::svptrue();
253 4010 VectorOperationProviderType::
254 template process_two_elements_with_vector_operation<
255 InnerFilterType, SourceVectorType, DestinationVectorType>(
256 4010 pg, src_rows, dst_rows, window_row_offsets_0,
257 4010 window_row_offsets_1, window_col_offsets, index, filter_);
258 4010 });
259
260 10444 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
261 3628 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
262 3628 VectorOperationProviderType::
263 template process_two_elements_with_vector_operation<
264 InnerFilterType, SourceVectorType, DestinationVectorType>(
265 3628 pg, src_rows, dst_rows, window_row_offsets_0,
266 3628 window_row_offsets_1, window_col_offsets, index, filter_);
267 3628 });
268 6816 }
269
270 // Processes two vertically adjacent pixels in a single column
271 13632 void process_two_pixels_with_horizontal_borders(
272 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
273 BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1,
274 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
275
14/14
✓ Branch 0 taken 384 times.
✓ Branch 1 taken 960 times.
✓ Branch 2 taken 3120 times.
✓ Branch 3 taken 3696 times.
✓ Branch 4 taken 3120 times.
✓ Branch 5 taken 3696 times.
✓ Branch 6 taken 3120 times.
✓ Branch 7 taken 3696 times.
✓ Branch 8 taken 384 times.
✓ Branch 9 taken 960 times.
✓ Branch 10 taken 384 times.
✓ Branch 11 taken 960 times.
✓ Branch 12 taken 3120 times.
✓ Branch 13 taken 3696 times.
31296 for (size_t index = 0; index < src_rows.channels(); ++index) {
276 17664 VectorOperationProviderType::
277 template process_two_elements_with_vector_operation<
278 InnerFilterType, SourceVectorType, DestinationVectorType>(
279 17664 SourceVecTraits::template svptrue_pat<SV_VL1>(), src_rows,
280 17664 dst_rows, window_row_offsets_0, window_row_offsets_1,
281 17664 window_col_offsets, index, filter_);
282 17664 }
283 13632 }
284
285 private:
286 InnerFilterType filter_;
287 };
288
289 // Shorthand for 3x3 2D filters driver type.
290 template <class InnerFilterType>
291 using Filter2D3x3 = Filter2d<
292 InnerFilterType, 3UL,
293 Filter2D3x3VectorOperations<
294 typename InnerFilterType::SourceType,
295 typename InnerFilterType::DestinationType,
296 Filter2dWindowLoader3x3<typename InnerFilterType::SourceType>>>;
297
298 template <typename InnerFilterType>
299 using Filter2D5x5 = Filter2d<
300 InnerFilterType, 5UL,
301 Filter2D5x5VectorOperations<
302 typename InnerFilterType::SourceType,
303 typename InnerFilterType::DestinationType,
304 Filter2dWindowLoader5x5<typename InnerFilterType::SourceType>>>;
305
306 template <typename InnerFilterType>
307 using Filter2D7x7 = Filter2d<
308 InnerFilterType, 7UL,
309 Filter2D7x7VectorOperations<
310 typename InnerFilterType::SourceType,
311 typename InnerFilterType::DestinationType,
312 Filter2dWindowLoader7x7<typename InnerFilterType::SourceType>>>;
313
314 } // namespace KLEIDICV_TARGET_NAMESPACE
315
316 #endif // KLEIDICV_FILTER_2D_SC_H
317