KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/filter_2d_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 134 134 100.0%
Functions: 378 378 100.0%
Branches: 56 56 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_FILTER_2D_SC_H
6 #define KLEIDICV_FILTER_2D_SC_H
7
8 #include "filter_2d_window_loader_3x3.h"
9 #include "filter_2d_window_loader_5x5.h"
10 #include "filter_2d_window_loader_7x7.h"
11 #include "kleidicv/sve2.h"
12 #include "process_filter_2d.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <typename SourceType, typename DestinationType,
17 typename WindowLoaderType>
18 class Filter2D3x3VectorOperations {
19 public:
20 using BorderInfoType =
21 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, 3>;
22 using BorderOffsets = typename BorderInfoType::Offsets;
23
24 template <typename InnerFilterType, typename SourceVectorType,
25 typename DestinationVectorType>
26 3708 static void process_one_element_with_vector_operation(
27 svbool_t pg, Rows<const SourceType> src_rows,
28 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
29 BorderOffsets window_col_offsets, size_t index,
30 const InnerFilterType& filter_) KLEIDICV_STREAMING {
31 3708 SourceVectorType src_0_0, src_0_1, src_0_2, src_1_0, src_1_1, src_1_2,
32 src_2_0, src_2_1, src_2_2;
33 3708 DestinationVectorType dst_vec;
34 11124 ScalableVectorArray2D<SourceVectorType, 3, 3> KernelWindow = {{
35 3708 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2)},
36 3708 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2)},
37 3708 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2)},
38 }};
39
40 37080 auto load_array_element = [&](const SourceType& x)
41 33372 KLEIDICV_STREAMING { return svld1(pg, &x); };
42
43 7416 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
44 3708 window_row_offsets, window_col_offsets,
45 3708 index);
46 3708 filter_.vector_path(pg, KernelWindow, dst_vec);
47
48 3708 svst1(pg, &dst_rows[index], dst_vec);
49 3708 }
50
51 template <typename InnerFilterType, typename SourceVectorType,
52 typename DestinationVectorType>
53 17638 static void process_two_elements_with_vector_operation(
54 svbool_t pg, Rows<const SourceType> src_rows,
55 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets_0,
56 BorderOffsets window_row_offsets_1, BorderOffsets window_col_offsets,
57 size_t index, const InnerFilterType& filter_) KLEIDICV_STREAMING {
58 17638 SourceVectorType src_0_0, src_0_1, src_0_2, src_1_0, src_1_1, src_1_2,
59 src_2_0, src_2_1, src_2_2, src_3_0, src_3_1, src_3_2;
60 17638 DestinationVectorType dst_vec_0, dst_vec_1;
61 70552 ScalableVectorArray2D<SourceVectorType, 4, 3> KernelWindow = {{
62 17638 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2)},
63 17638 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2)},
64 17638 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2)},
65 17638 {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2)},
66 }};
67
68 229294 auto load_array_element = [&](const SourceType& x)
69 211656 KLEIDICV_STREAMING { return svld1(pg, &x); };
70
71 17638 WindowLoaderType::load_window_to_handle_dual_rows(
72 17638 KernelWindow, load_array_element, src_rows, window_row_offsets_0,
73 17638 window_row_offsets_1, window_col_offsets, index);
74
75 17638 filter_.vector_path_for_dual_row_handling(pg, KernelWindow, dst_vec_0,
76 dst_vec_1);
77 17638 svst1(pg, &dst_rows.at(0, 0)[index], dst_vec_0);
78 17638 svst1(pg, &dst_rows.at(1, 0)[index], dst_vec_1);
79 17638 }
80 };
81
82 template <typename SourceType, typename DestinationType,
83 typename WindowLoaderType>
84 class Filter2D5x5VectorOperations {
85 public:
86 using BorderInfoType =
87 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, 5>;
88 using BorderOffsets = typename BorderInfoType::Offsets;
89
90 template <typename InnerFilterType, typename SourceVectorType,
91 typename DestinationVectorType>
92 75826 static void process_one_element_with_vector_operation(
93 svbool_t pg, Rows<const SourceType> src_rows,
94 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
95 BorderOffsets window_col_offsets, size_t index,
96 const InnerFilterType& filter_) KLEIDICV_STREAMING {
97 75826 SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_1_0,
98 src_1_1, src_1_2, src_1_3, src_1_4, src_2_0, src_2_1, src_2_2, src_2_3,
99 src_2_4, src_3_0, src_3_1, src_3_2, src_3_3, src_3_4, src_4_0, src_4_1,
100 src_4_2, src_4_3, src_4_4;
101 75826 DestinationVectorType output_vector;
102 // Initialization
103 379130 ScalableVectorArray2D<SourceVectorType, 5, 5> KernelWindow = {{
104 151652 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2),
105 151652 std::ref(src_0_3), std::ref(src_0_4)},
106 151652 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2),
107 151652 std::ref(src_1_3), std::ref(src_1_4)},
108 151652 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2),
109 151652 std::ref(src_2_3), std::ref(src_2_4)},
110 151652 {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2),
111 151652 std::ref(src_3_3), std::ref(src_3_4)},
112 151652 {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2),
113 151652 std::ref(src_4_3), std::ref(src_4_4)},
114 }};
115
116 1971476 auto load_array_element = [&](const SourceType& x)
117 1895650 KLEIDICV_STREAMING { return svld1(pg, &x); };
118
119 151652 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
120 75826 window_row_offsets, window_col_offsets,
121 75826 index);
122 75826 filter_.vector_path(pg, KernelWindow, output_vector);
123 75826 svst1(pg, &dst_rows[index], output_vector);
124 75826 }
125 };
126
127 template <typename SourceType, typename DestinationType,
128 typename WindowLoaderType>
129 class Filter2D7x7VectorOperations {
130 public:
131 using BorderInfoType =
132 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, 7>;
133 using BorderOffsets = typename BorderInfoType::Offsets;
134
135 template <typename InnerFilterType, typename SourceVectorType,
136 typename DestinationVectorType>
137 121900 static void process_one_element_with_vector_operation(
138 svbool_t pg, Rows<const SourceType> src_rows,
139 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
140 BorderOffsets window_col_offsets, size_t index,
141 const InnerFilterType& filter_) KLEIDICV_STREAMING {
142 121900 SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_0_5,
143 src_0_6, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, src_1_5, src_1_6,
144 src_2_0, src_2_1, src_2_2, src_2_3, src_2_4, src_2_5, src_2_6, src_3_0,
145 src_3_1, src_3_2, src_3_3, src_3_4, src_3_5, src_3_6, src_4_0, src_4_1,
146 src_4_2, src_4_3, src_4_4, src_4_5, src_4_6, src_5_0, src_5_1, src_5_2,
147 src_5_3, src_5_4, src_5_5, src_5_6, src_6_0, src_6_1, src_6_2, src_6_3,
148 src_6_4, src_6_5, src_6_6;
149 121900 DestinationVectorType output_vector;
150
151 // Initialization
152 853300 ScalableVectorArray2D<SourceVectorType, 7, 7> KernelWindow = {{
153 487600 {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2),
154 365700 std::ref(src_0_3), std::ref(src_0_4), std::ref(src_0_5),
155 121900 std::ref(src_0_6)},
156 487600 {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2),
157 365700 std::ref(src_1_3), std::ref(src_1_4), std::ref(src_1_5),
158 121900 std::ref(src_1_6)},
159 487600 {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2),
160 365700 std::ref(src_2_3), std::ref(src_2_4), std::ref(src_2_5),
161 121900 std::ref(src_2_6)},
162 487600 {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2),
163 365700 std::ref(src_3_3), std::ref(src_3_4), std::ref(src_3_5),
164 121900 std::ref(src_3_6)},
165 487600 {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2),
166 365700 std::ref(src_4_3), std::ref(src_4_4), std::ref(src_4_5),
167 121900 std::ref(src_4_6)},
168 487600 {std::ref(src_5_0), std::ref(src_5_1), std::ref(src_5_2),
169 365700 std::ref(src_5_3), std::ref(src_5_4), std::ref(src_5_5),
170 121900 std::ref(src_5_6)},
171 487600 {std::ref(src_6_0), std::ref(src_6_1), std::ref(src_6_2),
172 365700 std::ref(src_6_3), std::ref(src_6_4), std::ref(src_6_5),
173 121900 std::ref(src_6_6)},
174 }};
175
176 6095000 auto load_array_element = [&](const SourceType& x)
177 5973100 KLEIDICV_STREAMING { return svld1(pg, &x); };
178
179 243800 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
180 121900 window_row_offsets, window_col_offsets,
181 121900 index);
182 121900 filter_.vector_path(pg, KernelWindow, output_vector);
183 121900 svst1(pg, &dst_rows[index], output_vector);
184 121900 }
185 };
186
187 template <typename InnerFilterType, size_t KSize,
188 typename VectorOperationProviderType>
189 class Filter2d {
190 public:
191 using SourceType = typename InnerFilterType::SourceType;
192 using DestinationType = typename InnerFilterType::DestinationType;
193 using SourceVecTraits =
194 typename KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
195 using DestinationVecTraits =
196 typename KLEIDICV_TARGET_NAMESPACE::VecTraits<DestinationType>;
197 using SourceVectorType = typename SourceVecTraits::VectorType;
198 using DestinationVectorType = typename DestinationVecTraits::VectorType;
199 using BorderInfoType =
200 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, KSize>;
201 using BorderType = FixedBorderType;
202 using BorderOffsets = typename BorderInfoType::Offsets;
203 static constexpr size_t kMargin = KSize / 2UL;
204 3220 explicit Filter2d(InnerFilterType filter) KLEIDICV_STREAMING
205 3220 : filter_{filter} {}
206
207 17352 void process_pixels_without_horizontal_borders(
208 size_t width, Rows<const SourceType> src_rows,
209 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
210 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
211 17352 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
212
213 44748 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
214 27396 svbool_t pg = SourceVecTraits::svptrue();
215 27396 VectorOperationProviderType::
216 template process_one_element_with_vector_operation<
217 InnerFilterType, SourceVectorType, DestinationVectorType>(
218 27396 pg, src_rows, dst_rows, window_row_offsets, window_col_offsets,
219 27396 index, filter_);
220 27396 });
221
222 26350 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
223 8998 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
224 8998 VectorOperationProviderType::
225 template process_one_element_with_vector_operation<
226 InnerFilterType, SourceVectorType, DestinationVectorType>(
227 8998 pg, src_rows, dst_rows, window_row_offsets, window_col_offsets,
228 8998 index, filter_);
229 8998 });
230 17352 }
231
232 85408 void process_one_pixel_with_horizontal_borders(
233 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
234 BorderOffsets window_row_offsets,
235 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
236
42/42
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 320 times.
✓ Branch 2 taken 2464 times.
✓ Branch 3 taken 6160 times.
✓ Branch 4 taken 4992 times.
✓ Branch 5 taken 12480 times.
✓ Branch 6 taken 144 times.
✓ Branch 7 taken 336 times.
✓ Branch 8 taken 6544 times.
✓ Branch 9 taken 10240 times.
✓ Branch 10 taken 8976 times.
✓ Branch 11 taken 16464 times.
✓ Branch 12 taken 144 times.
✓ Branch 13 taken 336 times.
✓ Branch 14 taken 6544 times.
✓ Branch 15 taken 10240 times.
✓ Branch 16 taken 8976 times.
✓ Branch 17 taken 16464 times.
✓ Branch 18 taken 144 times.
✓ Branch 19 taken 336 times.
✓ Branch 20 taken 6544 times.
✓ Branch 21 taken 10240 times.
✓ Branch 22 taken 8976 times.
✓ Branch 23 taken 16464 times.
✓ Branch 24 taken 128 times.
✓ Branch 25 taken 320 times.
✓ Branch 26 taken 2464 times.
✓ Branch 27 taken 6160 times.
✓ Branch 28 taken 4992 times.
✓ Branch 29 taken 12480 times.
✓ Branch 30 taken 128 times.
✓ Branch 31 taken 320 times.
✓ Branch 32 taken 2464 times.
✓ Branch 33 taken 6160 times.
✓ Branch 34 taken 4992 times.
✓ Branch 35 taken 12480 times.
✓ Branch 36 taken 144 times.
✓ Branch 37 taken 336 times.
✓ Branch 38 taken 6544 times.
✓ Branch 39 taken 10240 times.
✓ Branch 40 taken 8976 times.
✓ Branch 41 taken 16464 times.
250448 for (size_t index = 0; index < src_rows.channels(); ++index) {
237 165040 VectorOperationProviderType::
238 template process_one_element_with_vector_operation<
239 InnerFilterType, SourceVectorType, DestinationVectorType>(
240 165040 SourceVecTraits::template svptrue_pat<SV_VL1>(), src_rows,
241 165040 dst_rows, window_row_offsets, window_col_offsets, index, filter_);
242 165040 }
243 85408 }
244
245 4544 void process_pixels_of_dual_rows_without_horizontal_borders(
246 size_t width, Rows<const SourceType> src_rows,
247 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets_0,
248 BorderOffsets window_row_offsets_1,
249 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
250 4544 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
251 8018 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
252 3474 svbool_t pg = SourceVecTraits::svptrue();
253 3474 VectorOperationProviderType::
254 template process_two_elements_with_vector_operation<
255 InnerFilterType, SourceVectorType, DestinationVectorType>(
256 3474 pg, src_rows, dst_rows, window_row_offsets_0,
257 3474 window_row_offsets_1, window_col_offsets, index, filter_);
258 3474 });
259
260 6932 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
261 2388 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
262 2388 VectorOperationProviderType::
263 template process_two_elements_with_vector_operation<
264 InnerFilterType, SourceVectorType, DestinationVectorType>(
265 2388 pg, src_rows, dst_rows, window_row_offsets_0,
266 2388 window_row_offsets_1, window_col_offsets, index, filter_);
267 2388 });
268 4544 }
269
270 // Processes two vertically adjacent pixels in a single column
271 9088 void process_two_pixels_with_horizontal_borders(
272 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
273 BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1,
274 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
275
14/14
✓ Branch 0 taken 256 times.
✓ Branch 1 taken 640 times.
✓ Branch 2 taken 2080 times.
✓ Branch 3 taken 2464 times.
✓ Branch 4 taken 2080 times.
✓ Branch 5 taken 2464 times.
✓ Branch 6 taken 2080 times.
✓ Branch 7 taken 2464 times.
✓ Branch 8 taken 256 times.
✓ Branch 9 taken 640 times.
✓ Branch 10 taken 256 times.
✓ Branch 11 taken 640 times.
✓ Branch 12 taken 2080 times.
✓ Branch 13 taken 2464 times.
20864 for (size_t index = 0; index < src_rows.channels(); ++index) {
276 11776 VectorOperationProviderType::
277 template process_two_elements_with_vector_operation<
278 InnerFilterType, SourceVectorType, DestinationVectorType>(
279 11776 SourceVecTraits::template svptrue_pat<SV_VL1>(), src_rows,
280 11776 dst_rows, window_row_offsets_0, window_row_offsets_1,
281 11776 window_col_offsets, index, filter_);
282 11776 }
283 9088 }
284
285 private:
286 InnerFilterType filter_;
287 };
288
289 // Shorthand for 3x3 2D filters driver type.
290 template <class InnerFilterType>
291 using Filter2D3x3 = Filter2d<
292 InnerFilterType, 3UL,
293 Filter2D3x3VectorOperations<
294 typename InnerFilterType::SourceType,
295 typename InnerFilterType::DestinationType,
296 Filter2dWindowLoader3x3<typename InnerFilterType::SourceType>>>;
297
298 template <typename InnerFilterType>
299 using Filter2D5x5 = Filter2d<
300 InnerFilterType, 5UL,
301 Filter2D5x5VectorOperations<
302 typename InnerFilterType::SourceType,
303 typename InnerFilterType::DestinationType,
304 Filter2dWindowLoader5x5<typename InnerFilterType::SourceType>>>;
305
306 template <typename InnerFilterType>
307 using Filter2D7x7 = Filter2d<
308 InnerFilterType, 7UL,
309 Filter2D7x7VectorOperations<
310 typename InnerFilterType::SourceType,
311 typename InnerFilterType::DestinationType,
312 Filter2dWindowLoader7x7<typename InnerFilterType::SourceType>>>;
313
314 } // namespace KLEIDICV_TARGET_NAMESPACE
315
316 #endif // KLEIDICV_FILTER_2D_SC_H
317