KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/separable_filter_15x15_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 183 183 100.0%
Functions: 26 26 100.0%
Branches: 2 2 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_15X15_SC_H
6 #define KLEIDICV_SEPARABLE_FILTER_15X15_SC_H
7
8 #include "kleidicv/sve2.h"
9 #include "kleidicv/workspace/border_15x15.h"
10
11 // It is used by SVE2 and SME, the actual namespace will reflect it.
12 namespace KLEIDICV_TARGET_NAMESPACE {
13
14 // Template for drivers of separable NxM filters.
15 template <typename FilterType, const size_t S>
16 class SeparableFilter;
17
18 // Driver for a separable 15x15 filter.
19 template <typename FilterType>
20 class SeparableFilter<FilterType, 15UL> {
21 public:
22 using SourceType = typename FilterType::SourceType;
23 using BufferType = typename FilterType::BufferType;
24 using DestinationType = typename FilterType::DestinationType;
25 using SourceVecTraits =
26 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
27 using SourceVectorType = typename SourceVecTraits::VectorType;
28 using BufferVecTraits =
29 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<BufferType>;
30 using BufferVectorType = typename BufferVecTraits::VectorType;
31 using BorderInfoType =
32 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo15x15<SourceType>;
33 using BorderType = FixedBorderType;
34 using BorderOffsets = typename BorderInfoType::Offsets;
35
36 96 explicit SeparableFilter(FilterType filter) KLEIDICV_STREAMING
37 96 : filter_{filter} {}
38
39 static constexpr size_t margin = 7UL;
40
41 1368 void process_vertical(size_t width, Rows<const SourceType> src_rows,
42 Rows<BufferType> dst_rows,
43 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
44 1368 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
45
46 2712 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
47 1344 svbool_t pg_all = SourceVecTraits::svptrue();
48 1344 vertical_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
49 1344 });
50
51 2736 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
52 1368 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
53 1368 vertical_vector_path(pg, src_rows, dst_rows, border_offsets, index);
54 1368 });
55 1368 }
56
57 1368 void process_horizontal(size_t width, Rows<const BufferType> src_rows,
58 Rows<DestinationType> dst_rows,
59 BorderOffsets border_offsets) const
60 KLEIDICV_STREAMING {
61 1368 svbool_t pg_all = BufferVecTraits::svptrue();
62 1368 LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()};
63
64 1704 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
65 672 horizontal_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets,
66 336 index);
67 336 });
68
69 1704 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
70 336 horizontal_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
71 336 });
72
73 2400 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
74 1032 svbool_t pg = BufferVecTraits::svwhilelt(index, length);
75 1032 horizontal_vector_path(pg, src_rows, dst_rows, border_offsets, index);
76 1032 });
77 1368 }
78
79 // Processing of horizontal borders is always scalar because border offsets
80 // change for each and every element in the border.
81 19152 void process_horizontal_borders(
82 Rows<const BufferType> src_rows, Rows<DestinationType> dst_rows,
83 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
84
2/2
✓ Branch 0 taken 19152 times.
✓ Branch 1 taken 33264 times.
52416 for (size_t index = 0; index < src_rows.channels(); ++index) {
85 33264 disable_loop_vectorization();
86 33264 process_horizontal_border(src_rows, dst_rows, border_offsets, index);
87 33264 }
88 19152 }
89
90 private:
91 2712 void vertical_vector_path(svbool_t pg, Rows<const SourceType> src_rows,
92 Rows<BufferType> dst_rows,
93 BorderOffsets border_offsets,
94 size_t index) const KLEIDICV_STREAMING {
95 5424 SourceVectorType src_0 =
96 2712 svld1(pg, &src_rows.at(border_offsets.c0())[index]);
97 5424 SourceVectorType src_1 =
98 2712 svld1(pg, &src_rows.at(border_offsets.c1())[index]);
99 5424 SourceVectorType src_2 =
100 2712 svld1(pg, &src_rows.at(border_offsets.c2())[index]);
101 5424 SourceVectorType src_3 =
102 2712 svld1(pg, &src_rows.at(border_offsets.c3())[index]);
103 5424 SourceVectorType src_4 =
104 2712 svld1(pg, &src_rows.at(border_offsets.c4())[index]);
105 5424 SourceVectorType src_5 =
106 2712 svld1(pg, &src_rows.at(border_offsets.c5())[index]);
107 5424 SourceVectorType src_6 =
108 2712 svld1(pg, &src_rows.at(border_offsets.c6())[index]);
109 5424 SourceVectorType src_7 =
110 2712 svld1(pg, &src_rows.at(border_offsets.c7())[index]);
111 5424 SourceVectorType src_8 =
112 2712 svld1(pg, &src_rows.at(border_offsets.c8())[index]);
113 5424 SourceVectorType src_9 =
114 2712 svld1(pg, &src_rows.at(border_offsets.c9())[index]);
115 5424 SourceVectorType src_10 =
116 2712 svld1(pg, &src_rows.at(border_offsets.c10())[index]);
117 5424 SourceVectorType src_11 =
118 2712 svld1(pg, &src_rows.at(border_offsets.c11())[index]);
119 5424 SourceVectorType src_12 =
120 2712 svld1(pg, &src_rows.at(border_offsets.c12())[index]);
121 5424 SourceVectorType src_13 =
122 2712 svld1(pg, &src_rows.at(border_offsets.c13())[index]);
123 5424 SourceVectorType src_14 =
124 2712 svld1(pg, &src_rows.at(border_offsets.c14())[index]);
125
126 40680 std::reference_wrapper<SourceVectorType> sources[15] = {
127 21696 src_0, src_1, src_2, src_3, src_4, src_5, src_6, src_7,
128 18984 src_8, src_9, src_10, src_11, src_12, src_13, src_14};
129 2712 filter_.vertical_vector_path(pg, sources, &dst_rows[index]);
130 2712 }
131
132 336 void horizontal_vector_path_2x(svbool_t pg, Rows<const BufferType> src_rows,
133 Rows<DestinationType> dst_rows,
134 BorderOffsets border_offsets,
135 size_t index) const KLEIDICV_STREAMING {
136 336 auto src_0 = &src_rows.at(0, border_offsets.c0())[index];
137 336 auto src_1 = &src_rows.at(0, border_offsets.c1())[index];
138 336 auto src_2 = &src_rows.at(0, border_offsets.c2())[index];
139 336 auto src_3 = &src_rows.at(0, border_offsets.c3())[index];
140 336 auto src_4 = &src_rows.at(0, border_offsets.c4())[index];
141 336 auto src_5 = &src_rows.at(0, border_offsets.c5())[index];
142 336 auto src_6 = &src_rows.at(0, border_offsets.c6())[index];
143 336 auto src_7 = &src_rows.at(0, border_offsets.c7())[index];
144 336 auto src_8 = &src_rows.at(0, border_offsets.c8())[index];
145 336 auto src_9 = &src_rows.at(0, border_offsets.c9())[index];
146 336 auto src_10 = &src_rows.at(0, border_offsets.c10())[index];
147 336 auto src_11 = &src_rows.at(0, border_offsets.c11())[index];
148 336 auto src_12 = &src_rows.at(0, border_offsets.c12())[index];
149 336 auto src_13 = &src_rows.at(0, border_offsets.c13())[index];
150 336 auto src_14 = &src_rows.at(0, border_offsets.c14())[index];
151
152 336 BufferVectorType src_0_0 = svld1(pg, &src_0[0]);
153 336 BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1);
154 336 BufferVectorType src_0_1 = svld1(pg, &src_1[0]);
155 336 BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1);
156 336 BufferVectorType src_0_2 = svld1(pg, &src_2[0]);
157 336 BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1);
158 336 BufferVectorType src_0_3 = svld1(pg, &src_3[0]);
159 336 BufferVectorType src_1_3 = svld1_vnum(pg, &src_3[0], 1);
160 336 BufferVectorType src_0_4 = svld1(pg, &src_4[0]);
161 336 BufferVectorType src_1_4 = svld1_vnum(pg, &src_4[0], 1);
162 336 BufferVectorType src_0_5 = svld1(pg, &src_5[0]);
163 336 BufferVectorType src_1_5 = svld1_vnum(pg, &src_5[0], 1);
164 336 BufferVectorType src_0_6 = svld1(pg, &src_6[0]);
165 336 BufferVectorType src_1_6 = svld1_vnum(pg, &src_6[0], 1);
166 336 BufferVectorType src_0_7 = svld1(pg, &src_7[0]);
167 336 BufferVectorType src_1_7 = svld1_vnum(pg, &src_7[0], 1);
168 336 BufferVectorType src_0_8 = svld1(pg, &src_8[0]);
169 336 BufferVectorType src_1_8 = svld1_vnum(pg, &src_8[0], 1);
170 336 BufferVectorType src_0_9 = svld1(pg, &src_9[0]);
171 336 BufferVectorType src_1_9 = svld1_vnum(pg, &src_9[0], 1);
172 336 BufferVectorType src_0_10 = svld1(pg, &src_10[0]);
173 336 BufferVectorType src_1_10 = svld1_vnum(pg, &src_10[0], 1);
174 336 BufferVectorType src_0_11 = svld1(pg, &src_11[0]);
175 336 BufferVectorType src_1_11 = svld1_vnum(pg, &src_11[0], 1);
176 336 BufferVectorType src_0_12 = svld1(pg, &src_12[0]);
177 336 BufferVectorType src_1_12 = svld1_vnum(pg, &src_12[0], 1);
178 336 BufferVectorType src_0_13 = svld1(pg, &src_13[0]);
179 336 BufferVectorType src_1_13 = svld1_vnum(pg, &src_13[0], 1);
180 336 BufferVectorType src_0_14 = svld1(pg, &src_14[0]);
181 336 BufferVectorType src_1_14 = svld1_vnum(pg, &src_14[0], 1);
182
183 5040 std::reference_wrapper<BufferVectorType> sources_0[15] = {
184 1680 src_0_0, src_0_1, src_0_2, src_0_3, src_0_4,
185 1680 src_0_5, src_0_6, src_0_7, src_0_8, src_0_9,
186 1680 src_0_10, src_0_11, src_0_12, src_0_13, src_0_14};
187 336 filter_.horizontal_vector_path(pg, sources_0, &dst_rows[index]);
188 5040 std::reference_wrapper<BufferVectorType> sources_1[15] = {
189 1680 src_1_0, src_1_1, src_1_2, src_1_3, src_1_4,
190 1680 src_1_5, src_1_6, src_1_7, src_1_8, src_1_9,
191 1680 src_1_10, src_1_11, src_1_12, src_1_13, src_1_14};
192 672 filter_.horizontal_vector_path(
193 336 pg, sources_1, &dst_rows[index + BufferVecTraits::num_lanes()]);
194 336 }
195
196 1368 void horizontal_vector_path(svbool_t pg, Rows<const BufferType> src_rows,
197 Rows<DestinationType> dst_rows,
198 BorderOffsets border_offsets,
199 size_t index) const KLEIDICV_STREAMING {
200 2736 BufferVectorType src_0 =
201 1368 svld1(pg, &src_rows.at(0, border_offsets.c0())[index]);
202 2736 BufferVectorType src_1 =
203 1368 svld1(pg, &src_rows.at(0, border_offsets.c1())[index]);
204 2736 BufferVectorType src_2 =
205 1368 svld1(pg, &src_rows.at(0, border_offsets.c2())[index]);
206 2736 BufferVectorType src_3 =
207 1368 svld1(pg, &src_rows.at(0, border_offsets.c3())[index]);
208 2736 BufferVectorType src_4 =
209 1368 svld1(pg, &src_rows.at(0, border_offsets.c4())[index]);
210 2736 BufferVectorType src_5 =
211 1368 svld1(pg, &src_rows.at(0, border_offsets.c5())[index]);
212 2736 BufferVectorType src_6 =
213 1368 svld1(pg, &src_rows.at(0, border_offsets.c6())[index]);
214 2736 BufferVectorType src_7 =
215 1368 svld1(pg, &src_rows.at(0, border_offsets.c7())[index]);
216 2736 BufferVectorType src_8 =
217 1368 svld1(pg, &src_rows.at(0, border_offsets.c8())[index]);
218 2736 BufferVectorType src_9 =
219 1368 svld1(pg, &src_rows.at(0, border_offsets.c9())[index]);
220 2736 BufferVectorType src_10 =
221 1368 svld1(pg, &src_rows.at(0, border_offsets.c10())[index]);
222 2736 BufferVectorType src_11 =
223 1368 svld1(pg, &src_rows.at(0, border_offsets.c11())[index]);
224 2736 BufferVectorType src_12 =
225 1368 svld1(pg, &src_rows.at(0, border_offsets.c12())[index]);
226 2736 BufferVectorType src_13 =
227 1368 svld1(pg, &src_rows.at(0, border_offsets.c13())[index]);
228 2736 BufferVectorType src_14 =
229 1368 svld1(pg, &src_rows.at(0, border_offsets.c14())[index]);
230 20520 std::reference_wrapper<BufferVectorType> sources[15] = {
231 10944 src_0, src_1, src_2, src_3, src_4, src_5, src_6, src_7,
232 9576 src_8, src_9, src_10, src_11, src_12, src_13, src_14};
233 1368 filter_.horizontal_vector_path(pg, sources, &dst_rows[index]);
234 1368 }
235
236 33264 void process_horizontal_border(Rows<const BufferType> src_rows,
237 Rows<DestinationType> dst_rows,
238 BorderOffsets border_offsets,
239 size_t index) const KLEIDICV_STREAMING {
240 33264 BufferType src[15];
241 33264 src[0] = src_rows.at(0, border_offsets.c0())[index];
242 33264 src[1] = src_rows.at(0, border_offsets.c1())[index];
243 33264 src[2] = src_rows.at(0, border_offsets.c2())[index];
244 33264 src[3] = src_rows.at(0, border_offsets.c3())[index];
245 33264 src[4] = src_rows.at(0, border_offsets.c4())[index];
246 33264 src[5] = src_rows.at(0, border_offsets.c5())[index];
247 33264 src[6] = src_rows.at(0, border_offsets.c6())[index];
248 33264 src[7] = src_rows.at(0, border_offsets.c7())[index];
249 33264 src[8] = src_rows.at(0, border_offsets.c8())[index];
250 33264 src[9] = src_rows.at(0, border_offsets.c9())[index];
251 33264 src[10] = src_rows.at(0, border_offsets.c10())[index];
252 33264 src[11] = src_rows.at(0, border_offsets.c11())[index];
253 33264 src[12] = src_rows.at(0, border_offsets.c12())[index];
254 33264 src[13] = src_rows.at(0, border_offsets.c13())[index];
255 33264 src[14] = src_rows.at(0, border_offsets.c14())[index];
256 33264 filter_.horizontal_scalar_path(src, &dst_rows[index]);
257 33264 }
258
259 FilterType filter_;
260 }; // end of class SeparableFilter<FilterType, 15UL>
261
262 // Shorthand for 15x15 separable filters driver type.
263 template <class FilterType>
264 using SeparableFilter15x15 = SeparableFilter<FilterType, 15UL>;
265
266 } // namespace KLEIDICV_TARGET_NAMESPACE
267
268 #endif // KLEIDICV_SEPARABLE_FILTER_15X15_SC_H
269