KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/separable_filter_9x9_sc.h
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 131 131 100.0%
Functions: 50 52 96.2%
Branches: 4 4 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_9X9_SC_H
6 #define KLEIDICV_SEPARABLE_FILTER_9X9_SC_H
7
8 #include "kleidicv/sve2.h"
9 #include "kleidicv/workspace/border_9x9.h"
10
11 // It is used by SVE2 and SME, the actual namespace will reflect it.
12 namespace KLEIDICV_TARGET_NAMESPACE {
13
14 // Template for drivers of separable NxM filters.
15 template <typename FilterType, const size_t S>
16 class SeparableFilter;
17
18 // Driver for a separable 9x9 filter.
19 template <typename FilterType>
20 class SeparableFilter<FilterType, 9UL> {
21 public:
22 using SourceType = typename FilterType::SourceType;
23 using BufferType = typename FilterType::BufferType;
24 using DestinationType = typename FilterType::DestinationType;
25 using SourceVecTraits =
26 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
27 using SourceVectorType = typename SourceVecTraits::VectorType;
28 using BufferVecTraits =
29 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<BufferType>;
30 using BufferVectorType = typename BufferVecTraits::VectorType;
31 using BorderInfoType =
32 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo9x9<SourceType>;
33 using BorderType = FixedBorderType;
34 using BorderOffsets = typename BorderInfoType::Offsets;
35
36 204 explicit SeparableFilter(FilterType filter) KLEIDICV_STREAMING
37 204 : filter_{filter} {}
38
39 static constexpr size_t margin = 4UL;
40
41 1836 void process_vertical(size_t width, Rows<const SourceType> src_rows,
42 Rows<BufferType> dst_rows,
43 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
44 1836 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
45
46 2708 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
47 872 svbool_t pg_all = SourceVecTraits::svptrue();
48 872 vertical_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
49 872 });
50
51 3640 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
52 1804 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
53 1804 vertical_vector_path(pg, src_rows, dst_rows, border_offsets, index);
54 1804 });
55 1836 }
56
57 1836 void process_horizontal(size_t width, Rows<const BufferType> src_rows,
58 Rows<DestinationType> dst_rows,
59 BorderOffsets border_offsets) const
60 KLEIDICV_STREAMING {
61 1836 svbool_t pg_all = BufferVecTraits::svptrue();
62 1836 LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()};
63
64 2100 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
65 528 horizontal_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets,
66 264 index);
67 264 });
68
69 2356 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
70 520 horizontal_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
71 520 });
72
73 3004 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
74 1168 svbool_t pg = BufferVecTraits::svwhilelt(index, length);
75 1168 horizontal_vector_path(pg, src_rows, dst_rows, border_offsets, index);
76 1168 });
77 1836 }
78
79 // Processing of horizontal borders is always scalar because border offsets
80 // change for each and every element in the border.
81 14688 void process_horizontal_borders(
82 Rows<const BufferType> src_rows, Rows<DestinationType> dst_rows,
83 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
84
4/4
✓ Branch 0 taken 11328 times.
✓ Branch 1 taken 13632 times.
✓ Branch 2 taken 3360 times.
✓ Branch 3 taken 5664 times.
33984 for (size_t index = 0; index < src_rows.channels(); ++index) {
85 19296 disable_loop_vectorization();
86 19296 process_horizontal_border(src_rows, dst_rows, border_offsets, index);
87 19296 }
88 14688 }
89
90 private:
91 2676 void vertical_vector_path(svbool_t pg, Rows<const SourceType> src_rows,
92 Rows<BufferType> dst_rows,
93 BorderOffsets border_offsets,
94 size_t index) const KLEIDICV_STREAMING {
95 5352 SourceVectorType src_0 =
96 2676 svld1(pg, &src_rows.at(border_offsets.c0())[index]);
97 5352 SourceVectorType src_1 =
98 2676 svld1(pg, &src_rows.at(border_offsets.c1())[index]);
99 5352 SourceVectorType src_2 =
100 2676 svld1(pg, &src_rows.at(border_offsets.c2())[index]);
101 5352 SourceVectorType src_3 =
102 2676 svld1(pg, &src_rows.at(border_offsets.c3())[index]);
103 5352 SourceVectorType src_4 =
104 2676 svld1(pg, &src_rows.at(border_offsets.c4())[index]);
105 5352 SourceVectorType src_5 =
106 2676 svld1(pg, &src_rows.at(border_offsets.c5())[index]);
107 5352 SourceVectorType src_6 =
108 2676 svld1(pg, &src_rows.at(border_offsets.c6())[index]);
109 5352 SourceVectorType src_7 =
110 2676 svld1(pg, &src_rows.at(border_offsets.c7())[index]);
111 5352 SourceVectorType src_8 =
112 2676 svld1(pg, &src_rows.at(border_offsets.c8())[index]);
113 24084 std::reference_wrapper<SourceVectorType> sources[9] = {
114 24084 src_0, src_1, src_2, src_3, src_4, src_5, src_6, src_7, src_8};
115 2676 filter_.vertical_vector_path(pg, sources, &dst_rows[index]);
116 2676 }
117
118 264 void horizontal_vector_path_2x(svbool_t pg, Rows<const BufferType> src_rows,
119 Rows<DestinationType> dst_rows,
120 BorderOffsets border_offsets,
121 size_t index) const KLEIDICV_STREAMING {
122 264 auto src_0 = &src_rows.at(0, border_offsets.c0())[index];
123 264 auto src_1 = &src_rows.at(0, border_offsets.c1())[index];
124 264 auto src_2 = &src_rows.at(0, border_offsets.c2())[index];
125 264 auto src_3 = &src_rows.at(0, border_offsets.c3())[index];
126 264 auto src_4 = &src_rows.at(0, border_offsets.c4())[index];
127 264 auto src_5 = &src_rows.at(0, border_offsets.c5())[index];
128 264 auto src_6 = &src_rows.at(0, border_offsets.c6())[index];
129 264 auto src_7 = &src_rows.at(0, border_offsets.c7())[index];
130 264 auto src_8 = &src_rows.at(0, border_offsets.c8())[index];
131
132 264 BufferVectorType src_0_0 = svld1(pg, &src_0[0]);
133 264 BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1);
134 264 BufferVectorType src_0_1 = svld1(pg, &src_1[0]);
135 264 BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1);
136 264 BufferVectorType src_0_2 = svld1(pg, &src_2[0]);
137 264 BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1);
138 264 BufferVectorType src_0_3 = svld1(pg, &src_3[0]);
139 264 BufferVectorType src_1_3 = svld1_vnum(pg, &src_3[0], 1);
140 264 BufferVectorType src_0_4 = svld1(pg, &src_4[0]);
141 264 BufferVectorType src_1_4 = svld1_vnum(pg, &src_4[0], 1);
142 264 BufferVectorType src_0_5 = svld1(pg, &src_5[0]);
143 264 BufferVectorType src_1_5 = svld1_vnum(pg, &src_5[0], 1);
144 264 BufferVectorType src_0_6 = svld1(pg, &src_6[0]);
145 264 BufferVectorType src_1_6 = svld1_vnum(pg, &src_6[0], 1);
146 264 BufferVectorType src_0_7 = svld1(pg, &src_7[0]);
147 264 BufferVectorType src_1_7 = svld1_vnum(pg, &src_7[0], 1);
148 264 BufferVectorType src_0_8 = svld1(pg, &src_8[0]);
149 264 BufferVectorType src_1_8 = svld1_vnum(pg, &src_8[0], 1);
150 2376 std::reference_wrapper<BufferVectorType> sources_0[9] = {
151 1320 src_0_0, src_0_1, src_0_2, src_0_3, src_0_4,
152 1056 src_0_5, src_0_6, src_0_7, src_0_8};
153 264 filter_.horizontal_vector_path(pg, sources_0, &dst_rows[index]);
154 2376 std::reference_wrapper<BufferVectorType> sources_1[9] = {
155 1320 src_1_0, src_1_1, src_1_2, src_1_3, src_1_4,
156 1056 src_1_5, src_1_6, src_1_7, src_1_8};
157 360 filter_.horizontal_vector_path(
158 264 pg, sources_1, &dst_rows[index + BufferVecTraits::num_lanes()]);
159 264 }
160
161 1688 void horizontal_vector_path(svbool_t pg, Rows<const BufferType> src_rows,
162 Rows<DestinationType> dst_rows,
163 BorderOffsets border_offsets,
164 size_t index) const KLEIDICV_STREAMING {
165 3376 BufferVectorType src_0 =
166 1688 svld1(pg, &src_rows.at(0, border_offsets.c0())[index]);
167 3376 BufferVectorType src_1 =
168 1688 svld1(pg, &src_rows.at(0, border_offsets.c1())[index]);
169 3376 BufferVectorType src_2 =
170 1688 svld1(pg, &src_rows.at(0, border_offsets.c2())[index]);
171 3376 BufferVectorType src_3 =
172 1688 svld1(pg, &src_rows.at(0, border_offsets.c3())[index]);
173 3376 BufferVectorType src_4 =
174 1688 svld1(pg, &src_rows.at(0, border_offsets.c4())[index]);
175 3376 BufferVectorType src_5 =
176 1688 svld1(pg, &src_rows.at(0, border_offsets.c5())[index]);
177 3376 BufferVectorType src_6 =
178 1688 svld1(pg, &src_rows.at(0, border_offsets.c6())[index]);
179 3376 BufferVectorType src_7 =
180 1688 svld1(pg, &src_rows.at(0, border_offsets.c7())[index]);
181 3376 BufferVectorType src_8 =
182 1688 svld1(pg, &src_rows.at(0, border_offsets.c8())[index]);
183
184 15192 std::reference_wrapper<BufferVectorType> sources[9] = {
185 15192 src_0, src_1, src_2, src_3, src_4, src_5, src_6, src_7, src_8};
186 1688 filter_.horizontal_vector_path(pg, sources, &dst_rows[index]);
187 1688 }
188
189 19296 void process_horizontal_border(Rows<const BufferType> src_rows,
190 Rows<DestinationType> dst_rows,
191 BorderOffsets border_offsets,
192 size_t index) const KLEIDICV_STREAMING {
193 19296 BufferType src[9];
194 19296 src[0] = src_rows.at(0, border_offsets.c0())[index];
195 19296 src[1] = src_rows.at(0, border_offsets.c1())[index];
196 19296 src[2] = src_rows.at(0, border_offsets.c2())[index];
197 19296 src[3] = src_rows.at(0, border_offsets.c3())[index];
198 19296 src[4] = src_rows.at(0, border_offsets.c4())[index];
199 19296 src[5] = src_rows.at(0, border_offsets.c5())[index];
200 19296 src[6] = src_rows.at(0, border_offsets.c6())[index];
201 19296 src[7] = src_rows.at(0, border_offsets.c7())[index];
202 19296 src[8] = src_rows.at(0, border_offsets.c8())[index];
203 19296 filter_.horizontal_scalar_path(src, &dst_rows[index]);
204 19296 }
205
206 FilterType filter_;
207 }; // end of class SeparableFilter<FilterType, 9UL>
208
209 // Shorthand for 9x9 separable filters driver type.
210 template <class FilterType>
211 using SeparableFilter9x9 = SeparableFilter<FilterType, 9UL>;
212
213 } // namespace KLEIDICV_TARGET_NAMESPACE
214
215 #endif // KLEIDICV_SEPARABLE_FILTER_9X9_SC_H
216