KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/separable_filter_5x5_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 97 97 100.0%
Functions: 130 130 100.0%
Branches: 6 6 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_5X5_SC_H
6 #define KLEIDICV_SEPARABLE_FILTER_5X5_SC_H
7
8 #include "kleidicv/sve2.h"
9 #include "kleidicv/workspace/border_5x5.h"
10
11 // It is used by SVE2 and SME, the actual namespace will reflect it.
12 namespace KLEIDICV_TARGET_NAMESPACE {
13
14 // Template for drivers of separable NxM filters.
15 template <typename FilterType, const size_t S>
16 class SeparableFilter;
17
18 // Driver for a separable 5x5 filter.
19 template <typename FilterType>
20 class SeparableFilter<FilterType, 5UL> {
21 public:
22 using SourceType = typename FilterType::SourceType;
23 using BufferType = typename FilterType::BufferType;
24 using DestinationType = typename FilterType::DestinationType;
25 using SourceVecTraits =
26 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
27 using SourceVectorType = typename SourceVecTraits::VectorType;
28 using BufferVecTraits =
29 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<BufferType>;
30 using BufferVectorType = typename BufferVecTraits::VectorType;
31 using BorderInfoType =
32 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5<SourceType>;
33 using BorderType = FixedBorderType;
34 using BorderOffsets = typename BorderInfoType::Offsets;
35
36 464 explicit SeparableFilter(FilterType filter) KLEIDICV_STREAMING
37 464 : filter_{filter} {}
38
39 static constexpr size_t margin = 2UL;
40
41 5470 void process_vertical(size_t width, Rows<const SourceType> src_rows,
42 Rows<BufferType> dst_rows,
43 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
44 5470 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
45
46 7633 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
47 2163 svbool_t pg_all = SourceVecTraits::svptrue();
48 2163 vertical_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
49 2163 });
50
51 10767 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
52 5297 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
53 5297 vertical_vector_path(pg, src_rows, dst_rows, border_offsets, index);
54 5297 });
55 5470 }
56
57 5470 void process_horizontal(size_t width, Rows<const BufferType> src_rows,
58 Rows<DestinationType> dst_rows,
59 BorderOffsets border_offsets) const
60 KLEIDICV_STREAMING {
61 5470 svbool_t pg_all = BufferVecTraits::svptrue();
62 5470 LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()};
63
64 7229 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
65 3518 horizontal_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets,
66 1759 index);
67 1759 });
68
69 6230 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
70 760 horizontal_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
71 760 });
72
73 8247 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
74 2777 svbool_t pg = BufferVecTraits::svwhilelt(index, length);
75 2777 horizontal_vector_path(pg, src_rows, dst_rows, border_offsets, index);
76 2777 });
77 5470 }
78
79 // Processing of horizontal borders is always scalar because border offsets
80 // change for each and every element in the border.
81 21880 void process_horizontal_borders(
82 Rows<const BufferType> src_rows, Rows<DestinationType> dst_rows,
83 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
84
6/6
✓ Branch 0 taken 10512 times.
✓ Branch 1 taken 11280 times.
✓ Branch 2 taken 5992 times.
✓ Branch 3 taken 6760 times.
✓ Branch 4 taken 5376 times.
✓ Branch 5 taken 5760 times.
45680 for (size_t index = 0; index < src_rows.channels(); ++index) {
85 23800 disable_loop_vectorization();
86 23800 process_horizontal_border(src_rows, dst_rows, border_offsets, index);
87 23800 }
88 21880 }
89
90 private:
91 7460 void vertical_vector_path(svbool_t pg, Rows<const SourceType> src_rows,
92 Rows<BufferType> dst_rows,
93 BorderOffsets border_offsets,
94 size_t index) const KLEIDICV_STREAMING {
95 14920 SourceVectorType src_0 =
96 7460 svld1(pg, &src_rows.at(border_offsets.c0())[index]);
97 14920 SourceVectorType src_1 =
98 7460 svld1(pg, &src_rows.at(border_offsets.c1())[index]);
99 14920 SourceVectorType src_2 =
100 7460 svld1(pg, &src_rows.at(border_offsets.c2())[index]);
101 14920 SourceVectorType src_3 =
102 7460 svld1(pg, &src_rows.at(border_offsets.c3())[index]);
103 14920 SourceVectorType src_4 =
104 7460 svld1(pg, &src_rows.at(border_offsets.c4())[index]);
105 14920 std::reference_wrapper<SourceVectorType> sources[5] = {src_0, src_1, src_2,
106 14920 src_3, src_4};
107 7460 filter_.vertical_vector_path(pg, sources, &dst_rows[index]);
108 7460 }
109
110 1759 void horizontal_vector_path_2x(svbool_t pg, Rows<const BufferType> src_rows,
111 Rows<DestinationType> dst_rows,
112 BorderOffsets border_offsets,
113 size_t index) const KLEIDICV_STREAMING {
114 1759 auto src_0 = &src_rows.at(0, border_offsets.c0())[index];
115 1759 auto src_1 = &src_rows.at(0, border_offsets.c1())[index];
116 1759 auto src_2 = &src_rows.at(0, border_offsets.c2())[index];
117 1759 auto src_3 = &src_rows.at(0, border_offsets.c3())[index];
118 1759 auto src_4 = &src_rows.at(0, border_offsets.c4())[index];
119
120 1759 BufferVectorType src_0_0 = svld1(pg, &src_0[0]);
121 1759 BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1);
122 1759 BufferVectorType src_0_1 = svld1(pg, &src_1[0]);
123 1759 BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1);
124 1759 BufferVectorType src_0_2 = svld1(pg, &src_2[0]);
125 1759 BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1);
126 1759 BufferVectorType src_0_3 = svld1(pg, &src_3[0]);
127 1759 BufferVectorType src_1_3 = svld1_vnum(pg, &src_3[0], 1);
128 1759 BufferVectorType src_0_4 = svld1(pg, &src_4[0]);
129 1759 BufferVectorType src_1_4 = svld1_vnum(pg, &src_4[0], 1);
130 8795 std::reference_wrapper<BufferVectorType> sources_0[5] = {
131 8795 src_0_0, src_0_1, src_0_2, src_0_3, src_0_4};
132 1759 filter_.horizontal_vector_path(pg, sources_0, &dst_rows[index]);
133 8795 std::reference_wrapper<BufferVectorType> sources_1[5] = {
134 8795 src_1_0, src_1_1, src_1_2, src_1_3, src_1_4};
135 3318 filter_.horizontal_vector_path(
136 1759 pg, sources_1, &dst_rows[index + BufferVecTraits::num_lanes()]);
137 1759 }
138
139 3537 void horizontal_vector_path(svbool_t pg, Rows<const BufferType> src_rows,
140 Rows<DestinationType> dst_rows,
141 BorderOffsets border_offsets,
142 size_t index) const KLEIDICV_STREAMING {
143 7074 BufferVectorType src_0 =
144 3537 svld1(pg, &src_rows.at(0, border_offsets.c0())[index]);
145 7074 BufferVectorType src_1 =
146 3537 svld1(pg, &src_rows.at(0, border_offsets.c1())[index]);
147 7074 BufferVectorType src_2 =
148 3537 svld1(pg, &src_rows.at(0, border_offsets.c2())[index]);
149 7074 BufferVectorType src_3 =
150 3537 svld1(pg, &src_rows.at(0, border_offsets.c3())[index]);
151 7074 BufferVectorType src_4 =
152 3537 svld1(pg, &src_rows.at(0, border_offsets.c4())[index]);
153 7074 std::reference_wrapper<BufferVectorType> sources[5] = {src_0, src_1, src_2,
154 7074 src_3, src_4};
155 3537 filter_.horizontal_vector_path(pg, sources, &dst_rows[index]);
156 3537 }
157
158 23800 void process_horizontal_border(Rows<const BufferType> src_rows,
159 Rows<DestinationType> dst_rows,
160 BorderOffsets border_offsets,
161 size_t index) const KLEIDICV_STREAMING {
162 23800 BufferType src[5];
163 23800 src[0] = src_rows.at(0, border_offsets.c0())[index];
164 23800 src[1] = src_rows.at(0, border_offsets.c1())[index];
165 23800 src[2] = src_rows.at(0, border_offsets.c2())[index];
166 23800 src[3] = src_rows.at(0, border_offsets.c3())[index];
167 23800 src[4] = src_rows.at(0, border_offsets.c4())[index];
168 23800 filter_.horizontal_scalar_path(src, &dst_rows[index]);
169 23800 }
170
171 FilterType filter_;
172 }; // end of class SeparableFilter<FilterType, 5UL>
173
174 // Shorthand for 5x5 separable filters driver type.
175 template <class FilterType>
176 using SeparableFilter5x5 = SeparableFilter<FilterType, 5UL>;
177
178 } // namespace KLEIDICV_TARGET_NAMESPACE
179
180 #endif // KLEIDICV_SEPARABLE_FILTER_5X5_SC_H
181