KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/separable_filter_3x3_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 79 79 100.0%
Functions: 104 104 100.0%
Branches: 4 4 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_3X3_SC_H
6 #define KLEIDICV_SEPARABLE_FILTER_3X3_SC_H
7
8 #include "kleidicv/sve2.h"
9 #include "kleidicv/workspace/border_3x3.h"
10
11 // It is used by SVE2 and SME, the actual namespace will reflect it.
12 namespace KLEIDICV_TARGET_NAMESPACE {
13
14 // Template for drivers of separable NxM filters.
15 template <typename FilterType, const size_t S>
16 class SeparableFilter;
17
18 // Driver for a separable 3x3 filter.
19 template <typename FilterType>
20 class SeparableFilter<FilterType, 3UL> {
21 public:
22 using SourceType = typename FilterType::SourceType;
23 using BufferType = typename FilterType::BufferType;
24 using DestinationType = typename FilterType::DestinationType;
25 using SourceVecTraits =
26 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
27 using SourceVectorType = typename SourceVecTraits::VectorType;
28 using BufferVecTraits =
29 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<BufferType>;
30 using BufferVectorType = typename BufferVecTraits::VectorType;
31 using BorderInfoType =
32 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo3x3<SourceType>;
33 using BorderType = FixedBorderType;
34 using BorderOffsets = typename BorderInfoType::Offsets;
35
36 690 explicit SeparableFilter(FilterType filter) KLEIDICV_STREAMING
37 690 : filter_{filter} {}
38
39 static constexpr size_t margin = 1UL;
40
41 12480 void process_vertical(size_t width, Rows<const SourceType> src_rows,
42 Rows<BufferType> dst_rows,
43 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
44 12480 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
45
46 15350 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
47 2870 svbool_t pg_all = SourceVecTraits::svptrue();
48 2870 vertical_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
49 2870 });
50
51 24960 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
52 12480 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
53 12480 vertical_vector_path(pg, src_rows, dst_rows, border_offsets, index);
54 12480 });
55 12480 }
56
57 12480 void process_horizontal(size_t width, Rows<const BufferType> src_rows,
58 Rows<DestinationType> dst_rows,
59 BorderOffsets border_offsets) const
60 KLEIDICV_STREAMING {
61 12480 svbool_t pg_all = BufferVecTraits::svptrue();
62 12480 LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()};
63
64 14680 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
65 4400 horizontal_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets,
66 2200 index);
67 2200 });
68
69 14044 loop.unroll_once([&](size_t index) KLEIDICV_STREAMING {
70 1564 horizontal_vector_path(pg_all, src_rows, dst_rows, border_offsets, index);
71 1564 });
72
73 18996 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
74 6516 svbool_t pg = BufferVecTraits::svwhilelt(index, length);
75 6516 horizontal_vector_path(pg, src_rows, dst_rows, border_offsets, index);
76 6516 });
77 12480 }
78
79 // Processing of horizontal borders is always scalar because border offsets
80 // change for each and every element in the border.
81 24960 void process_horizontal_borders(
82 Rows<const BufferType> src_rows, Rows<DestinationType> dst_rows,
83 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
84
4/4
✓ Branch 0 taken 12960 times.
✓ Branch 1 taken 24648 times.
✓ Branch 2 taken 12000 times.
✓ Branch 3 taken 23412 times.
73020 for (size_t index = 0; index < src_rows.channels(); ++index) {
85 48060 disable_loop_vectorization();
86 48060 process_horizontal_border(src_rows, dst_rows, border_offsets, index);
87 48060 }
88 24960 }
89
90 private:
91 15350 void vertical_vector_path(svbool_t pg, Rows<const SourceType> src_rows,
92 Rows<BufferType> dst_rows,
93 BorderOffsets border_offsets,
94 size_t index) const KLEIDICV_STREAMING {
95 30700 SourceVectorType src_0 =
96 15350 svld1(pg, &src_rows.at(border_offsets.c0())[index]);
97 30700 SourceVectorType src_1 =
98 15350 svld1(pg, &src_rows.at(border_offsets.c1())[index]);
99 30700 SourceVectorType src_2 =
100 15350 svld1(pg, &src_rows.at(border_offsets.c2())[index]);
101 15350 std::reference_wrapper<SourceVectorType> sources[3] = {src_0, src_1, src_2};
102 15350 filter_.vertical_vector_path(pg, sources, &dst_rows[index]);
103 15350 }
104
105 2200 void horizontal_vector_path_2x(svbool_t pg, Rows<const BufferType> src_rows,
106 Rows<DestinationType> dst_rows,
107 BorderOffsets border_offsets,
108 size_t index) const KLEIDICV_STREAMING {
109 2200 auto src_0 = &src_rows.at(0, border_offsets.c0())[index];
110 2200 auto src_1 = &src_rows.at(0, border_offsets.c1())[index];
111 2200 auto src_2 = &src_rows.at(0, border_offsets.c2())[index];
112
113 2200 BufferVectorType src_0_0 = svld1(pg, &src_0[0]);
114 2200 BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1);
115 2200 BufferVectorType src_0_1 = svld1(pg, &src_1[0]);
116 2200 BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1);
117 2200 BufferVectorType src_0_2 = svld1(pg, &src_2[0]);
118 2200 BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1);
119
120 2200 std::reference_wrapper<BufferVectorType> sources_0[3] = {src_0_0, src_0_1,
121 2200 src_0_2};
122 2200 filter_.horizontal_vector_path(pg, sources_0, &dst_rows[index]);
123 2200 std::reference_wrapper<BufferVectorType> sources_1[3] = {src_1_0, src_1_1,
124 2200 src_1_2};
125 2224 filter_.horizontal_vector_path(
126 2200 pg, sources_1, &dst_rows[index + BufferVecTraits::num_lanes()]);
127 2200 }
128
129 8080 void horizontal_vector_path(svbool_t pg, Rows<const BufferType> src_rows,
130 Rows<DestinationType> dst_rows,
131 BorderOffsets border_offsets,
132 size_t index) const KLEIDICV_STREAMING {
133 16160 BufferVectorType src_0 =
134 8080 svld1(pg, &src_rows.at(0, border_offsets.c0())[index]);
135 16160 BufferVectorType src_1 =
136 8080 svld1(pg, &src_rows.at(0, border_offsets.c1())[index]);
137 16160 BufferVectorType src_2 =
138 8080 svld1(pg, &src_rows.at(0, border_offsets.c2())[index]);
139
140 8080 std::reference_wrapper<BufferVectorType> sources[3] = {src_0, src_1, src_2};
141 8080 filter_.horizontal_vector_path(pg, sources, &dst_rows[index]);
142 8080 }
143
144 48060 void process_horizontal_border(Rows<const BufferType> src_rows,
145 Rows<DestinationType> dst_rows,
146 BorderOffsets border_offsets,
147 size_t index) const KLEIDICV_STREAMING {
148 48060 BufferType src[3];
149 48060 src[0] = src_rows.at(0, border_offsets.c0())[index];
150 48060 src[1] = src_rows.at(0, border_offsets.c1())[index];
151 48060 src[2] = src_rows.at(0, border_offsets.c2())[index];
152 48060 filter_.horizontal_scalar_path(src, &dst_rows[index]);
153 48060 }
154
155 FilterType filter_;
156 }; // end of class SeparableFilter<FilterType, 3UL>
157
158 // Shorthand for 3x3 separable filters driver type.
159 template <class FilterType>
160 using SeparableFilter3x3 = SeparableFilter<FilterType, 3UL>;
161
162 } // namespace KLEIDICV_TARGET_NAMESPACE
163
164 #endif // KLEIDICV_SEPARABLE_FILTER_3X3_SC_H
165