KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/filter_2d_neon.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 82 82 100.0%
Functions: 261 273 95.6%
Branches: 56 56 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_FILTER_2D_NEON_H
6 #define KLEIDICV_FILTER_2D_NEON_H
7
8 #include "filter_2d_window_loader_3x3.h"
9 #include "filter_2d_window_loader_5x5.h"
10 #include "filter_2d_window_loader_7x7.h"
11 #include "kleidicv/neon.h"
12 #include "process_filter_2d.h"
13
14 namespace KLEIDICV_TARGET_NAMESPACE {
15
16 template <typename InnerFilterType, size_t KSize, typename WindowLoaderType>
17 class Filter2d {
18 public:
19 using SourceType = typename InnerFilterType::SourceType;
20 using DestinationType = typename InnerFilterType::DestinationType;
21 using SourceVecTraits = typename neon::VecTraits<SourceType>;
22 using DestinationVecTraits = typename neon::VecTraits<DestinationType>;
23 using SourceVectorType = typename SourceVecTraits::VectorType;
24 using DestinationVectorType = typename DestinationVecTraits::VectorType;
25 using BorderType = FixedBorderType;
26 static constexpr size_t kMargin = KSize / 2;
27 using BorderInfoType =
28 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo<SourceType, KSize>;
29 using BorderOffsets = typename BorderInfoType::Offsets;
30
31 1610 explicit Filter2d(InnerFilterType filter) : filter_{filter} {}
32
33 8676 void process_pixels_without_horizontal_borders(
34 size_t width, Rows<const SourceType> src_rows,
35 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets,
36 BorderOffsets window_col_offsets) const {
37 17352 LoopUnroll2<TryToAvoidTailLoop> loop{width * src_rows.channels(),
38 8676 SourceVecTraits::num_lanes()};
39
40 34632 loop.unroll_once([&](size_t index) {
41 25956 SourceVectorType src[KSize][KSize];
42 25956 DestinationVectorType dst_vec;
43
44 11224924 auto KernelWindow = [&](size_t row, size_t col) -> SourceVectorType& {
45 11198968 return src[row][col];
46 };
47
48 983368 auto load_array_element = [](const SourceType& x) { return vld1q(&x); };
49 51912 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
50 25956 window_row_offsets, window_col_offsets,
51 25956 index);
52
53 25956 filter_.vector_path(KernelWindow, dst_vec);
54 25956 vst1q(&dst_rows[index], dst_vec);
55 25956 });
56
57 13556 loop.tail([&](size_t index) {
58 4880 process_one_element_with_horizontal_borders(
59 4880 src_rows, dst_rows, window_row_offsets, window_col_offsets, index);
60 4880 });
61 8676 }
62
63 2272 void process_pixels_of_dual_rows_without_horizontal_borders(
64 size_t width, Rows<const SourceType> src_rows,
65 Rows<DestinationType> dst_rows, BorderOffsets window_row_offsets_0,
66 BorderOffsets window_row_offsets_1,
67 BorderOffsets window_col_offsets) const {
68 4544 LoopUnroll2<TryToAvoidTailLoop> loop{width * src_rows.channels(),
69 2272 SourceVecTraits::num_lanes()};
70
71 5750 loop.unroll_once([&](size_t index) {
72 3478 SourceVectorType src[KSize + 1][KSize];
73 3478 DestinationVectorType dst_vec_0;
74 3478 DestinationVectorType dst_vec_1;
75 198246 auto KernelWindow = [&](size_t row, size_t col) -> SourceVectorType& {
76 194768 return src[row][col];
77 };
78
79 45214 auto load_array_element = [](const SourceType& x) { return vld1q(&x); };
80 3478 WindowLoaderType::load_window_to_handle_dual_rows(
81 3478 KernelWindow, load_array_element, src_rows, window_row_offsets_0,
82 3478 window_row_offsets_1, window_col_offsets, index);
83
84 3478 filter_.vector_path_for_dual_row_handling(KernelWindow, dst_vec_0,
85 dst_vec_1);
86 3478 vst1q(&dst_rows.at(0, 0)[index], dst_vec_0);
87 3478 vst1q(&dst_rows.at(1, 0)[index], dst_vec_1);
88 3478 });
89
90 4440 loop.tail([&](size_t index) {
91 2168 process_two_element_vertically_with_or_without_horizontal_borders(
92 2168 src_rows, dst_rows, window_row_offsets_0, window_row_offsets_1,
93 2168 window_col_offsets, index);
94 2168 });
95 2272 }
96
97 42704 void process_one_pixel_with_horizontal_borders(
98 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
99 BorderOffsets window_row_offsets,
100 BorderOffsets window_col_offsets) const KLEIDICV_STREAMING {
101
42/42
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 160 times.
✓ Branch 2 taken 1232 times.
✓ Branch 3 taken 3080 times.
✓ Branch 4 taken 2496 times.
✓ Branch 5 taken 6240 times.
✓ Branch 6 taken 72 times.
✓ Branch 7 taken 168 times.
✓ Branch 8 taken 3272 times.
✓ Branch 9 taken 5120 times.
✓ Branch 10 taken 4488 times.
✓ Branch 11 taken 8232 times.
✓ Branch 12 taken 72 times.
✓ Branch 13 taken 168 times.
✓ Branch 14 taken 3272 times.
✓ Branch 15 taken 5120 times.
✓ Branch 16 taken 4488 times.
✓ Branch 17 taken 8232 times.
✓ Branch 18 taken 72 times.
✓ Branch 19 taken 168 times.
✓ Branch 20 taken 3272 times.
✓ Branch 21 taken 5120 times.
✓ Branch 22 taken 4488 times.
✓ Branch 23 taken 8232 times.
✓ Branch 24 taken 64 times.
✓ Branch 25 taken 160 times.
✓ Branch 26 taken 1232 times.
✓ Branch 27 taken 3080 times.
✓ Branch 28 taken 2496 times.
✓ Branch 29 taken 6240 times.
✓ Branch 30 taken 64 times.
✓ Branch 31 taken 160 times.
✓ Branch 32 taken 1232 times.
✓ Branch 33 taken 3080 times.
✓ Branch 34 taken 2496 times.
✓ Branch 35 taken 6240 times.
✓ Branch 36 taken 72 times.
✓ Branch 37 taken 168 times.
✓ Branch 38 taken 3272 times.
✓ Branch 39 taken 5120 times.
✓ Branch 40 taken 4488 times.
✓ Branch 41 taken 8232 times.
125224 for (size_t index = 0; index < src_rows.channels(); ++index) {
102 82520 disable_loop_vectorization();
103 82520 process_one_element_with_horizontal_borders(
104 82520 src_rows, dst_rows, window_row_offsets, window_col_offsets, index);
105 82520 }
106 42704 }
107
108 // Processes two vertically adjacent pixels in a single column
109 4544 void process_two_pixels_with_horizontal_borders(
110 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
111 BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1,
112 BorderOffsets window_col_offsets) const {
113
14/14
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 320 times.
✓ Branch 2 taken 1040 times.
✓ Branch 3 taken 1232 times.
✓ Branch 4 taken 1040 times.
✓ Branch 5 taken 1232 times.
✓ Branch 6 taken 1040 times.
✓ Branch 7 taken 1232 times.
✓ Branch 8 taken 128 times.
✓ Branch 9 taken 320 times.
✓ Branch 10 taken 128 times.
✓ Branch 11 taken 320 times.
✓ Branch 12 taken 1040 times.
✓ Branch 13 taken 1232 times.
10432 for (size_t index = 0; index < src_rows.channels(); ++index) {
114 5888 disable_loop_vectorization();
115 5888 process_two_element_vertically_with_or_without_horizontal_borders(
116 5888 src_rows, dst_rows, window_row_offsets_0, window_row_offsets_1,
117 5888 window_col_offsets, index);
118 5888 }
119 4544 }
120
121 private:
122 87400 void process_one_element_with_horizontal_borders(
123 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
124 BorderOffsets window_row_offsets, BorderOffsets window_col_offsets,
125 size_t index) const KLEIDICV_STREAMING {
126 87400 SourceType src[KSize][KSize];
127
128 87400 auto KernelWindow =
129 41533720 [&](size_t row, size_t col)
130 41446320 KLEIDICV_STREAMING -> SourceType& { return src[row][col]; };
131
132 3552016 auto load_array_element = [&](const SourceType& x)
133 3464616 KLEIDICV_STREAMING { return x; };
134
135 174800 WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows,
136 87400 window_row_offsets, window_col_offsets,
137 87400 index);
138
139 87400 filter_.scalar_path(KernelWindow, dst_rows[index]);
140 87400 }
141
142 8056 void process_two_element_vertically_with_or_without_horizontal_borders(
143 Rows<const SourceType> src_rows, Rows<DestinationType> dst_rows,
144 BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1,
145 BorderOffsets window_col_offsets, size_t index) const {
146 8056 SourceType src[KSize + 1][KSize];
147 459192 auto KernelWindow = [&](size_t row, size_t col) -> SourceType& {
148 451136 return src[row][col];
149 };
150 104728 auto load_array_element = [](const SourceType& x) { return x; };
151 8056 WindowLoaderType::load_window_to_handle_dual_rows(
152 8056 KernelWindow, load_array_element, src_rows, window_row_offsets_0,
153 8056 window_row_offsets_1, window_col_offsets, index);
154
155 8056 filter_.scalar_path_for_dual_row_handling(
156 8056 KernelWindow, dst_rows.at(0, 0)[index], dst_rows.at(1, 0)[index]);
157 8056 }
158
159 InnerFilterType filter_;
160 };
161
162 template <typename InnerFilterType>
163 using Filter2D3x3 =
164 Filter2d<InnerFilterType, 3UL,
165 Filter2dWindowLoader3x3<typename InnerFilterType::SourceType>>;
166
167 template <typename InnerFilterType>
168 using Filter2D5x5 =
169 Filter2d<InnerFilterType, 5UL,
170 Filter2dWindowLoader5x5<typename InnerFilterType::SourceType>>;
171
172 template <typename InnerFilterType>
173 using Filter2D7x7 =
174 Filter2d<InnerFilterType, 7UL,
175 Filter2dWindowLoader7x7<typename InnerFilterType::SourceType>>;
176
177 } // namespace KLEIDICV_TARGET_NAMESPACE
178
179 #endif // KLEIDICV_FILTER_2D_NEON_H
180