KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/workspace/separable.h
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 91 91 100.0%
Functions: 100 100 100.0%
Branches: 68 68 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_WORKSPACE_SEPARABLE_H
6 #define KLEIDICV_WORKSPACE_SEPARABLE_H
7
8 #include <algorithm>
9 #include <cstddef>
10 #include <cstdint>
11 #include <cstdlib>
12 #include <memory>
13 #include <utility>
14 #include <variant>
15
16 #include "kleidicv/types.h"
17
18 namespace KLEIDICV_TARGET_NAMESPACE {
19
20 // Workspace for separable fixed-size filters.
21 //
22 // Theory of operation
23 //
24 // Given an NxM input matrix and a separable filter AxB = V x H, this workspace
25 // first processes N rows vertically into a separate horizontal buffer. Right
26 // after the vertical operation, the horizontal operation is applied and the
27 // result is written to the destination.
28 //
29 // Limitations
30 //
31 // 1. In-place operations are not supported.
32 // 2. The input's width and height have to be at least `filter's width - 1` and
33 // `filter's height - 1`, respectively.
34 //
35 // Example
36 //
37 // N = 2, M = 3, A = B = 3, border type replicate and 'x' is multiplication.
38 //
39 // Input: Separated filters:
40 // [ M00, M01, M02 ] V = [ V0 ] H = [H0, H1, H2 ]
41 // [ M10, M11, M12 ] [ V1 ]
42 // [ M20, M21, M22 ] [ V2 ]
43 //
44 // Buffer contents in iteration 0 after applying the vertical operation
45 // taking "replicate" border type into account:
46 //
47 // [ B0, B1, B2 ] =
48 // [ M{0, 0, 1}0 x V, M{0, 0, 1}1 x V, M{0, 0, 1}2 x V ]
49 //
50 // The horizontal operation is then semantically performed on the following
51 // input taking "replicate" border type into account:
52 //
53 // [ B0, B0, B1, B2, B2 ]
54 //
55 // The destination contents after the 0th iteration is then:
56 //
57 // [ D00, D01, D02 ] =
58 // [ B{0, 0, 1} x H, B{0, 1, 2} x H, B{1, 2, 2} x H]
59 //
60 // Handling of borders is calculated based on offsets rather than setting up
61 // suitably-sized buffers which could hold both borders and data.
62 class SeparableFilterWorkspace {
63 public:
64 // Workspace is only constructible with create().
65 SeparableFilterWorkspace() = delete;
66
67 3216 static std::variant<SeparableFilterWorkspace, kleidicv_error_t> create(
68 Rectangle rect, size_t channels,
69 size_t intermediate_size) KLEIDICV_STREAMING {
70 6432 auto [allocation, buffer_rows_stride] =
71 3216 allocate(rect, channels, intermediate_size);
72
73
2/2
✓ Branch 0 taken 3156 times.
✓ Branch 1 taken 60 times.
3216 if (!allocation) {
74 60 return KLEIDICV_ERROR_ALLOCATION;
75 }
76
77 6312 return SeparableFilterWorkspace{rect, channels, allocation,
78 3156 buffer_rows_stride};
79 3216 }
80
81 protected:
82 3768 static std::pair<uint8_t *, size_t> allocate(Rectangle rect, size_t channels,
83 size_t intermediate_size)
84 KLEIDICV_STREAMING {
85 3768 size_t buffer_rows_number_of_elements = rect.width() * channels;
86 // Adding more elements because of SVE, where interleaving stores are
87 // governed by one predicate. For example, if a predicate requires 7 uint8_t
88 // elements and an algorithm performs widening to 16 bits, the resulting
89 // interleaving store will still be governed by the same predicate, thus
90 // storing 8 elements. Choosing '3' to account for svst4().
91 3768 buffer_rows_number_of_elements += 3;
92
93 7536 size_t buffer_rows_stride =
94 3768 buffer_rows_number_of_elements * intermediate_size;
95
96 7536 uint8_t *allocation =
97 3768 reinterpret_cast<uint8_t *>(std::malloc(buffer_rows_stride));
98
99 3768 return {allocation, buffer_rows_stride};
100 3768 }
101
102 3704 SeparableFilterWorkspace(Rectangle rect, size_t channels, uint8_t *allocation,
103 size_t buffer_rows_stride) KLEIDICV_STREAMING
104 3704 : rect_{rect},
105 3704 channels_{channels},
106 3704 buffer_{allocation, &std::free},
107 3704 buffer_rows_stride_{buffer_rows_stride} {}
108
109 public:
110 // Processes rows vertically first along the full width
111 template <typename FilterType>
112 2688 void process(size_t y_begin, size_t y_end,
113 Rows<const typename FilterType::SourceType> src_rows,
114 Rows<typename FilterType::DestinationType> dst_rows,
115 typename FilterType::BorderType border_type,
116 FilterType filter) KLEIDICV_STREAMING {
117 // Border helper which calculates border offsets.
118 5376 typename FilterType::BorderInfoType vertical_border{rect_.height(),
119 2688 border_type};
120 5376 typename FilterType::BorderInfoType horizontal_border{rect_.width(),
121 2688 border_type};
122
123 // Buffer rows which hold intermediate widened data.
124 2688 auto buffer_rows =
125 5376 Rows{reinterpret_cast<typename FilterType::BufferType *>(buffer_.get()),
126 2688 buffer_rows_stride_, channels_};
127
128 // Vertical processing loop.
129
20/20
✓ Branch 0 taken 884 times.
✓ Branch 1 taken 11184 times.
✓ Branch 2 taken 940 times.
✓ Branch 3 taken 13076 times.
✓ Branch 4 taken 132 times.
✓ Branch 5 taken 808 times.
✓ Branch 6 taken 204 times.
✓ Branch 7 taken 1888 times.
✓ Branch 8 taken 128 times.
✓ Branch 9 taken 1824 times.
✓ Branch 10 taken 128 times.
✓ Branch 11 taken 2592 times.
✓ Branch 12 taken 68 times.
✓ Branch 13 taken 152 times.
✓ Branch 14 taken 68 times.
✓ Branch 15 taken 288 times.
✓ Branch 16 taken 68 times.
✓ Branch 17 taken 424 times.
✓ Branch 18 taken 68 times.
✓ Branch 19 taken 560 times.
35484 for (size_t vertical_index = y_begin; vertical_index < y_end;
130 32796 ++vertical_index) {
131 // Recalculate vertical border offsets.
132 32796 auto offsets = vertical_border.offsets_with_border(vertical_index);
133 // Process in the vertical direction first.
134 65592 filter.process_vertical(rect_.width(), src_rows.at(vertical_index),
135 32796 buffer_rows, offsets);
136 // Process in the horizontal direction last.
137 65592 process_horizontal(rect_.width(), buffer_rows,
138 32796 dst_rows.at(vertical_index), filter,
139 32796 horizontal_border);
140 32796 }
141 2688 }
142
143 // Processes rows vertically first along the full width
144 template <typename FilterType>
145 64 void process_arbitrary(size_t kernel_size, size_t y_begin, size_t y_end,
146 Rows<const typename FilterType::SourceType> src_rows,
147 Rows<typename FilterType::DestinationType> dst_rows,
148 typename FilterType::BorderType /* border_type */,
149 FilterType filter) KLEIDICV_STREAMING {
150 // Buffer rows which hold intermediate widened data.
151 64 auto buffer_rows =
152 128 Rows{reinterpret_cast<typename FilterType::BufferType *>(buffer_.get()),
153 64 buffer_rows_stride_, channels_};
154 64 size_t margin = kernel_size / 2;
155
156 // Process top rows, affected by border
157
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 64 times.
304 for (size_t row_index = y_begin; row_index < std::max(y_begin, margin);
158 240 ++row_index) {
159 480 filter.process_arbitrary_border_vertical(rect_.width(), src_rows,
160 240 row_index, buffer_rows);
161 480 filter.process_arbitrary_horizontal(rect_.width(), kernel_size,
162 240 buffer_rows, dst_rows.at(row_index));
163 240 }
164
165 // Process middle rows that are not affected by any borders
166
2/2
✓ Branch 0 taken 208 times.
✓ Branch 1 taken 64 times.
272 for (size_t row_index = std::max(y_begin, margin);
167 272 row_index < std::min(y_end, rect_.height() - margin); ++row_index) {
168 416 filter.process_arbitrary_vertical(rect_.width(), src_rows.at(row_index),
169 208 buffer_rows);
170 416 filter.process_arbitrary_horizontal(rect_.width(), kernel_size,
171 208 buffer_rows, dst_rows.at(row_index));
172 208 }
173
174 // Process bottom rows, affected by border
175
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 240 times.
304 for (size_t row_index = std::min(y_end, rect_.height() - margin);
176 304 row_index < y_end; ++row_index) {
177 480 filter.process_arbitrary_border_vertical(rect_.width(), src_rows,
178 240 row_index, buffer_rows);
179 480 filter.process_arbitrary_horizontal(rect_.width(), kernel_size,
180 240 buffer_rows, dst_rows.at(row_index));
181 240 }
182 64 }
183
184 private:
185 template <typename FilterType>
186 32796 void process_horizontal(size_t width,
187 Rows<typename FilterType::BufferType> buffer_rows,
188 Rows<typename FilterType::DestinationType> dst_rows,
189 FilterType filter,
190 typename FilterType::BorderInfoType horizontal_border)
191 KLEIDICV_STREAMING {
192 // Margin associated with the filter.
193 32796 constexpr size_t margin = filter.margin;
194
195 // Process data affected by left border.
196 KLEIDICV_FORCE_LOOP_UNROLL
197
20/20
✓ Branch 0 taken 13856 times.
✓ Branch 1 taken 11184 times.
✓ Branch 2 taken 18368 times.
✓ Branch 3 taken 13076 times.
✓ Branch 4 taken 2424 times.
✓ Branch 5 taken 808 times.
✓ Branch 6 taken 7552 times.
✓ Branch 7 taken 1888 times.
✓ Branch 8 taken 12768 times.
✓ Branch 9 taken 1824 times.
✓ Branch 10 taken 25920 times.
✓ Branch 11 taken 2592 times.
✓ Branch 12 taken 152 times.
✓ Branch 13 taken 152 times.
✓ Branch 14 taken 576 times.
✓ Branch 15 taken 288 times.
✓ Branch 16 taken 1272 times.
✓ Branch 17 taken 424 times.
✓ Branch 18 taken 2240 times.
✓ Branch 19 taken 560 times.
117924 for (size_t horizontal_index = 0; horizontal_index < margin;
198 85128 ++horizontal_index) {
199 85128 auto offsets =
200 85128 horizontal_border.offsets_with_left_border(horizontal_index);
201 170256 filter.process_horizontal_borders(buffer_rows.at(0, horizontal_index),
202 85128 dst_rows.at(0, horizontal_index),
203 85128 offsets);
204 85128 }
205
206 // Process data which is not affected by any borders in bulk.
207 {
208 32796 size_t width_without_borders = width - (2 * margin);
209 32796 auto offsets = horizontal_border.offsets_without_border();
210 65592 filter.process_horizontal(width_without_borders,
211 32796 buffer_rows.at(0, margin),
212 32796 dst_rows.at(0, margin), offsets);
213 32796 }
214
215 // Process data affected by right border.
216 KLEIDICV_FORCE_LOOP_UNROLL
217
20/20
✓ Branch 0 taken 11184 times.
✓ Branch 1 taken 13856 times.
✓ Branch 2 taken 13076 times.
✓ Branch 3 taken 18368 times.
✓ Branch 4 taken 808 times.
✓ Branch 5 taken 2424 times.
✓ Branch 6 taken 1888 times.
✓ Branch 7 taken 7552 times.
✓ Branch 8 taken 1824 times.
✓ Branch 9 taken 12768 times.
✓ Branch 10 taken 2592 times.
✓ Branch 11 taken 25920 times.
✓ Branch 12 taken 152 times.
✓ Branch 13 taken 152 times.
✓ Branch 14 taken 288 times.
✓ Branch 15 taken 576 times.
✓ Branch 16 taken 424 times.
✓ Branch 17 taken 1272 times.
✓ Branch 18 taken 560 times.
✓ Branch 19 taken 2240 times.
117924 for (size_t horizontal_index = 0; horizontal_index < margin;
218 85128 ++horizontal_index) {
219 85128 size_t index = width - margin + horizontal_index;
220 85128 auto offsets = horizontal_border.offsets_with_right_border(index);
221 170256 filter.process_horizontal_borders(buffer_rows.at(0, index),
222 85128 dst_rows.at(0, index), offsets);
223 85128 }
224 32796 }
225
226 protected:
227 Rectangle rect_;
228 size_t channels_;
229 std::unique_ptr<uint8_t, decltype(&std::free)> buffer_;
230 size_t buffer_rows_stride_;
231 }; // end of class SeparableFilterWorkspace
232
233 } // namespace KLEIDICV_TARGET_NAMESPACE
234
235 #endif // KLEIDICV_WORKSPACE_SEPARABLE_H
236