KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/blur_and_downsample_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 171 171 100.0%
Functions: 32 32 100.0%
Branches: 28 28 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/ctypes.h"
6 #include "kleidicv/filters/blur_and_downsample.h"
7 #include "kleidicv/kleidicv.h"
8 #include "kleidicv/sve2.h"
9 #include "kleidicv/utils.h"
10 #include "kleidicv/workspace/blur_and_downsample_ws.h"
11 #include "kleidicv/workspace/border_5x5.h"
12
13 namespace KLEIDICV_TARGET_NAMESPACE {
14
15 // Applies Gaussian Blur binomial filter to even rows and columns
16 //
17 // [ 1, 4, 6, 4, 1 ] [ 1 ]
18 // [ 4, 16, 24, 16, 4 ] [ 4 ]
19 // F = 1/256 * [ 6, 24, 36, 24, 6 ] = 1/256 * [ 6 ] * [ 1, 4, 6, 4, 1 ]
20 // [ 4, 16, 24, 16, 4 ] [ 4 ]
21 // [ 1, 4, 6, 4, 1 ] [ 1 ]
22 class BlurAndDownsample {
23 public:
24 using SourceType = uint8_t;
25 using BufferType = uint16_t;
26 using DestinationType = uint8_t;
27 using SourceVecTraits =
28 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
29 using SourceVectorType = typename SourceVecTraits::VectorType;
30 using SourceVector2Type = typename SourceVecTraits::Vector2Type;
31 using BufferVecTraits =
32 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<BufferType>;
33 using BufferVectorType = typename BufferVecTraits::VectorType;
34 using BorderInfoType =
35 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5<SourceType>;
36 using BorderType = FixedBorderType;
37 using BorderOffsets = typename BorderInfoType::Offsets;
38
39 static constexpr size_t margin = 2UL;
40
41 918 void process_vertical(size_t width, Rows<const SourceType> src_rows,
42 Rows<BufferType> dst_rows,
43 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
44 918 LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()};
45
46 1040 loop.unroll_twice([&](ptrdiff_t index) KLEIDICV_STREAMING {
47 122 svbool_t pg_all = SourceVecTraits::svptrue();
48 244 vertical_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets,
49 122 index);
50 122 });
51
52 934 loop.unroll_once([&](ptrdiff_t index) KLEIDICV_STREAMING {
53 16 svbool_t pg_all = SourceVecTraits::svptrue();
54 32 vertical_vector_path_1x(pg_all, src_rows, dst_rows, border_offsets,
55 16 index);
56 16 });
57
58 1820 loop.remaining([&](ptrdiff_t index, ptrdiff_t length) KLEIDICV_STREAMING {
59 902 svbool_t pg = SourceVecTraits::svwhilelt(index, length);
60 902 vertical_vector_path_1x(pg, src_rows, dst_rows, border_offsets, index);
61 902 });
62 918 }
63
64 918 void process_horizontal(size_t width, Rows<const BufferType> src_rows,
65 Rows<DestinationType> dst_rows,
66 BorderOffsets border_offsets) const
67 KLEIDICV_STREAMING {
68 918 svbool_t pg_all = BufferVecTraits::svptrue();
69 918 LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()};
70
71 1098 loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING {
72 360 horizontal_vector_path_2x(pg_all, pg_all, src_rows, pg_all, dst_rows,
73 180 border_offsets, static_cast<ptrdiff_t>(index));
74 180 });
75
76 1344 loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
77 426 svbool_t pg_src_0 = BufferVecTraits::svwhilelt(index, length);
78 852 svbool_t pg_src_1 = BufferVecTraits::svwhilelt(
79 426 index + BufferVecTraits::num_lanes(), length);
80 852 svbool_t pg_dst =
81 426 BufferVecTraits::svwhilelt((index + 1) / 2, (length + 1) / 2);
82 852 horizontal_vector_path_2x(pg_src_0, pg_src_1, src_rows, pg_dst, dst_rows,
83 426 border_offsets, static_cast<ptrdiff_t>(index));
84 426 });
85 918 }
86
87 1836 void process_horizontal_borders(
88 Rows<const BufferType> src_rows, Rows<DestinationType> dst_rows,
89 BorderOffsets border_offsets) const KLEIDICV_STREAMING {
90
2/2
✓ Branch 0 taken 1836 times.
✓ Branch 1 taken 1836 times.
3672 for (ptrdiff_t index = 0;
91 3672 index < static_cast<ptrdiff_t>(src_rows.channels()); ++index) {
92 1836 disable_loop_vectorization();
93 1836 svbool_t pg = svptrue_pat_b8(SV_VL1);
94 1836 horizontal_border_path(pg, src_rows, dst_rows, border_offsets, index);
95 1836 }
96 1836 }
97
98 private:
99 122 void vertical_vector_path_2x(svbool_t pg, Rows<const SourceType> src_rows,
100 Rows<BufferType> dst_rows,
101 BorderOffsets border_offsets,
102 ptrdiff_t index) const KLEIDICV_STREAMING {
103 122 const auto *src_row_0 = &src_rows.at(border_offsets.c0())[index];
104 122 const auto *src_row_1 = &src_rows.at(border_offsets.c1())[index];
105 122 const auto *src_row_2 = &src_rows.at(border_offsets.c2())[index];
106 122 const auto *src_row_3 = &src_rows.at(border_offsets.c3())[index];
107 122 const auto *src_row_4 = &src_rows.at(border_offsets.c4())[index];
108
109 122 SourceVector2Type src_0;
110 122 SourceVector2Type src_1;
111 122 SourceVector2Type src_2;
112 122 SourceVector2Type src_3;
113 122 SourceVector2Type src_4;
114
115 122 src_0 =
116 122 svcreate2(svld1(pg, &src_row_0[0]), svld1_vnum(pg, &src_row_0[0], 1));
117 122 src_1 =
118 122 svcreate2(svld1(pg, &src_row_1[0]), svld1_vnum(pg, &src_row_1[0], 1));
119 122 src_2 =
120 122 svcreate2(svld1(pg, &src_row_2[0]), svld1_vnum(pg, &src_row_2[0], 1));
121 122 src_3 =
122 122 svcreate2(svld1(pg, &src_row_3[0]), svld1_vnum(pg, &src_row_3[0], 1));
123 122 src_4 =
124 122 svcreate2(svld1(pg, &src_row_4[0]), svld1_vnum(pg, &src_row_4[0], 1));
125
126 244 vertical_vector_path(pg, svget2(src_0, 0), svget2(src_1, 0),
127 122 svget2(src_2, 0), svget2(src_3, 0), svget2(src_4, 0),
128 122 &dst_rows[index]);
129 244 vertical_vector_path(pg, svget2(src_0, 1), svget2(src_1, 1),
130 122 svget2(src_2, 1), svget2(src_3, 1), svget2(src_4, 1),
131 244 &dst_rows[index + static_cast<ptrdiff_t>(
132 122 SourceVecTraits::num_lanes())]);
133 122 }
134
135 918 void vertical_vector_path_1x(svbool_t pg, Rows<const SourceType> src_rows,
136 Rows<BufferType> dst_rows,
137 BorderOffsets border_offsets,
138 ptrdiff_t index) const KLEIDICV_STREAMING {
139 1836 SourceVectorType src_0 =
140 918 svld1(pg, &src_rows.at(border_offsets.c0())[index]);
141 1836 SourceVectorType src_1 =
142 918 svld1(pg, &src_rows.at(border_offsets.c1())[index]);
143 1836 SourceVectorType src_2 =
144 918 svld1(pg, &src_rows.at(border_offsets.c2())[index]);
145 1836 SourceVectorType src_3 =
146 918 svld1(pg, &src_rows.at(border_offsets.c3())[index]);
147 1836 SourceVectorType src_4 =
148 918 svld1(pg, &src_rows.at(border_offsets.c4())[index]);
149 1836 vertical_vector_path(pg, src_0, src_1, src_2, src_3, src_4,
150 918 &dst_rows[index]);
151 918 }
152
153 // Applies vertical filtering vector using SIMD operations.
154 //
155 // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
156 1162 void vertical_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1,
157 svuint8_t src_2, svuint8_t src_3, svuint8_t src_4,
158 BufferType *dst) const KLEIDICV_STREAMING {
159 1162 svuint16_t acc_0_4_b = svaddlb_u16(src_0, src_4);
160 1162 svuint16_t acc_0_4_t = svaddlt_u16(src_0, src_4);
161 1162 svuint16_t acc_1_3_b = svaddlb_u16(src_1, src_3);
162 1162 svuint16_t acc_1_3_t = svaddlt_u16(src_1, src_3);
163
164 1162 svuint16_t acc_u16_b = svmlalb_n_u16(acc_0_4_b, src_2, 6);
165 1162 svuint16_t acc_u16_t = svmlalt_n_u16(acc_0_4_t, src_2, 6);
166 1162 acc_u16_b = svmla_n_u16_x(pg, acc_u16_b, acc_1_3_b, 4);
167 1162 acc_u16_t = svmla_n_u16_x(pg, acc_u16_t, acc_1_3_t, 4);
168
169 1162 svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t);
170 1162 svst2(pg, &dst[0], interleaved);
171 1162 }
172
173 606 void horizontal_vector_path_2x(svbool_t pg_src_0, svbool_t pg_src_1,
174 Rows<const BufferType> src_rows,
175 svbool_t pg_dst,
176 Rows<DestinationType> dst_rows,
177 BorderOffsets border_offsets,
178 ptrdiff_t index) const KLEIDICV_STREAMING {
179 606 const auto *src_0 = &src_rows.at(0, border_offsets.c0())[index];
180 606 const auto *src_1 = &src_rows.at(0, border_offsets.c1())[index];
181 606 const auto *src_2 = &src_rows.at(0, border_offsets.c2())[index];
182 606 const auto *src_3 = &src_rows.at(0, border_offsets.c3())[index];
183 606 const auto *src_4 = &src_rows.at(0, border_offsets.c4())[index];
184
185 606 BufferVectorType src_0_0 = svld1(pg_src_0, &src_0[0]);
186 606 BufferVectorType src_1_0 = svld1_vnum(pg_src_1, &src_0[0], 1);
187 606 BufferVectorType src_0_1 = svld1(pg_src_0, &src_1[0]);
188 606 BufferVectorType src_1_1 = svld1_vnum(pg_src_1, &src_1[0], 1);
189 606 BufferVectorType src_0_2 = svld1(pg_src_0, &src_2[0]);
190 606 BufferVectorType src_1_2 = svld1_vnum(pg_src_1, &src_2[0], 1);
191 606 BufferVectorType src_0_3 = svld1(pg_src_0, &src_3[0]);
192 606 BufferVectorType src_1_3 = svld1_vnum(pg_src_1, &src_3[0], 1);
193 606 BufferVectorType src_0_4 = svld1(pg_src_0, &src_4[0]);
194 606 BufferVectorType src_1_4 = svld1_vnum(pg_src_1, &src_4[0], 1);
195
196 1212 svuint16_t res_0 = horizontal_vector_path(pg_src_0, src_0_0, src_0_1,
197 606 src_0_2, src_0_3, src_0_4);
198 1212 svuint16_t res_1 = horizontal_vector_path(pg_src_1, src_1_0, src_1_1,
199 606 src_1_2, src_1_3, src_1_4);
200
201 606 svuint16_t res_even_only = svuzp1(res_0, res_1);
202 606 svst1b(pg_dst, &dst_rows[index / 2], res_even_only);
203 606 }
204
205 // Applies horizontal filtering vector using SIMD operations.
206 //
207 // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
208 1212 svuint16_t horizontal_vector_path(svbool_t pg, svuint16_t src_0,
209 svuint16_t src_1, svuint16_t src_2,
210 svuint16_t src_3,
211 svuint16_t src_4) const KLEIDICV_STREAMING {
212 1212 svuint16_t acc_0_4 = svadd_x(pg, src_0, src_4);
213 1212 svuint16_t acc_1_3 = svadd_x(pg, src_1, src_3);
214 1212 svuint16_t acc = svmla_n_u16_x(pg, acc_0_4, src_2, 6);
215 1212 acc = svmla_n_u16_x(pg, acc, acc_1_3, 4);
216 1212 acc = svrshr_x(pg, acc, 8);
217 2424 return acc;
218 1212 }
219
220 // Applies horizontal filtering for the borders using SIMD operations.
221 //
222 // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
223 1836 void horizontal_border_path(svbool_t pg, Rows<const BufferType> src_rows,
224 Rows<DestinationType> dst_rows,
225 BorderOffsets border_offsets,
226 ptrdiff_t index) const KLEIDICV_STREAMING {
227 3672 BufferVectorType src_0 =
228 1836 svld1(pg, &src_rows.at(0, border_offsets.c0())[index]);
229 3672 BufferVectorType src_1 =
230 1836 svld1(pg, &src_rows.at(0, border_offsets.c1())[index]);
231 3672 BufferVectorType src_2 =
232 1836 svld1(pg, &src_rows.at(0, border_offsets.c2())[index]);
233 3672 BufferVectorType src_3 =
234 1836 svld1(pg, &src_rows.at(0, border_offsets.c3())[index]);
235 3672 BufferVectorType src_4 =
236 1836 svld1(pg, &src_rows.at(0, border_offsets.c4())[index]);
237
238 1836 svuint16_t acc_0_4 = svadd_x(pg, src_0, src_4);
239 1836 svuint16_t acc_1_3 = svadd_x(pg, src_1, src_3);
240 1836 svuint16_t acc = svmla_n_u16_x(pg, acc_0_4, src_2, 6);
241 1836 acc = svmla_n_u16_x(pg, acc, acc_1_3, 4);
242 1836 acc = svrshr_x(pg, acc, 8);
243
244 1836 svst1b(pg, &dst_rows[index / 2], acc);
245 1836 }
246 }; // end of class BlurAndDownsample
247
248 // Does not include checks for whether the operation is implemented.
249 // This must be done earlier, by blur_and_downsample_is_implemented.
250 102 static kleidicv_error_t blur_and_downsample_checks(
251 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
252 uint8_t *dst, size_t dst_stride, size_t channels,
253 BlurAndDownsampleFilterWorkspace *workspace) KLEIDICV_STREAMING {
254
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 100 times.
102 CHECK_POINTERS(workspace);
255
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 98 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 98 times.
100 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
256
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 96 times.
98 CHECK_POINTER_AND_STRIDE(dst, dst_stride, (src_height + 1) / 2);
257
6/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 94 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 92 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 92 times.
96 CHECK_IMAGE_SIZE(src_width, src_height);
258
259 92 Rectangle rect{src_width, src_height};
260 92 const Rectangle &context_rect = workspace->image_size();
261
4/4
✓ Branch 0 taken 88 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 86 times.
92 if (context_rect.width() < src_width || context_rect.height() < src_height) {
262 6 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
263 }
264
265 // Currently supports only one channel, so it cannot be tested.
266 // GCOVR_EXCL_START
267 if (workspace->channels() < channels) {
268 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
269 }
270 // GCOVR_EXCL_STOP
271
272 86 return KLEIDICV_OK;
273 102 }
274
275 102 static kleidicv_error_t blur_and_downsample_stripe_u8_sc(
276 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
277 uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end,
278 size_t channels, FixedBorderType fixed_border_type,
279 kleidicv_filter_context_t *context) KLEIDICV_STREAMING {
280 // Does not include checks for whether the operation is implemented.
281 // This must be done earlier, by blur_and_downsample_is_implemented.
282 204 auto *workspace =
283 102 reinterpret_cast<BlurAndDownsampleFilterWorkspace *>(context);
284
285
6/6
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 86 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 86 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 86 times.
220 if (auto check_result =
286 204 blur_and_downsample_checks(src, src_stride, src_width, src_height,
287 102 dst, dst_stride, channels, workspace)) {
288 16 return check_result;
289 }
290
291 86 Rectangle rect{src_width, src_height};
292
293 86 Rows<const uint8_t> src_rows{src, src_stride, channels};
294 86 Rows<uint8_t> dst_rows{dst, dst_stride, channels};
295 172 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
296 86 fixed_border_type, BlurAndDownsample{});
297
298 86 return KLEIDICV_OK;
299 102 }
300
301 } // namespace KLEIDICV_TARGET_NAMESPACE
302