KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/separable_filter_2d_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 248 248 100.0%
Functions: 36 36 100.0%
Branches: 104 104 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_2D_SC_H
6 #define KLEIDICV_SEPARABLE_FILTER_2D_SC_H
7
8 #include <limits>
9
10 #include "kleidicv/filters/separable_filter_5x5_sc.h"
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13 #include "kleidicv/workspace/separable.h"
14
15 namespace KLEIDICV_TARGET_NAMESPACE {
16
17 template <typename ScalarType, size_t KernelSize>
18 class SeparableFilter2D;
19
20 template <>
21 class SeparableFilter2D<uint8_t, 5> {
22 public:
23 using SourceType = uint8_t;
24 using SourceVectorType = typename VecTraits<SourceType>::VectorType;
25 using BufferType = uint16_t;
26 using BufferVectorType = typename VecTraits<BufferType>::VectorType;
27 using BufferDoubleVectorType = typename VecTraits<BufferType>::Vector2Type;
28 using DestinationType = uint8_t;
29
30 165 SeparableFilter2D(
31 const SourceType *kernel_x, BufferVectorType &kernel_x_0_u16,
32 BufferVectorType &kernel_x_1_u16, BufferVectorType &kernel_x_2_u16,
33 BufferVectorType &kernel_x_3_u16, BufferVectorType &kernel_x_4_u16,
34 SourceVectorType &kernel_y_0_u8, SourceVectorType &kernel_y_1_u8,
35 SourceVectorType &kernel_y_2_u8, SourceVectorType &kernel_y_3_u8,
36 SourceVectorType &kernel_y_4_u8)
37 165 : kernel_x_(kernel_x),
38 165 kernel_x_0_u16_(kernel_x_0_u16),
39 165 kernel_x_1_u16_(kernel_x_1_u16),
40 165 kernel_x_2_u16_(kernel_x_2_u16),
41 165 kernel_x_3_u16_(kernel_x_3_u16),
42 165 kernel_x_4_u16_(kernel_x_4_u16),
43
44 165 kernel_y_0_u8_(kernel_y_0_u8),
45 165 kernel_y_1_u8_(kernel_y_1_u8),
46 165 kernel_y_2_u8_(kernel_y_2_u8),
47 165 kernel_y_3_u8_(kernel_y_3_u8),
48 165 kernel_y_4_u8_(kernel_y_4_u8) {}
49
50 2284 void vertical_vector_path(svbool_t pg,
51 std::reference_wrapper<SourceVectorType> src[5],
52 BufferType *dst) const KLEIDICV_STREAMING {
53 // 0
54 2284 BufferVectorType acc_b = svmullb_u16(src[0], kernel_y_0_u8_);
55 2284 BufferVectorType acc_t = svmullt_u16(src[0], kernel_y_0_u8_);
56
57 // 1
58 2284 BufferVectorType vec_b = svmullb_u16(src[1], kernel_y_1_u8_);
59 2284 BufferVectorType vec_t = svmullt_u16(src[1], kernel_y_1_u8_);
60 2284 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
61 2284 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
62
63 // 2
64 2284 vec_b = svmullb_u16(src[2], kernel_y_2_u8_);
65 2284 vec_t = svmullt_u16(src[2], kernel_y_2_u8_);
66 2284 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
67 2284 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
68
69 // 3
70 2284 vec_b = svmullb_u16(src[3], kernel_y_3_u8_);
71 2284 vec_t = svmullt_u16(src[3], kernel_y_3_u8_);
72 2284 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
73 2284 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
74
75 // 4
76 2284 vec_b = svmullb_u16(src[4], kernel_y_4_u8_);
77 2284 vec_t = svmullt_u16(src[4], kernel_y_4_u8_);
78 2284 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
79 2284 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
80
81 2284 BufferDoubleVectorType interleaved = svcreate2_u16(acc_b, acc_t);
82 2284 svst2(pg, &dst[0], interleaved);
83 2284 }
84
85 1950 void horizontal_vector_path(svbool_t pg,
86 std::reference_wrapper<BufferVectorType> src[5],
87 DestinationType *dst) const KLEIDICV_STREAMING {
88 // 0
89 1950 svuint32_t acc_b = svmullb_u32(src[0], kernel_x_0_u16_);
90 1950 svuint32_t acc_t = svmullt_u32(src[0], kernel_x_0_u16_);
91
92 // 1
93 1950 acc_b = svmlalb_u32(acc_b, src[1], kernel_x_1_u16_);
94 1950 acc_t = svmlalt_u32(acc_t, src[1], kernel_x_1_u16_);
95
96 // 2
97 1950 acc_b = svmlalb_u32(acc_b, src[2], kernel_x_2_u16_);
98 1950 acc_t = svmlalt_u32(acc_t, src[2], kernel_x_2_u16_);
99
100 // 3
101 1950 acc_b = svmlalb_u32(acc_b, src[3], kernel_x_3_u16_);
102 1950 acc_t = svmlalt_u32(acc_t, src[3], kernel_x_3_u16_);
103
104 // 4
105 1950 acc_b = svmlalb_u32(acc_b, src[4], kernel_x_4_u16_);
106 1950 acc_t = svmlalt_u32(acc_t, src[4], kernel_x_4_u16_);
107
108 1950 svuint16_t acc_u16_b = svqxtnb_u32(acc_b);
109 1950 svuint16_t acc_u16 = svqxtnt_u32(acc_u16_b, acc_t);
110
111 3900 svbool_t greater =
112 1950 svcmpgt_n_u16(pg, acc_u16, std::numeric_limits<SourceType>::max());
113 1950 acc_u16 =
114 1950 svdup_n_u16_m(acc_u16, greater, std::numeric_limits<SourceType>::max());
115
116 1950 svst1b_u16(pg, &dst[0], acc_u16);
117 1950 }
118
119 8592 void horizontal_scalar_path(const BufferType src[5],
120 DestinationType *dst) const KLEIDICV_STREAMING {
121 8592 SourceType acc; // NOLINT
122
2/2
✓ Branch 0 taken 6324 times.
✓ Branch 1 taken 2268 times.
8592 if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) {
123 6324 dst[0] = std::numeric_limits<SourceType>::max();
124 6324 return;
125 }
126
127
4/4
✓ Branch 0 taken 8672 times.
✓ Branch 1 taken 1573 times.
✓ Branch 2 taken 695 times.
✓ Branch 3 taken 1573 times.
10940 for (size_t i = 1; i < 5; i++) {
128 8672 SourceType temp; // NOLINT
129
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 8624 times.
8672 if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) {
130 48 dst[0] = std::numeric_limits<SourceType>::max();
131 48 return;
132 }
133
2/2
✓ Branch 0 taken 647 times.
✓ Branch 1 taken 7977 times.
8624 if (__builtin_add_overflow(acc, temp, &acc)) {
134 647 dst[0] = std::numeric_limits<SourceType>::max();
135 647 return;
136 }
137 8672 }
138
139 1573 dst[0] = acc;
140 8592 }
141
142 private:
143 const SourceType *kernel_x_;
144
145 BufferVectorType &kernel_x_0_u16_;
146 BufferVectorType &kernel_x_1_u16_;
147 BufferVectorType &kernel_x_2_u16_;
148 BufferVectorType &kernel_x_3_u16_;
149 BufferVectorType &kernel_x_4_u16_;
150
151 SourceVectorType &kernel_y_0_u8_;
152 SourceVectorType &kernel_y_1_u8_;
153 SourceVectorType &kernel_y_2_u8_;
154 SourceVectorType &kernel_y_3_u8_;
155 SourceVectorType &kernel_y_4_u8_;
156 }; // end of class SeparableFilter2D<uint8_t, 5>
157
158 template <>
159 class SeparableFilter2D<uint16_t, 5> {
160 public:
161 using SourceType = uint16_t;
162 using SourceVectorType = typename VecTraits<SourceType>::VectorType;
163 using BufferType = uint32_t;
164 using BufferVectorType = typename VecTraits<BufferType>::VectorType;
165 using BufferDoubleVectorType = typename VecTraits<BufferType>::Vector2Type;
166 using DestinationType = uint16_t;
167
168 165 SeparableFilter2D(
169 const SourceType *kernel_x, BufferVectorType &kernel_x_0_u32,
170 BufferVectorType &kernel_x_1_u32, BufferVectorType &kernel_x_2_u32,
171 BufferVectorType &kernel_x_3_u32, BufferVectorType &kernel_x_4_u32,
172 SourceVectorType &kernel_y_0_u16, SourceVectorType &kernel_y_1_u16,
173 SourceVectorType &kernel_y_2_u16, SourceVectorType &kernel_y_3_u16,
174 SourceVectorType &kernel_y_4_u16)
175 165 : kernel_x_(kernel_x),
176 165 kernel_x_0_u32_(kernel_x_0_u32),
177 165 kernel_x_1_u32_(kernel_x_1_u32),
178 165 kernel_x_2_u32_(kernel_x_2_u32),
179 165 kernel_x_3_u32_(kernel_x_3_u32),
180 165 kernel_x_4_u32_(kernel_x_4_u32),
181
182 165 kernel_y_0_u16_(kernel_y_0_u16),
183 165 kernel_y_1_u16_(kernel_y_1_u16),
184 165 kernel_y_2_u16_(kernel_y_2_u16),
185 165 kernel_y_3_u16_(kernel_y_3_u16),
186 165 kernel_y_4_u16_(kernel_y_4_u16) {}
187
188 2909 void vertical_vector_path(svbool_t pg,
189 std::reference_wrapper<SourceVectorType> src[5],
190 BufferType *dst) const KLEIDICV_STREAMING {
191 // 0
192 2909 BufferVectorType acc_b = svmullb_u32(src[0], kernel_y_0_u16_);
193 2909 BufferVectorType acc_t = svmullt_u32(src[0], kernel_y_0_u16_);
194
195 // 1
196 2909 BufferVectorType vec_b = svmullb_u32(src[1], kernel_y_1_u16_);
197 2909 BufferVectorType vec_t = svmullt_u32(src[1], kernel_y_1_u16_);
198 2909 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
199 2909 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
200
201 // 2
202 2909 vec_b = svmullb_u32(src[2], kernel_y_2_u16_);
203 2909 vec_t = svmullt_u32(src[2], kernel_y_2_u16_);
204 2909 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
205 2909 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
206
207 // 3
208 2909 vec_b = svmullb_u32(src[3], kernel_y_3_u16_);
209 2909 vec_t = svmullt_u32(src[3], kernel_y_3_u16_);
210 2909 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
211 2909 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
212
213 // 4
214 2909 vec_b = svmullb_u32(src[4], kernel_y_4_u16_);
215 2909 vec_t = svmullt_u32(src[4], kernel_y_4_u16_);
216 2909 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
217 2909 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
218
219 2909 BufferDoubleVectorType interleaved = svcreate2_u32(acc_b, acc_t);
220 2909 svst2(pg, &dst[0], interleaved);
221 2909 }
222
223 2834 void horizontal_vector_path(svbool_t pg,
224 std::reference_wrapper<BufferVectorType> src[5],
225 DestinationType *dst) const KLEIDICV_STREAMING {
226 // 0
227 2834 svuint64_t acc_b = svmullb_u64(src[0], kernel_x_0_u32_);
228 2834 svuint64_t acc_t = svmullt_u64(src[0], kernel_x_0_u32_);
229
230 // 1
231 2834 acc_b = svmlalb_u64(acc_b, src[1], kernel_x_1_u32_);
232 2834 acc_t = svmlalt_u64(acc_t, src[1], kernel_x_1_u32_);
233
234 // 2
235 2834 acc_b = svmlalb_u64(acc_b, src[2], kernel_x_2_u32_);
236 2834 acc_t = svmlalt_u64(acc_t, src[2], kernel_x_2_u32_);
237
238 // 3
239 2834 acc_b = svmlalb_u64(acc_b, src[3], kernel_x_3_u32_);
240 2834 acc_t = svmlalt_u64(acc_t, src[3], kernel_x_3_u32_);
241
242 // 4
243 2834 acc_b = svmlalb_u64(acc_b, src[4], kernel_x_4_u32_);
244 2834 acc_t = svmlalt_u64(acc_t, src[4], kernel_x_4_u32_);
245
246 2834 svuint32_t acc_u32_b = svqxtnb_u64(acc_b);
247 2834 svuint32_t acc_u32 = svqxtnt_u64(acc_u32_b, acc_t);
248
249 5668 svbool_t greater =
250 2834 svcmpgt_n_u32(pg, acc_u32, std::numeric_limits<SourceType>::max());
251 2834 acc_u32 =
252 2834 svdup_n_u32_m(acc_u32, greater, std::numeric_limits<SourceType>::max());
253
254 2834 svst1h_u32(pg, &dst[0], acc_u32);
255 2834 }
256
257 8700 void horizontal_scalar_path(const BufferType src[5],
258 DestinationType *dst) const KLEIDICV_STREAMING {
259 8700 SourceType acc; // Avoid cppcoreguidelines-init-variables. NOLINT
260
2/2
✓ Branch 0 taken 6390 times.
✓ Branch 1 taken 2310 times.
8700 if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) {
261 6390 dst[0] = std::numeric_limits<SourceType>::max();
262 6390 return;
263 }
264
265
4/4
✓ Branch 0 taken 9216 times.
✓ Branch 1 taken 2253 times.
✓ Branch 2 taken 57 times.
✓ Branch 3 taken 2253 times.
11526 for (size_t i = 1; i < 5; i++) {
266 9216 SourceType temp; // Avoid cppcoreguidelines-init-variables. NOLINT
267
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 9210 times.
9216 if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) {
268 6 dst[0] = std::numeric_limits<SourceType>::max();
269 6 return;
270 }
271
2/2
✓ Branch 0 taken 51 times.
✓ Branch 1 taken 9159 times.
9210 if (__builtin_add_overflow(acc, temp, &acc)) {
272 51 dst[0] = std::numeric_limits<SourceType>::max();
273 51 return;
274 }
275 9216 }
276
277 2253 dst[0] = acc;
278 8700 }
279
280 private:
281 const SourceType *kernel_x_;
282
283 BufferVectorType &kernel_x_0_u32_;
284 BufferVectorType &kernel_x_1_u32_;
285 BufferVectorType &kernel_x_2_u32_;
286 BufferVectorType &kernel_x_3_u32_;
287 BufferVectorType &kernel_x_4_u32_;
288
289 SourceVectorType &kernel_y_0_u16_;
290 SourceVectorType &kernel_y_1_u16_;
291 SourceVectorType &kernel_y_2_u16_;
292 SourceVectorType &kernel_y_3_u16_;
293 SourceVectorType &kernel_y_4_u16_;
294 }; // end of class SeparableFilter2D<uint16_t, 5>
295
296 template <>
297 class SeparableFilter2D<int16_t, 5> {
298 public:
299 using SourceType = int16_t;
300 using SourceVectorType = typename VecTraits<SourceType>::VectorType;
301 using BufferType = int32_t;
302 using BufferVectorType = typename VecTraits<BufferType>::VectorType;
303 using BufferDoubleVectorType = typename VecTraits<BufferType>::Vector2Type;
304 using DestinationType = int16_t;
305
306 162 SeparableFilter2D(
307 const SourceType *kernel_x, BufferVectorType &kernel_x_0_s32,
308 BufferVectorType &kernel_x_1_s32, BufferVectorType &kernel_x_2_s32,
309 BufferVectorType &kernel_x_3_s32, BufferVectorType &kernel_x_4_s32,
310 SourceVectorType &kernel_y_0_s16, SourceVectorType &kernel_y_1_s16,
311 SourceVectorType &kernel_y_2_s16, SourceVectorType &kernel_y_3_s16,
312 SourceVectorType &kernel_y_4_s16)
313 162 : kernel_x_(kernel_x),
314 162 kernel_x_0_s32_(kernel_x_0_s32),
315 162 kernel_x_1_s32_(kernel_x_1_s32),
316 162 kernel_x_2_s32_(kernel_x_2_s32),
317 162 kernel_x_3_s32_(kernel_x_3_s32),
318 162 kernel_x_4_s32_(kernel_x_4_s32),
319
320 162 kernel_y_0_s16_(kernel_y_0_s16),
321 162 kernel_y_1_s16_(kernel_y_1_s16),
322 162 kernel_y_2_s16_(kernel_y_2_s16),
323 162 kernel_y_3_s16_(kernel_y_3_s16),
324 162 kernel_y_4_s16_(kernel_y_4_s16) {}
325
326 2896 void vertical_vector_path(svbool_t pg,
327 std::reference_wrapper<SourceVectorType> src[5],
328 BufferType *dst) const KLEIDICV_STREAMING {
329 // 0
330 2896 BufferVectorType acc_b = svmullb_s32(src[0], kernel_y_0_s16_);
331 2896 BufferVectorType acc_t = svmullt_s32(src[0], kernel_y_0_s16_);
332
333 // 1
334 2896 BufferVectorType vec_b = svmullb_s32(src[1], kernel_y_1_s16_);
335 2896 BufferVectorType vec_t = svmullt_s32(src[1], kernel_y_1_s16_);
336 2896 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
337 2896 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
338
339 // 2
340 2896 vec_b = svmullb_s32(src[2], kernel_y_2_s16_);
341 2896 vec_t = svmullt_s32(src[2], kernel_y_2_s16_);
342 2896 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
343 2896 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
344
345 // 3
346 2896 vec_b = svmullb_s32(src[3], kernel_y_3_s16_);
347 2896 vec_t = svmullt_s32(src[3], kernel_y_3_s16_);
348 2896 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
349 2896 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
350
351 // 4
352 2896 vec_b = svmullb_s32(src[4], kernel_y_4_s16_);
353 2896 vec_t = svmullt_s32(src[4], kernel_y_4_s16_);
354 2896 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
355 2896 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
356
357 2896 BufferDoubleVectorType interleaved = svcreate2_s32(acc_b, acc_t);
358 2896 svst2(pg, &dst[0], interleaved);
359 2896 }
360
361 2830 void horizontal_vector_path(svbool_t pg,
362 std::reference_wrapper<BufferVectorType> src[5],
363 DestinationType *dst) const KLEIDICV_STREAMING {
364 // 0
365 2830 svint64_t acc_b = svmullb_s64(src[0], kernel_x_0_s32_);
366 2830 svint64_t acc_t = svmullt_s64(src[0], kernel_x_0_s32_);
367
368 // 1
369 2830 acc_b = svmlalb_s64(acc_b, src[1], kernel_x_1_s32_);
370 2830 acc_t = svmlalt_s64(acc_t, src[1], kernel_x_1_s32_);
371
372 // 2
373 2830 acc_b = svmlalb_s64(acc_b, src[2], kernel_x_2_s32_);
374 2830 acc_t = svmlalt_s64(acc_t, src[2], kernel_x_2_s32_);
375
376 // 3
377 2830 acc_b = svmlalb_s64(acc_b, src[3], kernel_x_3_s32_);
378 2830 acc_t = svmlalt_s64(acc_t, src[3], kernel_x_3_s32_);
379
380 // 4
381 2830 acc_b = svmlalb_s64(acc_b, src[4], kernel_x_4_s32_);
382 2830 acc_t = svmlalt_s64(acc_t, src[4], kernel_x_4_s32_);
383
384 2830 svint32_t acc_s32_b = svqxtnb_s64(acc_b);
385 2830 svint32_t acc_s32 = svqxtnt_s64(acc_s32_b, acc_t);
386
387 5660 svbool_t less =
388 2830 svcmplt_n_s32(pg, acc_s32, std::numeric_limits<SourceType>::min());
389 2830 acc_s32 =
390 2830 svdup_n_s32_m(acc_s32, less, std::numeric_limits<SourceType>::min());
391
392 5660 svbool_t greater =
393 2830 svcmpgt_n_s32(pg, acc_s32, std::numeric_limits<SourceType>::max());
394 2830 acc_s32 =
395 2830 svdup_n_s32_m(acc_s32, greater, std::numeric_limits<SourceType>::max());
396
397 2830 svst1h_s32(pg, &dst[0], acc_s32);
398 2830 }
399
400 8640 void horizontal_scalar_path(const BufferType src[5],
401 DestinationType *dst) const KLEIDICV_STREAMING {
402 8640 int64_t acc = static_cast<int64_t>(src[0]) * kernel_x_[0];
403
2/2
✓ Branch 0 taken 34560 times.
✓ Branch 1 taken 8640 times.
43200 for (size_t i = 1; i < 5; i++) {
404 34560 acc += static_cast<int64_t>(src[i]) * kernel_x_[i];
405 34560 }
406
407
2/2
✓ Branch 0 taken 3114 times.
✓ Branch 1 taken 5526 times.
8640 if (acc < std::numeric_limits<DestinationType>::min()) {
408 3114 acc = std::numeric_limits<DestinationType>::min();
409
2/2
✓ Branch 0 taken 2371 times.
✓ Branch 1 taken 3155 times.
8640 } else if (acc > std::numeric_limits<DestinationType>::max()) {
410 3155 acc = std::numeric_limits<DestinationType>::max();
411 3155 }
412
413 8640 dst[0] = static_cast<DestinationType>(acc);
414 8640 }
415
416 private:
417 const SourceType *kernel_x_;
418
419 BufferVectorType &kernel_x_0_s32_;
420 BufferVectorType &kernel_x_1_s32_;
421 BufferVectorType &kernel_x_2_s32_;
422 BufferVectorType &kernel_x_3_s32_;
423 BufferVectorType &kernel_x_4_s32_;
424
425 SourceVectorType &kernel_y_0_s16_;
426 SourceVectorType &kernel_y_1_s16_;
427 SourceVectorType &kernel_y_2_s16_;
428 SourceVectorType &kernel_y_3_s16_;
429 SourceVectorType &kernel_y_4_s16_;
430 }; // end of class SeparableFilter2D<int16_t, 5>
431
432 template <typename T>
433 600 static kleidicv_error_t separable_filter_2d_checks(
434 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
435 size_t height, size_t channels, const T *kernel_x, const T *kernel_y,
436 SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING {
437
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 192 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 192 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 189 times.
600 CHECK_POINTERS(workspace, kernel_x, kernel_y);
438
439
12/12
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 189 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 189 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 189 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 189 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 186 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 186 times.
573 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
440
12/12
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 186 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 186 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 186 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 186 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 183 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 183 times.
564 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
441
18/18
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 183 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 180 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 180 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 183 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 180 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 180 times.
✓ Branch 12 taken 3 times.
✓ Branch 13 taken 180 times.
✓ Branch 14 taken 3 times.
✓ Branch 15 taken 177 times.
✓ Branch 16 taken 6 times.
✓ Branch 17 taken 177 times.
555 CHECK_IMAGE_SIZE(width, height);
442
443
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 177 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 177 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 174 times.
537 if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
444 9 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
445 }
446
447
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 174 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 174 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 171 times.
528 if (workspace->channels() < channels) {
448 9 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
449 }
450
451 519 const Rectangle &context_rect = workspace->image_size();
452
12/12
✓ Branch 0 taken 168 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 165 times.
✓ Branch 4 taken 168 times.
✓ Branch 5 taken 6 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 165 times.
✓ Branch 8 taken 165 times.
✓ Branch 9 taken 6 times.
✓ Branch 10 taken 3 times.
✓ Branch 11 taken 162 times.
519 if (context_rect.width() < width || context_rect.height() < height) {
453 27 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
454 }
455
456 492 return KLEIDICV_OK;
457 600 }
458
459 template <typename T>
460 600 kleidicv_error_t separable_filter_2d_stripe_sc(
461 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
462 size_t height, size_t y_begin, size_t y_end, size_t channels,
463 const T *kernel_x, size_t /*kernel_width*/, const T *kernel_y,
464 size_t /*kernel_height*/, FixedBorderType fixed_border_type,
465 kleidicv_filter_context_t *context) KLEIDICV_STREAMING {
466 600 auto *workspace = reinterpret_cast<SeparableFilterWorkspace *>(context);
467 1200 kleidicv_error_t checks_result = separable_filter_2d_checks(
468 600 src, src_stride, dst, dst_stride, width, height, channels, kernel_x,
469 600 kernel_y, workspace);
470
471
6/6
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 165 times.
✓ Branch 2 taken 36 times.
✓ Branch 3 taken 165 times.
✓ Branch 4 taken 36 times.
✓ Branch 5 taken 162 times.
600 if (checks_result != KLEIDICV_OK) {
472 108 return checks_result;
473 }
474
475 492 Rectangle rect{width, height};
476
477 using SeparableFilterClass = SeparableFilter2D<T, 5>;
478
479 using WiderT = typename double_element_width<T>::type;
480 using KernelXVectorTraits = VecTraits<WiderT>;
481 using KernelXVectorT = typename KernelXVectorTraits::VectorType;
482 using KernelYVectorTraits = VecTraits<T>;
483 using KernelYVectorT = typename KernelYVectorTraits::VectorType;
484
485 492 KernelXVectorT kernel_x_0 = KernelXVectorTraits::svdup(kernel_x[0]);
486 492 KernelXVectorT kernel_x_1 = KernelXVectorTraits::svdup(kernel_x[1]);
487 492 KernelXVectorT kernel_x_2 = KernelXVectorTraits::svdup(kernel_x[2]);
488 492 KernelXVectorT kernel_x_3 = KernelXVectorTraits::svdup(kernel_x[3]);
489 492 KernelXVectorT kernel_x_4 = KernelXVectorTraits::svdup(kernel_x[4]);
490
491 492 KernelYVectorT kernel_y_0 = KernelYVectorTraits::svdup(kernel_y[0]);
492 492 KernelYVectorT kernel_y_1 = KernelYVectorTraits::svdup(kernel_y[1]);
493 492 KernelYVectorT kernel_y_2 = KernelYVectorTraits::svdup(kernel_y[2]);
494 492 KernelYVectorT kernel_y_3 = KernelYVectorTraits::svdup(kernel_y[3]);
495 492 KernelYVectorT kernel_y_4 = KernelYVectorTraits::svdup(kernel_y[4]);
496
497 984 SeparableFilterClass filterClass{
498 492 kernel_x, kernel_x_0, kernel_x_1, kernel_x_2, kernel_x_3, kernel_x_4,
499 kernel_y_0, kernel_y_1, kernel_y_2, kernel_y_3, kernel_y_4};
500 492 SeparableFilter<SeparableFilterClass, 5> filter{filterClass};
501
502 492 Rows<const T> src_rows{src, src_stride, channels};
503 492 Rows<T> dst_rows{dst, dst_stride, channels};
504 984 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
505 492 fixed_border_type, filter);
506
507 492 return KLEIDICV_OK;
508 600 }
509
510 } // namespace KLEIDICV_TARGET_NAMESPACE
511
512 #endif // KLEIDICV_SEPARABLE_FILTER_2D_SC_H
513