KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/separable_filter_2d_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 248 248 100.0%
Functions: 36 36 100.0%
Branches: 104 104 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_2D_SC_H
6 #define KLEIDICV_SEPARABLE_FILTER_2D_SC_H
7
8 #include <limits>
9
10 #include "kleidicv/filters/separable_filter_5x5_sc.h"
11 #include "kleidicv/kleidicv.h"
12 #include "kleidicv/sve2.h"
13 #include "kleidicv/workspace/separable.h"
14
15 namespace KLEIDICV_TARGET_NAMESPACE {
16
17 template <typename ScalarType, size_t KernelSize>
18 class SeparableFilter2D;
19
20 template <>
21 class SeparableFilter2D<uint8_t, 5> {
22 public:
23 using SourceType = uint8_t;
24 using SourceVectorType = typename VecTraits<SourceType>::VectorType;
25 using BufferType = uint16_t;
26 using BufferVectorType = typename VecTraits<BufferType>::VectorType;
27 using BufferDoubleVectorType = typename VecTraits<BufferType>::Vector2Type;
28 using DestinationType = uint8_t;
29
30 110 SeparableFilter2D(
31 const SourceType *kernel_x, BufferVectorType &kernel_x_0_u16,
32 BufferVectorType &kernel_x_1_u16, BufferVectorType &kernel_x_2_u16,
33 BufferVectorType &kernel_x_3_u16, BufferVectorType &kernel_x_4_u16,
34 SourceVectorType &kernel_y_0_u8, SourceVectorType &kernel_y_1_u8,
35 SourceVectorType &kernel_y_2_u8, SourceVectorType &kernel_y_3_u8,
36 SourceVectorType &kernel_y_4_u8)
37 110 : kernel_x_(kernel_x),
38 110 kernel_x_0_u16_(kernel_x_0_u16),
39 110 kernel_x_1_u16_(kernel_x_1_u16),
40 110 kernel_x_2_u16_(kernel_x_2_u16),
41 110 kernel_x_3_u16_(kernel_x_3_u16),
42 110 kernel_x_4_u16_(kernel_x_4_u16),
43
44 110 kernel_y_0_u8_(kernel_y_0_u8),
45 110 kernel_y_1_u8_(kernel_y_1_u8),
46 110 kernel_y_2_u8_(kernel_y_2_u8),
47 110 kernel_y_3_u8_(kernel_y_3_u8),
48 110 kernel_y_4_u8_(kernel_y_4_u8) {}
49
50 1568 void vertical_vector_path(svbool_t pg,
51 std::reference_wrapper<SourceVectorType> src[5],
52 BufferType *dst) const KLEIDICV_STREAMING {
53 // 0
54 1568 BufferVectorType acc_b = svmullb_u16(src[0], kernel_y_0_u8_);
55 1568 BufferVectorType acc_t = svmullt_u16(src[0], kernel_y_0_u8_);
56
57 // 1
58 1568 BufferVectorType vec_b = svmullb_u16(src[1], kernel_y_1_u8_);
59 1568 BufferVectorType vec_t = svmullt_u16(src[1], kernel_y_1_u8_);
60 1568 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
61 1568 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
62
63 // 2
64 1568 vec_b = svmullb_u16(src[2], kernel_y_2_u8_);
65 1568 vec_t = svmullt_u16(src[2], kernel_y_2_u8_);
66 1568 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
67 1568 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
68
69 // 3
70 1568 vec_b = svmullb_u16(src[3], kernel_y_3_u8_);
71 1568 vec_t = svmullt_u16(src[3], kernel_y_3_u8_);
72 1568 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
73 1568 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
74
75 // 4
76 1568 vec_b = svmullb_u16(src[4], kernel_y_4_u8_);
77 1568 vec_t = svmullt_u16(src[4], kernel_y_4_u8_);
78 1568 acc_b = svqadd_u16_x(pg, acc_b, vec_b);
79 1568 acc_t = svqadd_u16_x(pg, acc_t, vec_t);
80
81 1568 BufferDoubleVectorType interleaved = svcreate2_u16(acc_b, acc_t);
82 1568 svst2(pg, &dst[0], interleaved);
83 1568 }
84
85 1372 void horizontal_vector_path(svbool_t pg,
86 std::reference_wrapper<BufferVectorType> src[5],
87 DestinationType *dst) const KLEIDICV_STREAMING {
88 // 0
89 1372 svuint32_t acc_b = svmullb_u32(src[0], kernel_x_0_u16_);
90 1372 svuint32_t acc_t = svmullt_u32(src[0], kernel_x_0_u16_);
91
92 // 1
93 1372 acc_b = svmlalb_u32(acc_b, src[1], kernel_x_1_u16_);
94 1372 acc_t = svmlalt_u32(acc_t, src[1], kernel_x_1_u16_);
95
96 // 2
97 1372 acc_b = svmlalb_u32(acc_b, src[2], kernel_x_2_u16_);
98 1372 acc_t = svmlalt_u32(acc_t, src[2], kernel_x_2_u16_);
99
100 // 3
101 1372 acc_b = svmlalb_u32(acc_b, src[3], kernel_x_3_u16_);
102 1372 acc_t = svmlalt_u32(acc_t, src[3], kernel_x_3_u16_);
103
104 // 4
105 1372 acc_b = svmlalb_u32(acc_b, src[4], kernel_x_4_u16_);
106 1372 acc_t = svmlalt_u32(acc_t, src[4], kernel_x_4_u16_);
107
108 1372 svuint16_t acc_u16_b = svqxtnb_u32(acc_b);
109 1372 svuint16_t acc_u16 = svqxtnt_u32(acc_u16_b, acc_t);
110
111 2744 svbool_t greater =
112 1372 svcmpgt_n_u16(pg, acc_u16, std::numeric_limits<SourceType>::max());
113 1372 acc_u16 =
114 1372 svdup_n_u16_m(acc_u16, greater, std::numeric_limits<SourceType>::max());
115
116 1372 svst1b_u16(pg, &dst[0], acc_u16);
117 1372 }
118
119 5728 void horizontal_scalar_path(const BufferType src[5],
120 DestinationType *dst) const KLEIDICV_STREAMING {
121 5728 SourceType acc; // NOLINT
122
2/2
✓ Branch 0 taken 4216 times.
✓ Branch 1 taken 1512 times.
5728 if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) {
123 4216 dst[0] = std::numeric_limits<SourceType>::max();
124 4216 return;
125 }
126
127
4/4
✓ Branch 0 taken 5761 times.
✓ Branch 1 taken 989 times.
✓ Branch 2 taken 523 times.
✓ Branch 3 taken 989 times.
7273 for (size_t i = 1; i < 5; i++) {
128 5761 SourceType temp; // NOLINT
129
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 5729 times.
5761 if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) {
130 32 dst[0] = std::numeric_limits<SourceType>::max();
131 32 return;
132 }
133
2/2
✓ Branch 0 taken 491 times.
✓ Branch 1 taken 5238 times.
5729 if (__builtin_add_overflow(acc, temp, &acc)) {
134 491 dst[0] = std::numeric_limits<SourceType>::max();
135 491 return;
136 }
137 5761 }
138
139 989 dst[0] = acc;
140 5728 }
141
142 private:
143 const SourceType *kernel_x_;
144
145 BufferVectorType &kernel_x_0_u16_;
146 BufferVectorType &kernel_x_1_u16_;
147 BufferVectorType &kernel_x_2_u16_;
148 BufferVectorType &kernel_x_3_u16_;
149 BufferVectorType &kernel_x_4_u16_;
150
151 SourceVectorType &kernel_y_0_u8_;
152 SourceVectorType &kernel_y_1_u8_;
153 SourceVectorType &kernel_y_2_u8_;
154 SourceVectorType &kernel_y_3_u8_;
155 SourceVectorType &kernel_y_4_u8_;
156 }; // end of class SeparableFilter2D<uint8_t, 5>
157
158 template <>
159 class SeparableFilter2D<uint16_t, 5> {
160 public:
161 using SourceType = uint16_t;
162 using SourceVectorType = typename VecTraits<SourceType>::VectorType;
163 using BufferType = uint32_t;
164 using BufferVectorType = typename VecTraits<BufferType>::VectorType;
165 using BufferDoubleVectorType = typename VecTraits<BufferType>::Vector2Type;
166 using DestinationType = uint16_t;
167
168 110 SeparableFilter2D(
169 const SourceType *kernel_x, BufferVectorType &kernel_x_0_u32,
170 BufferVectorType &kernel_x_1_u32, BufferVectorType &kernel_x_2_u32,
171 BufferVectorType &kernel_x_3_u32, BufferVectorType &kernel_x_4_u32,
172 SourceVectorType &kernel_y_0_u16, SourceVectorType &kernel_y_1_u16,
173 SourceVectorType &kernel_y_2_u16, SourceVectorType &kernel_y_3_u16,
174 SourceVectorType &kernel_y_4_u16)
175 110 : kernel_x_(kernel_x),
176 110 kernel_x_0_u32_(kernel_x_0_u32),
177 110 kernel_x_1_u32_(kernel_x_1_u32),
178 110 kernel_x_2_u32_(kernel_x_2_u32),
179 110 kernel_x_3_u32_(kernel_x_3_u32),
180 110 kernel_x_4_u32_(kernel_x_4_u32),
181
182 110 kernel_y_0_u16_(kernel_y_0_u16),
183 110 kernel_y_1_u16_(kernel_y_1_u16),
184 110 kernel_y_2_u16_(kernel_y_2_u16),
185 110 kernel_y_3_u16_(kernel_y_3_u16),
186 110 kernel_y_4_u16_(kernel_y_4_u16) {}
187
188 2068 void vertical_vector_path(svbool_t pg,
189 std::reference_wrapper<SourceVectorType> src[5],
190 BufferType *dst) const KLEIDICV_STREAMING {
191 // 0
192 2068 BufferVectorType acc_b = svmullb_u32(src[0], kernel_y_0_u16_);
193 2068 BufferVectorType acc_t = svmullt_u32(src[0], kernel_y_0_u16_);
194
195 // 1
196 2068 BufferVectorType vec_b = svmullb_u32(src[1], kernel_y_1_u16_);
197 2068 BufferVectorType vec_t = svmullt_u32(src[1], kernel_y_1_u16_);
198 2068 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
199 2068 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
200
201 // 2
202 2068 vec_b = svmullb_u32(src[2], kernel_y_2_u16_);
203 2068 vec_t = svmullt_u32(src[2], kernel_y_2_u16_);
204 2068 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
205 2068 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
206
207 // 3
208 2068 vec_b = svmullb_u32(src[3], kernel_y_3_u16_);
209 2068 vec_t = svmullt_u32(src[3], kernel_y_3_u16_);
210 2068 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
211 2068 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
212
213 // 4
214 2068 vec_b = svmullb_u32(src[4], kernel_y_4_u16_);
215 2068 vec_t = svmullt_u32(src[4], kernel_y_4_u16_);
216 2068 acc_b = svqadd_u32_x(pg, acc_b, vec_b);
217 2068 acc_t = svqadd_u32_x(pg, acc_t, vec_t);
218
219 2068 BufferDoubleVectorType interleaved = svcreate2_u32(acc_b, acc_t);
220 2068 svst2(pg, &dst[0], interleaved);
221 2068 }
222
223 2083 void horizontal_vector_path(svbool_t pg,
224 std::reference_wrapper<BufferVectorType> src[5],
225 DestinationType *dst) const KLEIDICV_STREAMING {
226 // 0
227 2083 svuint64_t acc_b = svmullb_u64(src[0], kernel_x_0_u32_);
228 2083 svuint64_t acc_t = svmullt_u64(src[0], kernel_x_0_u32_);
229
230 // 1
231 2083 acc_b = svmlalb_u64(acc_b, src[1], kernel_x_1_u32_);
232 2083 acc_t = svmlalt_u64(acc_t, src[1], kernel_x_1_u32_);
233
234 // 2
235 2083 acc_b = svmlalb_u64(acc_b, src[2], kernel_x_2_u32_);
236 2083 acc_t = svmlalt_u64(acc_t, src[2], kernel_x_2_u32_);
237
238 // 3
239 2083 acc_b = svmlalb_u64(acc_b, src[3], kernel_x_3_u32_);
240 2083 acc_t = svmlalt_u64(acc_t, src[3], kernel_x_3_u32_);
241
242 // 4
243 2083 acc_b = svmlalb_u64(acc_b, src[4], kernel_x_4_u32_);
244 2083 acc_t = svmlalt_u64(acc_t, src[4], kernel_x_4_u32_);
245
246 2083 svuint32_t acc_u32_b = svqxtnb_u64(acc_b);
247 2083 svuint32_t acc_u32 = svqxtnt_u64(acc_u32_b, acc_t);
248
249 4166 svbool_t greater =
250 2083 svcmpgt_n_u32(pg, acc_u32, std::numeric_limits<SourceType>::max());
251 2083 acc_u32 =
252 2083 svdup_n_u32_m(acc_u32, greater, std::numeric_limits<SourceType>::max());
253
254 2083 svst1h_u32(pg, &dst[0], acc_u32);
255 2083 }
256
257 5800 void horizontal_scalar_path(const BufferType src[5],
258 DestinationType *dst) const KLEIDICV_STREAMING {
259 5800 SourceType acc; // Avoid cppcoreguidelines-init-variables. NOLINT
260
2/2
✓ Branch 0 taken 4260 times.
✓ Branch 1 taken 1540 times.
5800 if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) {
261 4260 dst[0] = std::numeric_limits<SourceType>::max();
262 4260 return;
263 }
264
265
4/4
✓ Branch 0 taken 6144 times.
✓ Branch 1 taken 1502 times.
✓ Branch 2 taken 38 times.
✓ Branch 3 taken 1502 times.
7684 for (size_t i = 1; i < 5; i++) {
266 6144 SourceType temp; // Avoid cppcoreguidelines-init-variables. NOLINT
267
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 6140 times.
6144 if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) {
268 4 dst[0] = std::numeric_limits<SourceType>::max();
269 4 return;
270 }
271
2/2
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 6106 times.
6140 if (__builtin_add_overflow(acc, temp, &acc)) {
272 34 dst[0] = std::numeric_limits<SourceType>::max();
273 34 return;
274 }
275 6144 }
276
277 1502 dst[0] = acc;
278 5800 }
279
280 private:
281 const SourceType *kernel_x_;
282
283 BufferVectorType &kernel_x_0_u32_;
284 BufferVectorType &kernel_x_1_u32_;
285 BufferVectorType &kernel_x_2_u32_;
286 BufferVectorType &kernel_x_3_u32_;
287 BufferVectorType &kernel_x_4_u32_;
288
289 SourceVectorType &kernel_y_0_u16_;
290 SourceVectorType &kernel_y_1_u16_;
291 SourceVectorType &kernel_y_2_u16_;
292 SourceVectorType &kernel_y_3_u16_;
293 SourceVectorType &kernel_y_4_u16_;
294 }; // end of class SeparableFilter2D<uint16_t, 5>
295
296 template <>
297 class SeparableFilter2D<int16_t, 5> {
298 public:
299 using SourceType = int16_t;
300 using SourceVectorType = typename VecTraits<SourceType>::VectorType;
301 using BufferType = int32_t;
302 using BufferVectorType = typename VecTraits<BufferType>::VectorType;
303 using BufferDoubleVectorType = typename VecTraits<BufferType>::Vector2Type;
304 using DestinationType = int16_t;
305
306 108 SeparableFilter2D(
307 const SourceType *kernel_x, BufferVectorType &kernel_x_0_s32,
308 BufferVectorType &kernel_x_1_s32, BufferVectorType &kernel_x_2_s32,
309 BufferVectorType &kernel_x_3_s32, BufferVectorType &kernel_x_4_s32,
310 SourceVectorType &kernel_y_0_s16, SourceVectorType &kernel_y_1_s16,
311 SourceVectorType &kernel_y_2_s16, SourceVectorType &kernel_y_3_s16,
312 SourceVectorType &kernel_y_4_s16)
313 108 : kernel_x_(kernel_x),
314 108 kernel_x_0_s32_(kernel_x_0_s32),
315 108 kernel_x_1_s32_(kernel_x_1_s32),
316 108 kernel_x_2_s32_(kernel_x_2_s32),
317 108 kernel_x_3_s32_(kernel_x_3_s32),
318 108 kernel_x_4_s32_(kernel_x_4_s32),
319
320 108 kernel_y_0_s16_(kernel_y_0_s16),
321 108 kernel_y_1_s16_(kernel_y_1_s16),
322 108 kernel_y_2_s16_(kernel_y_2_s16),
323 108 kernel_y_3_s16_(kernel_y_3_s16),
324 108 kernel_y_4_s16_(kernel_y_4_s16) {}
325
326 2060 void vertical_vector_path(svbool_t pg,
327 std::reference_wrapper<SourceVectorType> src[5],
328 BufferType *dst) const KLEIDICV_STREAMING {
329 // 0
330 2060 BufferVectorType acc_b = svmullb_s32(src[0], kernel_y_0_s16_);
331 2060 BufferVectorType acc_t = svmullt_s32(src[0], kernel_y_0_s16_);
332
333 // 1
334 2060 BufferVectorType vec_b = svmullb_s32(src[1], kernel_y_1_s16_);
335 2060 BufferVectorType vec_t = svmullt_s32(src[1], kernel_y_1_s16_);
336 2060 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
337 2060 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
338
339 // 2
340 2060 vec_b = svmullb_s32(src[2], kernel_y_2_s16_);
341 2060 vec_t = svmullt_s32(src[2], kernel_y_2_s16_);
342 2060 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
343 2060 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
344
345 // 3
346 2060 vec_b = svmullb_s32(src[3], kernel_y_3_s16_);
347 2060 vec_t = svmullt_s32(src[3], kernel_y_3_s16_);
348 2060 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
349 2060 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
350
351 // 4
352 2060 vec_b = svmullb_s32(src[4], kernel_y_4_s16_);
353 2060 vec_t = svmullt_s32(src[4], kernel_y_4_s16_);
354 2060 acc_b = svqadd_s32_x(pg, acc_b, vec_b);
355 2060 acc_t = svqadd_s32_x(pg, acc_t, vec_t);
356
357 2060 BufferDoubleVectorType interleaved = svcreate2_s32(acc_b, acc_t);
358 2060 svst2(pg, &dst[0], interleaved);
359 2060 }
360
361 2084 void horizontal_vector_path(svbool_t pg,
362 std::reference_wrapper<BufferVectorType> src[5],
363 DestinationType *dst) const KLEIDICV_STREAMING {
364 // 0
365 2084 svint64_t acc_b = svmullb_s64(src[0], kernel_x_0_s32_);
366 2084 svint64_t acc_t = svmullt_s64(src[0], kernel_x_0_s32_);
367
368 // 1
369 2084 acc_b = svmlalb_s64(acc_b, src[1], kernel_x_1_s32_);
370 2084 acc_t = svmlalt_s64(acc_t, src[1], kernel_x_1_s32_);
371
372 // 2
373 2084 acc_b = svmlalb_s64(acc_b, src[2], kernel_x_2_s32_);
374 2084 acc_t = svmlalt_s64(acc_t, src[2], kernel_x_2_s32_);
375
376 // 3
377 2084 acc_b = svmlalb_s64(acc_b, src[3], kernel_x_3_s32_);
378 2084 acc_t = svmlalt_s64(acc_t, src[3], kernel_x_3_s32_);
379
380 // 4
381 2084 acc_b = svmlalb_s64(acc_b, src[4], kernel_x_4_s32_);
382 2084 acc_t = svmlalt_s64(acc_t, src[4], kernel_x_4_s32_);
383
384 2084 svint32_t acc_s32_b = svqxtnb_s64(acc_b);
385 2084 svint32_t acc_s32 = svqxtnt_s64(acc_s32_b, acc_t);
386
387 4168 svbool_t less =
388 2084 svcmplt_n_s32(pg, acc_s32, std::numeric_limits<SourceType>::min());
389 2084 acc_s32 =
390 2084 svdup_n_s32_m(acc_s32, less, std::numeric_limits<SourceType>::min());
391
392 4168 svbool_t greater =
393 2084 svcmpgt_n_s32(pg, acc_s32, std::numeric_limits<SourceType>::max());
394 2084 acc_s32 =
395 2084 svdup_n_s32_m(acc_s32, greater, std::numeric_limits<SourceType>::max());
396
397 2084 svst1h_s32(pg, &dst[0], acc_s32);
398 2084 }
399
400 5760 void horizontal_scalar_path(const BufferType src[5],
401 DestinationType *dst) const KLEIDICV_STREAMING {
402 5760 int64_t acc = static_cast<int64_t>(src[0]) * kernel_x_[0];
403
2/2
✓ Branch 0 taken 23040 times.
✓ Branch 1 taken 5760 times.
28800 for (size_t i = 1; i < 5; i++) {
404 23040 acc += static_cast<int64_t>(src[i]) * kernel_x_[i];
405 23040 }
406
407
2/2
✓ Branch 0 taken 2198 times.
✓ Branch 1 taken 3562 times.
5760 if (acc < std::numeric_limits<DestinationType>::min()) {
408 2198 acc = std::numeric_limits<DestinationType>::min();
409
2/2
✓ Branch 0 taken 1588 times.
✓ Branch 1 taken 1974 times.
5760 } else if (acc > std::numeric_limits<DestinationType>::max()) {
410 1974 acc = std::numeric_limits<DestinationType>::max();
411 1974 }
412
413 5760 dst[0] = static_cast<DestinationType>(acc);
414 5760 }
415
416 private:
417 const SourceType *kernel_x_;
418
419 BufferVectorType &kernel_x_0_s32_;
420 BufferVectorType &kernel_x_1_s32_;
421 BufferVectorType &kernel_x_2_s32_;
422 BufferVectorType &kernel_x_3_s32_;
423 BufferVectorType &kernel_x_4_s32_;
424
425 SourceVectorType &kernel_y_0_s16_;
426 SourceVectorType &kernel_y_1_s16_;
427 SourceVectorType &kernel_y_2_s16_;
428 SourceVectorType &kernel_y_3_s16_;
429 SourceVectorType &kernel_y_4_s16_;
430 }; // end of class SeparableFilter2D<int16_t, 5>
431
432 template <typename T>
433 400 static kleidicv_error_t separable_filter_2d_checks(
434 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
435 size_t height, size_t channels, const T *kernel_x, const T *kernel_y,
436 SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING {
437
6/6
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 128 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 128 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 126 times.
400 CHECK_POINTERS(workspace, kernel_x, kernel_y);
438
439
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 126 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 126 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 126 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 124 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 124 times.
382 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
440
12/12
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 124 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 124 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 124 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 124 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 122 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 122 times.
376 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
441
18/18
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 122 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 120 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 120 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 122 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 120 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 120 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 120 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 118 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 118 times.
370 CHECK_IMAGE_SIZE(width, height);
442
443
6/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 118 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 118 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 116 times.
358 if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
444 6 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
445 }
446
447
6/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 116 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 116 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 114 times.
352 if (workspace->channels() < channels) {
448 6 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
449 }
450
451 346 const Rectangle &context_rect = workspace->image_size();
452
12/12
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 110 times.
✓ Branch 4 taken 112 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 110 times.
✓ Branch 8 taken 110 times.
✓ Branch 9 taken 4 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 108 times.
346 if (context_rect.width() < width || context_rect.height() < height) {
453 18 return KLEIDICV_ERROR_CONTEXT_MISMATCH;
454 }
455
456 328 return KLEIDICV_OK;
457 400 }
458
459 template <typename T>
460 400 kleidicv_error_t separable_filter_2d_stripe_sc(
461 const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width,
462 size_t height, size_t y_begin, size_t y_end, size_t channels,
463 const T *kernel_x, size_t /*kernel_width*/, const T *kernel_y,
464 size_t /*kernel_height*/, FixedBorderType fixed_border_type,
465 kleidicv_filter_context_t *context) KLEIDICV_STREAMING {
466 400 auto *workspace = reinterpret_cast<SeparableFilterWorkspace *>(context);
467 800 kleidicv_error_t checks_result = separable_filter_2d_checks(
468 400 src, src_stride, dst, dst_stride, width, height, channels, kernel_x,
469 400 kernel_y, workspace);
470
471
6/6
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 110 times.
✓ Branch 2 taken 24 times.
✓ Branch 3 taken 110 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 108 times.
400 if (checks_result != KLEIDICV_OK) {
472 72 return checks_result;
473 }
474
475 328 Rectangle rect{width, height};
476
477 using SeparableFilterClass = SeparableFilter2D<T, 5>;
478
479 using WiderT = typename double_element_width<T>::type;
480 using KernelXVectorTraits = VecTraits<WiderT>;
481 using KernelXVectorT = typename KernelXVectorTraits::VectorType;
482 using KernelYVectorTraits = VecTraits<T>;
483 using KernelYVectorT = typename KernelYVectorTraits::VectorType;
484
485 328 KernelXVectorT kernel_x_0 = KernelXVectorTraits::svdup(kernel_x[0]);
486 328 KernelXVectorT kernel_x_1 = KernelXVectorTraits::svdup(kernel_x[1]);
487 328 KernelXVectorT kernel_x_2 = KernelXVectorTraits::svdup(kernel_x[2]);
488 328 KernelXVectorT kernel_x_3 = KernelXVectorTraits::svdup(kernel_x[3]);
489 328 KernelXVectorT kernel_x_4 = KernelXVectorTraits::svdup(kernel_x[4]);
490
491 328 KernelYVectorT kernel_y_0 = KernelYVectorTraits::svdup(kernel_y[0]);
492 328 KernelYVectorT kernel_y_1 = KernelYVectorTraits::svdup(kernel_y[1]);
493 328 KernelYVectorT kernel_y_2 = KernelYVectorTraits::svdup(kernel_y[2]);
494 328 KernelYVectorT kernel_y_3 = KernelYVectorTraits::svdup(kernel_y[3]);
495 328 KernelYVectorT kernel_y_4 = KernelYVectorTraits::svdup(kernel_y[4]);
496
497 656 SeparableFilterClass filterClass{
498 328 kernel_x, kernel_x_0, kernel_x_1, kernel_x_2, kernel_x_3, kernel_x_4,
499 kernel_y_0, kernel_y_1, kernel_y_2, kernel_y_3, kernel_y_4};
500 328 SeparableFilter<SeparableFilterClass, 5> filter{filterClass};
501
502 328 Rows<const T> src_rows{src, src_stride, channels};
503 328 Rows<T> dst_rows{dst, dst_stride, channels};
504 656 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
505 328 fixed_border_type, filter);
506
507 328 return KLEIDICV_OK;
508 400 }
509
510 } // namespace KLEIDICV_TARGET_NAMESPACE
511
512 #endif // KLEIDICV_SEPARABLE_FILTER_2D_SC_H
513