KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/gaussian_blur_fixed_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 193 193 100.0%
Functions: 100 100 100.0%
Branches: 54 56 96.4%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_GAUSSIAN_BLUR_SC_H
6 #define KLEIDICV_GAUSSIAN_BLUR_SC_H
7
8 #include <array>
9 #include <cassert>
10
11 #include "kleidicv/filters/gaussian_blur.h"
12 #include "kleidicv/filters/separable_filter_15x15_sc.h"
13 #include "kleidicv/filters/separable_filter_21x21_sc.h"
14 #include "kleidicv/filters/separable_filter_3x3_sc.h"
15 #include "kleidicv/filters/separable_filter_5x5_sc.h"
16 #include "kleidicv/filters/separable_filter_7x7_sc.h"
17 #include "kleidicv/filters/sigma.h"
18 #include "kleidicv/workspace/separable.h"
19
20 #if KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2
21 #include <arm_sme.h>
22 #endif
23
24 namespace KLEIDICV_TARGET_NAMESPACE {
25
26 // Primary template for Gaussian Blur filters.
27 template <typename ScalarType, size_t KernelSize, bool IsBinomial>
28 class GaussianBlur;
29
30 // Template for 3x3 Gaussian Blur binomial filters.
31 //
32 // [ 1, 2, 1 ] [ 1 ]
33 // F = 1/16 * [ 2, 4, 2 ] = 1/16 * [ 2 ] * [ 1, 2, 1 ]
34 // [ 1, 2, 1 ] [ 1 ]
35 template <>
36 class GaussianBlur<uint8_t, 3, true> {
37 public:
38 using SourceType = uint8_t;
39 using BufferType = uint16_t;
40 using DestinationType = uint8_t;
41
42 // Applies vertical filtering vector using SIMD operations.
43 //
44 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
45 504 void vertical_vector_path(svbool_t pg,
46 std::reference_wrapper<svuint8_t> src[3],
47 BufferType *dst) const KLEIDICV_STREAMING {
48 504 svuint16_t acc_0_2_b = svaddlb_u16(src[0], src[2]);
49 504 svuint16_t acc_0_2_t = svaddlt_u16(src[0], src[2]);
50
51 504 svuint16_t acc_1_b = svshllb_n_u16(src[1], 1);
52 504 svuint16_t acc_1_t = svshllt_n_u16(src[1], 1);
53
54 504 svuint16_t acc_u16_b = svadd_u16_x(pg, acc_0_2_b, acc_1_b);
55 504 svuint16_t acc_u16_t = svadd_u16_x(pg, acc_0_2_t, acc_1_t);
56
57 504 svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t);
58 504 svst2(pg, &dst[0], interleaved);
59 504 }
60
61 // Applies horizontal filtering vector using SIMD operations.
62 //
63 // DST = 1/16 * [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
64 400 void horizontal_vector_path(svbool_t pg,
65 std::reference_wrapper<svuint16_t> src[3],
66 DestinationType *dst) const KLEIDICV_STREAMING {
67 400 svuint16_t acc_0_2 = svhadd_u16_x(pg, src[0], src[2]);
68
69 400 svuint16_t acc = svadd_u16_x(pg, acc_0_2, src[1]);
70 400 acc = svrshr_x(pg, acc, 3);
71
72 400 svst1b(pg, &dst[0], acc);
73 400 }
74
75 // Applies horizontal filtering vector using scalar operations.
76 //
77 // DST = 1/16 * [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
78 1008 void horizontal_scalar_path(const BufferType src[3],
79 DestinationType *dst) const KLEIDICV_STREAMING {
80 1008 auto acc = src[0] + 2 * src[1] + src[2];
81 1008 dst[0] = rounding_shift_right(acc, 4);
82 1008 }
83 }; // end of class GaussianBlur<uint8_t, 3, true>
84
85 // Template for 5x5 Gaussian Blur binomial filters.
86 //
87 // [ 1, 4, 6, 4, 1 ] [ 1 ]
88 // [ 4, 16, 24, 16, 4 ] [ 4 ]
89 // F = 1/256 * [ 6, 24, 36, 24, 6 ] = 1/256 * [ 6 ] * [ 1, 4, 6, 4, 1 ]
90 // [ 4, 16, 24, 16, 4 ] [ 4 ]
91 // [ 1, 4, 6, 4, 1 ] [ 1 ]
92 template <>
93 class GaussianBlur<uint8_t, 5, true> {
94 public:
95 using SourceType = uint8_t;
96 using BufferType = uint16_t;
97 using DestinationType = uint8_t;
98
99 // Applies vertical filtering vector using SIMD operations.
100 //
101 // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
102 1524 void vertical_vector_path(svbool_t pg,
103 std::reference_wrapper<svuint8_t> src[5],
104 BufferType *dst) const KLEIDICV_STREAMING {
105 1524 svuint16_t acc_0_4_b = svaddlb_u16(src[0], src[4]);
106 1524 svuint16_t acc_0_4_t = svaddlt_u16(src[0], src[4]);
107 1524 svuint16_t acc_1_3_b = svaddlb_u16(src[1], src[3]);
108 1524 svuint16_t acc_1_3_t = svaddlt_u16(src[1], src[3]);
109
110 1524 svuint16_t acc_u16_b = svmlalb_n_u16(acc_0_4_b, src[2], 6);
111 1524 svuint16_t acc_u16_t = svmlalt_n_u16(acc_0_4_t, src[2], 6);
112 1524 acc_u16_b = svmla_n_u16_x(pg, acc_u16_b, acc_1_3_b, 4);
113 1524 acc_u16_t = svmla_n_u16_x(pg, acc_u16_t, acc_1_3_t, 4);
114
115 1524 svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t);
116 1524 svst2(pg, &dst[0], interleaved);
117 1524 }
118
119 // Applies horizontal filtering vector using SIMD operations.
120 //
121 // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
122 1316 void horizontal_vector_path(svbool_t pg,
123 std::reference_wrapper<svuint16_t> src[5],
124 DestinationType *dst) const KLEIDICV_STREAMING {
125 1316 svuint16_t acc_0_4 = svadd_x(pg, src[0], src[4]);
126 1316 svuint16_t acc_1_3 = svadd_x(pg, src[1], src[3]);
127 1316 svuint16_t acc = svmla_n_u16_x(pg, acc_0_4, src[2], 6);
128 1316 acc = svmla_n_u16_x(pg, acc, acc_1_3, 4);
129 1316 acc = svrshr_x(pg, acc, 8);
130 1316 svst1b(pg, &dst[0], acc);
131 1316 }
132
133 // Applies horizontal filtering vector using scalar operations.
134 //
135 // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
136 5552 void horizontal_scalar_path(const BufferType src[5],
137 DestinationType *dst) const KLEIDICV_STREAMING {
138 5552 auto acc = src[0] + src[4] + 4 * (src[1] + src[3]) + 6 * src[2];
139 5552 dst[0] = rounding_shift_right(acc, 8);
140 5552 }
141 }; // end of class GaussianBlur<uint8_t, 5, true>
142
143 // Template for 7x7 Gaussian Blur binomial filters.
144 //
145 // [ 4, 14, 28, 36, 28, 14, 4 ]
146 // [ 14, 49, 98, 126, 98, 49, 14 ]
147 // [ 28, 98, 196, 252, 196, 98, 28 ]
148 // F = 1/4096 * [ 36, 126, 252, 324, 252, 126, 36 ] =
149 // [ 28, 98, 196, 252, 196, 98, 28 ]
150 // [ 14, 49, 98, 126, 98, 49, 14 ]
151 // [ 4, 14, 28, 36, 28, 14, 4 ]
152 //
153 // [ 2 ]
154 // [ 7 ]
155 // [ 14 ]
156 // = 1/4096 * [ 18 ] * [ 2, 7, 14, 18, 14, 7, 2 ]
157 // [ 14 ]
158 // [ 7 ]
159 // [ 2 ]
160 template <>
161 class GaussianBlur<uint8_t, 7, true> {
162 public:
163 using SourceType = uint8_t;
164 using BufferType = uint16_t;
165 using DestinationType = uint8_t;
166
167 // Applies vertical filtering vector using SIMD operations.
168 //
169 // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] *
170 // * [ 2, 7, 14, 18, 14, 7, 2 ]T
171 596 void vertical_vector_path(svbool_t pg,
172 std::reference_wrapper<svuint8_t> src[7],
173 BufferType *dst) const KLEIDICV_STREAMING {
174 596 svuint16_t acc_0_6_b = svaddlb_u16(src[0], src[6]);
175 596 svuint16_t acc_0_6_t = svaddlt_u16(src[0], src[6]);
176
177 596 svuint16_t acc_1_5_b = svaddlb_u16(src[1], src[5]);
178 596 svuint16_t acc_1_5_t = svaddlt_u16(src[1], src[5]);
179
180 596 svuint16_t acc_2_4_b = svaddlb_u16(src[2], src[4]);
181 596 svuint16_t acc_2_4_t = svaddlt_u16(src[2], src[4]);
182
183 596 svuint16_t acc_3_b = svmovlb_u16(src[3]);
184 596 svuint16_t acc_3_t = svmovlt_u16(src[3]);
185
186 596 svuint16_t acc_0_2_4_6_b = svmla_n_u16_x(pg, acc_0_6_b, acc_2_4_b, 7);
187 596 svuint16_t acc_0_2_4_6_t = svmla_n_u16_x(pg, acc_0_6_t, acc_2_4_t, 7);
188
189 596 svuint16_t acc_0_2_3_4_6_b = svmla_n_u16_x(pg, acc_0_2_4_6_b, acc_3_b, 9);
190 596 svuint16_t acc_0_2_3_4_6_t = svmla_n_u16_x(pg, acc_0_2_4_6_t, acc_3_t, 9);
191 596 acc_0_2_3_4_6_b = svlsl_n_u16_x(pg, acc_0_2_3_4_6_b, 1);
192 596 acc_0_2_3_4_6_t = svlsl_n_u16_x(pg, acc_0_2_3_4_6_t, 1);
193
194 1192 svuint16_t acc_0_1_2_3_4_5_6_b =
195 596 svmla_n_u16_x(pg, acc_0_2_3_4_6_b, acc_1_5_b, 7);
196 1192 svuint16_t acc_0_1_2_3_4_5_6_t =
197 596 svmla_n_u16_x(pg, acc_0_2_3_4_6_t, acc_1_5_t, 7);
198
199 1192 svuint16x2_t interleaved =
200 596 svcreate2(acc_0_1_2_3_4_5_6_b, acc_0_1_2_3_4_5_6_t);
201 596 svst2(pg, &dst[0], interleaved);
202 596 }
203
204 // Applies horizontal filtering vector using SIMD operations.
205 //
206 // DST = 1/4096 * [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] *
207 // * [ 2, 7, 14, 18, 14, 7, 2 ]T
208 464 void horizontal_vector_path(svbool_t pg,
209 std::reference_wrapper<svuint16_t> src[7],
210 DestinationType *dst) const KLEIDICV_STREAMING {
211 464 svuint32_t acc_0_6_b = svaddlb_u32(src[0], src[6]);
212 464 svuint32_t acc_0_6_t = svaddlt_u32(src[0], src[6]);
213
214 464 svuint32_t acc_1_5_b = svaddlb_u32(src[1], src[5]);
215 464 svuint32_t acc_1_5_t = svaddlt_u32(src[1], src[5]);
216
217 464 svuint16_t acc_2_4 = svadd_u16_x(pg, src[2], src[4]);
218
219 464 svuint32_t acc_0_2_4_6_b = svmlalb_n_u32(acc_0_6_b, acc_2_4, 7);
220 464 svuint32_t acc_0_2_4_6_t = svmlalt_n_u32(acc_0_6_t, acc_2_4, 7);
221
222 464 svuint32_t acc_0_2_3_4_6_b = svmlalb_n_u32(acc_0_2_4_6_b, src[3], 9);
223 464 svuint32_t acc_0_2_3_4_6_t = svmlalt_n_u32(acc_0_2_4_6_t, src[3], 9);
224
225 464 acc_0_2_3_4_6_b = svlsl_n_u32_x(pg, acc_0_2_3_4_6_b, 1);
226 464 acc_0_2_3_4_6_t = svlsl_n_u32_x(pg, acc_0_2_3_4_6_t, 1);
227
228 928 svuint32_t acc_0_1_2_3_4_5_6_b =
229 464 svmla_n_u32_x(pg, acc_0_2_3_4_6_b, acc_1_5_b, 7);
230 928 svuint32_t acc_0_1_2_3_4_5_6_t =
231 464 svmla_n_u32_x(pg, acc_0_2_3_4_6_t, acc_1_5_t, 7);
232
233 928 svuint16_t acc_0_1_2_3_4_5_6_u16_b =
234 464 svrshrnb_n_u32(acc_0_1_2_3_4_5_6_b, 12);
235 928 svuint16_t acc_0_1_2_3_4_5_6_u16 =
236 464 svrshrnt_n_u32(acc_0_1_2_3_4_5_6_u16_b, acc_0_1_2_3_4_5_6_t, 12);
237
238 464 svst1b(pg, &dst[0], acc_0_1_2_3_4_5_6_u16);
239 464 }
240
241 // Applies horizontal filtering vector using scalar operations.
242 //
243 // DST = 1/4096 * [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] *
244 // * [ 2, 7, 14, 18, 14, 7, 2 ]T
245 3288 void horizontal_scalar_path(const BufferType src[7],
246 DestinationType *dst) const KLEIDICV_STREAMING {
247 9864 uint32_t acc = src[0] * 2 + src[1] * 7 + src[2] * 14 + src[3] * 18 +
248 6576 src[4] * 14 + src[5] * 7 + src[6] * 2;
249 3288 dst[0] = rounding_shift_right(acc, 12);
250 3288 }
251 }; // end of class GaussianBlur<uint8_t, 7, true>
252
253 // CustomSigma variant
254 template <size_t KernelSize>
255 class GaussianBlur<uint8_t, KernelSize, false> {
256 public:
257 using SourceType = uint8_t;
258 using BufferType = uint8_t;
259 using DestinationType = uint8_t;
260 using SourceVecTraits =
261 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
262 using SourceVectorType = typename SourceVecTraits::VectorType;
263
264 static constexpr size_t kHalfKernelSize = get_half_kernel_size(KernelSize);
265
266 230 explicit GaussianBlur(const uint16_t *half_kernel)
267 230 : half_kernel_(half_kernel) {}
268
269 6048 void vertical_vector_path(
270 svbool_t pg, std::reference_wrapper<SourceVectorType> src[KernelSize],
271 BufferType *dst) const KLEIDICV_STREAMING {
272 6048 common_vector_path(pg, src, dst);
273 6048 }
274
275 70640 void vertical_scalar_path(const SourceType src[KernelSize],
276 BufferType *dst) const KLEIDICV_STREAMING {
277 141280 uint32_t acc = static_cast<uint32_t>(src[kHalfKernelSize - 1]) *
278 70640 half_kernel_[kHalfKernelSize - 1];
279
280 // Optimization to avoid unnecessary branching in vector code.
281 KLEIDICV_FORCE_LOOP_UNROLL
282
10/10
✓ Branch 0 taken 22176 times.
✓ Branch 1 taken 155232 times.
✓ Branch 2 taken 45120 times.
✓ Branch 3 taken 451200 times.
✓ Branch 4 taken 248 times.
✓ Branch 5 taken 248 times.
✓ Branch 6 taken 960 times.
✓ Branch 7 taken 1920 times.
✓ Branch 8 taken 2136 times.
✓ Branch 9 taken 6408 times.
685648 for (size_t i = 0; i < kHalfKernelSize - 1; i++) {
283 1845024 acc += (static_cast<uint32_t>(src[i]) +
284 1230016 static_cast<uint32_t>(src[KernelSize - i - 1])) *
285 615008 half_kernel_[i];
286 615008 }
287
288 70640 dst[0] = static_cast<BufferType>(rounding_shift_right(acc, 8));
289 70640 }
290
291 3896 void horizontal_vector_path(
292 svbool_t pg, std::reference_wrapper<SourceVectorType> src[KernelSize],
293 BufferType *dst) const KLEIDICV_STREAMING {
294 3896 common_vector_path(pg, src, dst);
295 3896 }
296
297 70640 void horizontal_scalar_path(const BufferType src[KernelSize],
298 DestinationType *dst) const KLEIDICV_STREAMING {
299 70640 vertical_scalar_path(src, dst);
300 70640 }
301
302 private:
303 9944 void common_vector_path(
304 svbool_t pg, std::reference_wrapper<SourceVectorType> src[KernelSize],
305 BufferType *dst) const KLEIDICV_STREAMING {
306 9944 svbool_t pg16_all = svptrue_b16();
307 19888 svuint16_t acc_b = svmullb_n_u16(src[kHalfKernelSize - 1],
308 9944 half_kernel_[kHalfKernelSize - 1]);
309 19888 svuint16_t acc_t = svmullt_n_u16(src[kHalfKernelSize - 1],
310 9944 half_kernel_[kHalfKernelSize - 1]);
311
312 // Optimization to avoid unnecessary branching in vector code.
313 KLEIDICV_FORCE_LOOP_UNROLL
314
10/10
✓ Branch 0 taken 3280 times.
✓ Branch 1 taken 22960 times.
✓ Branch 2 taken 5320 times.
✓ Branch 3 taken 53200 times.
✓ Branch 4 taken 228 times.
✓ Branch 5 taken 228 times.
✓ Branch 6 taken 440 times.
✓ Branch 7 taken 880 times.
✓ Branch 8 taken 676 times.
✓ Branch 9 taken 2028 times.
89240 for (size_t i = 0; i < kHalfKernelSize - 1; i++) {
315 79296 const size_t j = KernelSize - i - 1;
316 79296 svuint16_t vec_b = svaddlb_u16(src[i], src[j]);
317 79296 svuint16_t vec_t = svaddlt_u16(src[i], src[j]);
318
319 79296 acc_b = svmla_n_u16_x(pg16_all, acc_b, vec_b, half_kernel_[i]);
320 79296 acc_t = svmla_n_u16_x(pg16_all, acc_t, vec_t, half_kernel_[i]);
321 79296 }
322
323 // Rounding before narrowing
324 9944 acc_b = svqadd_n_u16(acc_b, 128);
325 9944 acc_t = svqadd_n_u16(acc_t, 128);
326 // Keep only the highest 8 bits
327 19888 svuint8_t result =
328 9944 svtrn2_u8(svreinterpret_u8_u16(acc_b), svreinterpret_u8_u16(acc_t));
329 9944 svst1(pg, &dst[0], result);
330 9944 }
331
332 const uint16_t *half_kernel_;
333 }; // end of class GaussianBlur<uint8_t, KernelSize, false>
334
335 template <size_t KernelSize, bool IsBinomial, typename ScalarType>
336 652 static kleidicv_error_t gaussian_blur_fixed_kernel_size(
337 const ScalarType *src, size_t src_stride, ScalarType *dst,
338 size_t dst_stride, Rectangle &rect, size_t y_begin, size_t y_end,
339 size_t channels, float sigma, FixedBorderType border_type,
340 SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING {
341 using GaussianBlurFilter = GaussianBlur<ScalarType, KernelSize, IsBinomial>;
342
343 652 Rows<const ScalarType> src_rows{src, src_stride, channels};
344 652 Rows<ScalarType> dst_rows{dst, dst_stride, channels};
345
346 if constexpr (IsBinomial) {
347 262 GaussianBlurFilter blur;
348 262 SeparableFilter<GaussianBlurFilter, KernelSize> filter{blur};
349 524 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
350 262 border_type, filter);
351
352 262 return KLEIDICV_OK;
353 262 } else {
354 390 constexpr size_t kHalfKernelSize = get_half_kernel_size(KernelSize);
355 390 uint16_t half_kernel[128];
356 390 generate_gaussian_half_kernel(half_kernel, kHalfKernelSize, sigma);
357 // If sigma is so small that the middle point gets all the weights, it's
358 // just a copy
359
10/10
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 64 times.
✓ Branch 3 taken 32 times.
✓ Branch 4 taken 34 times.
✓ Branch 5 taken 32 times.
✓ Branch 6 taken 34 times.
✓ Branch 7 taken 32 times.
✓ Branch 8 taken 34 times.
✓ Branch 9 taken 32 times.
390 if (half_kernel[kHalfKernelSize - 1] < 256) {
360 230 GaussianBlurFilter blur(half_kernel);
361 230 SeparableFilter<GaussianBlurFilter, KernelSize> filter{blur};
362 460 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
363 230 border_type, filter);
364 230 } else {
365
10/10
✓ Branch 0 taken 456 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 648 times.
✓ Branch 3 taken 32 times.
✓ Branch 4 taken 72 times.
✓ Branch 5 taken 32 times.
✓ Branch 6 taken 136 times.
✓ Branch 7 taken 32 times.
✓ Branch 8 taken 200 times.
✓ Branch 9 taken 32 times.
1672 for (size_t row = y_begin; row < y_end; ++row) {
366 #if KLEIDICV_TARGET_SME && defined(__ANDROID__)
367 __arm_sc_memcpy(
368 static_cast<void *>(&dst_rows.at(row)[0]),
369 static_cast<const void *>(&src_rows.at(row)[0]),
370 rect.width() * sizeof(ScalarType) * dst_rows.channels());
371 #else
372 3024 std::memcpy(static_cast<void *>(&dst_rows.at(row)[0]),
373 1512 static_cast<const void *>(&src_rows.at(row)[0]),
374 1512 rect.width() * sizeof(ScalarType) * dst_rows.channels());
375 #endif
376 1512 }
377 }
378 390 return KLEIDICV_OK;
379 390 }
380 652 }
381
382 template <bool IsBinomial, typename ScalarType>
383 652 static kleidicv_error_t gaussian_blur(
384 size_t kernel_size, const ScalarType *src, size_t src_stride,
385 ScalarType *dst, size_t dst_stride, Rectangle &rect, size_t y_begin,
386 size_t y_end, size_t channels, float sigma, FixedBorderType border_type,
387 SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING {
388
10/12
✓ Branch 0 taken 94 times.
✓ Branch 1 taken 102 times.
✓ Branch 2 taken 66 times.
✓ Branch 3 taken 32 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 32 times.
✓ Branch 6 taken 66 times.
✓ Branch 7 taken 66 times.
✓ Branch 8 taken 66 times.
✓ Branch 9 taken 64 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 64 times.
652 switch (kernel_size) {
389 case 3:
390 160 return gaussian_blur_fixed_kernel_size<3, IsBinomial>(
391 160 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
392 160 sigma, border_type, workspace);
393 case 5:
394 168 return gaussian_blur_fixed_kernel_size<5, IsBinomial>(
395 168 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
396 168 sigma, border_type, workspace);
397 case 7:
398 132 return gaussian_blur_fixed_kernel_size<7, IsBinomial>(
399 132 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
400 132 sigma, border_type, workspace);
401 case 15:
402 // 15x15 does not have a binomial variant
403 96 return gaussian_blur_fixed_kernel_size<15, false>(
404 96 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
405 96 sigma, border_type, workspace);
406 case 21:
407 // 21x21 does not have a binomial variant
408 96 return gaussian_blur_fixed_kernel_size<21, false>(
409 96 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
410 96 sigma, border_type, workspace);
411 // gaussian_blur_is_implemented checked the kernel size already.
412 // GCOVR_EXCL_START
413 default:
414 assert(!"kernel size not implemented");
415 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
416 // GCOVR_EXCL_STOP
417 }
418 652 }
419
420 690 static kleidicv_error_t gaussian_blur_fixed_stripe_u8_sc(
421 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
422 size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels,
423 size_t kernel_width, size_t /*kernel_height*/, float sigma_x,
424 float /*sigma_y*/, FixedBorderType fixed_border_type,
425 kleidicv_filter_context_t *context) KLEIDICV_STREAMING {
426 690 auto *workspace = reinterpret_cast<SeparableFilterWorkspace *>(context);
427 1380 kleidicv_error_t checks_result = gaussian_blur_checks(
428 690 src, src_stride, dst, dst_stride, width, height, channels, workspace);
429
430
2/2
✓ Branch 0 taken 38 times.
✓ Branch 1 taken 652 times.
690 if (checks_result != KLEIDICV_OK) {
431 38 return checks_result;
432 }
433
434 652 Rectangle rect{width, height};
435
436
2/2
✓ Branch 0 taken 326 times.
✓ Branch 1 taken 326 times.
652 if (sigma_x == 0.0) {
437 652 return gaussian_blur<true>(kernel_width, src, src_stride, dst, dst_stride,
438 326 rect, y_begin, y_end, channels, sigma_x,
439 326 fixed_border_type, workspace);
440 }
441
442 652 return gaussian_blur<false>(kernel_width, src, src_stride, dst, dst_stride,
443 326 rect, y_begin, y_end, channels, sigma_x,
444 326 fixed_border_type, workspace);
445 690 }
446
447 } // namespace KLEIDICV_TARGET_NAMESPACE
448
449 #endif // KLEIDICV_GAUSSIAN_BLUR_SC_H
450