KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/gaussian_blur_fixed_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 193 193 100.0%
Functions: 100 100 100.0%
Branches: 54 56 96.4%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_GAUSSIAN_BLUR_SC_H
6 #define KLEIDICV_GAUSSIAN_BLUR_SC_H
7
8 #include <array>
9 #include <cassert>
10
11 #include "kleidicv/filters/gaussian_blur.h"
12 #include "kleidicv/filters/separable_filter_15x15_sc.h"
13 #include "kleidicv/filters/separable_filter_21x21_sc.h"
14 #include "kleidicv/filters/separable_filter_3x3_sc.h"
15 #include "kleidicv/filters/separable_filter_5x5_sc.h"
16 #include "kleidicv/filters/separable_filter_7x7_sc.h"
17 #include "kleidicv/filters/sigma.h"
18 #include "kleidicv/workspace/separable.h"
19
20 #if KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2
21 #include <arm_sme.h>
22 #endif
23
24 namespace KLEIDICV_TARGET_NAMESPACE {
25
26 // Primary template for Gaussian Blur filters.
27 template <typename ScalarType, size_t KernelSize, bool IsBinomial>
28 class GaussianBlur;
29
30 // Template for 3x3 Gaussian Blur binomial filters.
31 //
32 // [ 1, 2, 1 ] [ 1 ]
33 // F = 1/16 * [ 2, 4, 2 ] = 1/16 * [ 2 ] * [ 1, 2, 1 ]
34 // [ 1, 2, 1 ] [ 1 ]
35 template <>
36 class GaussianBlur<uint8_t, 3, true> {
37 public:
38 using SourceType = uint8_t;
39 using BufferType = uint16_t;
40 using DestinationType = uint8_t;
41
42 // Applies vertical filtering vector using SIMD operations.
43 //
44 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
45 804 void vertical_vector_path(svbool_t pg,
46 std::reference_wrapper<svuint8_t> src[3],
47 BufferType *dst) const KLEIDICV_STREAMING {
48 804 svuint16_t acc_0_2_b = svaddlb_u16(src[0], src[2]);
49 804 svuint16_t acc_0_2_t = svaddlt_u16(src[0], src[2]);
50
51 804 svuint16_t acc_1_b = svshllb_n_u16(src[1], 1);
52 804 svuint16_t acc_1_t = svshllt_n_u16(src[1], 1);
53
54 804 svuint16_t acc_u16_b = svadd_u16_x(pg, acc_0_2_b, acc_1_b);
55 804 svuint16_t acc_u16_t = svadd_u16_x(pg, acc_0_2_t, acc_1_t);
56
57 804 svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t);
58 804 svst2(pg, &dst[0], interleaved);
59 804 }
60
61 // Applies horizontal filtering vector using SIMD operations.
62 //
63 // DST = 1/16 * [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
64 604 void horizontal_vector_path(svbool_t pg,
65 std::reference_wrapper<svuint16_t> src[3],
66 DestinationType *dst) const KLEIDICV_STREAMING {
67 604 svuint16_t acc_0_2 = svhadd_u16_x(pg, src[0], src[2]);
68
69 604 svuint16_t acc = svadd_u16_x(pg, acc_0_2, src[1]);
70 604 acc = svrshr_x(pg, acc, 3);
71
72 604 svst1b(pg, &dst[0], acc);
73 604 }
74
75 // Applies horizontal filtering vector using scalar operations.
76 //
77 // DST = 1/16 * [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
78 1608 void horizontal_scalar_path(const BufferType src[3],
79 DestinationType *dst) const KLEIDICV_STREAMING {
80 1608 auto acc = src[0] + 2 * src[1] + src[2];
81 1608 dst[0] = rounding_shift_right(acc, 4);
82 1608 }
83 }; // end of class GaussianBlur<uint8_t, 3, true>
84
85 // Template for 5x5 Gaussian Blur binomial filters.
86 //
87 // [ 1, 4, 6, 4, 1 ] [ 1 ]
88 // [ 4, 16, 24, 16, 4 ] [ 4 ]
89 // F = 1/256 * [ 6, 24, 36, 24, 6 ] = 1/256 * [ 6 ] * [ 1, 4, 6, 4, 1 ]
90 // [ 4, 16, 24, 16, 4 ] [ 4 ]
91 // [ 1, 4, 6, 4, 1 ] [ 1 ]
92 template <>
93 class GaussianBlur<uint8_t, 5, true> {
94 public:
95 using SourceType = uint8_t;
96 using BufferType = uint16_t;
97 using DestinationType = uint8_t;
98
99 // Applies vertical filtering vector using SIMD operations.
100 //
101 // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
102 2218 void vertical_vector_path(svbool_t pg,
103 std::reference_wrapper<svuint8_t> src[5],
104 BufferType *dst) const KLEIDICV_STREAMING {
105 2218 svuint16_t acc_0_4_b = svaddlb_u16(src[0], src[4]);
106 2218 svuint16_t acc_0_4_t = svaddlt_u16(src[0], src[4]);
107 2218 svuint16_t acc_1_3_b = svaddlb_u16(src[1], src[3]);
108 2218 svuint16_t acc_1_3_t = svaddlt_u16(src[1], src[3]);
109
110 2218 svuint16_t acc_u16_b = svmlalb_n_u16(acc_0_4_b, src[2], 6);
111 2218 svuint16_t acc_u16_t = svmlalt_n_u16(acc_0_4_t, src[2], 6);
112 2218 acc_u16_b = svmla_n_u16_x(pg, acc_u16_b, acc_1_3_b, 4);
113 2218 acc_u16_t = svmla_n_u16_x(pg, acc_u16_t, acc_1_3_t, 4);
114
115 2218 svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t);
116 2218 svst2(pg, &dst[0], interleaved);
117 2218 }
118
119 // Applies horizontal filtering vector using SIMD operations.
120 //
121 // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
122 1872 void horizontal_vector_path(svbool_t pg,
123 std::reference_wrapper<svuint16_t> src[5],
124 DestinationType *dst) const KLEIDICV_STREAMING {
125 1872 svuint16_t acc_0_4 = svadd_x(pg, src[0], src[4]);
126 1872 svuint16_t acc_1_3 = svadd_x(pg, src[1], src[3]);
127 1872 svuint16_t acc = svmla_n_u16_x(pg, acc_0_4, src[2], 6);
128 1872 acc = svmla_n_u16_x(pg, acc, acc_1_3, 4);
129 1872 acc = svrshr_x(pg, acc, 8);
130 1872 svst1b(pg, &dst[0], acc);
131 1872 }
132
133 // Applies horizontal filtering vector using scalar operations.
134 //
135 // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T
136 8328 void horizontal_scalar_path(const BufferType src[5],
137 DestinationType *dst) const KLEIDICV_STREAMING {
138 8328 auto acc = src[0] + src[4] + 4 * (src[1] + src[3]) + 6 * src[2];
139 8328 dst[0] = rounding_shift_right(acc, 8);
140 8328 }
141 }; // end of class GaussianBlur<uint8_t, 5, true>
142
143 // Template for 7x7 Gaussian Blur binomial filters.
144 //
145 // [ 4, 14, 28, 36, 28, 14, 4 ]
146 // [ 14, 49, 98, 126, 98, 49, 14 ]
147 // [ 28, 98, 196, 252, 196, 98, 28 ]
148 // F = 1/4096 * [ 36, 126, 252, 324, 252, 126, 36 ] =
149 // [ 28, 98, 196, 252, 196, 98, 28 ]
150 // [ 14, 49, 98, 126, 98, 49, 14 ]
151 // [ 4, 14, 28, 36, 28, 14, 4 ]
152 //
153 // [ 2 ]
154 // [ 7 ]
155 // [ 14 ]
156 // = 1/4096 * [ 18 ] * [ 2, 7, 14, 18, 14, 7, 2 ]
157 // [ 14 ]
158 // [ 7 ]
159 // [ 2 ]
160 template <>
161 class GaussianBlur<uint8_t, 7, true> {
162 public:
163 using SourceType = uint8_t;
164 using BufferType = uint16_t;
165 using DestinationType = uint8_t;
166
167 // Applies vertical filtering vector using SIMD operations.
168 //
169 // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] *
170 // * [ 2, 7, 14, 18, 14, 7, 2 ]T
171 870 void vertical_vector_path(svbool_t pg,
172 std::reference_wrapper<svuint8_t> src[7],
173 BufferType *dst) const KLEIDICV_STREAMING {
174 870 svuint16_t acc_0_6_b = svaddlb_u16(src[0], src[6]);
175 870 svuint16_t acc_0_6_t = svaddlt_u16(src[0], src[6]);
176
177 870 svuint16_t acc_1_5_b = svaddlb_u16(src[1], src[5]);
178 870 svuint16_t acc_1_5_t = svaddlt_u16(src[1], src[5]);
179
180 870 svuint16_t acc_2_4_b = svaddlb_u16(src[2], src[4]);
181 870 svuint16_t acc_2_4_t = svaddlt_u16(src[2], src[4]);
182
183 870 svuint16_t acc_3_b = svmovlb_u16(src[3]);
184 870 svuint16_t acc_3_t = svmovlt_u16(src[3]);
185
186 870 svuint16_t acc_0_2_4_6_b = svmla_n_u16_x(pg, acc_0_6_b, acc_2_4_b, 7);
187 870 svuint16_t acc_0_2_4_6_t = svmla_n_u16_x(pg, acc_0_6_t, acc_2_4_t, 7);
188
189 870 svuint16_t acc_0_2_3_4_6_b = svmla_n_u16_x(pg, acc_0_2_4_6_b, acc_3_b, 9);
190 870 svuint16_t acc_0_2_3_4_6_t = svmla_n_u16_x(pg, acc_0_2_4_6_t, acc_3_t, 9);
191 870 acc_0_2_3_4_6_b = svlsl_n_u16_x(pg, acc_0_2_3_4_6_b, 1);
192 870 acc_0_2_3_4_6_t = svlsl_n_u16_x(pg, acc_0_2_3_4_6_t, 1);
193
194 1740 svuint16_t acc_0_1_2_3_4_5_6_b =
195 870 svmla_n_u16_x(pg, acc_0_2_3_4_6_b, acc_1_5_b, 7);
196 1740 svuint16_t acc_0_1_2_3_4_5_6_t =
197 870 svmla_n_u16_x(pg, acc_0_2_3_4_6_t, acc_1_5_t, 7);
198
199 1740 svuint16x2_t interleaved =
200 870 svcreate2(acc_0_1_2_3_4_5_6_b, acc_0_1_2_3_4_5_6_t);
201 870 svst2(pg, &dst[0], interleaved);
202 870 }
203
204 // Applies horizontal filtering vector using SIMD operations.
205 //
206 // DST = 1/4096 * [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] *
207 // * [ 2, 7, 14, 18, 14, 7, 2 ]T
208 684 void horizontal_vector_path(svbool_t pg,
209 std::reference_wrapper<svuint16_t> src[7],
210 DestinationType *dst) const KLEIDICV_STREAMING {
211 684 svuint32_t acc_0_6_b = svaddlb_u32(src[0], src[6]);
212 684 svuint32_t acc_0_6_t = svaddlt_u32(src[0], src[6]);
213
214 684 svuint32_t acc_1_5_b = svaddlb_u32(src[1], src[5]);
215 684 svuint32_t acc_1_5_t = svaddlt_u32(src[1], src[5]);
216
217 684 svuint16_t acc_2_4 = svadd_u16_x(pg, src[2], src[4]);
218
219 684 svuint32_t acc_0_2_4_6_b = svmlalb_n_u32(acc_0_6_b, acc_2_4, 7);
220 684 svuint32_t acc_0_2_4_6_t = svmlalt_n_u32(acc_0_6_t, acc_2_4, 7);
221
222 684 svuint32_t acc_0_2_3_4_6_b = svmlalb_n_u32(acc_0_2_4_6_b, src[3], 9);
223 684 svuint32_t acc_0_2_3_4_6_t = svmlalt_n_u32(acc_0_2_4_6_t, src[3], 9);
224
225 684 acc_0_2_3_4_6_b = svlsl_n_u32_x(pg, acc_0_2_3_4_6_b, 1);
226 684 acc_0_2_3_4_6_t = svlsl_n_u32_x(pg, acc_0_2_3_4_6_t, 1);
227
228 1368 svuint32_t acc_0_1_2_3_4_5_6_b =
229 684 svmla_n_u32_x(pg, acc_0_2_3_4_6_b, acc_1_5_b, 7);
230 1368 svuint32_t acc_0_1_2_3_4_5_6_t =
231 684 svmla_n_u32_x(pg, acc_0_2_3_4_6_t, acc_1_5_t, 7);
232
233 1368 svuint16_t acc_0_1_2_3_4_5_6_u16_b =
234 684 svrshrnb_n_u32(acc_0_1_2_3_4_5_6_b, 12);
235 1368 svuint16_t acc_0_1_2_3_4_5_6_u16 =
236 684 svrshrnt_n_u32(acc_0_1_2_3_4_5_6_u16_b, acc_0_1_2_3_4_5_6_t, 12);
237
238 684 svst1b(pg, &dst[0], acc_0_1_2_3_4_5_6_u16);
239 684 }
240
241 // Applies horizontal filtering vector using scalar operations.
242 //
243 // DST = 1/4096 * [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] *
244 // * [ 2, 7, 14, 18, 14, 7, 2 ]T
245 4932 void horizontal_scalar_path(const BufferType src[7],
246 DestinationType *dst) const KLEIDICV_STREAMING {
247 14796 uint32_t acc = src[0] * 2 + src[1] * 7 + src[2] * 14 + src[3] * 18 +
248 9864 src[4] * 14 + src[5] * 7 + src[6] * 2;
249 4932 dst[0] = rounding_shift_right(acc, 12);
250 4932 }
251 }; // end of class GaussianBlur<uint8_t, 7, true>
252
253 // CustomSigma variant
254 template <size_t KernelSize>
255 class GaussianBlur<uint8_t, KernelSize, false> {
256 public:
257 using SourceType = uint8_t;
258 using BufferType = uint8_t;
259 using DestinationType = uint8_t;
260 using SourceVecTraits =
261 typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits<SourceType>;
262 using SourceVectorType = typename SourceVecTraits::VectorType;
263
264 static constexpr size_t kHalfKernelSize = get_half_kernel_size(KernelSize);
265
266 345 explicit GaussianBlur(const uint16_t *half_kernel)
267 345 : half_kernel_(half_kernel) {}
268
269 8328 void vertical_vector_path(
270 svbool_t pg, std::reference_wrapper<SourceVectorType> src[KernelSize],
271 BufferType *dst) const KLEIDICV_STREAMING {
272 8328 common_vector_path(pg, src, dst);
273 8328 }
274
275 105960 void vertical_scalar_path(const SourceType src[KernelSize],
276 BufferType *dst) const KLEIDICV_STREAMING {
277 211920 uint32_t acc = static_cast<uint32_t>(src[kHalfKernelSize - 1]) *
278 105960 half_kernel_[kHalfKernelSize - 1];
279
280 // Optimization to avoid unnecessary branching in vector code.
281 KLEIDICV_FORCE_LOOP_UNROLL
282
10/10
✓ Branch 0 taken 33264 times.
✓ Branch 1 taken 232848 times.
✓ Branch 2 taken 67680 times.
✓ Branch 3 taken 676800 times.
✓ Branch 4 taken 372 times.
✓ Branch 5 taken 372 times.
✓ Branch 6 taken 1440 times.
✓ Branch 7 taken 2880 times.
✓ Branch 8 taken 3204 times.
✓ Branch 9 taken 9612 times.
1028472 for (size_t i = 0; i < kHalfKernelSize - 1; i++) {
283 2767536 acc += (static_cast<uint32_t>(src[i]) +
284 1845024 static_cast<uint32_t>(src[KernelSize - i - 1])) *
285 922512 half_kernel_[i];
286 922512 }
287
288 105960 dst[0] = static_cast<BufferType>(rounding_shift_right(acc, 8));
289 105960 }
290
291 5844 void horizontal_vector_path(
292 svbool_t pg, std::reference_wrapper<SourceVectorType> src[KernelSize],
293 BufferType *dst) const KLEIDICV_STREAMING {
294 5844 common_vector_path(pg, src, dst);
295 5844 }
296
297 105960 void horizontal_scalar_path(const BufferType src[KernelSize],
298 DestinationType *dst) const KLEIDICV_STREAMING {
299 105960 vertical_scalar_path(src, dst);
300 105960 }
301
302 private:
303 14172 void common_vector_path(
304 svbool_t pg, std::reference_wrapper<SourceVectorType> src[KernelSize],
305 BufferType *dst) const KLEIDICV_STREAMING {
306 14172 svbool_t pg16_all = svptrue_b16();
307 28344 svuint16_t acc_b = svmullb_n_u16(src[kHalfKernelSize - 1],
308 14172 half_kernel_[kHalfKernelSize - 1]);
309 28344 svuint16_t acc_t = svmullt_n_u16(src[kHalfKernelSize - 1],
310 14172 half_kernel_[kHalfKernelSize - 1]);
311
312 // Optimization to avoid unnecessary branching in vector code.
313 KLEIDICV_FORCE_LOOP_UNROLL
314
10/10
✓ Branch 0 taken 4752 times.
✓ Branch 1 taken 33264 times.
✓ Branch 2 taken 7416 times.
✓ Branch 3 taken 74160 times.
✓ Branch 4 taken 342 times.
✓ Branch 5 taken 342 times.
✓ Branch 6 taken 660 times.
✓ Branch 7 taken 1320 times.
✓ Branch 8 taken 1002 times.
✓ Branch 9 taken 3006 times.
126264 for (size_t i = 0; i < kHalfKernelSize - 1; i++) {
315 112092 const size_t j = KernelSize - i - 1;
316 112092 svuint16_t vec_b = svaddlb_u16(src[i], src[j]);
317 112092 svuint16_t vec_t = svaddlt_u16(src[i], src[j]);
318
319 112092 acc_b = svmla_n_u16_x(pg16_all, acc_b, vec_b, half_kernel_[i]);
320 112092 acc_t = svmla_n_u16_x(pg16_all, acc_t, vec_t, half_kernel_[i]);
321 112092 }
322
323 // Rounding before narrowing
324 14172 acc_b = svqadd_n_u16(acc_b, 128);
325 14172 acc_t = svqadd_n_u16(acc_t, 128);
326 // Keep only the highest 8 bits
327 28344 svuint8_t result =
328 14172 svtrn2_u8(svreinterpret_u8_u16(acc_b), svreinterpret_u8_u16(acc_t));
329 14172 svst1(pg, &dst[0], result);
330 14172 }
331
332 const uint16_t *half_kernel_;
333 }; // end of class GaussianBlur<uint8_t, KernelSize, false>
334
335 template <size_t KernelSize, bool IsBinomial, typename ScalarType>
336 978 static kleidicv_error_t gaussian_blur_fixed_kernel_size(
337 const ScalarType *src, size_t src_stride, ScalarType *dst,
338 size_t dst_stride, Rectangle &rect, size_t y_begin, size_t y_end,
339 size_t channels, float sigma, FixedBorderType border_type,
340 SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING {
341 using GaussianBlurFilter = GaussianBlur<ScalarType, KernelSize, IsBinomial>;
342
343 978 Rows<const ScalarType> src_rows{src, src_stride, channels};
344 978 Rows<ScalarType> dst_rows{dst, dst_stride, channels};
345
346 if constexpr (IsBinomial) {
347 393 GaussianBlurFilter blur;
348 393 SeparableFilter<GaussianBlurFilter, KernelSize> filter{blur};
349 786 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
350 393 border_type, filter);
351
352 393 return KLEIDICV_OK;
353 393 } else {
354 585 constexpr size_t kHalfKernelSize = get_half_kernel_size(KernelSize);
355 585 uint16_t half_kernel[128];
356 585 generate_gaussian_half_kernel(half_kernel, kHalfKernelSize, sigma);
357 // If sigma is so small that the middle point gets all the weights, it's
358 // just a copy
359
10/10
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 96 times.
✓ Branch 3 taken 48 times.
✓ Branch 4 taken 51 times.
✓ Branch 5 taken 48 times.
✓ Branch 6 taken 51 times.
✓ Branch 7 taken 48 times.
✓ Branch 8 taken 51 times.
✓ Branch 9 taken 48 times.
585 if (half_kernel[kHalfKernelSize - 1] < 256) {
360 345 GaussianBlurFilter blur(half_kernel);
361 345 SeparableFilter<GaussianBlurFilter, KernelSize> filter{blur};
362 690 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
363 345 border_type, filter);
364 345 } else {
365
10/10
✓ Branch 0 taken 684 times.
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 972 times.
✓ Branch 3 taken 48 times.
✓ Branch 4 taken 108 times.
✓ Branch 5 taken 48 times.
✓ Branch 6 taken 204 times.
✓ Branch 7 taken 48 times.
✓ Branch 8 taken 300 times.
✓ Branch 9 taken 48 times.
2508 for (size_t row = y_begin; row < y_end; ++row) {
366 #if KLEIDICV_TARGET_SME && defined(__ANDROID__)
367 __arm_sc_memcpy(
368 static_cast<void *>(&dst_rows.at(row)[0]),
369 static_cast<const void *>(&src_rows.at(row)[0]),
370 rect.width() * sizeof(ScalarType) * dst_rows.channels());
371 #else
372 4536 std::memcpy(static_cast<void *>(&dst_rows.at(row)[0]),
373 2268 static_cast<const void *>(&src_rows.at(row)[0]),
374 2268 rect.width() * sizeof(ScalarType) * dst_rows.channels());
375 #endif
376 2268 }
377 }
378 585 return KLEIDICV_OK;
379 585 }
380 978 }
381
382 template <bool IsBinomial, typename ScalarType>
383 978 static kleidicv_error_t gaussian_blur(
384 size_t kernel_size, const ScalarType *src, size_t src_stride,
385 ScalarType *dst, size_t dst_stride, Rectangle &rect, size_t y_begin,
386 size_t y_end, size_t channels, float sigma, FixedBorderType border_type,
387 SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING {
388
10/12
✓ Branch 0 taken 141 times.
✓ Branch 1 taken 153 times.
✓ Branch 2 taken 99 times.
✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 48 times.
✓ Branch 6 taken 99 times.
✓ Branch 7 taken 99 times.
✓ Branch 8 taken 99 times.
✓ Branch 9 taken 96 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 96 times.
978 switch (kernel_size) {
389 case 3:
390 240 return gaussian_blur_fixed_kernel_size<3, IsBinomial>(
391 240 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
392 240 sigma, border_type, workspace);
393 case 5:
394 252 return gaussian_blur_fixed_kernel_size<5, IsBinomial>(
395 252 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
396 252 sigma, border_type, workspace);
397 case 7:
398 198 return gaussian_blur_fixed_kernel_size<7, IsBinomial>(
399 198 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
400 198 sigma, border_type, workspace);
401 case 15:
402 // 15x15 does not have a binomial variant
403 144 return gaussian_blur_fixed_kernel_size<15, false>(
404 144 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
405 144 sigma, border_type, workspace);
406 case 21:
407 // 21x21 does not have a binomial variant
408 144 return gaussian_blur_fixed_kernel_size<21, false>(
409 144 src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels,
410 144 sigma, border_type, workspace);
411 // gaussian_blur_is_implemented checked the kernel size already.
412 // GCOVR_EXCL_START
413 default:
414 assert(!"kernel size not implemented");
415 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
416 // GCOVR_EXCL_STOP
417 }
418 978 }
419
420 1035 static kleidicv_error_t gaussian_blur_fixed_stripe_u8_sc(
421 const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
422 size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels,
423 size_t kernel_width, size_t /*kernel_height*/, float sigma_x,
424 float /*sigma_y*/, FixedBorderType fixed_border_type,
425 kleidicv_filter_context_t *context) KLEIDICV_STREAMING {
426 1035 auto *workspace = reinterpret_cast<SeparableFilterWorkspace *>(context);
427 2070 kleidicv_error_t checks_result = gaussian_blur_checks(
428 1035 src, src_stride, dst, dst_stride, width, height, channels, workspace);
429
430
2/2
✓ Branch 0 taken 57 times.
✓ Branch 1 taken 978 times.
1035 if (checks_result != KLEIDICV_OK) {
431 57 return checks_result;
432 }
433
434 978 Rectangle rect{width, height};
435
436
2/2
✓ Branch 0 taken 489 times.
✓ Branch 1 taken 489 times.
978 if (sigma_x == 0.0) {
437 978 return gaussian_blur<true>(kernel_width, src, src_stride, dst, dst_stride,
438 489 rect, y_begin, y_end, channels, sigma_x,
439 489 fixed_border_type, workspace);
440 }
441
442 978 return gaussian_blur<false>(kernel_width, src, src_stride, dst, dst_stride,
443 489 rect, y_begin, y_end, channels, sigma_x,
444 489 fixed_border_type, workspace);
445 1035 }
446
447 } // namespace KLEIDICV_TARGET_NAMESPACE
448
449 #endif // KLEIDICV_GAUSSIAN_BLUR_SC_H
450