Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_SIGMA_H | ||
6 | #define KLEIDICV_SIGMA_H | ||
7 | |||
8 | #include <cmath> | ||
9 | #include <cstdint> | ||
10 | #include <cstdlib> | ||
11 | |||
12 | #include "kleidicv/config.h" | ||
13 | |||
14 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
15 | |||
16 | 54 | static constexpr size_t get_half_kernel_size(size_t kernel_size) | |
17 | KLEIDICV_STREAMING { | ||
18 | // since kernel sizes are odd, "half" here means that | ||
19 | // the extra element is included | ||
20 | 54 | return (kernel_size >> 1) + 1; | |
21 | } | ||
22 | |||
23 | // This function is not marked as streaming compatible, as std::round is also | ||
24 | // not streaming compatible. | ||
25 | 708 | static void generate_gaussian_half_kernel(uint16_t* half_kernel, | |
26 | size_t half_size, float sigma) { | ||
27 | // Define the mid point of the full kernel range. | ||
28 | 708 | const size_t kMid = half_size - 1; | |
29 | |||
30 | // Define the full kernel size. | ||
31 | 708 | const size_t kKernelSize = kMid * 2 + 1; | |
32 | |||
33 | // Calculate the sigma manually in case it is not defined. | ||
34 |
2/2✓ Branch 0 taken 567 times.
✓ Branch 1 taken 141 times.
|
708 | if (sigma == 0.0) { |
35 | 141 | sigma = static_cast<float>(kKernelSize) * 0.15F + 0.35F; | |
36 | 141 | } | |
37 | |||
38 | // Temporary float half-kernel. | ||
39 | 708 | float half_kernel_float[255]; | |
40 | |||
41 | // Prepare the sigma value for later multiplication inside a loop. | ||
42 | 708 | float coefficient = 1 / -(2 * sigma * sigma); | |
43 | |||
44 | 708 | float sum = 0.0; | |
45 | 708 | size_t j = kMid; | |
46 |
2/2✓ Branch 0 taken 3624 times.
✓ Branch 1 taken 708 times.
|
4332 | for (size_t i = 0; i < kMid; i++, j--) { |
47 | 3624 | half_kernel_float[i] = | |
48 | 3624 | std::exp(static_cast<float>(j) * static_cast<float>(j) * coefficient); | |
49 | 3624 | sum += half_kernel_float[i]; | |
50 | 3624 | } | |
51 | |||
52 | // This multiplier is used for two things: | ||
53 | // * For normalizing the kernel values, so the sum of the final values is 1. | ||
54 | // (The 'sum' variable only accounts for the half of the kernel values | ||
55 | // without the mid point. That is the reason for the division by | ||
56 | // '(sum * 2 + 1)'.) | ||
57 | // * For converting the values to fixed-point (uint16_t), where 8 bits are | ||
58 | // used for the fractional part. That is the reason for the multiplication | ||
59 | // by 256. | ||
60 | 708 | float multiplier = 256 / (sum * 2 + 1); | |
61 | |||
62 | // Normalize the kernel and convert it to the fixed-point format. Rounding | ||
63 | // errors are diffused in the kernel. | ||
64 | 708 | float error = 0.0; | |
65 |
2/2✓ Branch 0 taken 3624 times.
✓ Branch 1 taken 708 times.
|
4332 | for (size_t i = 0; i < kMid; i++) { |
66 | 3624 | float value = half_kernel_float[i] * multiplier - error; | |
67 | 3624 | float value_rounded = std::round(value); | |
68 | 3624 | half_kernel[i] = static_cast<uint16_t>(value_rounded); | |
69 | 3624 | error = value_rounded - value; | |
70 | 3624 | } | |
71 | 708 | half_kernel[kMid] = static_cast<uint16_t>(std::round(multiplier - error)); | |
72 | 708 | } | |
73 | |||
74 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
75 | |||
76 | #endif // KLEIDICV_SIGMA_H | ||
77 |