| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #ifndef KLEIDICV_SIGMA_H | ||
| 6 | #define KLEIDICV_SIGMA_H | ||
| 7 | |||
| 8 | #include <cmath> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <cstdlib> | ||
| 11 | |||
| 12 | #include "kleidicv/config.h" | ||
| 13 | |||
| 14 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
| 15 | |||
| 16 | 72 | static constexpr size_t get_half_kernel_size(size_t kernel_size) | |
| 17 | KLEIDICV_STREAMING { | ||
| 18 | // since kernel sizes are odd, "half" here means that | ||
| 19 | // the extra element is included | ||
| 20 | 72 | return (kernel_size >> 1) + 1; | |
| 21 | } | ||
| 22 | |||
| 23 | // This function is not marked as streaming compatible, as std::round is also | ||
| 24 | // not streaming compatible. | ||
| 25 | 944 | static void generate_gaussian_half_kernel(uint16_t* half_kernel, | |
| 26 | size_t half_size, float sigma) { | ||
| 27 | // Define the mid point of the full kernel range. | ||
| 28 | 944 | const size_t kMid = half_size - 1; | |
| 29 | |||
| 30 | // Define the full kernel size. | ||
| 31 | 944 | const size_t kKernelSize = kMid * 2 + 1; | |
| 32 | |||
| 33 | // Calculate the sigma manually in case it is not defined. | ||
| 34 |
2/2✓ Branch 0 taken 756 times.
✓ Branch 1 taken 188 times.
|
944 | if (sigma == 0.0) { |
| 35 | 188 | sigma = static_cast<float>(kKernelSize) * 0.15F + 0.35F; | |
| 36 | 188 | } | |
| 37 | |||
| 38 | // Temporary float half-kernel. | ||
| 39 | 944 | float half_kernel_float[255]; | |
| 40 | |||
| 41 | // Prepare the sigma value for later multiplication inside a loop. | ||
| 42 | 944 | float coefficient = 1 / -(2 * sigma * sigma); | |
| 43 | |||
| 44 | 944 | float sum = 0.0; | |
| 45 | 944 | size_t j = kMid; | |
| 46 |
2/2✓ Branch 0 taken 4832 times.
✓ Branch 1 taken 944 times.
|
5776 | for (size_t i = 0; i < kMid; i++, j--) { |
| 47 | 4832 | half_kernel_float[i] = | |
| 48 | 4832 | std::exp(static_cast<float>(j) * static_cast<float>(j) * coefficient); | |
| 49 | 4832 | sum += half_kernel_float[i]; | |
| 50 | 4832 | } | |
| 51 | |||
| 52 | // This multiplier is used for two things: | ||
| 53 | // * For normalizing the kernel values, so the sum of the final values is 1. | ||
| 54 | // (The 'sum' variable only accounts for the half of the kernel values | ||
| 55 | // without the mid point. That is the reason for the division by | ||
| 56 | // '(sum * 2 + 1)'.) | ||
| 57 | // * For converting the values to fixed-point (uint16_t), where 8 bits are | ||
| 58 | // used for the fractional part. That is the reason for the multiplication | ||
| 59 | // by 256. | ||
| 60 | 944 | float multiplier = 256 / (sum * 2 + 1); | |
| 61 | |||
| 62 | // Normalize the kernel and convert it to the fixed-point format. Rounding | ||
| 63 | // errors are diffused in the kernel. | ||
| 64 | 944 | float error = 0.0; | |
| 65 |
2/2✓ Branch 0 taken 4832 times.
✓ Branch 1 taken 944 times.
|
5776 | for (size_t i = 0; i < kMid; i++) { |
| 66 | 4832 | float value = half_kernel_float[i] * multiplier - error; | |
| 67 | 4832 | float value_rounded = std::round(value); | |
| 68 | 4832 | half_kernel[i] = static_cast<uint16_t>(value_rounded); | |
| 69 | 4832 | error = value_rounded - value; | |
| 70 | 4832 | } | |
| 71 | 944 | half_kernel[kMid] = static_cast<uint16_t>(std::round(multiplier - error)); | |
| 72 | 944 | } | |
| 73 | |||
| 74 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
| 75 | |||
| 76 | #endif // KLEIDICV_SIGMA_H | ||
| 77 |