| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include "kleidicv/kleidicv.h" | ||
| 6 | #include "kleidicv/neon.h" | ||
| 7 | #include "kleidicv/resize/resize_linear.h" | ||
| 8 | |||
| 9 | namespace kleidicv::neon { | ||
| 10 | |||
| 11 | /// Resizes source data by averaging 4 elements to one. | ||
| 12 | /// In-place operation not supported. | ||
| 13 | /// | ||
| 14 | /// For even source dimensions `(2*N, 2*M)` destination dimensions should be | ||
| 15 | /// `(N, M)`. | ||
| 16 | /// In case of odd source dimensions `(2*N+1, 2*M+1)` destination | ||
| 17 | /// dimensions could be either `(N+1, M+1)` or `(N, M)` or combination of both. | ||
| 18 | /// For later cases last respective row or column of source data will not be | ||
| 19 | /// processed. Currently only supports single-channel data. Number of pixels in | ||
| 20 | /// the source is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. | ||
| 21 | /// | ||
| 22 | /// Even dimension example of 2x2 to 1x1 conversion: | ||
| 23 | /// ``` | ||
| 24 | /// | a | b | --> | (a+b+c+d)/4 | | ||
| 25 | /// | c | d | | ||
| 26 | /// ``` | ||
| 27 | /// Odd dimension example of 3x3 to 2x2 conversion: | ||
| 28 | /// ``` | ||
| 29 | /// | a | b | c | | (a+b+c+d)/4 | (c+f)/2 | | ||
| 30 | /// | d | e | f | --> | (g+h)/2 | i | | ||
| 31 | /// | g | h | i | | ||
| 32 | /// ``` | ||
| 33 | |||
| 34 | KLEIDICV_TARGET_FN_ATTRS | ||
| 35 | 99 | kleidicv_error_t resize_to_quarter_u8(const uint8_t *src, size_t src_stride, | |
| 36 | size_t src_width, size_t src_height, | ||
| 37 | uint8_t *dst, size_t dst_stride, | ||
| 38 | size_t dst_width, size_t dst_height) { | ||
| 39 | using VecTraits = neon::VecTraits<uint8_t>; | ||
| 40 | 99 | constexpr size_t kVectorLengthX2 = kVectorLength * 2; | |
| 41 | 99 | constexpr size_t kVectorLengthX4 = kVectorLength * 4; | |
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 234 times.
✓ Branch 1 taken 99 times.
|
567 | for (; src_height >= 2; src_height -= 2, src += (src_stride * 2), |
| 44 | 234 | --dst_height, dst += dst_stride) { | |
| 45 | 234 | const uint8_t *src_l = src; | |
| 46 | 234 | uint8_t *dst_l = dst; | |
| 47 | 234 | size_t src_width_l = src_width; | |
| 48 | 234 | size_t dst_width_l = dst_width; | |
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 234 times.
|
240 | for (; src_width_l >= kVectorLengthX4; |
| 51 | 6 | src_width_l -= kVectorLengthX4, dst_width_l -= kVectorLengthX2, | |
| 52 | 6 | dst_l += kVectorLengthX2, src_l += kVectorLengthX4) { | |
| 53 | 6 | KLEIDICV_PREFETCH(src_l + 1024); | |
| 54 | 6 | KLEIDICV_PREFETCH(src_l + src_stride + 1024); | |
| 55 | |||
| 56 | 6 | uint8x16x4_t top_line, bottom_line; | |
| 57 | 6 | uint16x8_t top_line_pairs_summed[4]; | |
| 58 | 6 | uint16x8_t bottom_line_pairs_summed[4]; | |
| 59 | 6 | uint16x8_t result_before_averaging[4]; | |
| 60 | 6 | uint8x16x2_t result; | |
| 61 | |||
| 62 | 6 | VecTraits::load(src_l, top_line); | |
| 63 | 6 | VecTraits::load(&src_l[src_stride], bottom_line); | |
| 64 | |||
| 65 | 6 | top_line_pairs_summed[0] = vpaddlq_u8(top_line.val[0]); | |
| 66 | 6 | top_line_pairs_summed[1] = vpaddlq_u8(top_line.val[1]); | |
| 67 | 6 | top_line_pairs_summed[2] = vpaddlq_u8(top_line.val[2]); | |
| 68 | 6 | top_line_pairs_summed[3] = vpaddlq_u8(top_line.val[3]); | |
| 69 | |||
| 70 | 6 | bottom_line_pairs_summed[0] = vpaddlq_u8(bottom_line.val[0]); | |
| 71 | 6 | bottom_line_pairs_summed[1] = vpaddlq_u8(bottom_line.val[1]); | |
| 72 | 6 | bottom_line_pairs_summed[2] = vpaddlq_u8(bottom_line.val[2]); | |
| 73 | 6 | bottom_line_pairs_summed[3] = vpaddlq_u8(bottom_line.val[3]); | |
| 74 | |||
| 75 | 6 | result_before_averaging[0] = | |
| 76 | 6 | vaddq_u16(top_line_pairs_summed[0], bottom_line_pairs_summed[0]); | |
| 77 | 6 | result_before_averaging[1] = | |
| 78 | 6 | vaddq_u16(top_line_pairs_summed[1], bottom_line_pairs_summed[1]); | |
| 79 | 6 | result_before_averaging[2] = | |
| 80 | 6 | vaddq_u16(top_line_pairs_summed[2], bottom_line_pairs_summed[2]); | |
| 81 | 6 | result_before_averaging[3] = | |
| 82 | 6 | vaddq_u16(top_line_pairs_summed[3], bottom_line_pairs_summed[3]); | |
| 83 | |||
| 84 | 6 | result.val[0] = | |
| 85 | 12 | vrshrn_high_n_u16(vrshrn_n_u16(result_before_averaging[0], 2), | |
| 86 | 6 | result_before_averaging[1], 2); | |
| 87 | 6 | result.val[1] = | |
| 88 | 12 | vrshrn_high_n_u16(vrshrn_n_u16(result_before_averaging[2], 2), | |
| 89 | 6 | result_before_averaging[3], 2); | |
| 90 | |||
| 91 | 6 | VecTraits::store(result, dst_l); | |
| 92 | 6 | } | |
| 93 | |||
| 94 |
2/2✓ Branch 0 taken 1284 times.
✓ Branch 1 taken 234 times.
|
1518 | for (; src_width_l > 1; |
| 95 | 1284 | src_width_l -= 2, src_l += 2, --dst_width_l, ++dst_l) { | |
| 96 | 1284 | disable_loop_vectorization(); | |
| 97 | 1284 | *dst_l = rounding_shift_right<uint16_t>( | |
| 98 | 2568 | static_cast<uint16_t>(*src_l) + *(src_l + 1) + *(src_l + src_stride) + | |
| 99 | 1284 | *(src_l + src_stride + 1), | |
| 100 | 2); | ||
| 101 | 1284 | } | |
| 102 | |||
| 103 |
2/2✓ Branch 0 taken 214 times.
✓ Branch 1 taken 20 times.
|
234 | if (dst_width_l) { |
| 104 | 20 | *dst_l = rounding_shift_right<uint16_t>( | |
| 105 | 20 | static_cast<uint16_t>(*src_l) + *(src_l + src_stride), 1); | |
| 106 | 20 | } | |
| 107 | 234 | } | |
| 108 | |||
| 109 |
2/2✓ Branch 0 taken 81 times.
✓ Branch 1 taken 18 times.
|
99 | if (dst_height) { |
| 110 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 18 times.
|
19 | for (; src_width >= kVectorLengthX4; |
| 111 | 1 | src_width -= kVectorLengthX4, dst_width -= kVectorLengthX2, | |
| 112 | 1 | dst += kVectorLengthX2, src += kVectorLengthX4) { | |
| 113 | 1 | uint8x16x4_t vsrc; | |
| 114 | 1 | uint16x8_t vsrc_line_pairs_summed[4]; | |
| 115 | 1 | uint8x16x2_t result; | |
| 116 | 1 | VecTraits::load(&src[0], vsrc); | |
| 117 | |||
| 118 | 1 | vsrc_line_pairs_summed[0] = vpaddlq_u8(vsrc.val[0]); | |
| 119 | 1 | vsrc_line_pairs_summed[1] = vpaddlq_u8(vsrc.val[1]); | |
| 120 | 1 | vsrc_line_pairs_summed[2] = vpaddlq_u8(vsrc.val[2]); | |
| 121 | 1 | vsrc_line_pairs_summed[3] = vpaddlq_u8(vsrc.val[3]); | |
| 122 | |||
| 123 | 1 | result.val[0] = | |
| 124 | 2 | vrshrn_high_n_u16(vrshrn_n_u16(vsrc_line_pairs_summed[0], 1), | |
| 125 | 1 | vsrc_line_pairs_summed[1], 1); | |
| 126 | 1 | result.val[1] = | |
| 127 | 2 | vrshrn_high_n_u16(vrshrn_n_u16(vsrc_line_pairs_summed[2], 1), | |
| 128 | 1 | vsrc_line_pairs_summed[3], 1); | |
| 129 | |||
| 130 | 1 | VecTraits::store(result, dst); | |
| 131 | 1 | } | |
| 132 | |||
| 133 |
2/2✓ Branch 0 taken 103 times.
✓ Branch 1 taken 18 times.
|
121 | for (; src_width > 1; src_width -= 2, src += 2, --dst_width, ++dst) { |
| 134 | 103 | disable_loop_vectorization(); | |
| 135 | 103 | *dst = rounding_shift_right<uint16_t>( | |
| 136 | 103 | static_cast<uint16_t>(*src) + *(src + 1), 1); | |
| 137 | 103 | } | |
| 138 | |||
| 139 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 8 times.
|
18 | if (dst_width) { |
| 140 | 8 | *dst = *src; | |
| 141 | 8 | } | |
| 142 | 18 | } | |
| 143 | 99 | return KLEIDICV_OK; | |
| 144 | 99 | } | |
| 145 | |||
| 146 | } // namespace kleidicv::neon | ||
| 147 |