| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include "kleidicv/kleidicv.h" | ||
| 6 | #include "kleidicv/neon.h" | ||
| 7 | #include "kleidicv/resize/resize.h" | ||
| 8 | |||
| 9 | namespace kleidicv::neon { | ||
| 10 | |||
| 11 | KLEIDICV_TARGET_FN_ATTRS | ||
| 12 | 120 | static kleidicv_error_t check_dimensions(size_t src_dim, size_t dst_dim) { | |
| 13 | 120 | size_t half_src_dim = src_dim / 2; | |
| 14 | |||
| 15 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 62 times.
|
120 | if ((src_dim % 2) == 0) { |
| 16 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 2 times.
|
62 | if (dst_dim == half_src_dim) { |
| 17 | 60 | return KLEIDICV_OK; | |
| 18 | } | ||
| 19 | 2 | } else { | |
| 20 |
4/4✓ Branch 0 taken 34 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 32 times.
✓ Branch 3 taken 2 times.
|
58 | if (dst_dim == half_src_dim || dst_dim == (half_src_dim + 1)) { |
| 21 | 56 | return KLEIDICV_OK; | |
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | 4 | return KLEIDICV_ERROR_RANGE; | |
| 26 | 120 | } | |
| 27 | |||
| 28 | // Disable the warning, as the complexity is just above the threshold, it's | ||
| 29 | // better to leave it in one piece. | ||
| 30 | // NOLINTBEGIN(readability-function-cognitive-complexity) | ||
| 31 | KLEIDICV_TARGET_FN_ATTRS | ||
| 32 | 65 | kleidicv_error_t resize_to_quarter_u8(const uint8_t *src, size_t src_stride, | |
| 33 | size_t src_width, size_t src_height, | ||
| 34 | uint8_t *dst, size_t dst_stride, | ||
| 35 | size_t dst_width, size_t dst_height) { | ||
| 36 |
4/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 64 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
|
65 | CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); |
| 37 |
4/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 63 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 63 times.
|
64 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); |
| 38 |
6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 62 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 61 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 61 times.
|
63 | CHECK_IMAGE_SIZE(src_width, src_height); |
| 39 | |||
| 40 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 59 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 59 times.
|
63 | if (kleidicv_error_t ret = check_dimensions(src_width, dst_width)) { |
| 41 | 2 | return ret; | |
| 42 | } | ||
| 43 | |||
| 44 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 57 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 57 times.
|
61 | if (kleidicv_error_t ret = check_dimensions(src_height, dst_height)) { |
| 45 | 2 | return ret; | |
| 46 | } | ||
| 47 | |||
| 48 | using VecTraits = neon::VecTraits<uint8_t>; | ||
| 49 | 57 | constexpr size_t kVectorLengthX2 = kVectorLength * 2; | |
| 50 | 57 | constexpr size_t kVectorLengthX4 = kVectorLength * 4; | |
| 51 | |||
| 52 |
2/2✓ Branch 0 taken 146 times.
✓ Branch 1 taken 57 times.
|
349 | for (; src_height >= 2; src_height -= 2, src += (src_stride * 2), |
| 53 | 146 | --dst_height, dst += dst_stride) { | |
| 54 | 146 | const uint8_t *src_l = src; | |
| 55 | 146 | uint8_t *dst_l = dst; | |
| 56 | 146 | size_t src_width_l = src_width; | |
| 57 | 146 | size_t dst_width_l = dst_width; | |
| 58 | |||
| 59 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 146 times.
|
152 | for (; src_width_l >= kVectorLengthX4; |
| 60 | 6 | src_width_l -= kVectorLengthX4, dst_width_l -= kVectorLengthX2, | |
| 61 | 6 | dst_l += kVectorLengthX2, src_l += kVectorLengthX4) { | |
| 62 | 6 | KLEIDICV_PREFETCH(src_l + 1024); | |
| 63 | 6 | KLEIDICV_PREFETCH(src_l + src_stride + 1024); | |
| 64 | |||
| 65 | 6 | uint8x16x4_t top_line, bottom_line; | |
| 66 | 6 | uint16x8_t top_line_pairs_summed[4]; | |
| 67 | 6 | uint16x8_t bottom_line_pairs_summed[4]; | |
| 68 | 6 | uint16x8_t result_before_averaging[4]; | |
| 69 | 6 | uint8x16x2_t result; | |
| 70 | |||
| 71 | 6 | VecTraits::load(src_l, top_line); | |
| 72 | 6 | VecTraits::load(&src_l[src_stride], bottom_line); | |
| 73 | |||
| 74 | 6 | top_line_pairs_summed[0] = vpaddlq_u8(top_line.val[0]); | |
| 75 | 6 | top_line_pairs_summed[1] = vpaddlq_u8(top_line.val[1]); | |
| 76 | 6 | top_line_pairs_summed[2] = vpaddlq_u8(top_line.val[2]); | |
| 77 | 6 | top_line_pairs_summed[3] = vpaddlq_u8(top_line.val[3]); | |
| 78 | |||
| 79 | 6 | bottom_line_pairs_summed[0] = vpaddlq_u8(bottom_line.val[0]); | |
| 80 | 6 | bottom_line_pairs_summed[1] = vpaddlq_u8(bottom_line.val[1]); | |
| 81 | 6 | bottom_line_pairs_summed[2] = vpaddlq_u8(bottom_line.val[2]); | |
| 82 | 6 | bottom_line_pairs_summed[3] = vpaddlq_u8(bottom_line.val[3]); | |
| 83 | |||
| 84 | 6 | result_before_averaging[0] = | |
| 85 | 6 | vaddq_u16(top_line_pairs_summed[0], bottom_line_pairs_summed[0]); | |
| 86 | 6 | result_before_averaging[1] = | |
| 87 | 6 | vaddq_u16(top_line_pairs_summed[1], bottom_line_pairs_summed[1]); | |
| 88 | 6 | result_before_averaging[2] = | |
| 89 | 6 | vaddq_u16(top_line_pairs_summed[2], bottom_line_pairs_summed[2]); | |
| 90 | 6 | result_before_averaging[3] = | |
| 91 | 6 | vaddq_u16(top_line_pairs_summed[3], bottom_line_pairs_summed[3]); | |
| 92 | |||
| 93 | 6 | result.val[0] = | |
| 94 | 6 | vrshrn_high_n_u16(vrshrn_n_u16(result_before_averaging[0], 2), | |
| 95 | result_before_averaging[1], 2); | ||
| 96 | 6 | result.val[1] = | |
| 97 | 6 | vrshrn_high_n_u16(vrshrn_n_u16(result_before_averaging[2], 2), | |
| 98 | result_before_averaging[3], 2); | ||
| 99 | |||
| 100 | 6 | VecTraits::store(result, dst_l); | |
| 101 | 6 | } | |
| 102 | |||
| 103 |
2/2✓ Branch 0 taken 840 times.
✓ Branch 1 taken 146 times.
|
986 | for (; src_width_l > 1; |
| 104 | 840 | src_width_l -= 2, src_l += 2, --dst_width_l, ++dst_l) { | |
| 105 | 840 | disable_loop_vectorization(); | |
| 106 | 840 | *dst_l = rounding_shift_right<uint16_t>( | |
| 107 | 1680 | static_cast<uint16_t>(*src_l) + *(src_l + 1) + *(src_l + src_stride) + | |
| 108 | 840 | *(src_l + src_stride + 1), | |
| 109 | 2); | ||
| 110 | 840 | } | |
| 111 | |||
| 112 |
2/2✓ Branch 0 taken 126 times.
✓ Branch 1 taken 20 times.
|
146 | if (dst_width_l) { |
| 113 | 20 | *dst_l = rounding_shift_right<uint16_t>( | |
| 114 | 20 | static_cast<uint16_t>(*src_l) + *(src_l + src_stride), 1); | |
| 115 | 20 | } | |
| 116 | 146 | } | |
| 117 | |||
| 118 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 17 times.
|
57 | if (dst_height) { |
| 119 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
|
18 | for (; src_width >= kVectorLengthX4; |
| 120 | 1 | src_width -= kVectorLengthX4, dst_width -= kVectorLengthX2, | |
| 121 | 1 | dst += kVectorLengthX2, src += kVectorLengthX4) { | |
| 122 | 1 | uint8x16x4_t vsrc; | |
| 123 | 1 | uint16x8_t vsrc_line_pairs_summed[4]; | |
| 124 | 1 | uint8x16x2_t result; | |
| 125 | 1 | VecTraits::load(&src[0], vsrc); | |
| 126 | |||
| 127 | 1 | vsrc_line_pairs_summed[0] = vpaddlq_u8(vsrc.val[0]); | |
| 128 | 1 | vsrc_line_pairs_summed[1] = vpaddlq_u8(vsrc.val[1]); | |
| 129 | 1 | vsrc_line_pairs_summed[2] = vpaddlq_u8(vsrc.val[2]); | |
| 130 | 1 | vsrc_line_pairs_summed[3] = vpaddlq_u8(vsrc.val[3]); | |
| 131 | |||
| 132 | 1 | result.val[0] = | |
| 133 | 1 | vrshrn_high_n_u16(vrshrn_n_u16(vsrc_line_pairs_summed[0], 1), | |
| 134 | vsrc_line_pairs_summed[1], 1); | ||
| 135 | 1 | result.val[1] = | |
| 136 | 1 | vrshrn_high_n_u16(vrshrn_n_u16(vsrc_line_pairs_summed[2], 1), | |
| 137 | vsrc_line_pairs_summed[3], 1); | ||
| 138 | |||
| 139 | 1 | VecTraits::store(result, dst); | |
| 140 | 1 | } | |
| 141 | |||
| 142 |
2/2✓ Branch 0 taken 101 times.
✓ Branch 1 taken 17 times.
|
118 | for (; src_width > 1; src_width -= 2, src += 2, --dst_width, ++dst) { |
| 143 | 101 | disable_loop_vectorization(); | |
| 144 | 101 | *dst = rounding_shift_right<uint16_t>( | |
| 145 | 101 | static_cast<uint16_t>(*src) + *(src + 1), 1); | |
| 146 | 101 | } | |
| 147 | |||
| 148 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 8 times.
|
17 | if (dst_width) { |
| 149 | 8 | *dst = *src; | |
| 150 | 8 | } | |
| 151 | 17 | } | |
| 152 | 57 | return KLEIDICV_OK; | |
| 153 | 65 | } | |
| 154 | // NOLINTEND(readability-function-cognitive-complexity) | ||
| 155 | |||
| 156 | } // namespace kleidicv::neon | ||
| 157 |