Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #include "kleidicv/kleidicv.h" | ||
6 | #include "kleidicv/neon.h" | ||
7 | #include "kleidicv/resize/resize.h" | ||
8 | |||
9 | namespace kleidicv::neon { | ||
10 | |||
11 | KLEIDICV_TARGET_FN_ATTRS | ||
12 | 120 | static kleidicv_error_t check_dimensions(size_t src_dim, size_t dst_dim) { | |
13 | 120 | size_t half_src_dim = src_dim / 2; | |
14 | |||
15 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 62 times.
|
120 | if ((src_dim % 2) == 0) { |
16 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 2 times.
|
62 | if (dst_dim == half_src_dim) { |
17 | 60 | return KLEIDICV_OK; | |
18 | } | ||
19 | 2 | } else { | |
20 |
4/4✓ Branch 0 taken 34 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 32 times.
✓ Branch 3 taken 2 times.
|
58 | if (dst_dim == half_src_dim || dst_dim == (half_src_dim + 1)) { |
21 | 56 | return KLEIDICV_OK; | |
22 | } | ||
23 | } | ||
24 | |||
25 | 4 | return KLEIDICV_ERROR_RANGE; | |
26 | 120 | } | |
27 | |||
28 | // Disable the warning, as the complexity is just above the threshold, it's | ||
29 | // better to leave it in one piece. | ||
30 | // NOLINTBEGIN(readability-function-cognitive-complexity) | ||
31 | KLEIDICV_TARGET_FN_ATTRS | ||
32 | 65 | kleidicv_error_t resize_to_quarter_u8(const uint8_t *src, size_t src_stride, | |
33 | size_t src_width, size_t src_height, | ||
34 | uint8_t *dst, size_t dst_stride, | ||
35 | size_t dst_width, size_t dst_height) { | ||
36 |
4/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 64 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 64 times.
|
65 | CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); |
37 |
4/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 63 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 63 times.
|
64 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); |
38 |
6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 62 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 61 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 61 times.
|
63 | CHECK_IMAGE_SIZE(src_width, src_height); |
39 | |||
40 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 59 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 59 times.
|
63 | if (kleidicv_error_t ret = check_dimensions(src_width, dst_width)) { |
41 | 2 | return ret; | |
42 | } | ||
43 | |||
44 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 57 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 57 times.
|
61 | if (kleidicv_error_t ret = check_dimensions(src_height, dst_height)) { |
45 | 2 | return ret; | |
46 | } | ||
47 | |||
48 |
2/2✓ Branch 0 taken 146 times.
✓ Branch 1 taken 57 times.
|
349 | for (; src_height >= 2; src_height -= 2, src += (src_stride * 2), |
49 | 146 | --dst_height, dst += dst_stride) { | |
50 | 146 | const uint8_t *src_l = src; | |
51 | 146 | uint8_t *dst_l = dst; | |
52 | 146 | size_t src_width_l = src_width; | |
53 | 146 | size_t dst_width_l = dst_width; | |
54 | |||
55 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 146 times.
|
152 | for (; src_width_l >= 32; |
56 | 6 | src_width_l -= 32, dst_width_l -= 16, dst_l += 16, src_l += 32) { | |
57 | 6 | uint8x16_t top_line_0 = vld1q_u8(src_l); | |
58 | 6 | uint8x16_t top_line_1 = vld1q_u8(&src_l[16]); | |
59 | 6 | uint8x16_t bottom_line_0 = vld1q_u8(&src_l[src_stride]); | |
60 | 6 | uint8x16_t bottom_line_1 = vld1q_u8(&src_l[src_stride + 16]); | |
61 | |||
62 | 6 | uint16x8_t top_line_pairs_summed_0 = vpaddlq_u8(top_line_0); | |
63 | 6 | uint16x8_t top_line_pairs_summed_1 = vpaddlq_u8(top_line_1); | |
64 | 6 | uint16x8_t bottom_line_pairs_summed_0 = vpaddlq_u8(bottom_line_0); | |
65 | 6 | uint16x8_t bottom_line_pairs_summed_1 = vpaddlq_u8(bottom_line_1); | |
66 | |||
67 | 12 | uint16x8_t result_before_averaging_0 = | |
68 | 6 | vaddq_u16(top_line_pairs_summed_0, bottom_line_pairs_summed_0); | |
69 | 12 | uint16x8_t result_before_averaging_1 = | |
70 | 6 | vaddq_u16(top_line_pairs_summed_1, bottom_line_pairs_summed_1); | |
71 | |||
72 | 6 | uint8x8_t result_0 = vrshrn_n_u16(result_before_averaging_0, 2); | |
73 | 6 | uint8x8_t result_1 = vrshrn_n_u16(result_before_averaging_1, 2); | |
74 | |||
75 | 6 | vst1_u8(&dst_l[0], result_0); | |
76 | 6 | vst1_u8(&dst_l[8], result_1); | |
77 | 6 | } | |
78 | |||
79 |
2/2✓ Branch 0 taken 648 times.
✓ Branch 1 taken 146 times.
|
794 | for (; src_width_l > 1; |
80 | 648 | src_width_l -= 2, src_l += 2, --dst_width_l, ++dst_l) { | |
81 | 648 | disable_loop_vectorization(); | |
82 | 648 | *dst_l = rounding_shift_right<uint16_t>( | |
83 | 1296 | static_cast<uint16_t>(*src_l) + *(src_l + 1) + *(src_l + src_stride) + | |
84 | 648 | *(src_l + src_stride + 1), | |
85 | 2); | ||
86 | 648 | } | |
87 | |||
88 |
2/2✓ Branch 0 taken 126 times.
✓ Branch 1 taken 20 times.
|
146 | if (dst_width_l) { |
89 | 20 | *dst_l = rounding_shift_right<uint16_t>( | |
90 | 20 | static_cast<uint16_t>(*src_l) + *(src_l + src_stride), 1); | |
91 | 20 | } | |
92 | 146 | } | |
93 | |||
94 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 17 times.
|
57 | if (dst_height) { |
95 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
|
18 | for (; src_width >= 32; |
96 | 1 | src_width -= 32, dst_width -= 16, dst += 16, src += 32) { | |
97 | 1 | uint8x16_t vsrc_0 = vld1q_u8(&src[0]); | |
98 | 1 | uint8x16_t vsrc_1 = vld1q_u8(&src[16]); | |
99 | |||
100 | 1 | uint16x8_t vsrc_line_pairs_summed_0 = vpaddlq_u8(vsrc_0); | |
101 | 1 | uint16x8_t vsrc_line_pairs_summed_1 = vpaddlq_u8(vsrc_1); | |
102 | |||
103 | 1 | uint8x8_t result_0 = vrshrn_n_u16(vsrc_line_pairs_summed_0, 1); | |
104 | 1 | uint8x8_t result_1 = vrshrn_n_u16(vsrc_line_pairs_summed_1, 1); | |
105 | |||
106 | 1 | vst1_u8(&dst[0], result_0); | |
107 | 1 | vst1_u8(&dst[8], result_1); | |
108 | 1 | } | |
109 | |||
110 |
2/2✓ Branch 0 taken 53 times.
✓ Branch 1 taken 17 times.
|
70 | for (; src_width > 1; src_width -= 2, src += 2, --dst_width, ++dst) { |
111 | 53 | disable_loop_vectorization(); | |
112 | 53 | *dst = rounding_shift_right<uint16_t>( | |
113 | 53 | static_cast<uint16_t>(*src) + *(src + 1), 1); | |
114 | 53 | } | |
115 | |||
116 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 8 times.
|
17 | if (dst_width) { |
117 | 8 | *dst = *src; | |
118 | 8 | } | |
119 | 17 | } | |
120 | 57 | return KLEIDICV_OK; | |
121 | 65 | } | |
122 | // NOLINTEND(readability-function-cognitive-complexity) | ||
123 | |||
124 | } // namespace kleidicv::neon | ||
125 |