Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_RESIZE_SC_H | ||
6 | #define KLEIDICV_RESIZE_SC_H | ||
7 | |||
8 | #include "kleidicv/kleidicv.h" | ||
9 | #include "kleidicv/sve2.h" | ||
10 | |||
11 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
12 | |||
13 | 384 | static inline svuint8_t resize_parallel_vectors( | |
14 | svbool_t pg, svuint8_t top_row, svuint8_t bottom_row) KLEIDICV_STREAMING { | ||
15 | 384 | svuint16_t result_before_averaging_b = svaddlb(top_row, bottom_row); | |
16 | 384 | svuint16_t result_before_averaging_t = svaddlt(top_row, bottom_row); | |
17 | 768 | svuint16_t result_before_averaging = | |
18 | 384 | svadd_x(pg, result_before_averaging_b, result_before_averaging_t); | |
19 | 768 | return svrshrnb(result_before_averaging, 2); | |
20 | 384 | } | |
21 | |||
22 | 18 | static inline void parallel_rows_vectors_path_2x( | |
23 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
24 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
25 | 18 | svuint8_t top_row_0 = svld1(pg, &src_rows.at(0)[0]); | |
26 | 18 | svuint8_t bottom_row_0 = svld1(pg, &src_rows.at(1)[0]); | |
27 | 18 | svuint8_t top_row_1 = svld1_vnum(pg, &src_rows.at(0)[0], 1); | |
28 | 18 | svuint8_t bottom_row_1 = svld1_vnum(pg, &src_rows.at(1)[0], 1); | |
29 | 18 | svuint16_t sum0b = svaddlb(top_row_0, bottom_row_0); | |
30 | 18 | svuint16_t sum0t = svaddlt(top_row_0, bottom_row_0); | |
31 | 18 | svuint16_t sum1b = svaddlb(top_row_1, bottom_row_1); | |
32 | 18 | svuint16_t sum1t = svaddlt(top_row_1, bottom_row_1); | |
33 | 18 | svuint8_t res0 = svrshrnb(svadd_x(pg, sum0b, sum0t), 2); | |
34 | 18 | svuint8_t res1 = svrshrnb(svadd_x(pg, sum1b, sum1t), 2); | |
35 | 18 | svuint8_t result = svuzp1(res0, res1); | |
36 | 18 | svst1(pg, &dst_rows[0], result); | |
37 | 18 | } | |
38 | |||
39 | 384 | static inline void parallel_rows_vectors_path( | |
40 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
41 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
42 | 384 | svuint8_t top_line = svld1(pg, &src_rows.at(0)[0]); | |
43 | 384 | svuint8_t bottom_line = svld1(pg, &src_rows.at(1)[0]); | |
44 | 384 | svuint8_t result = resize_parallel_vectors(pg, top_line, bottom_line); | |
45 | 384 | svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result)); | |
46 | 384 | } | |
47 | |||
48 | template <typename ScalarType> | ||
49 | 438 | static inline void process_parallel_rows(Rows<const ScalarType> src_rows, | |
50 | size_t src_width, | ||
51 | Rows<ScalarType> dst_rows, | ||
52 | size_t dst_width) KLEIDICV_STREAMING { | ||
53 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
54 | 438 | const size_t size_mask = ~static_cast<size_t>(1U); | |
55 | |||
56 | // Process rows up to the last even pixel index. | ||
57 | 876 | LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()} | |
58 | // Process double vector chunks. | ||
59 | 456 | .unroll_twice([&](size_t index) KLEIDICV_STREAMING { | |
60 | 18 | auto pg = VecTraits::svptrue(); | |
61 | 36 | parallel_rows_vectors_path_2x(pg, src_rows.at(0, index), | |
62 | 18 | dst_rows.at(0, index / 2)); | |
63 | 18 | }) | |
64 | 474 | .unroll_once([&](size_t index) KLEIDICV_STREAMING { | |
65 | 36 | auto pg = VecTraits::svptrue(); | |
66 | 72 | parallel_rows_vectors_path(pg, src_rows.at(0, index), | |
67 | 36 | dst_rows.at(0, index / 2)); | |
68 | 36 | }) | |
69 | // Process the remaining chunk of the row. | ||
70 | 786 | .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { | |
71 | 348 | auto pg = VecTraits::svwhilelt(index, length); | |
72 | 696 | parallel_rows_vectors_path(pg, src_rows.at(0, index), | |
73 | 348 | dst_rows.at(0, index / 2)); | |
74 | 348 | }); | |
75 | |||
76 | // Handle the last odd column, if any. | ||
77 |
2/2✓ Branch 0 taken 378 times.
✓ Branch 1 taken 60 times.
|
438 | if (dst_width > (src_width / 2)) { |
78 | 60 | dst_rows[dst_width - 1] = rounding_shift_right<uint16_t>( | |
79 | 120 | static_cast<const uint16_t>(src_rows.at(0, src_width - 1)[0]) + | |
80 | 60 | src_rows.at(1, src_width - 1)[0], | |
81 | 1); | ||
82 | 60 | } | |
83 | 438 | } | |
84 | |||
85 | static inline svuint8_t resize_single_row(svbool_t pg, | ||
86 | svuint8_t row) KLEIDICV_STREAMING { | ||
87 | return svrshrnb(svadalp_x(pg, svdup_u16(0), row), 1); | ||
88 | } | ||
89 | |||
90 | 3 | static inline void single_row_vector_path_2x( | |
91 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
92 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
93 | 3 | svuint8_t line0 = svld1(pg, &src_rows[0]); | |
94 | 3 | svuint8_t line1 = svld1_vnum(pg, &src_rows[0], 1); | |
95 | 3 | svuint8_t result0 = svrshrnb(svadalp_x(pg, svdup_u16(0), line0), 1); | |
96 | 3 | svuint8_t result1 = svrshrnb(svadalp_x(pg, svdup_u16(0), line1), 1); | |
97 | 3 | svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result0)); | |
98 | 3 | svst1b_vnum(pg, &dst_rows[0], 1, svreinterpret_u16_u8(result1)); | |
99 | 3 | } | |
100 | |||
101 | 36 | static inline void single_row_vector_path( | |
102 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
103 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
104 | 36 | svuint8_t line = svld1(pg, &src_rows.at(0)[0]); | |
105 | 36 | svuint8_t result = svrshrnb(svadalp_x(pg, svdup_u16(0), line), 1); | |
106 | 36 | svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result)); | |
107 | 36 | } | |
108 | |||
109 | template <typename ScalarType> | ||
110 | 51 | static inline void process_single_row(Rows<const ScalarType> src_rows, | |
111 | size_t src_width, | ||
112 | Rows<ScalarType> dst_rows, | ||
113 | size_t dst_width) KLEIDICV_STREAMING { | ||
114 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
115 | 51 | const size_t size_mask = ~static_cast<size_t>(1U); | |
116 | |||
117 | // Process rows up to the last even pixel index. | ||
118 | 102 | LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()} | |
119 | // Process full vector chunks. | ||
120 | 54 | .unroll_twice([&](size_t index) KLEIDICV_STREAMING { | |
121 | 3 | auto pg = VecTraits::svptrue(); | |
122 | 6 | single_row_vector_path_2x(pg, src_rows.at(0, index), | |
123 | 3 | dst_rows.at(0, index / 2)); | |
124 | 3 | }) | |
125 | 60 | .unroll_once([&](size_t index) KLEIDICV_STREAMING { | |
126 | 9 | auto pg = VecTraits::svptrue(); | |
127 | 18 | single_row_vector_path(pg, src_rows.at(0, index), | |
128 | 9 | dst_rows.at(0, index / 2)); | |
129 | 9 | }) | |
130 | // Process the remaining chunk of the row. | ||
131 | 78 | .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { | |
132 | 27 | auto pg = VecTraits::svwhilelt(index, length); | |
133 | 54 | single_row_vector_path(pg, src_rows.at(0, index), | |
134 | 27 | dst_rows.at(0, index / 2)); | |
135 | 27 | }); | |
136 | |||
137 | // Handle the last odd column, if any. | ||
138 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 24 times.
|
51 | if (dst_width > (src_width / 2)) { |
139 | 24 | dst_rows[dst_width - 1] = src_rows[src_width - 1]; | |
140 | 24 | } | |
141 | 51 | } | |
142 | |||
143 | KLEIDICV_TARGET_FN_ATTRS | ||
144 | 360 | static kleidicv_error_t check_dimensions(size_t src_dim, | |
145 | size_t dst_dim) KLEIDICV_STREAMING { | ||
146 | 360 | size_t half_src_dim = src_dim / 2; | |
147 | |||
148 |
2/2✓ Branch 0 taken 174 times.
✓ Branch 1 taken 186 times.
|
360 | if ((src_dim % 2) == 0) { |
149 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 6 times.
|
186 | if (dst_dim == half_src_dim) { |
150 | 180 | return KLEIDICV_OK; | |
151 | } | ||
152 | 6 | } else { | |
153 |
4/4✓ Branch 0 taken 102 times.
✓ Branch 1 taken 72 times.
✓ Branch 2 taken 96 times.
✓ Branch 3 taken 6 times.
|
174 | if (dst_dim == half_src_dim || dst_dim == (half_src_dim + 1)) { |
154 | 168 | return KLEIDICV_OK; | |
155 | } | ||
156 | } | ||
157 | |||
158 | 12 | return KLEIDICV_ERROR_RANGE; | |
159 | 360 | } | |
160 | |||
161 | 195 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t resize_to_quarter_u8_sc( | |
162 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
163 | uint8_t *dst, size_t dst_stride, size_t dst_width, | ||
164 | size_t dst_height) KLEIDICV_STREAMING { | ||
165 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 192 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 192 times.
|
195 | CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); |
166 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 189 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 189 times.
|
192 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); |
167 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 186 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 183 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 183 times.
|
189 | CHECK_IMAGE_SIZE(src_width, src_height); |
168 | |||
169 |
4/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 177 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 177 times.
|
189 | if (kleidicv_error_t ret = check_dimensions(src_width, dst_width)) { |
170 | 6 | return ret; | |
171 | } | ||
172 | |||
173 |
4/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 171 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 171 times.
|
183 | if (kleidicv_error_t ret = check_dimensions(src_height, dst_height)) { |
174 | 6 | return ret; | |
175 | } | ||
176 | |||
177 | 171 | Rows<const uint8_t> src_rows{src, src_stride, /* channels*/ 1}; | |
178 | 171 | Rows<uint8_t> dst_rows{dst, dst_stride, /* channels*/ 1}; | |
179 | 171 | LoopUnroll2 loop{src_height, /* Process two rows */ 2}; | |
180 | |||
181 | // Process two rows at once. | ||
182 | 609 | loop.unroll_once([&](size_t) // NOLINT(readability/casting) | |
183 | KLEIDICV_STREAMING { | ||
184 | 876 | process_parallel_rows(src_rows, src_width, dst_rows, | |
185 | 438 | dst_width); | |
186 | 438 | src_rows += 2; | |
187 | 438 | ++dst_rows; | |
188 | 438 | }); | |
189 | |||
190 | // Handle an odd row, if any. | ||
191 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 51 times.
|
171 | if (dst_height > (src_height / 2)) { |
192 | 102 | loop.remaining([&](size_t, size_t) KLEIDICV_STREAMING { | |
193 | 51 | process_single_row(src_rows, src_width, dst_rows, dst_width); | |
194 | 51 | }); | |
195 | 51 | } | |
196 | 171 | return KLEIDICV_OK; | |
197 | 195 | } | |
198 | |||
199 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
200 | |||
201 | #endif // KLEIDICV_RESIZE_SC_H | ||
202 |