| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #ifndef KLEIDICV_RESIZE_SC_H | ||
| 6 | #define KLEIDICV_RESIZE_SC_H | ||
| 7 | |||
| 8 | #include "kleidicv/kleidicv.h" | ||
| 9 | #include "kleidicv/sve2.h" | ||
| 10 | |||
| 11 | /// Resizes source data by averaging 4 elements to one. | ||
| 12 | /// In-place operation not supported. | ||
| 13 | /// | ||
| 14 | /// For even source dimensions `(2*N, 2*M)` destination dimensions should be | ||
| 15 | /// `(N, M)`. | ||
| 16 | /// In case of odd source dimensions `(2*N+1, 2*M+1)` destination | ||
| 17 | /// dimensions could be either `(N+1, M+1)` or `(N, M)` or combination of both. | ||
| 18 | /// For later cases last respective row or column of source data will not be | ||
| 19 | /// processed. Currently only supports single-channel data. Number of pixels in | ||
| 20 | /// the source is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. | ||
| 21 | /// | ||
| 22 | /// Even dimension example of 2x2 to 1x1 conversion: | ||
| 23 | /// ``` | ||
| 24 | /// | a | b | --> | (a+b+c+d)/4 | | ||
| 25 | /// | c | d | | ||
| 26 | /// ``` | ||
| 27 | /// Odd dimension example of 3x3 to 2x2 conversion: | ||
| 28 | /// ``` | ||
| 29 | /// | a | b | c | | (a+b+c+d)/4 | (c+f)/2 | | ||
| 30 | /// | d | e | f | --> | (g+h)/2 | i | | ||
| 31 | /// | g | h | i | | ||
| 32 | /// ``` | ||
| 33 | |||
| 34 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
| 35 | |||
| 36 | 864 | static inline svuint8_t resize_parallel_vectors( | |
| 37 | svbool_t pg, svuint8_t top_row, svuint8_t bottom_row) KLEIDICV_STREAMING { | ||
| 38 | 864 | svuint16_t result_before_averaging_b = svaddlb(top_row, bottom_row); | |
| 39 | 864 | svuint16_t result_before_averaging_t = svaddlt(top_row, bottom_row); | |
| 40 | 1728 | svuint16_t result_before_averaging = | |
| 41 | 864 | svadd_x(pg, result_before_averaging_b, result_before_averaging_t); | |
| 42 | 1728 | return svrshrnb(result_before_averaging, 2); | |
| 43 | 864 | } | |
| 44 | |||
| 45 | 96 | static inline void parallel_rows_vectors_path_2x( | |
| 46 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
| 47 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
| 48 | #if KLEIDICV_TARGET_SME2 | ||
| 49 | 24 | svcount_t pg_counter = svptrue_c8(); | |
| 50 | 24 | auto src_top = svld1_x2(pg_counter, &src_rows.at(0)[0]); | |
| 51 | 24 | auto src_bottom = svld1_x2(pg_counter, &src_rows.at(1)[0]); | |
| 52 | 24 | svuint8_t top_row_0 = svget2(src_top, 0); | |
| 53 | 24 | svuint8_t top_row_1 = svget2(src_top, 1); | |
| 54 | 24 | svuint8_t bottom_row_0 = svget2(src_bottom, 0); | |
| 55 | 24 | svuint8_t bottom_row_1 = svget2(src_bottom, 1); | |
| 56 | #else | ||
| 57 | 72 | svuint8_t top_row_0 = svld1(pg, &src_rows.at(0)[0]); | |
| 58 | 72 | svuint8_t bottom_row_0 = svld1(pg, &src_rows.at(1)[0]); | |
| 59 | 72 | svuint8_t top_row_1 = svld1_vnum(pg, &src_rows.at(0)[0], 1); | |
| 60 | 72 | svuint8_t bottom_row_1 = svld1_vnum(pg, &src_rows.at(1)[0], 1); | |
| 61 | #endif // KLEIDICV_TARGET_SME2 | ||
| 62 | 96 | svuint16_t sum0b = svaddlb(top_row_0, bottom_row_0); | |
| 63 | 96 | svuint16_t sum0t = svaddlt(top_row_0, bottom_row_0); | |
| 64 | 96 | svuint16_t sum1b = svaddlb(top_row_1, bottom_row_1); | |
| 65 | 96 | svuint16_t sum1t = svaddlt(top_row_1, bottom_row_1); | |
| 66 | 96 | svuint8_t res0 = svrshrnb(svadd_x(pg, sum0b, sum0t), 2); | |
| 67 | 96 | svuint8_t res1 = svrshrnb(svadd_x(pg, sum1b, sum1t), 2); | |
| 68 | 96 | svuint8_t result = svuzp1(res0, res1); | |
| 69 | 96 | svst1(pg, &dst_rows[0], result); | |
| 70 | 96 | } | |
| 71 | |||
| 72 | 864 | static inline void parallel_rows_vectors_path( | |
| 73 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
| 74 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
| 75 | 864 | svuint8_t top_line = svld1(pg, &src_rows.at(0)[0]); | |
| 76 | 864 | svuint8_t bottom_line = svld1(pg, &src_rows.at(1)[0]); | |
| 77 | 864 | svuint8_t result = resize_parallel_vectors(pg, top_line, bottom_line); | |
| 78 | 864 | svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result)); | |
| 79 | 864 | } | |
| 80 | |||
| 81 | template <typename ScalarType> | ||
| 82 | 936 | static inline void process_parallel_rows(Rows<const ScalarType> src_rows, | |
| 83 | size_t src_width, | ||
| 84 | Rows<ScalarType> dst_rows, | ||
| 85 | size_t dst_width) KLEIDICV_STREAMING { | ||
| 86 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
| 87 | 936 | const size_t size_mask = ~static_cast<size_t>(1U); | |
| 88 | |||
| 89 | // Process rows up to the last even pixel index. | ||
| 90 | 1872 | LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()} | |
| 91 | // Process double vector chunks. | ||
| 92 | 1032 | .unroll_twice([&](size_t index) KLEIDICV_STREAMING { | |
| 93 | 96 | auto pg = VecTraits::svptrue(); | |
| 94 | 192 | parallel_rows_vectors_path_2x(pg, src_rows.at(0, index), | |
| 95 | 96 | dst_rows.at(0, index / 2)); | |
| 96 | 96 | }) | |
| 97 | 984 | .unroll_once([&](size_t index) KLEIDICV_STREAMING { | |
| 98 | 48 | auto pg = VecTraits::svptrue(); | |
| 99 | 96 | parallel_rows_vectors_path(pg, src_rows.at(0, index), | |
| 100 | 48 | dst_rows.at(0, index / 2)); | |
| 101 | 48 | }) | |
| 102 | // Process the remaining chunk of the row. | ||
| 103 | 1752 | .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { | |
| 104 | 816 | auto pg = VecTraits::svwhilelt(index, length); | |
| 105 | 1632 | parallel_rows_vectors_path(pg, src_rows.at(0, index), | |
| 106 | 816 | dst_rows.at(0, index / 2)); | |
| 107 | 816 | }); | |
| 108 | |||
| 109 | // Handle the last odd column, if any. | ||
| 110 |
2/2✓ Branch 0 taken 856 times.
✓ Branch 1 taken 80 times.
|
936 | if (dst_width > (src_width / 2)) { |
| 111 | 80 | dst_rows[dst_width - 1] = rounding_shift_right<uint16_t>( | |
| 112 | 160 | static_cast<const uint16_t>(src_rows.at(0, src_width - 1)[0]) + | |
| 113 | 80 | src_rows.at(1, src_width - 1)[0], | |
| 114 | 1); | ||
| 115 | 80 | } | |
| 116 | 936 | } | |
| 117 | |||
| 118 | static inline svuint8_t resize_single_row(svbool_t pg, | ||
| 119 | svuint8_t row) KLEIDICV_STREAMING { | ||
| 120 | return svrshrnb(svadalp_x(pg, svdup_u16(0), row), 1); | ||
| 121 | } | ||
| 122 | |||
| 123 | 20 | static inline void single_row_vector_path_2x( | |
| 124 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
| 125 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
| 126 | #if KLEIDICV_TARGET_SME2 | ||
| 127 | 5 | svcount_t pg_counter = svptrue_c8(); | |
| 128 | 5 | auto src = svld1_x2(pg_counter, &src_rows.at(0)[0]); | |
| 129 | 5 | svuint8_t line0 = svget2(src, 0); | |
| 130 | 5 | svuint8_t line1 = svget2(src, 1); | |
| 131 | #else | ||
| 132 | 15 | svuint8_t line0 = svld1(pg, &src_rows[0]); | |
| 133 | 15 | svuint8_t line1 = svld1_vnum(pg, &src_rows[0], 1); | |
| 134 | #endif // KLEIDICV_TARGET_SME2 | ||
| 135 | 20 | svuint8_t result0 = svrshrnb(svadalp_x(pg, svdup_u16(0), line0), 1); | |
| 136 | 20 | svuint8_t result1 = svrshrnb(svadalp_x(pg, svdup_u16(0), line1), 1); | |
| 137 | 20 | svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result0)); | |
| 138 | 20 | svst1b_vnum(pg, &dst_rows[0], 1, svreinterpret_u16_u8(result1)); | |
| 139 | 20 | } | |
| 140 | |||
| 141 | 56 | static inline void single_row_vector_path( | |
| 142 | svbool_t pg, Rows<const uint8_t> src_rows, | ||
| 143 | Rows<uint8_t> dst_rows) KLEIDICV_STREAMING { | ||
| 144 | 56 | svuint8_t line = svld1(pg, &src_rows.at(0)[0]); | |
| 145 | 56 | svuint8_t result = svrshrnb(svadalp_x(pg, svdup_u16(0), line), 1); | |
| 146 | 56 | svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result)); | |
| 147 | 56 | } | |
| 148 | |||
| 149 | template <typename ScalarType> | ||
| 150 | 72 | static inline void process_single_row(Rows<const ScalarType> src_rows, | |
| 151 | size_t src_width, | ||
| 152 | Rows<ScalarType> dst_rows, | ||
| 153 | size_t dst_width) KLEIDICV_STREAMING { | ||
| 154 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
| 155 | 72 | const size_t size_mask = ~static_cast<size_t>(1U); | |
| 156 | |||
| 157 | // Process rows up to the last even pixel index. | ||
| 158 | 144 | LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()} | |
| 159 | // Process full vector chunks. | ||
| 160 | 92 | .unroll_twice([&](size_t index) KLEIDICV_STREAMING { | |
| 161 | 20 | auto pg = VecTraits::svptrue(); | |
| 162 | 40 | single_row_vector_path_2x(pg, src_rows.at(0, index), | |
| 163 | 20 | dst_rows.at(0, index / 2)); | |
| 164 | 20 | }) | |
| 165 | 84 | .unroll_once([&](size_t index) KLEIDICV_STREAMING { | |
| 166 | 12 | auto pg = VecTraits::svptrue(); | |
| 167 | 24 | single_row_vector_path(pg, src_rows.at(0, index), | |
| 168 | 12 | dst_rows.at(0, index / 2)); | |
| 169 | 12 | }) | |
| 170 | // Process the remaining chunk of the row. | ||
| 171 | 116 | .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { | |
| 172 | 44 | auto pg = VecTraits::svwhilelt(index, length); | |
| 173 | 88 | single_row_vector_path(pg, src_rows.at(0, index), | |
| 174 | 44 | dst_rows.at(0, index / 2)); | |
| 175 | 44 | }); | |
| 176 | |||
| 177 | // Handle the last odd column, if any. | ||
| 178 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 32 times.
|
72 | if (dst_width > (src_width / 2)) { |
| 179 | 32 | dst_rows[dst_width - 1] = src_rows[src_width - 1]; | |
| 180 | 32 | } | |
| 181 | 72 | } | |
| 182 | |||
| 183 | 396 | KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t resize_to_quarter_u8_sc( | |
| 184 | const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, | ||
| 185 | uint8_t *dst, size_t dst_stride, size_t dst_width, | ||
| 186 | size_t dst_height) KLEIDICV_STREAMING { | ||
| 187 | 396 | Rows<const uint8_t> src_rows{src, src_stride, /* channels*/ 1}; | |
| 188 | 396 | Rows<uint8_t> dst_rows{dst, dst_stride, /* channels*/ 1}; | |
| 189 | 396 | LoopUnroll2 loop{src_height, /* Process two rows */ 2}; | |
| 190 | |||
| 191 | // Process two rows at once. | ||
| 192 | 1332 | loop.unroll_once([&](size_t) // NOLINT(readability/casting) | |
| 193 | KLEIDICV_STREAMING { | ||
| 194 | 1872 | process_parallel_rows(src_rows, src_width, dst_rows, | |
| 195 | 936 | dst_width); | |
| 196 | 936 | src_rows += 2; | |
| 197 | 936 | ++dst_rows; | |
| 198 | 936 | }); | |
| 199 | |||
| 200 | // Handle an odd row, if any. | ||
| 201 |
2/2✓ Branch 0 taken 324 times.
✓ Branch 1 taken 72 times.
|
396 | if (dst_height > (src_height / 2)) { |
| 202 | 144 | loop.remaining([&](size_t, size_t) KLEIDICV_STREAMING { | |
| 203 | 72 | process_single_row(src_rows, src_width, dst_rows, dst_width); | |
| 204 | 72 | }); | |
| 205 | 72 | } | |
| 206 | 396 | return KLEIDICV_OK; | |
| 207 | 396 | } | |
| 208 | |||
| 209 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
| 210 | |||
| 211 | #endif // KLEIDICV_RESIZE_SC_H | ||
| 212 |