KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/resize/resize_to_quarter_sc.h
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 114 114 100.0%
Functions: 48 48 100.0%
Branches: 6 6 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RESIZE_SC_H
6 #define KLEIDICV_RESIZE_SC_H
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/sve2.h"
10
11 /// Resizes source data by averaging 4 elements to one.
12 /// In-place operation not supported.
13 ///
14 /// For even source dimensions `(2*N, 2*M)` destination dimensions should be
15 /// `(N, M)`.
16 /// In case of odd source dimensions `(2*N+1, 2*M+1)` destination
17 /// dimensions could be either `(N+1, M+1)` or `(N, M)` or combination of both.
18 /// For later cases last respective row or column of source data will not be
19 /// processed. Currently only supports single-channel data. Number of pixels in
20 /// the source is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS.
21 ///
22 /// Even dimension example of 2x2 to 1x1 conversion:
23 /// ```
24 /// | a | b | --> | (a+b+c+d)/4 |
25 /// | c | d |
26 /// ```
27 /// Odd dimension example of 3x3 to 2x2 conversion:
28 /// ```
29 /// | a | b | c | | (a+b+c+d)/4 | (c+f)/2 |
30 /// | d | e | f | --> | (g+h)/2 | i |
31 /// | g | h | i |
32 /// ```
33
34 namespace KLEIDICV_TARGET_NAMESPACE {
35
36 864 static inline svuint8_t resize_parallel_vectors(
37 svbool_t pg, svuint8_t top_row, svuint8_t bottom_row) KLEIDICV_STREAMING {
38 864 svuint16_t result_before_averaging_b = svaddlb(top_row, bottom_row);
39 864 svuint16_t result_before_averaging_t = svaddlt(top_row, bottom_row);
40 1728 svuint16_t result_before_averaging =
41 864 svadd_x(pg, result_before_averaging_b, result_before_averaging_t);
42 1728 return svrshrnb(result_before_averaging, 2);
43 864 }
44
45 96 static inline void parallel_rows_vectors_path_2x(
46 svbool_t pg, Rows<const uint8_t> src_rows,
47 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
48 #if KLEIDICV_TARGET_SME2
49 24 svcount_t pg_counter = svptrue_c8();
50 24 auto src_top = svld1_x2(pg_counter, &src_rows.at(0)[0]);
51 24 auto src_bottom = svld1_x2(pg_counter, &src_rows.at(1)[0]);
52 24 svuint8_t top_row_0 = svget2(src_top, 0);
53 24 svuint8_t top_row_1 = svget2(src_top, 1);
54 24 svuint8_t bottom_row_0 = svget2(src_bottom, 0);
55 24 svuint8_t bottom_row_1 = svget2(src_bottom, 1);
56 #else
57 72 svuint8_t top_row_0 = svld1(pg, &src_rows.at(0)[0]);
58 72 svuint8_t bottom_row_0 = svld1(pg, &src_rows.at(1)[0]);
59 72 svuint8_t top_row_1 = svld1_vnum(pg, &src_rows.at(0)[0], 1);
60 72 svuint8_t bottom_row_1 = svld1_vnum(pg, &src_rows.at(1)[0], 1);
61 #endif // KLEIDICV_TARGET_SME2
62 96 svuint16_t sum0b = svaddlb(top_row_0, bottom_row_0);
63 96 svuint16_t sum0t = svaddlt(top_row_0, bottom_row_0);
64 96 svuint16_t sum1b = svaddlb(top_row_1, bottom_row_1);
65 96 svuint16_t sum1t = svaddlt(top_row_1, bottom_row_1);
66 96 svuint8_t res0 = svrshrnb(svadd_x(pg, sum0b, sum0t), 2);
67 96 svuint8_t res1 = svrshrnb(svadd_x(pg, sum1b, sum1t), 2);
68 96 svuint8_t result = svuzp1(res0, res1);
69 96 svst1(pg, &dst_rows[0], result);
70 96 }
71
72 864 static inline void parallel_rows_vectors_path(
73 svbool_t pg, Rows<const uint8_t> src_rows,
74 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
75 864 svuint8_t top_line = svld1(pg, &src_rows.at(0)[0]);
76 864 svuint8_t bottom_line = svld1(pg, &src_rows.at(1)[0]);
77 864 svuint8_t result = resize_parallel_vectors(pg, top_line, bottom_line);
78 864 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result));
79 864 }
80
81 template <typename ScalarType>
82 936 static inline void process_parallel_rows(Rows<const ScalarType> src_rows,
83 size_t src_width,
84 Rows<ScalarType> dst_rows,
85 size_t dst_width) KLEIDICV_STREAMING {
86 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
87 936 const size_t size_mask = ~static_cast<size_t>(1U);
88
89 // Process rows up to the last even pixel index.
90 1872 LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()}
91 // Process double vector chunks.
92 1032 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
93 96 auto pg = VecTraits::svptrue();
94 192 parallel_rows_vectors_path_2x(pg, src_rows.at(0, index),
95 96 dst_rows.at(0, index / 2));
96 96 })
97 984 .unroll_once([&](size_t index) KLEIDICV_STREAMING {
98 48 auto pg = VecTraits::svptrue();
99 96 parallel_rows_vectors_path(pg, src_rows.at(0, index),
100 48 dst_rows.at(0, index / 2));
101 48 })
102 // Process the remaining chunk of the row.
103 1752 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
104 816 auto pg = VecTraits::svwhilelt(index, length);
105 1632 parallel_rows_vectors_path(pg, src_rows.at(0, index),
106 816 dst_rows.at(0, index / 2));
107 816 });
108
109 // Handle the last odd column, if any.
110
2/2
✓ Branch 0 taken 856 times.
✓ Branch 1 taken 80 times.
936 if (dst_width > (src_width / 2)) {
111 80 dst_rows[dst_width - 1] = rounding_shift_right<uint16_t>(
112 160 static_cast<const uint16_t>(src_rows.at(0, src_width - 1)[0]) +
113 80 src_rows.at(1, src_width - 1)[0],
114 1);
115 80 }
116 936 }
117
118 static inline svuint8_t resize_single_row(svbool_t pg,
119 svuint8_t row) KLEIDICV_STREAMING {
120 return svrshrnb(svadalp_x(pg, svdup_u16(0), row), 1);
121 }
122
123 20 static inline void single_row_vector_path_2x(
124 svbool_t pg, Rows<const uint8_t> src_rows,
125 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
126 #if KLEIDICV_TARGET_SME2
127 5 svcount_t pg_counter = svptrue_c8();
128 5 auto src = svld1_x2(pg_counter, &src_rows.at(0)[0]);
129 5 svuint8_t line0 = svget2(src, 0);
130 5 svuint8_t line1 = svget2(src, 1);
131 #else
132 15 svuint8_t line0 = svld1(pg, &src_rows[0]);
133 15 svuint8_t line1 = svld1_vnum(pg, &src_rows[0], 1);
134 #endif // KLEIDICV_TARGET_SME2
135 20 svuint8_t result0 = svrshrnb(svadalp_x(pg, svdup_u16(0), line0), 1);
136 20 svuint8_t result1 = svrshrnb(svadalp_x(pg, svdup_u16(0), line1), 1);
137 20 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result0));
138 20 svst1b_vnum(pg, &dst_rows[0], 1, svreinterpret_u16_u8(result1));
139 20 }
140
141 56 static inline void single_row_vector_path(
142 svbool_t pg, Rows<const uint8_t> src_rows,
143 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
144 56 svuint8_t line = svld1(pg, &src_rows.at(0)[0]);
145 56 svuint8_t result = svrshrnb(svadalp_x(pg, svdup_u16(0), line), 1);
146 56 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result));
147 56 }
148
149 template <typename ScalarType>
150 72 static inline void process_single_row(Rows<const ScalarType> src_rows,
151 size_t src_width,
152 Rows<ScalarType> dst_rows,
153 size_t dst_width) KLEIDICV_STREAMING {
154 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
155 72 const size_t size_mask = ~static_cast<size_t>(1U);
156
157 // Process rows up to the last even pixel index.
158 144 LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()}
159 // Process full vector chunks.
160 92 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
161 20 auto pg = VecTraits::svptrue();
162 40 single_row_vector_path_2x(pg, src_rows.at(0, index),
163 20 dst_rows.at(0, index / 2));
164 20 })
165 84 .unroll_once([&](size_t index) KLEIDICV_STREAMING {
166 12 auto pg = VecTraits::svptrue();
167 24 single_row_vector_path(pg, src_rows.at(0, index),
168 12 dst_rows.at(0, index / 2));
169 12 })
170 // Process the remaining chunk of the row.
171 116 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
172 44 auto pg = VecTraits::svwhilelt(index, length);
173 88 single_row_vector_path(pg, src_rows.at(0, index),
174 44 dst_rows.at(0, index / 2));
175 44 });
176
177 // Handle the last odd column, if any.
178
2/2
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 32 times.
72 if (dst_width > (src_width / 2)) {
179 32 dst_rows[dst_width - 1] = src_rows[src_width - 1];
180 32 }
181 72 }
182
183 396 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t resize_to_quarter_u8_sc(
184 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
185 uint8_t *dst, size_t dst_stride, size_t dst_width,
186 size_t dst_height) KLEIDICV_STREAMING {
187 396 Rows<const uint8_t> src_rows{src, src_stride, /* channels*/ 1};
188 396 Rows<uint8_t> dst_rows{dst, dst_stride, /* channels*/ 1};
189 396 LoopUnroll2 loop{src_height, /* Process two rows */ 2};
190
191 // Process two rows at once.
192 1332 loop.unroll_once([&](size_t) // NOLINT(readability/casting)
193 KLEIDICV_STREAMING {
194 1872 process_parallel_rows(src_rows, src_width, dst_rows,
195 936 dst_width);
196 936 src_rows += 2;
197 936 ++dst_rows;
198 936 });
199
200 // Handle an odd row, if any.
201
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 72 times.
396 if (dst_height > (src_height / 2)) {
202 144 loop.remaining([&](size_t, size_t) KLEIDICV_STREAMING {
203 72 process_single_row(src_rows, src_width, dst_rows, dst_width);
204 72 });
205 72 }
206 396 return KLEIDICV_OK;
207 396 }
208
209 } // namespace KLEIDICV_TARGET_NAMESPACE
210
211 #endif // KLEIDICV_RESIZE_SC_H
212