KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/resize/resize_sc.h
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 131 131 100.0%
Functions: 51 51 100.0%
Branches: 36 36 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RESIZE_SC_H
6 #define KLEIDICV_RESIZE_SC_H
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/sve2.h"
10
11 namespace KLEIDICV_TARGET_NAMESPACE {
12
13 512 static inline svuint8_t resize_parallel_vectors(
14 svbool_t pg, svuint8_t top_row, svuint8_t bottom_row) KLEIDICV_STREAMING {
15 512 svuint16_t result_before_averaging_b = svaddlb(top_row, bottom_row);
16 512 svuint16_t result_before_averaging_t = svaddlt(top_row, bottom_row);
17 1024 svuint16_t result_before_averaging =
18 512 svadd_x(pg, result_before_averaging_b, result_before_averaging_t);
19 1024 return svrshrnb(result_before_averaging, 2);
20 512 }
21
22 96 static inline void parallel_rows_vectors_path_2x(
23 svbool_t pg, Rows<const uint8_t> src_rows,
24 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
25 #if KLEIDICV_TARGET_SME2
26 24 svcount_t pg_counter = svptrue_c8();
27 24 auto src_top = svld1_x2(pg_counter, &src_rows.at(0)[0]);
28 24 auto src_bottom = svld1_x2(pg_counter, &src_rows.at(1)[0]);
29 24 svuint8_t top_row_0 = svget2(src_top, 0);
30 24 svuint8_t top_row_1 = svget2(src_top, 1);
31 24 svuint8_t bottom_row_0 = svget2(src_bottom, 0);
32 24 svuint8_t bottom_row_1 = svget2(src_bottom, 1);
33 #else
34 72 svuint8_t top_row_0 = svld1(pg, &src_rows.at(0)[0]);
35 72 svuint8_t bottom_row_0 = svld1(pg, &src_rows.at(1)[0]);
36 72 svuint8_t top_row_1 = svld1_vnum(pg, &src_rows.at(0)[0], 1);
37 72 svuint8_t bottom_row_1 = svld1_vnum(pg, &src_rows.at(1)[0], 1);
38 #endif // KLEIDICV_TARGET_SME2
39 96 svuint16_t sum0b = svaddlb(top_row_0, bottom_row_0);
40 96 svuint16_t sum0t = svaddlt(top_row_0, bottom_row_0);
41 96 svuint16_t sum1b = svaddlb(top_row_1, bottom_row_1);
42 96 svuint16_t sum1t = svaddlt(top_row_1, bottom_row_1);
43 96 svuint8_t res0 = svrshrnb(svadd_x(pg, sum0b, sum0t), 2);
44 96 svuint8_t res1 = svrshrnb(svadd_x(pg, sum1b, sum1t), 2);
45 96 svuint8_t result = svuzp1(res0, res1);
46 96 svst1(pg, &dst_rows[0], result);
47 96 }
48
49 512 static inline void parallel_rows_vectors_path(
50 svbool_t pg, Rows<const uint8_t> src_rows,
51 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
52 512 svuint8_t top_line = svld1(pg, &src_rows.at(0)[0]);
53 512 svuint8_t bottom_line = svld1(pg, &src_rows.at(1)[0]);
54 512 svuint8_t result = resize_parallel_vectors(pg, top_line, bottom_line);
55 512 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result));
56 512 }
57
58 template <typename ScalarType>
59 584 static inline void process_parallel_rows(Rows<const ScalarType> src_rows,
60 size_t src_width,
61 Rows<ScalarType> dst_rows,
62 size_t dst_width) KLEIDICV_STREAMING {
63 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
64 584 const size_t size_mask = ~static_cast<size_t>(1U);
65
66 // Process rows up to the last even pixel index.
67 1168 LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()}
68 // Process double vector chunks.
69 680 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
70 96 auto pg = VecTraits::svptrue();
71 192 parallel_rows_vectors_path_2x(pg, src_rows.at(0, index),
72 96 dst_rows.at(0, index / 2));
73 96 })
74 632 .unroll_once([&](size_t index) KLEIDICV_STREAMING {
75 48 auto pg = VecTraits::svptrue();
76 96 parallel_rows_vectors_path(pg, src_rows.at(0, index),
77 48 dst_rows.at(0, index / 2));
78 48 })
79 // Process the remaining chunk of the row.
80 1048 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
81 464 auto pg = VecTraits::svwhilelt(index, length);
82 928 parallel_rows_vectors_path(pg, src_rows.at(0, index),
83 464 dst_rows.at(0, index / 2));
84 464 });
85
86 // Handle the last odd column, if any.
87
2/2
✓ Branch 0 taken 504 times.
✓ Branch 1 taken 80 times.
584 if (dst_width > (src_width / 2)) {
88 80 dst_rows[dst_width - 1] = rounding_shift_right<uint16_t>(
89 160 static_cast<const uint16_t>(src_rows.at(0, src_width - 1)[0]) +
90 80 src_rows.at(1, src_width - 1)[0],
91 1);
92 80 }
93 584 }
94
95 static inline svuint8_t resize_single_row(svbool_t pg,
96 svuint8_t row) KLEIDICV_STREAMING {
97 return svrshrnb(svadalp_x(pg, svdup_u16(0), row), 1);
98 }
99
100 20 static inline void single_row_vector_path_2x(
101 svbool_t pg, Rows<const uint8_t> src_rows,
102 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
103 #if KLEIDICV_TARGET_SME2
104 5 svcount_t pg_counter = svptrue_c8();
105 5 auto src = svld1_x2(pg_counter, &src_rows.at(0)[0]);
106 5 svuint8_t line0 = svget2(src, 0);
107 5 svuint8_t line1 = svget2(src, 1);
108 #else
109 15 svuint8_t line0 = svld1(pg, &src_rows[0]);
110 15 svuint8_t line1 = svld1_vnum(pg, &src_rows[0], 1);
111 #endif // KLEIDICV_TARGET_SME2
112 20 svuint8_t result0 = svrshrnb(svadalp_x(pg, svdup_u16(0), line0), 1);
113 20 svuint8_t result1 = svrshrnb(svadalp_x(pg, svdup_u16(0), line1), 1);
114 20 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result0));
115 20 svst1b_vnum(pg, &dst_rows[0], 1, svreinterpret_u16_u8(result1));
116 20 }
117
118 48 static inline void single_row_vector_path(
119 svbool_t pg, Rows<const uint8_t> src_rows,
120 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
121 48 svuint8_t line = svld1(pg, &src_rows.at(0)[0]);
122 48 svuint8_t result = svrshrnb(svadalp_x(pg, svdup_u16(0), line), 1);
123 48 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result));
124 48 }
125
126 template <typename ScalarType>
127 68 static inline void process_single_row(Rows<const ScalarType> src_rows,
128 size_t src_width,
129 Rows<ScalarType> dst_rows,
130 size_t dst_width) KLEIDICV_STREAMING {
131 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
132 68 const size_t size_mask = ~static_cast<size_t>(1U);
133
134 // Process rows up to the last even pixel index.
135 136 LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()}
136 // Process full vector chunks.
137 88 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
138 20 auto pg = VecTraits::svptrue();
139 40 single_row_vector_path_2x(pg, src_rows.at(0, index),
140 20 dst_rows.at(0, index / 2));
141 20 })
142 80 .unroll_once([&](size_t index) KLEIDICV_STREAMING {
143 12 auto pg = VecTraits::svptrue();
144 24 single_row_vector_path(pg, src_rows.at(0, index),
145 12 dst_rows.at(0, index / 2));
146 12 })
147 // Process the remaining chunk of the row.
148 104 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
149 36 auto pg = VecTraits::svwhilelt(index, length);
150 72 single_row_vector_path(pg, src_rows.at(0, index),
151 36 dst_rows.at(0, index / 2));
152 36 });
153
154 // Handle the last odd column, if any.
155
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 32 times.
68 if (dst_width > (src_width / 2)) {
156 32 dst_rows[dst_width - 1] = src_rows[src_width - 1];
157 32 }
158 68 }
159
160 KLEIDICV_TARGET_FN_ATTRS
161 480 static kleidicv_error_t check_dimensions(size_t src_dim,
162 size_t dst_dim) KLEIDICV_STREAMING {
163 480 size_t half_src_dim = src_dim / 2;
164
165
2/2
✓ Branch 0 taken 232 times.
✓ Branch 1 taken 248 times.
480 if ((src_dim % 2) == 0) {
166
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 8 times.
248 if (dst_dim == half_src_dim) {
167 240 return KLEIDICV_OK;
168 }
169 8 } else {
170
4/4
✓ Branch 0 taken 136 times.
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 128 times.
✓ Branch 3 taken 8 times.
232 if (dst_dim == half_src_dim || dst_dim == (half_src_dim + 1)) {
171 224 return KLEIDICV_OK;
172 }
173 }
174
175 16 return KLEIDICV_ERROR_RANGE;
176 480 }
177
178 260 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t resize_to_quarter_u8_sc(
179 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
180 uint8_t *dst, size_t dst_stride, size_t dst_width,
181 size_t dst_height) KLEIDICV_STREAMING {
182
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 256 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 256 times.
260 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
183
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 252 times.
256 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
184
6/6
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 248 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 244 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 244 times.
252 CHECK_IMAGE_SIZE(src_width, src_height);
185
186
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 236 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 236 times.
252 if (kleidicv_error_t ret = check_dimensions(src_width, dst_width)) {
187 8 return ret;
188 }
189
190
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 228 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 228 times.
244 if (kleidicv_error_t ret = check_dimensions(src_height, dst_height)) {
191 8 return ret;
192 }
193
194 228 Rows<const uint8_t> src_rows{src, src_stride, /* channels*/ 1};
195 228 Rows<uint8_t> dst_rows{dst, dst_stride, /* channels*/ 1};
196 228 LoopUnroll2 loop{src_height, /* Process two rows */ 2};
197
198 // Process two rows at once.
199 812 loop.unroll_once([&](size_t) // NOLINT(readability/casting)
200 KLEIDICV_STREAMING {
201 1168 process_parallel_rows(src_rows, src_width, dst_rows,
202 584 dst_width);
203 584 src_rows += 2;
204 584 ++dst_rows;
205 584 });
206
207 // Handle an odd row, if any.
208
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 68 times.
228 if (dst_height > (src_height / 2)) {
209 136 loop.remaining([&](size_t, size_t) KLEIDICV_STREAMING {
210 68 process_single_row(src_rows, src_width, dst_rows, dst_width);
211 68 });
212 68 }
213 228 return KLEIDICV_OK;
214 260 }
215
216 } // namespace KLEIDICV_TARGET_NAMESPACE
217
218 #endif // KLEIDICV_RESIZE_SC_H
219