KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/resize/resize_sc.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 120 120 100.0%
Functions: 34 34 100.0%
Branches: 36 36 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_RESIZE_SC_H
6 #define KLEIDICV_RESIZE_SC_H
7
8 #include "kleidicv/kleidicv.h"
9 #include "kleidicv/sve2.h"
10
11 namespace KLEIDICV_TARGET_NAMESPACE {
12
13 384 static inline svuint8_t resize_parallel_vectors(
14 svbool_t pg, svuint8_t top_row, svuint8_t bottom_row) KLEIDICV_STREAMING {
15 384 svuint16_t result_before_averaging_b = svaddlb(top_row, bottom_row);
16 384 svuint16_t result_before_averaging_t = svaddlt(top_row, bottom_row);
17 768 svuint16_t result_before_averaging =
18 384 svadd_x(pg, result_before_averaging_b, result_before_averaging_t);
19 768 return svrshrnb(result_before_averaging, 2);
20 384 }
21
22 18 static inline void parallel_rows_vectors_path_2x(
23 svbool_t pg, Rows<const uint8_t> src_rows,
24 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
25 18 svuint8_t top_row_0 = svld1(pg, &src_rows.at(0)[0]);
26 18 svuint8_t bottom_row_0 = svld1(pg, &src_rows.at(1)[0]);
27 18 svuint8_t top_row_1 = svld1_vnum(pg, &src_rows.at(0)[0], 1);
28 18 svuint8_t bottom_row_1 = svld1_vnum(pg, &src_rows.at(1)[0], 1);
29 18 svuint16_t sum0b = svaddlb(top_row_0, bottom_row_0);
30 18 svuint16_t sum0t = svaddlt(top_row_0, bottom_row_0);
31 18 svuint16_t sum1b = svaddlb(top_row_1, bottom_row_1);
32 18 svuint16_t sum1t = svaddlt(top_row_1, bottom_row_1);
33 18 svuint8_t res0 = svrshrnb(svadd_x(pg, sum0b, sum0t), 2);
34 18 svuint8_t res1 = svrshrnb(svadd_x(pg, sum1b, sum1t), 2);
35 18 svuint8_t result = svuzp1(res0, res1);
36 18 svst1(pg, &dst_rows[0], result);
37 18 }
38
39 384 static inline void parallel_rows_vectors_path(
40 svbool_t pg, Rows<const uint8_t> src_rows,
41 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
42 384 svuint8_t top_line = svld1(pg, &src_rows.at(0)[0]);
43 384 svuint8_t bottom_line = svld1(pg, &src_rows.at(1)[0]);
44 384 svuint8_t result = resize_parallel_vectors(pg, top_line, bottom_line);
45 384 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result));
46 384 }
47
48 template <typename ScalarType>
49 438 static inline void process_parallel_rows(Rows<const ScalarType> src_rows,
50 size_t src_width,
51 Rows<ScalarType> dst_rows,
52 size_t dst_width) KLEIDICV_STREAMING {
53 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
54 438 const size_t size_mask = ~static_cast<size_t>(1U);
55
56 // Process rows up to the last even pixel index.
57 876 LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()}
58 // Process double vector chunks.
59 456 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
60 18 auto pg = VecTraits::svptrue();
61 36 parallel_rows_vectors_path_2x(pg, src_rows.at(0, index),
62 18 dst_rows.at(0, index / 2));
63 18 })
64 474 .unroll_once([&](size_t index) KLEIDICV_STREAMING {
65 36 auto pg = VecTraits::svptrue();
66 72 parallel_rows_vectors_path(pg, src_rows.at(0, index),
67 36 dst_rows.at(0, index / 2));
68 36 })
69 // Process the remaining chunk of the row.
70 786 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
71 348 auto pg = VecTraits::svwhilelt(index, length);
72 696 parallel_rows_vectors_path(pg, src_rows.at(0, index),
73 348 dst_rows.at(0, index / 2));
74 348 });
75
76 // Handle the last odd column, if any.
77
2/2
✓ Branch 0 taken 378 times.
✓ Branch 1 taken 60 times.
438 if (dst_width > (src_width / 2)) {
78 60 dst_rows[dst_width - 1] = rounding_shift_right<uint16_t>(
79 120 static_cast<const uint16_t>(src_rows.at(0, src_width - 1)[0]) +
80 60 src_rows.at(1, src_width - 1)[0],
81 1);
82 60 }
83 438 }
84
85 static inline svuint8_t resize_single_row(svbool_t pg,
86 svuint8_t row) KLEIDICV_STREAMING {
87 return svrshrnb(svadalp_x(pg, svdup_u16(0), row), 1);
88 }
89
90 3 static inline void single_row_vector_path_2x(
91 svbool_t pg, Rows<const uint8_t> src_rows,
92 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
93 3 svuint8_t line0 = svld1(pg, &src_rows[0]);
94 3 svuint8_t line1 = svld1_vnum(pg, &src_rows[0], 1);
95 3 svuint8_t result0 = svrshrnb(svadalp_x(pg, svdup_u16(0), line0), 1);
96 3 svuint8_t result1 = svrshrnb(svadalp_x(pg, svdup_u16(0), line1), 1);
97 3 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result0));
98 3 svst1b_vnum(pg, &dst_rows[0], 1, svreinterpret_u16_u8(result1));
99 3 }
100
101 36 static inline void single_row_vector_path(
102 svbool_t pg, Rows<const uint8_t> src_rows,
103 Rows<uint8_t> dst_rows) KLEIDICV_STREAMING {
104 36 svuint8_t line = svld1(pg, &src_rows.at(0)[0]);
105 36 svuint8_t result = svrshrnb(svadalp_x(pg, svdup_u16(0), line), 1);
106 36 svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result));
107 36 }
108
109 template <typename ScalarType>
110 51 static inline void process_single_row(Rows<const ScalarType> src_rows,
111 size_t src_width,
112 Rows<ScalarType> dst_rows,
113 size_t dst_width) KLEIDICV_STREAMING {
114 using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>;
115 51 const size_t size_mask = ~static_cast<size_t>(1U);
116
117 // Process rows up to the last even pixel index.
118 102 LoopUnroll2{src_width & size_mask, VecTraits::num_lanes()}
119 // Process full vector chunks.
120 54 .unroll_twice([&](size_t index) KLEIDICV_STREAMING {
121 3 auto pg = VecTraits::svptrue();
122 6 single_row_vector_path_2x(pg, src_rows.at(0, index),
123 3 dst_rows.at(0, index / 2));
124 3 })
125 60 .unroll_once([&](size_t index) KLEIDICV_STREAMING {
126 9 auto pg = VecTraits::svptrue();
127 18 single_row_vector_path(pg, src_rows.at(0, index),
128 9 dst_rows.at(0, index / 2));
129 9 })
130 // Process the remaining chunk of the row.
131 78 .remaining([&](size_t index, size_t length) KLEIDICV_STREAMING {
132 27 auto pg = VecTraits::svwhilelt(index, length);
133 54 single_row_vector_path(pg, src_rows.at(0, index),
134 27 dst_rows.at(0, index / 2));
135 27 });
136
137 // Handle the last odd column, if any.
138
2/2
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 24 times.
51 if (dst_width > (src_width / 2)) {
139 24 dst_rows[dst_width - 1] = src_rows[src_width - 1];
140 24 }
141 51 }
142
143 KLEIDICV_TARGET_FN_ATTRS
144 360 static kleidicv_error_t check_dimensions(size_t src_dim,
145 size_t dst_dim) KLEIDICV_STREAMING {
146 360 size_t half_src_dim = src_dim / 2;
147
148
2/2
✓ Branch 0 taken 174 times.
✓ Branch 1 taken 186 times.
360 if ((src_dim % 2) == 0) {
149
2/2
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 6 times.
186 if (dst_dim == half_src_dim) {
150 180 return KLEIDICV_OK;
151 }
152 6 } else {
153
4/4
✓ Branch 0 taken 102 times.
✓ Branch 1 taken 72 times.
✓ Branch 2 taken 96 times.
✓ Branch 3 taken 6 times.
174 if (dst_dim == half_src_dim || dst_dim == (half_src_dim + 1)) {
154 168 return KLEIDICV_OK;
155 }
156 }
157
158 12 return KLEIDICV_ERROR_RANGE;
159 360 }
160
161 195 KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t resize_to_quarter_u8_sc(
162 const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height,
163 uint8_t *dst, size_t dst_stride, size_t dst_width,
164 size_t dst_height) KLEIDICV_STREAMING {
165
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 192 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 192 times.
195 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
166
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 189 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 189 times.
192 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
167
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 186 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 183 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 183 times.
189 CHECK_IMAGE_SIZE(src_width, src_height);
168
169
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 177 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 177 times.
189 if (kleidicv_error_t ret = check_dimensions(src_width, dst_width)) {
170 6 return ret;
171 }
172
173
4/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 171 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 171 times.
183 if (kleidicv_error_t ret = check_dimensions(src_height, dst_height)) {
174 6 return ret;
175 }
176
177 171 Rows<const uint8_t> src_rows{src, src_stride, /* channels*/ 1};
178 171 Rows<uint8_t> dst_rows{dst, dst_stride, /* channels*/ 1};
179 171 LoopUnroll2 loop{src_height, /* Process two rows */ 2};
180
181 // Process two rows at once.
182 609 loop.unroll_once([&](size_t) // NOLINT(readability/casting)
183 KLEIDICV_STREAMING {
184 876 process_parallel_rows(src_rows, src_width, dst_rows,
185 438 dst_width);
186 438 src_rows += 2;
187 438 ++dst_rows;
188 438 });
189
190 // Handle an odd row, if any.
191
2/2
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 51 times.
171 if (dst_height > (src_height / 2)) {
192 102 loop.remaining([&](size_t, size_t) KLEIDICV_STREAMING {
193 51 process_single_row(src_rows, src_width, dst_rows, dst_width);
194 51 });
195 51 }
196 171 return KLEIDICV_OK;
197 195 }
198
199 } // namespace KLEIDICV_TARGET_NAMESPACE
200
201 #endif // KLEIDICV_RESIZE_SC_H
202