KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/resize/resize_to_quarter_neon.cpp
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 88 88 100.0%
Functions: 1 1 100.0%
Branches: 16 16 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/kleidicv.h"
6 #include "kleidicv/neon.h"
7 #include "kleidicv/resize/resize_linear.h"
8
9 namespace kleidicv::neon {
10
11 /// Resizes source data by averaging 4 elements to one.
12 /// In-place operation not supported.
13 ///
14 /// For even source dimensions `(2*N, 2*M)` destination dimensions should be
15 /// `(N, M)`.
16 /// In case of odd source dimensions `(2*N+1, 2*M+1)` destination
17 /// dimensions could be either `(N+1, M+1)` or `(N, M)` or combination of both.
18 /// For later cases last respective row or column of source data will not be
19 /// processed. Currently only supports single-channel data. Number of pixels in
20 /// the source is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS.
21 ///
22 /// Even dimension example of 2x2 to 1x1 conversion:
23 /// ```
24 /// | a | b | --> | (a+b+c+d)/4 |
25 /// | c | d |
26 /// ```
27 /// Odd dimension example of 3x3 to 2x2 conversion:
28 /// ```
29 /// | a | b | c | | (a+b+c+d)/4 | (c+f)/2 |
30 /// | d | e | f | --> | (g+h)/2 | i |
31 /// | g | h | i |
32 /// ```
33
34 KLEIDICV_TARGET_FN_ATTRS
35 99 kleidicv_error_t resize_to_quarter_u8(const uint8_t *src, size_t src_stride,
36 size_t src_width, size_t src_height,
37 uint8_t *dst, size_t dst_stride,
38 size_t dst_width, size_t dst_height) {
39 using VecTraits = neon::VecTraits<uint8_t>;
40 99 constexpr size_t kVectorLengthX2 = kVectorLength * 2;
41 99 constexpr size_t kVectorLengthX4 = kVectorLength * 4;
42
43
2/2
✓ Branch 0 taken 234 times.
✓ Branch 1 taken 99 times.
567 for (; src_height >= 2; src_height -= 2, src += (src_stride * 2),
44 234 --dst_height, dst += dst_stride) {
45 234 const uint8_t *src_l = src;
46 234 uint8_t *dst_l = dst;
47 234 size_t src_width_l = src_width;
48 234 size_t dst_width_l = dst_width;
49
50
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 234 times.
240 for (; src_width_l >= kVectorLengthX4;
51 6 src_width_l -= kVectorLengthX4, dst_width_l -= kVectorLengthX2,
52 6 dst_l += kVectorLengthX2, src_l += kVectorLengthX4) {
53 6 KLEIDICV_PREFETCH(src_l + 1024);
54 6 KLEIDICV_PREFETCH(src_l + src_stride + 1024);
55
56 6 uint8x16x4_t top_line, bottom_line;
57 6 uint16x8_t top_line_pairs_summed[4];
58 6 uint16x8_t bottom_line_pairs_summed[4];
59 6 uint16x8_t result_before_averaging[4];
60 6 uint8x16x2_t result;
61
62 6 VecTraits::load(src_l, top_line);
63 6 VecTraits::load(&src_l[src_stride], bottom_line);
64
65 6 top_line_pairs_summed[0] = vpaddlq_u8(top_line.val[0]);
66 6 top_line_pairs_summed[1] = vpaddlq_u8(top_line.val[1]);
67 6 top_line_pairs_summed[2] = vpaddlq_u8(top_line.val[2]);
68 6 top_line_pairs_summed[3] = vpaddlq_u8(top_line.val[3]);
69
70 6 bottom_line_pairs_summed[0] = vpaddlq_u8(bottom_line.val[0]);
71 6 bottom_line_pairs_summed[1] = vpaddlq_u8(bottom_line.val[1]);
72 6 bottom_line_pairs_summed[2] = vpaddlq_u8(bottom_line.val[2]);
73 6 bottom_line_pairs_summed[3] = vpaddlq_u8(bottom_line.val[3]);
74
75 6 result_before_averaging[0] =
76 6 vaddq_u16(top_line_pairs_summed[0], bottom_line_pairs_summed[0]);
77 6 result_before_averaging[1] =
78 6 vaddq_u16(top_line_pairs_summed[1], bottom_line_pairs_summed[1]);
79 6 result_before_averaging[2] =
80 6 vaddq_u16(top_line_pairs_summed[2], bottom_line_pairs_summed[2]);
81 6 result_before_averaging[3] =
82 6 vaddq_u16(top_line_pairs_summed[3], bottom_line_pairs_summed[3]);
83
84 6 result.val[0] =
85 12 vrshrn_high_n_u16(vrshrn_n_u16(result_before_averaging[0], 2),
86 6 result_before_averaging[1], 2);
87 6 result.val[1] =
88 12 vrshrn_high_n_u16(vrshrn_n_u16(result_before_averaging[2], 2),
89 6 result_before_averaging[3], 2);
90
91 6 VecTraits::store(result, dst_l);
92 6 }
93
94
2/2
✓ Branch 0 taken 1284 times.
✓ Branch 1 taken 234 times.
1518 for (; src_width_l > 1;
95 1284 src_width_l -= 2, src_l += 2, --dst_width_l, ++dst_l) {
96 1284 disable_loop_vectorization();
97 1284 *dst_l = rounding_shift_right<uint16_t>(
98 2568 static_cast<uint16_t>(*src_l) + *(src_l + 1) + *(src_l + src_stride) +
99 1284 *(src_l + src_stride + 1),
100 2);
101 1284 }
102
103
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 20 times.
234 if (dst_width_l) {
104 20 *dst_l = rounding_shift_right<uint16_t>(
105 20 static_cast<uint16_t>(*src_l) + *(src_l + src_stride), 1);
106 20 }
107 234 }
108
109
2/2
✓ Branch 0 taken 81 times.
✓ Branch 1 taken 18 times.
99 if (dst_height) {
110
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 18 times.
19 for (; src_width >= kVectorLengthX4;
111 1 src_width -= kVectorLengthX4, dst_width -= kVectorLengthX2,
112 1 dst += kVectorLengthX2, src += kVectorLengthX4) {
113 1 uint8x16x4_t vsrc;
114 1 uint16x8_t vsrc_line_pairs_summed[4];
115 1 uint8x16x2_t result;
116 1 VecTraits::load(&src[0], vsrc);
117
118 1 vsrc_line_pairs_summed[0] = vpaddlq_u8(vsrc.val[0]);
119 1 vsrc_line_pairs_summed[1] = vpaddlq_u8(vsrc.val[1]);
120 1 vsrc_line_pairs_summed[2] = vpaddlq_u8(vsrc.val[2]);
121 1 vsrc_line_pairs_summed[3] = vpaddlq_u8(vsrc.val[3]);
122
123 1 result.val[0] =
124 2 vrshrn_high_n_u16(vrshrn_n_u16(vsrc_line_pairs_summed[0], 1),
125 1 vsrc_line_pairs_summed[1], 1);
126 1 result.val[1] =
127 2 vrshrn_high_n_u16(vrshrn_n_u16(vsrc_line_pairs_summed[2], 1),
128 1 vsrc_line_pairs_summed[3], 1);
129
130 1 VecTraits::store(result, dst);
131 1 }
132
133
2/2
✓ Branch 0 taken 103 times.
✓ Branch 1 taken 18 times.
121 for (; src_width > 1; src_width -= 2, src += 2, --dst_width, ++dst) {
134 103 disable_loop_vectorization();
135 103 *dst = rounding_shift_right<uint16_t>(
136 103 static_cast<uint16_t>(*src) + *(src + 1), 1);
137 103 }
138
139
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 8 times.
18 if (dst_width) {
140 8 *dst = *src;
141 8 }
142 18 }
143 99 return KLEIDICV_OK;
144 99 }
145
146 } // namespace kleidicv::neon
147