KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/transform/warp_perspective_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 87 87 100.0%
Functions: 33 65 50.8%
Branches: 54 72 75.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <cassert>
6
7 #include "kleidicv/ctypes.h"
8 #include "kleidicv/neon.h"
9 #include "kleidicv/traits.h"
10 #include "kleidicv/transform/warp_perspective.h"
11 #include "kleidicv/utils.h"
12 #include "transform_neon.h"
13
14 namespace kleidicv::neon {
15
16 // Template for WarpPerspective transformation.
17 // Destination pixels are filled from the source, by taking pixels using the
18 // transformed coordinates that are calculated as follows:
19 //
20 // [ T0, T1, T2 ] [ x ]
21 // (x',y',w') = [ T3, T4, T5 ] * [ y ]
22 // [ T6, T7, T8 ] [ 1 ]
23 // then
24 //
25 // xt = x' / w'
26 // yt = y' / w'
27 //
28 // or putting it together:
29 //
30 // xt = (T0*x + T1*y + T2) / (T6*x + T7*y + T8)
31 // yt = (T3*x + T4*y + T5) / (T6*x + T7*y + T8)
32 //
33
34 template <typename ScalarType, bool IsLarge,
35 kleidicv_interpolation_type_t Inter, kleidicv_border_type_t Border,
36 size_t Channels>
37 789 void transform_operation(Rows<const ScalarType> src_rows, size_t src_width,
38 size_t src_height, const float transform[9],
39 const ScalarType *border_values,
40 Rows<ScalarType> dst_rows, size_t dst_width,
41 size_t y_begin, size_t y_end) {
42 static constexpr uint32_t first_few_x[] = {0, 1, 2, 3};
43 789 uint32x4_t x0123_ = vld1q_u32(first_few_x);
44
45 1578 uint32x4_t v_src_stride =
46 789 vdupq_n_u32(static_cast<uint32_t>(src_rows.stride()));
47 789 uint32x4_t v_xmax = vdupq_n_u32(static_cast<uint32_t>(src_width - 1));
48 789 uint32x4_t v_ymax = vdupq_n_u32(static_cast<uint32_t>(src_height - 1));
49 789 float32x4_t tx0, ty0, tw0;
50
51 255123 auto calculate_coordinates = [&](uint32_t x) {
52 // The next few values can be calculated by adding the corresponding Tn*x
53 254334 float32x4_t fx = vcvtq_f32_u32(vaddq_u32(x0123_, vdupq_n_u32(x)));
54 254334 float32x4_t tx = vmlaq_n_f32(tx0, fx, transform[0]);
55 254334 float32x4_t ty = vmlaq_n_f32(ty0, fx, transform[3]);
56 254334 float32x4_t tw = vmlaq_n_f32(tw0, fx, transform[6]);
57
58 // Calculate inverse weight because division is expensive
59 254334 float32x4_t iw = vdivq_f32(vdupq_n_f32(1.F), tw);
60 // Calculate coordinates into the source image
61 254334 float32x4_t xf = vmulq_f32(tx, iw);
62 254334 float32x4_t yf = vmulq_f32(ty, iw);
63 254334 return FloatVectorPair{xf, yf};
64 254334 };
65
66 134120 auto calculate_linear = [&](uint32_t x) {
67 133331 float32x4_t a, b, c, d, xfrac, yfrac;
68 if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) {
69 90679 load_quad_pixels_replicate<ScalarType, IsLarge>(
70 90679 calculate_coordinates(x), v_xmax, v_ymax, v_src_stride, src_rows,
71 xfrac, yfrac, a, b, c, d);
72 } else {
73 static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT);
74 42652 load_quad_pixels_constant<ScalarType, IsLarge>(
75 42652 calculate_coordinates(x), v_xmax, v_ymax, v_src_stride, border_values,
76 42652 src_rows, xfrac, yfrac, a, b, c, d);
77 }
78 266662 return lerp_2d(xfrac, yfrac, a, b, c, d);
79 133331 };
80
81
16/32
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 21 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 42 times.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 1 times.
✓ Branch 13 taken 21 times.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✓ Branch 16 taken 283 times.
✓ Branch 17 taken 2459 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✓ Branch 20 taken 111 times.
✓ Branch 21 taken 510 times.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✓ Branch 24 taken 280 times.
✓ Branch 25 taken 4078 times.
✗ Branch 26 not taken.
✗ Branch 27 not taken.
✓ Branch 28 taken 109 times.
✓ Branch 29 taken 1317 times.
✗ Branch 30 not taken.
✗ Branch 31 not taken.
9279 for (size_t y = y_begin; y < y_end; ++y) {
82 8490 float dy = static_cast<float>(y);
83 8490 Columns<ScalarType> dst = dst_rows.as_columns();
84 // Calculate half-transformed values at the first pixel (nominators)
85 // tw = T6*x + T7*y + T8
86 // tx = (T0*x + T1*y + T2) / tw
87 // ty = (T3*x + T4*y + T5) / tw
88 8490 tx0 = vdupq_n_f32(transform[1] * dy + transform[2]);
89 8490 ty0 = vdupq_n_f32(transform[4] * dy + transform[5]);
90 8490 tw0 = vdupq_n_f32(transform[7] * dy + transform[8]);
91
92 static const size_t kStep = VecTraits<float>::num_lanes();
93 8490 LoopUnroll2<TryToAvoidTailLoop> loop{dst_width, kStep};
94 if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) {
95 if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) {
96 84953 loop.unroll_once([&](size_t x) {
97 82452 auto &&[xf, yf] = calculate_coordinates(x);
98 82452 transform_pixels_replicate<ScalarType, IsLarge, Channels>(
99 82452 xf, yf, v_xmax, v_ymax, v_src_stride, src_rows, dst.at(x));
100 82452 });
101 } else {
102 static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT);
103 39082 loop.unroll_once([&](size_t x) {
104 38551 auto &&[xf, yf] = calculate_coordinates(x);
105 38551 transform_pixels_constant<ScalarType, IsLarge, Channels>(
106 38551 xf, yf, v_xmax, v_ymax, v_src_stride, src_rows, dst.at(x),
107 38551 border_values);
108 38551 });
109 }
110 } else {
111 static_assert(Inter == KLEIDICV_INTERPOLATION_LINEAR);
112 35829 loop.unroll_four_times([&](size_t _x) {
113 30371 uint32_t x = static_cast<uint32_t>(_x);
114 30371 ScalarType *p_dst = &dst[static_cast<ptrdiff_t>(_x)];
115 30371 uint32x4_t res0 = calculate_linear(x);
116 30371 x += kStep;
117 30371 uint32x4_t res1 = calculate_linear(x);
118 30371 uint16x8_t result16_0 = vuzp1q_u16(res0, res1);
119 30371 x += kStep;
120 30371 res0 = calculate_linear(x);
121 30371 x += kStep;
122 30371 res1 = calculate_linear(x);
123 30371 uint16x8_t result16_1 = vuzp1q_u16(res0, res1);
124 30371 vst1q_u8(p_dst, vuzp1q_u8(result16_0, result16_1));
125 30371 });
126 17305 loop.unroll_once([&](size_t x) {
127 11847 ScalarType *p_dst = &dst[static_cast<ptrdiff_t>(x)];
128 11847 uint32x4_t res = calculate_linear(static_cast<uint32_t>(x));
129 11847 p_dst[0] = vgetq_lane_u32(res, 0);
130 11847 p_dst[1] = vgetq_lane_u32(res, 1);
131 11847 p_dst[2] = vgetq_lane_u32(res, 2);
132 11847 p_dst[3] = vgetq_lane_u32(res, 3);
133 11847 });
134 }
135 8490 ++dst_rows;
136 8490 }
137 789 }
138
139 template <typename T>
140 802 kleidicv_error_t warp_perspective_stripe(
141 const T *src, size_t src_stride, size_t src_width, size_t src_height,
142 T *dst, size_t dst_stride, size_t dst_width, size_t dst_height,
143 size_t y_begin, size_t y_end, const float transformation[9],
144 size_t channels, kleidicv_interpolation_type_t interpolation,
145 kleidicv_border_type_t border_type, const T *border_value) {
146
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 801 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 801 times.
802 CHECK_POINTER_AND_STRIDE(src, src_stride, src_height);
147
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 800 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 800 times.
801 CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height);
148
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 799 times.
800 CHECK_POINTERS(transformation);
149
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 798 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 797 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 797 times.
799 CHECK_IMAGE_SIZE(src_width, src_height);
150
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 796 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 795 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 795 times.
797 CHECK_IMAGE_SIZE(dst_width, dst_height);
151
152
4/4
✓ Branch 0 taken 223 times.
✓ Branch 1 taken 572 times.
✓ Branch 2 taken 222 times.
✓ Branch 3 taken 1 times.
795 if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) {
153 1 return KLEIDICV_ERROR_NULL_POINTER;
154 }
155
156 // Calculating in float32_t will only be precise until 24 bits, and
157 // multiplication can only be done with 32x32 bits
158 // Empty source image is not supported
159
4/4
✓ Branch 0 taken 793 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 792 times.
✓ Branch 3 taken 1 times.
794 if (src_width >= (1ULL << 24) || src_height >= (1ULL << 24) ||
160
4/4
✓ Branch 0 taken 791 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 790 times.
✓ Branch 3 taken 1 times.
792 dst_width >= (1ULL << 24) || dst_height >= (1ULL << 24) ||
161
4/6
✓ Branch 0 taken 789 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 789 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 789 times.
790 src_stride >= (1ULL << 32) || src_width == 0 || src_height == 0) {
162 5 return KLEIDICV_ERROR_RANGE;
163 }
164
165 789 Rows<const T> src_rows{src, src_stride, channels};
166 789 Rows<T> dst_rows{dst, dst_stride, channels};
167 789 dst_rows += y_begin;
168
169 789 transform_operation<T>(is_image_large(src_rows, src_height), interpolation,
170 border_type, channels, src_rows, src_width, src_height,
171 transformation, border_value, dst_rows, dst_width,
172 y_begin, y_end);
173 789 return KLEIDICV_OK;
174 802 }
175
176 #define KLEIDICV_INSTANTIATE_WARP_PERSPECTIVE(type) \
177 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \
178 warp_perspective_stripe<type>( \
179 const type *src, size_t src_stride, size_t src_width, size_t src_height, \
180 type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \
181 size_t y_begin, size_t y_end, const float transformation[9], \
182 size_t channels, kleidicv_interpolation_type_t interpolation, \
183 kleidicv_border_type_t border_type, const type *border_value)
184
185 KLEIDICV_INSTANTIATE_WARP_PERSPECTIVE(uint8_t);
186
187 } // namespace kleidicv::neon
188