KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/sobel_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 75 75 100.0%
Functions: 10 10 100.0%
Branches: 36 36 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/filters/separable_filter_3x3_neon.h"
6 #include "kleidicv/filters/sobel.h"
7 #include "kleidicv/kleidicv.h"
8 #include "kleidicv/neon.h"
9 #include "kleidicv/workspace/separable.h"
10
11 namespace kleidicv::neon {
12
13 // Template for 3x3 Sobel filters which calculate horizontal derivative
14 // approximations, often denoted as Gx.
15 //
16 // The applied weights, as the kernel is mirrored both vertically and
17 // horizontally during the convolution:
18 // [ -1, 0, 1 ] [ 1 ]
19 // F = [ -2, 0, 2 ] = [ 2 ] * [ -1, 0, 1 ]
20 // [ -1, 0, 1 ] [ 1 ]
21 template <typename T>
22 class HorizontalSobel3x3;
23
24 // 3x3 Sobel filter for uint8_t types which calculates horizontal derivative
25 // approximations, often denoted as Gx.
26 template <>
27 class HorizontalSobel3x3<uint8_t> {
28 public:
29 using SourceType = uint8_t;
30 using BufferType = int16_t;
31 using DestinationType = int16_t;
32
33 // Applies vertical filtering vector using SIMD operations.
34 //
35 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
36 1494 void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
37 1494 int16x8_t acc_l = vaddl_u8(vget_low_u8(src[0]), vget_low_u8(src[2]));
38 1494 int16x8_t acc_h = vaddl_u8(vget_high_u8(src[0]), vget_high_u8(src[2]));
39 1494 uint8x16_t shift_l = vshll_n_u8(vget_low_u8(src[1]), 1);
40 1494 uint8x16_t shift_h = vshll_n_u8(vget_high_u8(src[1]), 1);
41 1494 acc_l = vaddq_u16(acc_l, shift_l);
42 1494 acc_h = vaddq_u16(acc_h, shift_h);
43 1494 vst1q(&dst[0], acc_l);
44 1494 vst1q(&dst[VecTraits<BufferType>::num_lanes()], acc_h);
45 1494 }
46
47 // Applies vertical filtering vector using scalar operations.
48 //
49 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
50 8556 void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
51 // Explicitly narrow. Overflow is permitted.
52 8556 dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
53 8556 }
54
55 // Applies horizontal filtering vector using SIMD operations.
56 //
57 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
58 2560 void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
59 2560 vst1q(&dst[0], vsubq_s16(src[2], src[0]));
60 2560 }
61
62 // Applies horizontal filtering vector using scalar operations.
63 //
64 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
65 9456 void horizontal_scalar_path(const BufferType src[3],
66 DestinationType *dst) const {
67 // Explicitly narrow. Overflow is permitted.
68 9456 dst[0] = static_cast<DestinationType>(src[2] - src[0]);
69 9456 }
70 }; // end of class HorizontalSobel3x3<uint8_t>
71
72 // Template for 3x3 Sobel filters which calculate vertical derivative
73 // approximations, often denoted as Gy.
74 //
75 // The applied weights, as the kernel is mirrored both vertically and
76 // horizontally during the convolution:
77 // [ -1, -2, -1 ] [ -1 ]
78 // F = [ 0, 0, 0 ] = [ 0 ] * [ 1, 2, 1 ]
79 // [ 1, 2, 1 ] [ 1 ]
80 template <typename T>
81 class VerticalSobel3x3;
82
83 // 3x3 Sobel filter for uint8_t types which calculates vertical derivative
84 // approximations, often denoted as Gy.
85 template <>
86 class VerticalSobel3x3<uint8_t> {
87 public:
88 using SourceType = uint8_t;
89 using BufferType = int16_t;
90 using DestinationType = int16_t;
91
92 // Applies vertical filtering vector using SIMD operations.
93 //
94 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
95 1494 void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
96 1494 uint16x8_t acc_l = vsubl_u8(vget_low_u8(src[2]), vget_low_u8(src[0]));
97 1494 uint16x8_t acc_h = vsubl_u8(vget_high_u8(src[2]), vget_high_u8(src[0]));
98 1494 vst1q(&dst[0], vreinterpretq_s16_u16(acc_l));
99 2988 vst1q(&dst[VecTraits<BufferType>::num_lanes()],
100 1494 vreinterpretq_s16_u16(acc_h));
101 1494 }
102
103 // Applies vertical filtering vector using scalar operations.
104 //
105 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
106 8556 void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
107 // Explicitly narrow. Overflow is permitted.
108 8556 dst[0] = static_cast<DestinationType>(src[2] - src[0]);
109 8556 }
110
111 // Applies horizontal filtering vector using SIMD operations.
112 //
113 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
114 2560 void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
115 2560 int16x8_t acc = vaddq_s16(src[0], src[2]);
116 2560 acc = vaddq_s16(acc, vshlq_n_s16(src[1], 1));
117 2560 vst1q(&dst[0], acc);
118 2560 }
119
120 // Applies horizontal filtering vector using scalar operations.
121 //
122 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
123 9456 void horizontal_scalar_path(const BufferType src[3],
124 DestinationType *dst) const {
125 // Explicitly narrow. Overflow is permitted.
126 9456 dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
127 9456 }
128 }; // end of class VerticalSobel3x3<uint8_t>
129
130 KLEIDICV_TARGET_FN_ATTRS
131 90 kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8(
132 const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
133 size_t width, size_t height, size_t y_begin, size_t y_end,
134 size_t channels) {
135
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 89 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 89 times.
90 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
136
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 87 times.
89 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
137
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 86 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 85 times.
87 CHECK_IMAGE_SIZE(width, height);
138
139
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
85 if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
140 1 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
141 }
142
143 84 Rectangle rect{width, height};
144 84 Rows<const uint8_t> src_rows{src, src_stride, channels};
145 84 Rows<int16_t> dst_rows{dst, dst_stride, channels};
146
147 84 auto workspace =
148 84 SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t));
149
2/2
✓ Branch 0 taken 83 times.
✓ Branch 1 taken 1 times.
84 if (!workspace) {
150 1 return KLEIDICV_ERROR_ALLOCATION;
151 }
152
153 83 HorizontalSobel3x3<uint8_t> horizontal_sobel;
154 83 SeparableFilter3x3<HorizontalSobel3x3<uint8_t>> filter{horizontal_sobel};
155 166 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
156 83 FixedBorderType::REPLICATE, filter);
157 83 return KLEIDICV_OK;
158 90 }
159
160 KLEIDICV_TARGET_FN_ATTRS
161 90 kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8(
162 const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
163 size_t width, size_t height, size_t y_begin, size_t y_end,
164 size_t channels) {
165
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 89 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 89 times.
90 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
166
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 87 times.
89 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
167
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 86 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 85 times.
87 CHECK_IMAGE_SIZE(width, height);
168
169
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.
85 if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
170 1 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
171 }
172
173 84 Rectangle rect{width, height};
174 84 Rows<const uint8_t> src_rows{src, src_stride, channels};
175 84 Rows<int16_t> dst_rows{dst, dst_stride, channels};
176
177 84 auto workspace =
178 84 SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t));
179
2/2
✓ Branch 0 taken 83 times.
✓ Branch 1 taken 1 times.
84 if (!workspace) {
180 1 return KLEIDICV_ERROR_ALLOCATION;
181 }
182
183 83 VerticalSobel3x3<uint8_t> vertical_sobel;
184 83 SeparableFilter3x3<VerticalSobel3x3<uint8_t>> filter{vertical_sobel};
185 166 workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
186 83 FixedBorderType::REPLICATE, filter);
187 83 return KLEIDICV_OK;
188 90 }
189
190 } // namespace kleidicv::neon
191