KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/sobel_neon.cpp
Date: 2026-03-05 15:57:40
Exec Total Coverage
Lines: 79 79 100.0%
Functions: 10 10 100.0%
Branches: 40 40 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/filters/separable_filter_3x3_neon.h"
6 #include "kleidicv/filters/sobel.h"
7 #include "kleidicv/kleidicv.h"
8 #include "kleidicv/neon.h"
9 #include "kleidicv/workspace/separable.h"
10
11 namespace kleidicv::neon {
12
13 // Template for 3x3 Sobel filters which calculate horizontal derivative
14 // approximations, often denoted as Gx.
15 //
16 // The applied weights, as the kernel is mirrored both vertically and
17 // horizontally during the convolution:
18 // [ -1, 0, 1 ] [ 1 ]
19 // F = [ -2, 0, 2 ] = [ 2 ] * [ -1, 0, 1 ]
20 // [ -1, 0, 1 ] [ 1 ]
21 template <typename T>
22 class HorizontalSobel3x3;
23
24 // 3x3 Sobel filter for uint8_t types which calculates horizontal derivative
25 // approximations, often denoted as Gx.
26 template <>
27 class HorizontalSobel3x3<uint8_t> {
28 public:
29 using SourceType = uint8_t;
30 using BufferType = int16_t;
31 using DestinationType = int16_t;
32
33 // Applies vertical filtering vector using SIMD operations.
34 //
35 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
36 1494 void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
37 1494 int16x8_t acc_l = vaddl_u8(vget_low_u8(src[0]), vget_low_u8(src[2]));
38 1494 int16x8_t acc_h = vaddl_u8(vget_high_u8(src[0]), vget_high_u8(src[2]));
39 1494 uint8x16_t shift_l = vshll_n_u8(vget_low_u8(src[1]), 1);
40 1494 uint8x16_t shift_h = vshll_n_u8(vget_high_u8(src[1]), 1);
41 1494 acc_l = vaddq_u16(acc_l, shift_l);
42 1494 acc_h = vaddq_u16(acc_h, shift_h);
43 1494 vst1q(&dst[0], acc_l);
44 1494 vst1q(&dst[VecTraits<BufferType>::num_lanes()], acc_h);
45 1494 }
46
47 // Applies vertical filtering vector using scalar operations.
48 //
49 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
50 8556 void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
51 // Explicitly narrow. Overflow is permitted.
52 8556 dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
53 8556 }
54
55 // Applies horizontal filtering vector using SIMD operations.
56 //
57 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
58 2560 void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
59 2560 vst1q(&dst[0], vsubq_s16(src[2], src[0]));
60 2560 }
61
62 // Applies horizontal filtering vector using scalar operations.
63 //
64 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
65 9456 void horizontal_scalar_path(const BufferType src[3],
66 DestinationType *dst) const {
67 // Explicitly narrow. Overflow is permitted.
68 9456 dst[0] = static_cast<DestinationType>(src[2] - src[0]);
69 9456 }
70 }; // end of class HorizontalSobel3x3<uint8_t>
71
72 // Template for 3x3 Sobel filters which calculate vertical derivative
73 // approximations, often denoted as Gy.
74 //
75 // The applied weights, as the kernel is mirrored both vertically and
76 // horizontally during the convolution:
77 // [ -1, -2, -1 ] [ -1 ]
78 // F = [ 0, 0, 0 ] = [ 0 ] * [ 1, 2, 1 ]
79 // [ 1, 2, 1 ] [ 1 ]
80 template <typename T>
81 class VerticalSobel3x3;
82
83 // 3x3 Sobel filter for uint8_t types which calculates vertical derivative
84 // approximations, often denoted as Gy.
85 template <>
86 class VerticalSobel3x3<uint8_t> {
87 public:
88 using SourceType = uint8_t;
89 using BufferType = int16_t;
90 using DestinationType = int16_t;
91
92 // Applies vertical filtering vector using SIMD operations.
93 //
94 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
95 1494 void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
96 1494 uint16x8_t acc_l = vsubl_u8(vget_low_u8(src[2]), vget_low_u8(src[0]));
97 1494 uint16x8_t acc_h = vsubl_u8(vget_high_u8(src[2]), vget_high_u8(src[0]));
98 1494 vst1q(&dst[0], vreinterpretq_s16_u16(acc_l));
99 2988 vst1q(&dst[VecTraits<BufferType>::num_lanes()],
100 1494 vreinterpretq_s16_u16(acc_h));
101 1494 }
102
103 // Applies vertical filtering vector using scalar operations.
104 //
105 // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
106 8556 void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
107 // Explicitly narrow. Overflow is permitted.
108 8556 dst[0] = static_cast<DestinationType>(src[2] - src[0]);
109 8556 }
110
111 // Applies horizontal filtering vector using SIMD operations.
112 //
113 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
114 2560 void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
115 2560 int16x8_t acc = vaddq_s16(src[0], src[2]);
116 2560 acc = vaddq_s16(acc, vshlq_n_s16(src[1], 1));
117 2560 vst1q(&dst[0], acc);
118 2560 }
119
120 // Applies horizontal filtering vector using scalar operations.
121 //
122 // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
123 9456 void horizontal_scalar_path(const BufferType src[3],
124 DestinationType *dst) const {
125 // Explicitly narrow. Overflow is permitted.
126 9456 dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
127 9456 }
128 }; // end of class VerticalSobel3x3<uint8_t>
129
130 KLEIDICV_TARGET_FN_ATTRS
131 116 kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8(
132 const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
133 size_t width, size_t height, size_t y_begin, size_t y_end,
134 size_t channels) {
135
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 115 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 115 times.
116 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
136
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 113 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 113 times.
115 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
137
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 112 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 111 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 111 times.
113 CHECK_IMAGE_SIZE(width, height);
138
139
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 110 times.
111 if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
140 1 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
141 }
142
143 110 Rectangle rect{width, height};
144 110 Rows<const uint8_t> src_rows{src, src_stride, channels};
145 110 Rows<int16_t> dst_rows{dst, dst_stride, channels};
146 using HorizontalSobel3x3_t = HorizontalSobel3x3<uint8_t>;
147 110 constexpr size_t intermediate_size{
148 sizeof(typename HorizontalSobel3x3_t::BufferType)};
149
150 110 auto workspace_variant =
151 110 SeparableFilterWorkspace::create(rect, channels, intermediate_size);
152
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 109 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 109 times.
111 if (auto *err = std::get_if<kleidicv_error_t>(&workspace_variant)) {
153 1 return *err;
154 }
155 109 auto &workspace = *std::get_if<SeparableFilterWorkspace>(&workspace_variant);
156
157 109 HorizontalSobel3x3_t vertical_sobel;
158 109 SeparableFilter3x3<HorizontalSobel3x3_t> filter{vertical_sobel};
159 218 workspace.process(y_begin, y_end, src_rows, dst_rows,
160 109 FixedBorderType::REPLICATE, filter);
161 109 return KLEIDICV_OK;
162 116 }
163
164 KLEIDICV_TARGET_FN_ATTRS
165 116 kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8(
166 const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
167 size_t width, size_t height, size_t y_begin, size_t y_end,
168 size_t channels) {
169
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 115 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 115 times.
116 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
170
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 113 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 113 times.
115 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
171
6/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 112 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 111 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 111 times.
113 CHECK_IMAGE_SIZE(width, height);
172
173
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 110 times.
111 if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
174 1 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
175 }
176
177 110 Rectangle rect{width, height};
178 110 Rows<const uint8_t> src_rows{src, src_stride, channels};
179 110 Rows<int16_t> dst_rows{dst, dst_stride, channels};
180 using VerticalSobel3x3_t = VerticalSobel3x3<uint8_t>;
181 110 constexpr size_t intermediate_size{
182 sizeof(typename VerticalSobel3x3_t::BufferType)};
183
184 110 auto workspace_variant =
185 110 SeparableFilterWorkspace::create(rect, channels, intermediate_size);
186
4/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 109 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 109 times.
111 if (auto *err = std::get_if<kleidicv_error_t>(&workspace_variant)) {
187 1 return *err;
188 }
189 109 auto &workspace = *std::get_if<SeparableFilterWorkspace>(&workspace_variant);
190
191 109 VerticalSobel3x3_t vertical_sobel;
192 109 SeparableFilter3x3<VerticalSobel3x3_t> filter{vertical_sobel};
193 218 workspace.process(y_begin, y_end, src_rows, dst_rows,
194 109 FixedBorderType::REPLICATE, filter);
195 109 return KLEIDICV_OK;
196 116 }
197
198 } // namespace kleidicv::neon
199