Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #ifndef KLEIDICV_SOBEL_SC_H | ||
6 | #define KLEIDICV_SOBEL_SC_H | ||
7 | |||
8 | #include "kleidicv/filters/separable_filter_3x3_sc.h" | ||
9 | #include "kleidicv/filters/sobel.h" | ||
10 | #include "kleidicv/kleidicv.h" | ||
11 | #include "kleidicv/sve2.h" | ||
12 | #include "kleidicv/workspace/separable.h" | ||
13 | |||
14 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
15 | |||
16 | // Template for 3x3 Sobel filters which calculate horizontal derivative | ||
17 | // approximations, often denoted as Gx. | ||
18 | // | ||
19 | // [ -1, 0, 1 ] [ 1 ] | ||
20 | // F = [ -2, 0, 2 ] = [ 2 ] * [ -1, 0, 1 ] | ||
21 | // [ -1, 0, 1 ] [ 1 ] | ||
22 | template <typename T> | ||
23 | class HorizontalSobel3x3; | ||
24 | |||
25 | // 3x3 Sobel filter for uint8_t types which calculates horizontal derivative | ||
26 | // approximations, often denoted as Gx. | ||
27 | template <> | ||
28 | class HorizontalSobel3x3<uint8_t> { | ||
29 | public: | ||
30 | using SourceType = uint8_t; | ||
31 | using BufferType = int16_t; | ||
32 | using DestinationType = int16_t; | ||
33 | |||
34 | // Applies vertical filtering vector using SIMD operations. | ||
35 | // | ||
36 | // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T | ||
37 | 5072 | void vertical_vector_path(svbool_t pg, | |
38 | std::reference_wrapper<svuint8_t> src[3], | ||
39 | BufferType *dst) const KLEIDICV_STREAMING { | ||
40 | 5072 | svuint16_t acc_u16_b = svaddlb(src[0], src[2]); | |
41 | 5072 | svuint16_t acc_u16_t = svaddlt(src[0], src[2]); | |
42 | 5072 | acc_u16_b = svmlalb(acc_u16_b, src[1], svdup_n_u8(2)); | |
43 | 5072 | acc_u16_t = svmlalt(acc_u16_t, src[1], svdup_n_u8(2)); | |
44 | |||
45 | 10144 | svint16x2_t interleaved = | |
46 | 5072 | svcreate2(svreinterpret_s16(acc_u16_b), svreinterpret_s16(acc_u16_t)); | |
47 | 5072 | svst2(pg, &dst[0], interleaved); | |
48 | 5072 | } | |
49 | |||
50 | // Applies horizontal filtering vector using SIMD operations. | ||
51 | // | ||
52 | // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T | ||
53 | 4448 | void horizontal_vector_path(svbool_t pg, | |
54 | std::reference_wrapper<svint16_t> src[3], | ||
55 | DestinationType *dst) const KLEIDICV_STREAMING { | ||
56 | 4448 | svst1(pg, &dst[0], svsub_x(pg, src[2], src[0])); | |
57 | 4448 | } | |
58 | |||
59 | // Applies horizontal filtering vector using scalar operations. | ||
60 | // | ||
61 | // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T | ||
62 | 15296 | void horizontal_scalar_path(const BufferType src[3], | |
63 | DestinationType *dst) const KLEIDICV_STREAMING { | ||
64 | // Explicitly narrow. Overflow is permitted. | ||
65 | 15296 | dst[0] = static_cast<DestinationType>(src[2] - src[0]); | |
66 | 15296 | } | |
67 | }; // end of class HorizontalSobel3x3<uint8_t> | ||
68 | |||
69 | // Template for 3x3 Sobel filters which calculate vertical derivative | ||
70 | // approximations, often denoted as Gy. | ||
71 | // | ||
72 | // [ -1, -2, 1 ] [ -1 ] | ||
73 | // F = [ 0, 0, 0 ] = [ 0 ] * [ 1, 2, 1 ] | ||
74 | // [ 1, 2, 1 ] [ 1 ] | ||
75 | template <typename T> | ||
76 | class VerticalSobel3x3; | ||
77 | |||
78 | // 3x3 Sobel filter for uint8_t types which calculates vertical derivative | ||
79 | // approximations, often denoted as Gy. | ||
80 | template <> | ||
81 | class VerticalSobel3x3<uint8_t> { | ||
82 | public: | ||
83 | using SourceType = uint8_t; | ||
84 | using BufferType = int16_t; | ||
85 | using DestinationType = int16_t; | ||
86 | |||
87 | // Applies vertical filtering vector using SIMD operations. | ||
88 | // | ||
89 | // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T | ||
90 | 5072 | void vertical_vector_path(svbool_t pg, | |
91 | std::reference_wrapper<svuint8_t> src[3], | ||
92 | BufferType *dst) const KLEIDICV_STREAMING { | ||
93 | 5072 | svuint16_t acc_u16_b = svsublb(src[2], src[0]); | |
94 | 5072 | svuint16_t acc_u16_t = svsublt(src[2], src[0]); | |
95 | |||
96 | 10144 | svint16x2_t interleaved = | |
97 | 5072 | svcreate2(svreinterpret_s16(acc_u16_b), svreinterpret_s16(acc_u16_t)); | |
98 | 5072 | svst2(pg, &dst[0], interleaved); | |
99 | 5072 | } | |
100 | |||
101 | // Applies horizontal filtering vector using SIMD operations. | ||
102 | // | ||
103 | // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T | ||
104 | 4448 | void horizontal_vector_path(svbool_t pg, | |
105 | std::reference_wrapper<svint16_t> src[3], | ||
106 | DestinationType *dst) const KLEIDICV_STREAMING { | ||
107 | 4448 | svint16_t acc = svadd_x(pg, src[0], src[2]); | |
108 | 4448 | acc = svmad_s16_x(pg, src[1], svdup_n_s16(2), acc); | |
109 | 4448 | svst1(pg, &dst[0], acc); | |
110 | 4448 | } | |
111 | |||
112 | // Applies horizontal filtering vector using scalar operations. | ||
113 | // | ||
114 | // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T | ||
115 | 15296 | void horizontal_scalar_path(const BufferType src[3], | |
116 | DestinationType *dst) const KLEIDICV_STREAMING { | ||
117 | // Explicitly narrow. Overflow is permitted. | ||
118 | 15296 | dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]); | |
119 | 15296 | } | |
120 | }; // end of class VerticalSobel3x3<uint8_t> | ||
121 | |||
122 | KLEIDICV_TARGET_FN_ATTRS | ||
123 | 180 | static kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8_sc( | |
124 | const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, | ||
125 | size_t width, size_t height, size_t y_begin, size_t y_end, | ||
126 | size_t channels) KLEIDICV_STREAMING { | ||
127 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 178 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 178 times.
|
180 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
128 |
4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 174 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 174 times.
|
178 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
129 |
6/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 172 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 170 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 170 times.
|
174 | CHECK_IMAGE_SIZE(width, height); |
130 | |||
131 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 168 times.
|
170 | if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { |
132 | 2 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
133 | } | ||
134 | |||
135 | 168 | Rectangle rect{width, height}; | |
136 | 168 | Rows<const uint8_t> src_rows{src, src_stride, channels}; | |
137 | 168 | Rows<int16_t> dst_rows{dst, dst_stride, channels}; | |
138 | |||
139 | 168 | auto workspace = | |
140 | 168 | SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t)); | |
141 |
2/2✓ Branch 0 taken 166 times.
✓ Branch 1 taken 2 times.
|
168 | if (!workspace) { |
142 | 2 | return KLEIDICV_ERROR_ALLOCATION; | |
143 | } | ||
144 | |||
145 | 166 | HorizontalSobel3x3<uint8_t> horizontal_sobel; | |
146 | 166 | SeparableFilter3x3<HorizontalSobel3x3<uint8_t>> filter{horizontal_sobel}; | |
147 | 332 | workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, | |
148 | 166 | FixedBorderType::REPLICATE, filter); | |
149 | 166 | return KLEIDICV_OK; | |
150 | 180 | } | |
151 | |||
152 | KLEIDICV_TARGET_FN_ATTRS | ||
153 | 180 | static kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8_sc( | |
154 | const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, | ||
155 | size_t width, size_t height, size_t y_begin, size_t y_end, | ||
156 | size_t channels) KLEIDICV_STREAMING { | ||
157 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 178 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 178 times.
|
180 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
158 |
4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 174 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 174 times.
|
178 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
159 |
6/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 172 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 170 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 170 times.
|
174 | CHECK_IMAGE_SIZE(width, height); |
160 | |||
161 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 168 times.
|
170 | if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { |
162 | 2 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
163 | } | ||
164 | |||
165 | 168 | Rectangle rect{width, height}; | |
166 | 168 | Rows<const uint8_t> src_rows{src, src_stride, channels}; | |
167 | 168 | Rows<int16_t> dst_rows{dst, dst_stride, channels}; | |
168 | |||
169 | 168 | auto workspace = | |
170 | 168 | SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t)); | |
171 |
2/2✓ Branch 0 taken 166 times.
✓ Branch 1 taken 2 times.
|
168 | if (!workspace) { |
172 | 2 | return KLEIDICV_ERROR_ALLOCATION; | |
173 | } | ||
174 | |||
175 | 166 | VerticalSobel3x3<uint8_t> vertical_sobel; | |
176 | 166 | SeparableFilter3x3<VerticalSobel3x3<uint8_t>> filter{vertical_sobel}; | |
177 | 332 | workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, | |
178 | 166 | FixedBorderType::REPLICATE, filter); | |
179 | 166 | return KLEIDICV_OK; | |
180 | 180 | } | |
181 | |||
182 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
183 | |||
184 | #endif // KLEIDICV_SOBEL_SC_H | ||
185 |