KleidiCV Coverage Report

Directory:	./
File:	kleidicv/src/filters/sobel_neon.cpp
Date:	2025-09-25 14:13:34

	Exec	Total	Coverage
Lines:	75	75	100.0%
Functions:	10	10	100.0%
Branches:	36	36	100.0%

  
      Line
      Branch
      Exec
      Source
    
      // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
    
      //
    
      // SPDX-License-Identifier: Apache-2.0
    
      #include "kleidicv/filters/separable_filter_3x3_neon.h"
    
      #include "kleidicv/filters/sobel.h"
    
      #include "kleidicv/kleidicv.h"
    
      #include "kleidicv/neon.h"
    
      #include "kleidicv/workspace/separable.h"
    
      namespace kleidicv::neon {
    
      // Template for 3x3 Sobel filters which calculate horizontal derivative
    
      // approximations, often denoted as Gx.
    
      //
    
      // The applied weights, as the kernel is mirrored both vertically and
    
      // horizontally during the convolution:
    
      //      [ -1, 0, 1 ]   [ 1 ]
    
      //  F = [ -2, 0, 2 ] = [ 2 ] * [ -1,  0, 1 ]
    
      //      [ -1, 0, 1 ]   [ 1 ]
    
      template <typename T>
    
      class HorizontalSobel3x3;
    
      // 3x3 Sobel filter for uint8_t types which calculates horizontal derivative
    
      // approximations, often denoted as Gx.
    
      template <>
    
      class HorizontalSobel3x3<uint8_t> {
    
       public:
    
        using SourceType = uint8_t;
    
        using BufferType = int16_t;
    
        using DestinationType = int16_t;
    
        // Applies vertical filtering vector using SIMD operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
    
      1494
        void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
    
      1494
          int16x8_t acc_l = vaddl_u8(vget_low_u8(src[0]), vget_low_u8(src[2]));
    
      1494
          int16x8_t acc_h = vaddl_u8(vget_high_u8(src[0]), vget_high_u8(src[2]));
    
      1494
          uint8x16_t shift_l = vshll_n_u8(vget_low_u8(src[1]), 1);
    
      1494
          uint8x16_t shift_h = vshll_n_u8(vget_high_u8(src[1]), 1);
    
      1494
          acc_l = vaddq_u16(acc_l, shift_l);
    
      1494
          acc_h = vaddq_u16(acc_h, shift_h);
    
      1494
          vst1q(&dst[0], acc_l);
    
      1494
          vst1q(&dst[VecTraits<BufferType>::num_lanes()], acc_h);
    
      1494
        }
    
        // Applies vertical filtering vector using scalar operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
    
      8556
        void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
    
          // Explicitly narrow. Overflow is permitted.
    
      8556
          dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
    
      8556
        }
    
        // Applies horizontal filtering vector using SIMD operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
    
      2560
        void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
    
      2560
          vst1q(&dst[0], vsubq_s16(src[2], src[0]));
    
      2560
        }
    
        // Applies horizontal filtering vector using scalar operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
    
      9456
        void horizontal_scalar_path(const BufferType src[3],
    
                                    DestinationType *dst) const {
    
          // Explicitly narrow. Overflow is permitted.
    
      9456
          dst[0] = static_cast<DestinationType>(src[2] - src[0]);
    
      9456
        }
    
      };  // end of class HorizontalSobel3x3<uint8_t>
    
      // Template for 3x3 Sobel filters which calculate vertical derivative
    
      // approximations, often denoted as Gy.
    
      //
    
      // The applied weights, as the kernel is mirrored both vertically and
    
      // horizontally during the convolution:
    
      //      [ -1, -2, -1 ]   [ -1 ]
    
      //  F = [  0,  0,  0 ] = [  0 ] * [ 1,  2, 1 ]
    
      //      [  1,  2,  1 ]   [  1 ]
    
      template <typename T>
    
      class VerticalSobel3x3;
    
      // 3x3 Sobel filter for uint8_t types which calculates vertical derivative
    
      // approximations, often denoted as Gy.
    
      template <>
    
      class VerticalSobel3x3<uint8_t> {
    
       public:
    
        using SourceType = uint8_t;
    
        using BufferType = int16_t;
    
        using DestinationType = int16_t;
    
        // Applies vertical filtering vector using SIMD operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
    
      1494
        void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
    
      1494
          uint16x8_t acc_l = vsubl_u8(vget_low_u8(src[2]), vget_low_u8(src[0]));
    
      1494
          uint16x8_t acc_h = vsubl_u8(vget_high_u8(src[2]), vget_high_u8(src[0]));
    
      1494
          vst1q(&dst[0], vreinterpretq_s16_u16(acc_l));
    
      2988
          vst1q(&dst[VecTraits<BufferType>::num_lanes()],
    
      1494
                vreinterpretq_s16_u16(acc_h));
    
      1494
        }
    
        // Applies vertical filtering vector using scalar operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
    
      8556
        void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
    
          // Explicitly narrow. Overflow is permitted.
    
      8556
          dst[0] = static_cast<DestinationType>(src[2] - src[0]);
    
      8556
        }
    
        // Applies horizontal filtering vector using SIMD operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
    
      2560
        void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
    
      2560
          int16x8_t acc = vaddq_s16(src[0], src[2]);
    
      2560
          acc = vaddq_s16(acc, vshlq_n_s16(src[1], 1));
    
      2560
          vst1q(&dst[0], acc);
    
      2560
        }
    
        // Applies horizontal filtering vector using scalar operations.
    
        //
    
        // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
    
      9456
        void horizontal_scalar_path(const BufferType src[3],
    
                                    DestinationType *dst) const {
    
          // Explicitly narrow. Overflow is permitted.
    
      9456
          dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
    
      9456
        }
    
      };  // end of class VerticalSobel3x3<uint8_t>
    
      KLEIDICV_TARGET_FN_ATTRS
    
      90
      kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8(
    
          const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
    
          size_t width, size_t height, size_t y_begin, size_t y_end,
    
          size_t channels) {
    
        4/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 89 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 89 times.

      90
        CHECK_POINTER_AND_STRIDE(src, src_stride, height);
    
        4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 87 times.

      89
        CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
    
        6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 86 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 85 times.

      87
        CHECK_IMAGE_SIZE(width, height);
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.

      85
        if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
    
      1
          return KLEIDICV_ERROR_NOT_IMPLEMENTED;
    
        }
    
      84
        Rectangle rect{width, height};
    
      84
        Rows<const uint8_t> src_rows{src, src_stride, channels};
    
      84
        Rows<int16_t> dst_rows{dst, dst_stride, channels};
    
      84
        auto workspace =
    
      84
            SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t));
    
        2/2✓ Branch 0 taken 83 times.
✓ Branch 1 taken 1 times.

      84
        if (!workspace) {
    
      1
          return KLEIDICV_ERROR_ALLOCATION;
    
        }
    
      83
        HorizontalSobel3x3<uint8_t> horizontal_sobel;
    
      83
        SeparableFilter3x3<HorizontalSobel3x3<uint8_t>> filter{horizontal_sobel};
    
      166
        workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
    
      83
                           FixedBorderType::REPLICATE, filter);
    
      83
        return KLEIDICV_OK;
    
      90
      }
    
      KLEIDICV_TARGET_FN_ATTRS
    
      90
      kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8(
    
          const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride,
    
          size_t width, size_t height, size_t y_begin, size_t y_end,
    
          size_t channels) {
    
        4/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 89 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 89 times.

      90
        CHECK_POINTER_AND_STRIDE(src, src_stride, height);
    
        4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 87 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 87 times.

      89
        CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
    
        6/6✓ Branch 0 taken 1 times.
✓ Branch 1 taken 86 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 85 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 85 times.

      87
        CHECK_IMAGE_SIZE(width, height);
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 84 times.

      85
        if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
    
      1
          return KLEIDICV_ERROR_NOT_IMPLEMENTED;
    
        }
    
      84
        Rectangle rect{width, height};
    
      84
        Rows<const uint8_t> src_rows{src, src_stride, channels};
    
      84
        Rows<int16_t> dst_rows{dst, dst_stride, channels};
    
      84
        auto workspace =
    
      84
            SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t));
    
        2/2✓ Branch 0 taken 83 times.
✓ Branch 1 taken 1 times.

      84
        if (!workspace) {
    
      1
          return KLEIDICV_ERROR_ALLOCATION;
    
        }
    
      83
        VerticalSobel3x3<uint8_t> vertical_sobel;
    
      83
        SeparableFilter3x3<VerticalSobel3x3<uint8_t>> filter{vertical_sobel};
    
      166
        workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
    
      83
                           FixedBorderType::REPLICATE, filter);
    
      83
        return KLEIDICV_OK;
    
      90
      }
    
      }  // namespace kleidicv::neon

Line	Branch	Exec	Source
1			// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2			//
3			// SPDX-License-Identifier: Apache-2.0
4
5			#include "kleidicv/filters/separable_filter_3x3_neon.h"
6			#include "kleidicv/filters/sobel.h"
7			#include "kleidicv/kleidicv.h"
8			#include "kleidicv/neon.h"
9			#include "kleidicv/workspace/separable.h"
10
11			namespace kleidicv::neon {
12
13			// Template for 3x3 Sobel filters which calculate horizontal derivative
14			// approximations, often denoted as Gx.
15			//
16			// The applied weights, as the kernel is mirrored both vertically and
17			// horizontally during the convolution:
18			// [ -1, 0, 1 ] [ 1 ]
19			// F = [ -2, 0, 2 ] = [ 2 ] * [ -1, 0, 1 ]
20			// [ -1, 0, 1 ] [ 1 ]
21			template <typename T>
22			class HorizontalSobel3x3;
23
24			// 3x3 Sobel filter for uint8_t types which calculates horizontal derivative
25			// approximations, often denoted as Gx.
26			template <>
27			class HorizontalSobel3x3<uint8_t> {
28			public:
29			using SourceType = uint8_t;
30			using BufferType = int16_t;
31			using DestinationType = int16_t;
32
33			// Applies vertical filtering vector using SIMD operations.
34			//
35			// DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
36		1494	void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
37		1494	int16x8_t acc_l = vaddl_u8(vget_low_u8(src[0]), vget_low_u8(src[2]));
38		1494	int16x8_t acc_h = vaddl_u8(vget_high_u8(src[0]), vget_high_u8(src[2]));
39		1494	uint8x16_t shift_l = vshll_n_u8(vget_low_u8(src[1]), 1);
40		1494	uint8x16_t shift_h = vshll_n_u8(vget_high_u8(src[1]), 1);
41		1494	acc_l = vaddq_u16(acc_l, shift_l);
42		1494	acc_h = vaddq_u16(acc_h, shift_h);
43		1494	vst1q(&dst[0], acc_l);
44		1494	vst1q(&dst[VecTraits<BufferType>::num_lanes()], acc_h);
45		1494	}
46
47			// Applies vertical filtering vector using scalar operations.
48			//
49			// DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
50		8556	void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
51			// Explicitly narrow. Overflow is permitted.
52		8556	dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
53		8556	}
54
55			// Applies horizontal filtering vector using SIMD operations.
56			//
57			// DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
58		2560	void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
59		2560	vst1q(&dst[0], vsubq_s16(src[2], src[0]));
60		2560	}
61
62			// Applies horizontal filtering vector using scalar operations.
63			//
64			// DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
65		9456	void horizontal_scalar_path(const BufferType src[3],
66			DestinationType *dst) const {
67			// Explicitly narrow. Overflow is permitted.
68		9456	dst[0] = static_cast<DestinationType>(src[2] - src[0]);
69		9456	}
70			}; // end of class HorizontalSobel3x3<uint8_t>
71
72			// Template for 3x3 Sobel filters which calculate vertical derivative
73			// approximations, often denoted as Gy.
74			//
75			// The applied weights, as the kernel is mirrored both vertically and
76			// horizontally during the convolution:
77			// [ -1, -2, -1 ] [ -1 ]
78			// F = [ 0, 0, 0 ] = [ 0 ] * [ 1, 2, 1 ]
79			// [ 1, 2, 1 ] [ 1 ]
80			template <typename T>
81			class VerticalSobel3x3;
82
83			// 3x3 Sobel filter for uint8_t types which calculates vertical derivative
84			// approximations, often denoted as Gy.
85			template <>
86			class VerticalSobel3x3<uint8_t> {
87			public:
88			using SourceType = uint8_t;
89			using BufferType = int16_t;
90			using DestinationType = int16_t;
91
92			// Applies vertical filtering vector using SIMD operations.
93			//
94			// DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
95		1494	void vertical_vector_path(uint8x16_t src[3], BufferType *dst) const {
96		1494	uint16x8_t acc_l = vsubl_u8(vget_low_u8(src[2]), vget_low_u8(src[0]));
97		1494	uint16x8_t acc_h = vsubl_u8(vget_high_u8(src[2]), vget_high_u8(src[0]));
98		1494	vst1q(&dst[0], vreinterpretq_s16_u16(acc_l));
99		2988	vst1q(&dst[VecTraits<BufferType>::num_lanes()],
100		1494	vreinterpretq_s16_u16(acc_h));
101		1494	}
102
103			// Applies vertical filtering vector using scalar operations.
104			//
105			// DST = [ SRC0, SRC1, SRC2 ] * [ -1, 0, 1 ]T
106		8556	void vertical_scalar_path(const SourceType src[3], BufferType *dst) const {
107			// Explicitly narrow. Overflow is permitted.
108		8556	dst[0] = static_cast<DestinationType>(src[2] - src[0]);
109		8556	}
110
111			// Applies horizontal filtering vector using SIMD operations.
112			//
113			// DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
114		2560	void horizontal_vector_path(int16x8_t src[3], DestinationType *dst) const {
115		2560	int16x8_t acc = vaddq_s16(src[0], src[2]);
116		2560	acc = vaddq_s16(acc, vshlq_n_s16(src[1], 1));
117		2560	vst1q(&dst[0], acc);
118		2560	}
119
120			// Applies horizontal filtering vector using scalar operations.
121			//
122			// DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T
123		9456	void horizontal_scalar_path(const BufferType src[3],
124			DestinationType *dst) const {
125			// Explicitly narrow. Overflow is permitted.
126		9456	dst[0] = static_cast<DestinationType>(src[0] + 2 * src[1] + src[2]);
127		9456	}
128			}; // end of class VerticalSobel3x3<uint8_t>
129
130			KLEIDICV_TARGET_FN_ATTRS
131		90	kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8(
132			const uint8_t src, size_t src_stride, int16_t dst, size_t dst_stride,
133			size_t width, size_t height, size_t y_begin, size_t y_end,
134			size_t channels) {
135	4/4 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 89 times. ✓ Branch 2 taken 1 times. ✓ Branch 3 taken 89 times.	90	CHECK_POINTER_AND_STRIDE(src, src_stride, height);
136	4/4 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 87 times. ✓ Branch 2 taken 2 times. ✓ Branch 3 taken 87 times.	89	CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
137	6/6 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 86 times. ✓ Branch 2 taken 1 times. ✓ Branch 3 taken 85 times. ✓ Branch 4 taken 2 times. ✓ Branch 5 taken 85 times.	87	CHECK_IMAGE_SIZE(width, height);
138
139	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 84 times.	85	if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
140		1	return KLEIDICV_ERROR_NOT_IMPLEMENTED;
141			}
142
143		84	Rectangle rect{width, height};
144		84	Rows<const uint8_t> src_rows{src, src_stride, channels};
145		84	Rows<int16_t> dst_rows{dst, dst_stride, channels};
146
147		84	auto workspace =
148		84	SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t));
149	2/2 ✓ Branch 0 taken 83 times. ✓ Branch 1 taken 1 times.	84	if (!workspace) {
150		1	return KLEIDICV_ERROR_ALLOCATION;
151			}
152
153		83	HorizontalSobel3x3<uint8_t> horizontal_sobel;
154		83	SeparableFilter3x3<HorizontalSobel3x3<uint8_t>> filter{horizontal_sobel};
155		166	workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
156		83	FixedBorderType::REPLICATE, filter);
157		83	return KLEIDICV_OK;
158		90	}
159
160			KLEIDICV_TARGET_FN_ATTRS
161		90	kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8(
162			const uint8_t src, size_t src_stride, int16_t dst, size_t dst_stride,
163			size_t width, size_t height, size_t y_begin, size_t y_end,
164			size_t channels) {
165	4/4 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 89 times. ✓ Branch 2 taken 1 times. ✓ Branch 3 taken 89 times.	90	CHECK_POINTER_AND_STRIDE(src, src_stride, height);
166	4/4 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 87 times. ✓ Branch 2 taken 2 times. ✓ Branch 3 taken 87 times.	89	CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
167	6/6 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 86 times. ✓ Branch 2 taken 1 times. ✓ Branch 3 taken 85 times. ✓ Branch 4 taken 2 times. ✓ Branch 5 taken 85 times.	87	CHECK_IMAGE_SIZE(width, height);
168
169	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 84 times.	85	if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) {
170		1	return KLEIDICV_ERROR_NOT_IMPLEMENTED;
171			}
172
173		84	Rectangle rect{width, height};
174		84	Rows<const uint8_t> src_rows{src, src_stride, channels};
175		84	Rows<int16_t> dst_rows{dst, dst_stride, channels};
176
177		84	auto workspace =
178		84	SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t));
179	2/2 ✓ Branch 0 taken 83 times. ✓ Branch 1 taken 1 times.	84	if (!workspace) {
180		1	return KLEIDICV_ERROR_ALLOCATION;
181			}
182
183		83	VerticalSobel3x3<uint8_t> vertical_sobel;
184		83	SeparableFilter3x3<VerticalSobel3x3<uint8_t>> filter{vertical_sobel};
185		166	workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels,
186		83	FixedBorderType::REPLICATE, filter);
187		83	return KLEIDICV_OK;
188		90	}
189
190			} // namespace kleidicv::neon
191