KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/filters/separable_filter_5x5_neon.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 75 75 100.0%
Functions: 50 50 100.0%
Branches: 6 6 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_SEPARABLE_FILTER_5X5_NEON_H
6 #define KLEIDICV_SEPARABLE_FILTER_5X5_NEON_H
7
8 #include "kleidicv/neon.h"
9 #include "kleidicv/workspace/border_5x5.h"
10
11 namespace KLEIDICV_TARGET_NAMESPACE {
12
13 // Template for drivers of separable NxM filters.
14 template <typename FilterType, const size_t S>
15 class SeparableFilter;
16
17 // Driver for a separable 5x5 filter.
18 template <typename FilterType>
19 class SeparableFilter<FilterType, 5UL> {
20 public:
21 using SourceType = typename FilterType::SourceType;
22 using BufferType = typename FilterType::BufferType;
23 using DestinationType = typename FilterType::DestinationType;
24 using SourceVecTraits = typename neon::VecTraits<SourceType>;
25 using SourceVectorType = typename SourceVecTraits::VectorType;
26 using BufferVecTraits = typename neon::VecTraits<BufferType>;
27 using BufferVectorType = typename BufferVecTraits::VectorType;
28 using BorderInfoType =
29 typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5<SourceType>;
30 using BorderType = FixedBorderType;
31 using BorderOffsets = typename BorderInfoType::Offsets;
32
33 232 explicit SeparableFilter(FilterType filter) : filter_{filter} {}
34
35 static constexpr size_t margin = 2UL;
36
37 2735 void process_vertical(size_t width, Rows<const SourceType> src_rows,
38 Rows<BufferType> dst_rows,
39 BorderOffsets border_offsets) const {
40 5470 LoopUnroll2<TryToAvoidTailLoop> loop{width * src_rows.channels(),
41 2735 SourceVecTraits::num_lanes()};
42
43 5129 loop.unroll_once([&](size_t index) {
44 2394 SourceVectorType src[5];
45 2394 src[0] = vld1q(&src_rows.at(border_offsets.c0())[index]);
46 2394 src[1] = vld1q(&src_rows.at(border_offsets.c1())[index]);
47 2394 src[2] = vld1q(&src_rows.at(border_offsets.c2())[index]);
48 2394 src[3] = vld1q(&src_rows.at(border_offsets.c3())[index]);
49 2394 src[4] = vld1q(&src_rows.at(border_offsets.c4())[index]);
50 2394 filter_.vertical_vector_path(src, &dst_rows[index]);
51 2394 });
52
53 14642 loop.tail([&](size_t index) {
54 11907 SourceType src[5];
55 11907 src[0] = src_rows.at(border_offsets.c0())[index];
56 11907 src[1] = src_rows.at(border_offsets.c1())[index];
57 11907 src[2] = src_rows.at(border_offsets.c2())[index];
58 11907 src[3] = src_rows.at(border_offsets.c3())[index];
59 11907 src[4] = src_rows.at(border_offsets.c4())[index];
60 11907 filter_.vertical_scalar_path(src, &dst_rows[index]);
61 11907 });
62 2735 }
63
64 2735 void process_horizontal(size_t width, Rows<const BufferType> src_rows,
65 Rows<DestinationType> dst_rows,
66 BorderOffsets border_offsets) const {
67 5470 LoopUnroll2<TryToAvoidTailLoop> loop{width * src_rows.channels(),
68 2735 BufferVecTraits::num_lanes()};
69
70 4118 loop.unroll_twice([&](size_t index) {
71 1383 auto src_0 = &src_rows.at(0, border_offsets.c0())[index];
72 1383 auto src_1 = &src_rows.at(0, border_offsets.c1())[index];
73 1383 auto src_2 = &src_rows.at(0, border_offsets.c2())[index];
74 1383 auto src_3 = &src_rows.at(0, border_offsets.c3())[index];
75 1383 auto src_4 = &src_rows.at(0, border_offsets.c4())[index];
76
77 1383 BufferVectorType src_a[5], src_b[5];
78 1383 src_a[0] = vld1q(&src_0[0]);
79 1383 src_b[0] = vld1q(&src_0[BufferVecTraits::num_lanes()]);
80 1383 src_a[1] = vld1q(&src_1[0]);
81 1383 src_b[1] = vld1q(&src_1[BufferVecTraits::num_lanes()]);
82 1383 src_a[2] = vld1q(&src_2[0]);
83 1383 src_b[2] = vld1q(&src_2[BufferVecTraits::num_lanes()]);
84 1383 src_a[3] = vld1q(&src_3[0]);
85 1383 src_b[3] = vld1q(&src_3[BufferVecTraits::num_lanes()]);
86 1383 src_a[4] = vld1q(&src_4[0]);
87 1383 src_b[4] = vld1q(&src_4[BufferVecTraits::num_lanes()]);
88
89 1383 filter_.horizontal_vector_path(src_a, &dst_rows[index]);
90 2766 filter_.horizontal_vector_path(
91 1383 src_b, &dst_rows[index + BufferVecTraits::num_lanes()]);
92 1383 });
93
94 3578 loop.unroll_once([&](size_t index) {
95 843 BufferVectorType src[5];
96 843 src[0] = vld1q(&src_rows.at(0, border_offsets.c0())[index]);
97 843 src[1] = vld1q(&src_rows.at(0, border_offsets.c1())[index]);
98 843 src[2] = vld1q(&src_rows.at(0, border_offsets.c2())[index]);
99 843 src[3] = vld1q(&src_rows.at(0, border_offsets.c3())[index]);
100 843 src[4] = vld1q(&src_rows.at(0, border_offsets.c4())[index]);
101 843 filter_.horizontal_vector_path(src, &dst_rows[index]);
102 843 });
103
104 4090 loop.tail([&](size_t index) {
105 1355 process_horizontal_scalar(src_rows, dst_rows, border_offsets, index);
106 1355 });
107 2735 }
108
109 10940 void process_horizontal_borders(Rows<const BufferType> src_rows,
110 Rows<DestinationType> dst_rows,
111 BorderOffsets border_offsets) const {
112
6/6
✓ Branch 0 taken 5256 times.
✓ Branch 1 taken 5640 times.
✓ Branch 2 taken 2996 times.
✓ Branch 3 taken 3380 times.
✓ Branch 4 taken 2688 times.
✓ Branch 5 taken 2880 times.
22840 for (size_t index = 0; index < src_rows.channels(); ++index) {
113 11900 disable_loop_vectorization();
114 11900 process_horizontal_scalar(src_rows, dst_rows, border_offsets, index);
115 11900 }
116 10940 }
117
118 private:
119 13255 void process_horizontal_scalar(Rows<const BufferType> src_rows,
120 Rows<DestinationType> dst_rows,
121 BorderOffsets border_offsets,
122 size_t index) const {
123 13255 BufferType src[5];
124 13255 src[0] = src_rows.at(0, border_offsets.c0())[index];
125 13255 src[1] = src_rows.at(0, border_offsets.c1())[index];
126 13255 src[2] = src_rows.at(0, border_offsets.c2())[index];
127 13255 src[3] = src_rows.at(0, border_offsets.c3())[index];
128 13255 src[4] = src_rows.at(0, border_offsets.c4())[index];
129 13255 filter_.horizontal_scalar_path(src, &dst_rows[index]);
130 13255 }
131
132 FilterType filter_;
133 }; // end of class SeparableFilter<FilterType, 5UL>
134
135 // Shorthand for 5x5 separable filters driver type.
136 template <class FilterType>
137 using SeparableFilter5x5 = SeparableFilter<FilterType, 5UL>;
138
139 } // namespace KLEIDICV_TARGET_NAMESPACE
140
141 #endif // KLEIDICV_SEPARABLE_FILTER_5X5_NEON_H
142