KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/filters/border_generic_neon.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 52 52 100.0%
Functions: 6 6 100.0%
Branches: 8 8 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_WORKSPACE_BORDER_GENERIC_NEON_H
6 #define KLEIDICV_WORKSPACE_BORDER_GENERIC_NEON_H
7
8 #include <algorithm>
9 #include <cstddef>
10
11 #include "kleidicv/neon.h"
12 #include "kleidicv/types.h"
13 #include "kleidicv/workspace/border_types.h"
14
15 namespace KLEIDICV_TARGET_NAMESPACE {
16
17 // Border offsets for generic filters.
18 template <kleidicv::FixedBorderType BorderType>
19 class GenericBorderHorizontal final {
20 public:
21 39 GenericBorderHorizontal(size_t width, size_t channels)
22 39 : width_(static_cast<ptrdiff_t>(width)),
23 39 channels_{static_cast<ptrdiff_t>(channels)},
24 39 data_indices_{0ULL | (1ULL << 8) | (2ULL << 16) | (3ULL << 24) |
25 (4ULL << 32) | (5ULL << 40) | (6ULL << 48) |
26 (7ULL << 56)},
27 39 border_indices_left_{0},
28 39 border_indices_right_{0} {
29 // The result will take some elements from the image (data), and the
30 // remaining parts from the border.
31 // An index vector is prepared here to help the process, e.g. for replicated
32 // borders and 3 channels, the constructed index vector will look like this:
33 // [1, 2, 0, 1, 2, 3, 4, 5]
34 // (0,1,2 is repeated until index 0 is reached, when the image data begins)
35 // Right side is similar, but it is the [5,6,7] that repeats after.
36
2/2
✓ Branch 0 taken 39 times.
✓ Branch 1 taken 312 times.
351 for (ptrdiff_t i = 0; i < 8; ++i) {
37 // channels_*8 - 1 - i: 23, 22, 21, 20, 19, 18, 17, 16
38 // % channels: 2, 1, 0, 2, 1, 0, 2, 1
39 312 border_indices_left_ =
40 312 (border_indices_left_ << 8) | ((channels_ * 8 - 1 - i) % channels_);
41 // (7 - i): 7, 6, 5, 4, 3, 2, 1, 0
42 // % channels: 1, 0, 2, 1 0, 2, 1, 0
43 312 border_indices_right_ =
44 312 (border_indices_right_ << 8) | (((7 - i) % channels) + 8 - channels_);
45 312 }
46 39 }
47
48 // Raw column can be bigger than width-1 or less than 0
49 3240 ptrdiff_t get_column(ptrdiff_t raw_column) const {
50 // TODO more border types, this is only the Replicated
51 6480 return std::max<ptrdiff_t>(std::min<ptrdiff_t>(raw_column, width_ - 1),
52 3240 ptrdiff_t{0});
53 }
54
55 // Assuming that start_offset is <= 0
56 2820 uint16x8_t load_left(Rows<const uint8_t> src_rows,
57 ptrdiff_t start_offset) const {
58 if constexpr (BorderType == FixedBorderType::REPLICATE) {
59 2820 uint8x8_t data = vld1_u8(&src_rows[0]);
60 2820 uint64_t indices{};
61
2/2
✓ Branch 0 taken 2520 times.
✓ Branch 1 taken 300 times.
2820 if (start_offset > -8) {
62 2520 ptrdiff_t shift = -8 * start_offset;
63 2520 indices =
64 2520 ((border_indices_left_ >> (64 - shift)) | (data_indices_ << shift));
65 2520 } else {
66 300 ptrdiff_t shift = ((-start_offset - 8) % channels_) * 8;
67 900 indices = (((border_indices_left_ >> (8 * channels_ - shift)) &
68 600 ((1 << shift) - 1)) |
69 300 (border_indices_left_ << shift));
70 300 }
71 5640 return vmovl_u8(vtbl1_u8(data, vreinterpret_u8_u64(uint64x1_t{indices})));
72 2820 }
73 }
74
75 // Assuming that start_offset is >= width - 8
76 2820 uint16x8_t load_right(Rows<const uint8_t> src_rows,
77 ptrdiff_t start_offset) const {
78 if constexpr (BorderType == FixedBorderType::REPLICATE) {
79 2820 uint8x8_t data = vld1_u8(&src_rows[width_ * channels_ - 8]);
80 2820 uint64_t indices{};
81 2820 ptrdiff_t shift = 8 * (start_offset - (width_ * channels_ - 8));
82
2/2
✓ Branch 0 taken 2520 times.
✓ Branch 1 taken 300 times.
2820 if (shift < 64) {
83 2520 indices =
84 2520 (data_indices_ >> shift) | (border_indices_right_ << (64 - shift));
85 2520 } else {
86 300 shift = ((start_offset - width_ * channels_) % channels_) * 8;
87
2/2
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 120 times.
300 indices = shift == 0
88 120 ? border_indices_right_
89 180 : (((border_indices_right_ >> (8 * channels_ - shift))
90 360 << (64 - shift)) |
91 180 (border_indices_right_ >> shift));
92 }
93 5640 return vmovl_u8(vtbl1_u8(data, vreinterpret_u8_u64(uint64x1_t{indices})));
94 2820 }
95 }
96
97 private:
98 ptrdiff_t width_;
99 ptrdiff_t channels_;
100 uint64_t data_indices_, border_indices_left_, border_indices_right_;
101 }; // end of class GenericBorderHorizontal<BorderType>
102
103 // Border offsets for generic filters.
104 template <kleidicv::FixedBorderType BorderType>
105 class GenericBorderVertical final {
106 public:
107 39 explicit GenericBorderVertical(size_t height)
108 39 : height_(static_cast<ptrdiff_t>(height)) {}
109
110 // Raw column can be bigger than width-1 or less than 0
111 19200 ptrdiff_t get_row(ptrdiff_t raw_row) const {
112 // TODO more border types, this is only the Replicated
113 38400 return std::max<ptrdiff_t>(std::min<ptrdiff_t>(raw_row, height_ - 1),
114 19200 ptrdiff_t{0});
115 }
116
117 private:
118 ptrdiff_t height_;
119 }; // end of class GenericBorderVertical<BorderType>
120
121 } // namespace KLEIDICV_TARGET_NAMESPACE
122
123 #endif // KLEIDICV_WORKSPACE_BORDER_GENERIC_NEON_H
124