KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/gray_to_rgb_neon.cpp
Date: 2026-01-20 20:58:59
Exec Total Coverage
Lines: 63 63 100.0%
Functions: 6 6 100.0%
Branches: 32 32 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/conversions/gray_to_rgb.h"
6 #include "kleidicv/kleidicv.h"
7 #include "kleidicv/neon.h"
8 #include "kleidicv/types.h"
9
10 namespace kleidicv::neon {
11
12 template <typename ScalarType>
13 class GrayToRGB final : public UnrollOnce {
14 public:
15 using VecTraits = neon::VecTraits<ScalarType>;
16 using VectorType = typename VecTraits::VectorType;
17
18 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
19 82 GrayToRGB() : indices_{} {
20 82 VecTraits::load(kGrayToRGBTableIndices, indices_);
21 82 }
22 #else
23 GrayToRGB() = default;
24 #endif
25
26 451 void vector_path(const ScalarType *src, ScalarType *dst) {
27 451 KLEIDICV_PREFETCH(&src[0] + 1024);
28 451 uint8x16_t src_vect;
29 451 VecTraits::load(&src[0], src_vect);
30 451 uint8x16x3_t dst_vect;
31 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
32 dst_vect.val[0] = src_vect;
33 dst_vect.val[1] = src_vect;
34 dst_vect.val[2] = src_vect;
35 vst3q_u8(dst, dst_vect);
36 #else
37 451 dst_vect.val[0] = vqtbl1q_u8(src_vect, indices_.val[0]);
38 451 dst_vect.val[1] = vqtbl1q_u8(src_vect, indices_.val[1]);
39 451 dst_vect.val[2] = vqtbl1q_u8(src_vect, indices_.val[2]);
40 451 VecTraits::store(dst_vect, dst);
41 #endif
42 451 }
43
44 336 void scalar_path(const ScalarType *src, ScalarType *dst) {
45 336 dst[0] = dst[1] = dst[2] = src[0];
46 336 }
47
48 private:
49 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
50
51 static constexpr uint8_t kGrayToRGBTableIndices[48] = {
52 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5,
53 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10,
54 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15};
55 uint8x16x3_t indices_;
56 #endif
57 }; // end of class GrayToRGB<ScalarType>
58
59 template <typename ScalarType>
60 class GrayToRGBA final {
61 public:
62 using VecTraits = neon::VecTraits<ScalarType>;
63 using VectorType = typename VecTraits::VectorType;
64
65 KLEIDICV_FORCE_INLINE
66 84 void process_row(size_t length, Columns<const uint8_t> src,
67 Columns<uint8_t> dst) {
68 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
69 84 uint8x16x4_t indices;
70 84 VecTraits::load(kGrayToRGBATableIndices, indices);
71 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
72 84 uint8x16x4_t dst_vect;
73 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || defined(__clang__)
74 84 uint8x16x2_t src_and_alpha;
75 84 src_and_alpha.val[1] = vdupq_n_u8(0xff);
76 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE || defined(__clang__)
77
78 84 const size_t unroll_count = length / kVectorLength;
79
2/2
✓ Branch 0 taken 451 times.
✓ Branch 1 taken 84 times.
535 for (size_t i = 0; i < unroll_count; ++i) {
80 451 KLEIDICV_PREFETCH(&src[0] + 1024);
81 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
82 VecTraits::load(&src[0], src_and_alpha.val[0]);
83 dst_vect.val[0] = src_and_alpha.val[0];
84 dst_vect.val[1] = src_and_alpha.val[0];
85 dst_vect.val[2] = src_and_alpha.val[0];
86 dst_vect.val[3] = src_and_alpha.val[1];
87 vst4q_u8(&dst[0], dst_vect);
88 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
89 #if defined(__clang__)
90 451 VecTraits::load(&src[0], src_and_alpha.val[0]);
91 451 dst_vect.val[0] = vqtbl2q_u8(src_and_alpha, indices.val[0]);
92 451 dst_vect.val[1] = vqtbl2q_u8(src_and_alpha, indices.val[1]);
93 451 dst_vect.val[2] = vqtbl2q_u8(src_and_alpha, indices.val[2]);
94 451 dst_vect.val[3] = vqtbl2q_u8(src_and_alpha, indices.val[3]);
95 #else // defined(__clang__)
96 asm volatile(
97 "ld1 { v16.16b }, [%[src_ptr]] \n\t"
98 "movi v17.16b, #0xff \n\t"
99 "tbl %0.16b, { v16.16b, v17.16b }, %[idx0].16b \n\t"
100 "tbl %1.16b, { v16.16b, v17.16b }, %[idx1].16b \n\t"
101 "tbl %2.16b, { v16.16b, v17.16b }, %[idx2].16b \n\t"
102 "tbl %3.16b, { v16.16b, v17.16b }, %[idx3].16b \n\t"
103 : "=&w"(dst_vect.val[0]), "=&w"(dst_vect.val[1]),
104 "=&w"(dst_vect.val[2]), "=&w"(dst_vect.val[3])
105 : [src_ptr] "r"(&src[0]), [idx0] "w"(indices.val[0]),
106 [idx1] "w"(indices.val[1]), [idx2] "w"(indices.val[2]),
107 [idx3] "w"(indices.val[3])
108 : "v16", "v17", "memory");
109 #endif // defined(__clang__)
110 451 VecTraits::store(dst_vect, &dst[0]);
111 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
112 451 src += static_cast<ptrdiff_t>(kVectorLength);
113 451 dst += static_cast<ptrdiff_t>(kVectorLength);
114 451 }
115 84 length -= kVectorLength * unroll_count;
116
117
2/2
✓ Branch 0 taken 84 times.
✓ Branch 1 taken 336 times.
420 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(length); ++i) {
118 336 disable_loop_vectorization();
119 336 dst.at(i)[0] = dst.at(i)[1] = dst.at(i)[2] = src.at(i)[0];
120 336 dst.at(i)[3] = 0xff;
121 336 }
122 84 }
123
124 private:
125 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
126 static constexpr uint8_t kGrayToRGBATableIndices[64] = {
127 0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16,
128 4, 4, 4, 16, 5, 5, 5, 16, 6, 6, 6, 16, 7, 7, 7, 16,
129 8, 8, 8, 16, 9, 9, 9, 16, 10, 10, 10, 16, 11, 11, 11, 16,
130 12, 12, 12, 16, 13, 13, 13, 16, 14, 14, 14, 16, 15, 15, 15, 16};
131 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
132 }; // end of class GrayToRGBA<ScalarType>
133
134 KLEIDICV_TARGET_FN_ATTRS
135 94 kleidicv_error_t gray_to_rgb_u8(const uint8_t *src, size_t src_stride,
136 uint8_t *dst, size_t dst_stride, size_t width,
137 size_t height) {
138
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 91 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 91 times.
94 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
139
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 88 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 88 times.
91 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
140
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 82 times.
88 CHECK_IMAGE_SIZE(width, height);
141
142 82 Rectangle rect{width, height};
143 82 Rows<const uint8_t> src_rows{src, src_stride};
144 82 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
145 82 GrayToRGB<uint8_t> operation;
146 82 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
147 82 return KLEIDICV_OK;
148 94 }
149
150 KLEIDICV_TARGET_FN_ATTRS
151 94 kleidicv_error_t gray_to_rgba_u8(const uint8_t *src, size_t src_stride,
152 uint8_t *dst, size_t dst_stride, size_t width,
153 size_t height) {
154
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 91 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 91 times.
94 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
155
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 88 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 88 times.
91 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
156
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 82 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 82 times.
88 CHECK_IMAGE_SIZE(width, height);
157
158 82 Rectangle rect{width, height};
159 82 Rows<const uint8_t> src_rows{src, src_stride};
160 82 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
161 82 GrayToRGBA<uint8_t> operation;
162 82 zip_rows(operation, rect, src_rows, dst_rows);
163 82 return KLEIDICV_OK;
164 94 }
165
166 } // namespace kleidicv::neon
167