KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/gray_to_rgb_neon.cpp
Date: 2025-11-25 17:23:32
Exec Total Coverage
Lines: 63 63 100.0%
Functions: 6 6 100.0%
Branches: 32 32 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/conversions/gray_to_rgb.h"
6 #include "kleidicv/kleidicv.h"
7 #include "kleidicv/neon.h"
8 #include "kleidicv/types.h"
9
10 namespace kleidicv::neon {
11
12 template <typename ScalarType>
13 class GrayToRGB final : public UnrollOnce {
14 public:
15 using VecTraits = neon::VecTraits<ScalarType>;
16 using VectorType = typename VecTraits::VectorType;
17
18 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
19 64 GrayToRGB() : indices_{} {
20 64 VecTraits::load(kGrayToRGBTableIndices, indices_);
21 64 }
22 #else
23 GrayToRGB() = default;
24 #endif
25
26 460 void vector_path(const ScalarType *src, ScalarType *dst) {
27 460 KLEIDICV_PREFETCH(&src[0] + 1024);
28 460 uint8x16_t src_vect;
29 460 VecTraits::load(&src[0], src_vect);
30 460 uint8x16x3_t dst_vect;
31 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
32 dst_vect.val[0] = src_vect;
33 dst_vect.val[1] = src_vect;
34 dst_vect.val[2] = src_vect;
35 vst3q_u8(dst, dst_vect);
36 #else
37 460 dst_vect.val[0] = vqtbl1q_u8(src_vect, indices_.val[0]);
38 460 dst_vect.val[1] = vqtbl1q_u8(src_vect, indices_.val[1]);
39 460 dst_vect.val[2] = vqtbl1q_u8(src_vect, indices_.val[2]);
40 460 VecTraits::store(dst_vect, dst);
41 #endif
42 460 }
43
44 192 void scalar_path(const ScalarType *src, ScalarType *dst) {
45 192 dst[0] = dst[1] = dst[2] = src[0];
46 192 }
47
48 private:
49 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
50
51 static constexpr uint8_t kGrayToRGBTableIndices[48] = {
52 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5,
53 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10,
54 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15};
55 uint8x16x3_t indices_;
56 #endif
57 }; // end of class GrayToRGB<ScalarType>
58
59 template <typename ScalarType>
60 class GrayToRGBA final {
61 public:
62 using VecTraits = neon::VecTraits<ScalarType>;
63 using VectorType = typename VecTraits::VectorType;
64
65 66 void process_row(size_t length, Columns<const uint8_t> src,
66 Columns<uint8_t> dst) {
67 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
68 66 uint8x16x4_t indices;
69 66 VecTraits::load(kGrayToRGBATableIndices, indices);
70 #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
71 66 uint8x16x4_t dst_vect;
72 66 uint8x16x2_t src_and_alpha;
73 66 src_and_alpha.val[1] = vdupq_n_u8(0xff);
74
75 66 const size_t unroll_count = length / kVectorLength;
76
2/2
✓ Branch 0 taken 460 times.
✓ Branch 1 taken 66 times.
526 for (size_t i = 0; i < unroll_count; ++i) {
77 460 KLEIDICV_PREFETCH(&src[0] + 1024);
78 460 VecTraits::load(&src[0], src_and_alpha.val[0]);
79 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
80 dst_vect.val[0] = src_and_alpha.val[0];
81 dst_vect.val[1] = src_and_alpha.val[0];
82 dst_vect.val[2] = src_and_alpha.val[0];
83 dst_vect.val[3] = src_and_alpha.val[1];
84 vst4q_u8(&dst[0], dst_vect);
85 #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
86 #if defined(__clang__)
87 460 dst_vect.val[0] = vqtbl2q_u8(src_and_alpha, indices.val[0]);
88 460 dst_vect.val[1] = vqtbl2q_u8(src_and_alpha, indices.val[1]);
89 460 dst_vect.val[2] = vqtbl2q_u8(src_and_alpha, indices.val[2]);
90 460 dst_vect.val[3] = vqtbl2q_u8(src_and_alpha, indices.val[3]);
91 #else // defined(__clang__)
92 asm volatile(
93 "tbl %0.16b, { %4.16b, %5.16b }, %6.16b \n\t"
94 "tbl %1.16b, { %4.16b, %5.16b }, %7.16b \n\t"
95 "tbl %2.16b, { %4.16b, %5.16b }, %8.16b \n\t"
96 "tbl %3.16b, { %4.16b, %5.16b }, %9.16b \n\t"
97 : "=&w"(dst_vect.val[0]), "=&w"(dst_vect.val[1]),
98 "=&w"(dst_vect.val[2]), "=&w"(dst_vect.val[3])
99 : "w"(src_and_alpha.val[0]), "w"(src_and_alpha.val[1]),
100 "w"(indices.val[0]), "w"(indices.val[1]), "w"(indices.val[2]),
101 "w"(indices.val[3])
102 :);
103 #endif // defined(__clang__)
104 460 VecTraits::store(dst_vect, &dst[0]);
105 #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
106 460 src += static_cast<ptrdiff_t>(kVectorLength);
107 460 dst += static_cast<ptrdiff_t>(kVectorLength);
108 460 }
109 66 length -= kVectorLength * unroll_count;
110
111
2/2
✓ Branch 0 taken 66 times.
✓ Branch 1 taken 192 times.
258 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(length); ++i) {
112 192 disable_loop_vectorization();
113 192 dst.at(i)[0] = dst.at(i)[1] = dst.at(i)[2] = src.at(i)[0];
114 192 dst.at(i)[3] = 0xff;
115 192 }
116 66 }
117
118 private:
119 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
120 static constexpr uint8_t kGrayToRGBATableIndices[64] = {
121 0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16,
122 4, 4, 4, 16, 5, 5, 5, 16, 6, 6, 6, 16, 7, 7, 7, 16,
123 8, 8, 8, 16, 9, 9, 9, 16, 10, 10, 10, 16, 11, 11, 11, 16,
124 12, 12, 12, 16, 13, 13, 13, 16, 14, 14, 14, 16, 15, 15, 15, 16};
125 #endif
126 }; // end of class GrayToRGBA<ScalarType>
127
128 KLEIDICV_TARGET_FN_ATTRS
129 76 kleidicv_error_t gray_to_rgb_u8(const uint8_t *src, size_t src_stride,
130 uint8_t *dst, size_t dst_stride, size_t width,
131 size_t height) {
132
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 73 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 73 times.
76 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
133
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 70 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 70 times.
73 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
134
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 64 times.
70 CHECK_IMAGE_SIZE(width, height);
135
136 64 Rectangle rect{width, height};
137 64 Rows<const uint8_t> src_rows{src, src_stride};
138 64 Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */};
139 64 GrayToRGB<uint8_t> operation;
140 64 apply_operation_by_rows(operation, rect, src_rows, dst_rows);
141 64 return KLEIDICV_OK;
142 76 }
143
144 KLEIDICV_TARGET_FN_ATTRS
145 76 kleidicv_error_t gray_to_rgba_u8(const uint8_t *src, size_t src_stride,
146 uint8_t *dst, size_t dst_stride, size_t width,
147 size_t height) {
148
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 73 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 73 times.
76 CHECK_POINTER_AND_STRIDE(src, src_stride, height);
149
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 70 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 70 times.
73 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
150
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 64 times.
70 CHECK_IMAGE_SIZE(width, height);
151
152 64 Rectangle rect{width, height};
153 64 Rows<const uint8_t> src_rows{src, src_stride};
154 64 Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */};
155 64 GrayToRGBA<uint8_t> operation;
156 64 zip_rows(operation, rect, src_rows, dst_rows);
157 64 return KLEIDICV_OK;
158 76 }
159
160 } // namespace kleidicv::neon
161