| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include "kleidicv/conversions/gray_to_rgb.h" | ||
| 6 | #include "kleidicv/kleidicv.h" | ||
| 7 | #include "kleidicv/neon.h" | ||
| 8 | #include "kleidicv/types.h" | ||
| 9 | |||
| 10 | namespace kleidicv::neon { | ||
| 11 | |||
| 12 | template <typename ScalarType> | ||
| 13 | class GrayToRGB final : public UnrollOnce { | ||
| 14 | public: | ||
| 15 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 16 | using VectorType = typename VecTraits::VectorType; | ||
| 17 | |||
| 18 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 19 | 64 | GrayToRGB() : indices_{} { | |
| 20 | 64 | VecTraits::load(kGrayToRGBTableIndices, indices_); | |
| 21 | 64 | } | |
| 22 | #else | ||
| 23 | GrayToRGB() = default; | ||
| 24 | #endif | ||
| 25 | |||
| 26 | 460 | void vector_path(const ScalarType *src, ScalarType *dst) { | |
| 27 | 460 | KLEIDICV_PREFETCH(&src[0] + 1024); | |
| 28 | 460 | uint8x16_t src_vect; | |
| 29 | 460 | VecTraits::load(&src[0], src_vect); | |
| 30 | 460 | uint8x16x3_t dst_vect; | |
| 31 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 32 | dst_vect.val[0] = src_vect; | ||
| 33 | dst_vect.val[1] = src_vect; | ||
| 34 | dst_vect.val[2] = src_vect; | ||
| 35 | vst3q_u8(dst, dst_vect); | ||
| 36 | #else | ||
| 37 | 460 | dst_vect.val[0] = vqtbl1q_u8(src_vect, indices_.val[0]); | |
| 38 | 460 | dst_vect.val[1] = vqtbl1q_u8(src_vect, indices_.val[1]); | |
| 39 | 460 | dst_vect.val[2] = vqtbl1q_u8(src_vect, indices_.val[2]); | |
| 40 | 460 | VecTraits::store(dst_vect, dst); | |
| 41 | #endif | ||
| 42 | 460 | } | |
| 43 | |||
| 44 | 192 | void scalar_path(const ScalarType *src, ScalarType *dst) { | |
| 45 | 192 | dst[0] = dst[1] = dst[2] = src[0]; | |
| 46 | 192 | } | |
| 47 | |||
| 48 | private: | ||
| 49 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 50 | |||
| 51 | static constexpr uint8_t kGrayToRGBTableIndices[48] = { | ||
| 52 | 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, | ||
| 53 | 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, | ||
| 54 | 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15}; | ||
| 55 | uint8x16x3_t indices_; | ||
| 56 | #endif | ||
| 57 | }; // end of class GrayToRGB<ScalarType> | ||
| 58 | |||
| 59 | template <typename ScalarType> | ||
| 60 | class GrayToRGBA final { | ||
| 61 | public: | ||
| 62 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 63 | using VectorType = typename VecTraits::VectorType; | ||
| 64 | |||
| 65 | 66 | void process_row(size_t length, Columns<const uint8_t> src, | |
| 66 | Columns<uint8_t> dst) { | ||
| 67 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 68 | 66 | uint8x16x4_t indices; | |
| 69 | 66 | VecTraits::load(kGrayToRGBATableIndices, indices); | |
| 70 | #endif // !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 71 | 66 | uint8x16x4_t dst_vect; | |
| 72 | 66 | uint8x16x2_t src_and_alpha; | |
| 73 | 66 | src_and_alpha.val[1] = vdupq_n_u8(0xff); | |
| 74 | |||
| 75 | 66 | const size_t unroll_count = length / kVectorLength; | |
| 76 |
2/2✓ Branch 0 taken 460 times.
✓ Branch 1 taken 66 times.
|
526 | for (size_t i = 0; i < unroll_count; ++i) { |
| 77 | 460 | KLEIDICV_PREFETCH(&src[0] + 1024); | |
| 78 | 460 | VecTraits::load(&src[0], src_and_alpha.val[0]); | |
| 79 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 80 | dst_vect.val[0] = src_and_alpha.val[0]; | ||
| 81 | dst_vect.val[1] = src_and_alpha.val[0]; | ||
| 82 | dst_vect.val[2] = src_and_alpha.val[0]; | ||
| 83 | dst_vect.val[3] = src_and_alpha.val[1]; | ||
| 84 | vst4q_u8(&dst[0], dst_vect); | ||
| 85 | #else // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 86 | #if defined(__clang__) | ||
| 87 | 460 | dst_vect.val[0] = vqtbl2q_u8(src_and_alpha, indices.val[0]); | |
| 88 | 460 | dst_vect.val[1] = vqtbl2q_u8(src_and_alpha, indices.val[1]); | |
| 89 | 460 | dst_vect.val[2] = vqtbl2q_u8(src_and_alpha, indices.val[2]); | |
| 90 | 460 | dst_vect.val[3] = vqtbl2q_u8(src_and_alpha, indices.val[3]); | |
| 91 | #else // defined(__clang__) | ||
| 92 | asm volatile( | ||
| 93 | "tbl %0.16b, { %4.16b, %5.16b }, %6.16b \n\t" | ||
| 94 | "tbl %1.16b, { %4.16b, %5.16b }, %7.16b \n\t" | ||
| 95 | "tbl %2.16b, { %4.16b, %5.16b }, %8.16b \n\t" | ||
| 96 | "tbl %3.16b, { %4.16b, %5.16b }, %9.16b \n\t" | ||
| 97 | : "=&w"(dst_vect.val[0]), "=&w"(dst_vect.val[1]), | ||
| 98 | "=&w"(dst_vect.val[2]), "=&w"(dst_vect.val[3]) | ||
| 99 | : "w"(src_and_alpha.val[0]), "w"(src_and_alpha.val[1]), | ||
| 100 | "w"(indices.val[0]), "w"(indices.val[1]), "w"(indices.val[2]), | ||
| 101 | "w"(indices.val[3]) | ||
| 102 | :); | ||
| 103 | #endif // defined(__clang__) | ||
| 104 | 460 | VecTraits::store(dst_vect, &dst[0]); | |
| 105 | #endif // KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 106 | 460 | src += static_cast<ptrdiff_t>(kVectorLength); | |
| 107 | 460 | dst += static_cast<ptrdiff_t>(kVectorLength); | |
| 108 | 460 | } | |
| 109 | 66 | length -= kVectorLength * unroll_count; | |
| 110 | |||
| 111 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 192 times.
|
258 | for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(length); ++i) { |
| 112 | 192 | disable_loop_vectorization(); | |
| 113 | 192 | dst.at(i)[0] = dst.at(i)[1] = dst.at(i)[2] = src.at(i)[0]; | |
| 114 | 192 | dst.at(i)[3] = 0xff; | |
| 115 | 192 | } | |
| 116 | 66 | } | |
| 117 | |||
| 118 | private: | ||
| 119 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 120 | static constexpr uint8_t kGrayToRGBATableIndices[64] = { | ||
| 121 | 0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16, | ||
| 122 | 4, 4, 4, 16, 5, 5, 5, 16, 6, 6, 6, 16, 7, 7, 7, 16, | ||
| 123 | 8, 8, 8, 16, 9, 9, 9, 16, 10, 10, 10, 16, 11, 11, 11, 16, | ||
| 124 | 12, 12, 12, 16, 13, 13, 13, 16, 14, 14, 14, 16, 15, 15, 15, 16}; | ||
| 125 | #endif | ||
| 126 | }; // end of class GrayToRGBA<ScalarType> | ||
| 127 | |||
| 128 | KLEIDICV_TARGET_FN_ATTRS | ||
| 129 | 76 | kleidicv_error_t gray_to_rgb_u8(const uint8_t *src, size_t src_stride, | |
| 130 | uint8_t *dst, size_t dst_stride, size_t width, | ||
| 131 | size_t height) { | ||
| 132 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 73 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 73 times.
|
76 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 133 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 70 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 70 times.
|
73 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 134 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 64 times.
|
70 | CHECK_IMAGE_SIZE(width, height); |
| 135 | |||
| 136 | 64 | Rectangle rect{width, height}; | |
| 137 | 64 | Rows<const uint8_t> src_rows{src, src_stride}; | |
| 138 | 64 | Rows<uint8_t> dst_rows{dst, dst_stride, 3 /* RGB */}; | |
| 139 | 64 | GrayToRGB<uint8_t> operation; | |
| 140 | 64 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
| 141 | 64 | return KLEIDICV_OK; | |
| 142 | 76 | } | |
| 143 | |||
| 144 | KLEIDICV_TARGET_FN_ATTRS | ||
| 145 | 76 | kleidicv_error_t gray_to_rgba_u8(const uint8_t *src, size_t src_stride, | |
| 146 | uint8_t *dst, size_t dst_stride, size_t width, | ||
| 147 | size_t height) { | ||
| 148 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 73 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 73 times.
|
76 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 149 |
4/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 70 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 70 times.
|
73 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 150 |
6/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 64 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 64 times.
|
70 | CHECK_IMAGE_SIZE(width, height); |
| 151 | |||
| 152 | 64 | Rectangle rect{width, height}; | |
| 153 | 64 | Rows<const uint8_t> src_rows{src, src_stride}; | |
| 154 | 64 | Rows<uint8_t> dst_rows{dst, dst_stride, 4 /* RGBA */}; | |
| 155 | 64 | GrayToRGBA<uint8_t> operation; | |
| 156 | 64 | zip_rows(operation, rect, src_rows, dst_rows); | |
| 157 | 64 | return KLEIDICV_OK; | |
| 158 | 76 | } | |
| 159 | |||
| 160 | } // namespace kleidicv::neon | ||
| 161 |