| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "kleidicv/conversions/yuv_to_rgb.h" | ||
| 8 | #include "kleidicv/kleidicv.h" | ||
| 9 | #include "kleidicv/neon.h" | ||
| 10 | #include "yuv420_to_rgb_neon.h" | ||
| 11 | |||
| 12 | namespace kleidicv::neon { | ||
| 13 | template <bool BGR, bool kAlpha> | ||
| 14 | class YUVSpToRGBxOrBGRx final : public YUV420XToRGBxOrBGRx<BGR, kAlpha>, | ||
| 15 | public UnrollOnce, | ||
| 16 | public TryToAvoidTailLoop { | ||
| 17 | public: | ||
| 18 | using VecTraits = neon::VecTraits<uint8_t>; | ||
| 19 | using ScalarType = VecTraits::ScalarType; | ||
| 20 | using VectorType = VecTraits::VectorType; | ||
| 21 | using YUV420XToRGBxOrBGRx<BGR, kAlpha>::de_interleave_indices_; | ||
| 22 | using YUV420XToRGBxOrBGRx<BGR, kAlpha>::yuv420x_to_rgb; | ||
| 23 | using YUV420XToRGBxOrBGRx<BGR, kAlpha>::v_first_; | ||
| 24 | |||
| 25 | 568 | explicit YUVSpToRGBxOrBGRx(bool v_first) | |
| 26 | 568 | : YUV420XToRGBxOrBGRx<BGR, kAlpha>(v_first) {} | |
| 27 | |||
| 28 | // Processes 2 * 16 bytes (even and odd rows) of the input YUV data, and | ||
| 29 | // outputs 2 * 3 (or 4) * 16 bytes of RGB (or RGBA) data per loop iteration. | ||
| 30 | KLEIDICV_FORCE_INLINE | ||
| 31 | 96 | void vector_path(VectorType y0, VectorType y1, VectorType uv, | |
| 32 | ScalarType *rgbx_row_0, ScalarType *rgbx_row_1) { | ||
| 33 | // Widen U and V to 32 bits. | ||
| 34 | 96 | int32x4_t u_l = vqtbl1q_s8(uv, de_interleave_indices_.val[0]); | |
| 35 | 96 | int32x4_t u_h = vqtbl1q_s8(uv, de_interleave_indices_.val[1]); | |
| 36 | |||
| 37 | 96 | int32x4_t v_l = vqtbl1q_s8(uv, de_interleave_indices_.val[2]); | |
| 38 | 96 | int32x4_t v_h = vqtbl1q_s8(uv, de_interleave_indices_.val[3]); | |
| 39 | |||
| 40 | 96 | yuv420x_to_rgb(y0, y1, u_l, u_h, v_l, v_h, rgbx_row_0, rgbx_row_1); | |
| 41 | 96 | } | |
| 42 | |||
| 43 | // Processes inputs which are not long enough to fit a vector. | ||
| 44 | KLEIDICV_FORCE_INLINE | ||
| 45 | 656 | void scalar_path(size_t length, const ScalarType *y_row_0, | |
| 46 | const ScalarType *y_row_1, const ScalarType *uv_row, | ||
| 47 | ScalarType *rgbx_row_0, ScalarType *rgbx_row_1) { | ||
| 48 | 656 | const uint8_t *y_rows[2] = {y_row_0, y_row_1}; | |
| 49 | 656 | uint8_t *rgbx_rows[2] = {rgbx_row_0, rgbx_row_1}; | |
| 50 | |||
| 51 | 656 | int32_t u_m128 = 0, v_m128 = 0; | |
| 52 |
8/8✓ Branch 0 taken 164 times.
✓ Branch 1 taken 1064 times.
✓ Branch 2 taken 164 times.
✓ Branch 3 taken 1064 times.
✓ Branch 4 taken 164 times.
✓ Branch 5 taken 1064 times.
✓ Branch 6 taken 164 times.
✓ Branch 7 taken 1064 times.
|
4912 | for (size_t index = 0; index < length; ++index) { |
| 53 | 4256 | disable_loop_vectorization(); | |
| 54 | |||
| 55 | // There is one {U, V} pair for 4 Y values. | ||
| 56 |
8/8✓ Branch 0 taken 508 times.
✓ Branch 1 taken 556 times.
✓ Branch 2 taken 508 times.
✓ Branch 3 taken 556 times.
✓ Branch 4 taken 508 times.
✓ Branch 5 taken 556 times.
✓ Branch 6 taken 508 times.
✓ Branch 7 taken 556 times.
|
4256 | if ((index % 2) == 0) { |
| 57 | 2224 | u_m128 = uv_row[0] - 128; | |
| 58 | 2224 | v_m128 = uv_row[1] - 128; | |
| 59 | 2224 | uv_row += 2; | |
| 60 |
8/8✓ Branch 0 taken 278 times.
✓ Branch 1 taken 278 times.
✓ Branch 2 taken 278 times.
✓ Branch 3 taken 278 times.
✓ Branch 4 taken 278 times.
✓ Branch 5 taken 278 times.
✓ Branch 6 taken 278 times.
✓ Branch 7 taken 278 times.
|
2224 | if (v_first_) { |
| 61 | 1112 | std::swap(u_m128, v_m128); | |
| 62 | 1112 | } | |
| 63 | 2224 | } | |
| 64 | |||
| 65 | 4256 | yuv420x_to_rgb(y_rows, index, u_m128, v_m128, rgbx_rows); | |
| 66 | 4256 | } | |
| 67 | 656 | } | |
| 68 | }; // end of class YUVSpToRGBxOrBGRx<bool, bool> | ||
| 69 | |||
| 70 | using YUVSpToRGB = YUVSpToRGBxOrBGRx<false, false>; | ||
| 71 | using YUVSpToRGBA = YUVSpToRGBxOrBGRx<false, true>; | ||
| 72 | using YUVSpToBGR = YUVSpToRGBxOrBGRx<true, false>; | ||
| 73 | using YUVSpToBGRA = YUVSpToRGBxOrBGRx<true, true>; | ||
| 74 | |||
| 75 | template <typename OperationType, typename ScalarType> | ||
| 76 | 568 | kleidicv_error_t yuv2rgbx_operation( | |
| 77 | OperationType &operation, const ScalarType *src_y, size_t src_y_stride, | ||
| 78 | const ScalarType *src_uv, size_t src_uv_stride, ScalarType *dst, | ||
| 79 | size_t dst_stride, size_t width, size_t height) { | ||
| 80 |
16/16✓ Branch 0 taken 8 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 134 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 134 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 134 times.
✓ Branch 8 taken 8 times.
✓ Branch 9 taken 134 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 134 times.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 134 times.
✓ Branch 14 taken 8 times.
✓ Branch 15 taken 134 times.
|
568 | CHECK_POINTER_AND_STRIDE(src_y, src_y_stride, height); |
| 81 |
16/16✓ Branch 0 taken 8 times.
✓ Branch 1 taken 126 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 126 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 126 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 126 times.
✓ Branch 8 taken 8 times.
✓ Branch 9 taken 126 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 126 times.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 126 times.
✓ Branch 14 taken 8 times.
✓ Branch 15 taken 126 times.
|
536 | CHECK_POINTER_AND_STRIDE(src_uv, src_uv_stride, (height + 1) / 2); |
| 82 |
16/16✓ Branch 0 taken 8 times.
✓ Branch 1 taken 118 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 118 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 118 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 118 times.
✓ Branch 8 taken 8 times.
✓ Branch 9 taken 118 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 118 times.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 118 times.
✓ Branch 14 taken 8 times.
✓ Branch 15 taken 118 times.
|
504 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 83 |
24/24✓ Branch 0 taken 10 times.
✓ Branch 1 taken 108 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 98 times.
✓ Branch 4 taken 20 times.
✓ Branch 5 taken 98 times.
✓ Branch 6 taken 10 times.
✓ Branch 7 taken 108 times.
✓ Branch 8 taken 10 times.
✓ Branch 9 taken 98 times.
✓ Branch 10 taken 20 times.
✓ Branch 11 taken 98 times.
✓ Branch 12 taken 10 times.
✓ Branch 13 taken 108 times.
✓ Branch 14 taken 10 times.
✓ Branch 15 taken 98 times.
✓ Branch 16 taken 20 times.
✓ Branch 17 taken 98 times.
✓ Branch 18 taken 10 times.
✓ Branch 19 taken 108 times.
✓ Branch 20 taken 10 times.
✓ Branch 21 taken 98 times.
✓ Branch 22 taken 20 times.
✓ Branch 23 taken 98 times.
|
472 | CHECK_IMAGE_SIZE(width, height); |
| 84 | |||
| 85 | 392 | Rectangle rect{width, height}; | |
| 86 | 392 | ParallelRows y_rows{src_y, src_y_stride}; | |
| 87 | 392 | Rows uv_rows{src_uv, src_uv_stride}; | |
| 88 | 392 | ParallelRows rgbx_rows{dst, dst_stride, operation.output_channels()}; | |
| 89 | |||
| 90 | 392 | RemoveContextAdapter remove_context_adapter{operation}; | |
| 91 | 392 | OperationAdapter operation_adapter{remove_context_adapter}; | |
| 92 | 392 | RemainingPathToScalarPathAdapter remaining_path_adapter{operation_adapter}; | |
| 93 | 392 | OperationContextAdapter context_adapter{remaining_path_adapter}; | |
| 94 | 392 | ParallelRowsAdapter parallel_rows_adapter{context_adapter}; | |
| 95 | 392 | RowBasedOperation row_based_operation{parallel_rows_adapter}; | |
| 96 | 392 | zip_parallel_rows(row_based_operation, rect, y_rows, uv_rows, rgbx_rows); | |
| 97 | 392 | return KLEIDICV_OK; | |
| 98 | 568 | } | |
| 99 | |||
| 100 | KLEIDICV_TARGET_FN_ATTRS | ||
| 101 | 664 | kleidicv_error_t yuv420sp_to_rgb_u8(const uint8_t *src_y, size_t src_y_stride, | |
| 102 | const uint8_t *src_uv, size_t src_uv_stride, | ||
| 103 | uint8_t *dst, size_t dst_stride, | ||
| 104 | size_t width, size_t height, | ||
| 105 | kleidicv_color_conversion_t color_format) { | ||
| 106 |
9/9✓ Branch 0 taken 71 times.
✓ Branch 1 taken 71 times.
✓ Branch 2 taken 71 times.
✓ Branch 3 taken 71 times.
✓ Branch 4 taken 71 times.
✓ Branch 5 taken 71 times.
✓ Branch 6 taken 71 times.
✓ Branch 7 taken 96 times.
✓ Branch 8 taken 71 times.
|
664 | switch (color_format) { |
| 107 | case KLEIDICV_NV21_TO_BGR: { | ||
| 108 | 71 | YUVSpToBGR operation{true}; | |
| 109 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 110 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 111 | 71 | } | |
| 112 | |||
| 113 | case KLEIDICV_NV21_TO_RGB: { | ||
| 114 | 71 | YUVSpToRGB operation{true}; | |
| 115 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 116 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 117 | 71 | } | |
| 118 | |||
| 119 | case KLEIDICV_NV21_TO_BGRA: { | ||
| 120 | 71 | YUVSpToBGRA operation{true}; | |
| 121 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 122 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 123 | 71 | } | |
| 124 | |||
| 125 | case KLEIDICV_NV21_TO_RGBA: { | ||
| 126 | 71 | YUVSpToRGBA operation{true}; | |
| 127 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 128 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 129 | 71 | } | |
| 130 | |||
| 131 | case KLEIDICV_NV12_TO_BGR: { | ||
| 132 | 71 | YUVSpToBGR operation{false}; | |
| 133 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 134 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 135 | 71 | } | |
| 136 | |||
| 137 | case KLEIDICV_NV12_TO_RGB: { | ||
| 138 | 71 | YUVSpToRGB operation{false}; | |
| 139 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 140 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 141 | 71 | } | |
| 142 | |||
| 143 | case KLEIDICV_NV12_TO_BGRA: { | ||
| 144 | 71 | YUVSpToBGRA operation{false}; | |
| 145 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 146 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 147 | 71 | } | |
| 148 | |||
| 149 | case KLEIDICV_NV12_TO_RGBA: { | ||
| 150 | 71 | YUVSpToRGBA operation{false}; | |
| 151 | 142 | return yuv2rgbx_operation(operation, src_y, src_y_stride, src_uv, | |
| 152 | 71 | src_uv_stride, dst, dst_stride, width, height); | |
| 153 | 71 | } | |
| 154 | |||
| 155 | default: | ||
| 156 | 96 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 157 | } | ||
| 158 | |||
| 159 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | ||
| 160 | 664 | } | |
| 161 | |||
| 162 | } // namespace kleidicv::neon | ||
| 163 |