KleidiCV Coverage Report

Directory:	./
File:	kleidicv/src/conversions/yuv_to_rgb_neon.cpp
Date:	2025-09-25 14:13:34

	Exec	Total	Coverage
Lines:	112	112	100.0%
Functions:	24	24	100.0%
Branches:	56	56	100.0%

  
      Line
      Branch
      Exec
      Source
    
      // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
    
      //
    
      // SPDX-License-Identifier: Apache-2.0
    
      #include <utility>
    
      #include "kleidicv/conversions/yuv_to_rgb.h"
    
      #include "kleidicv/ctypes.h"
    
      #include "kleidicv/kleidicv.h"
    
      #include "kleidicv/neon.h"
    
      namespace kleidicv::neon {
    
      template <bool BGR, bool kAlpha>
    
      class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop {
    
       public:
    
        using VecTraits = neon::VecTraits<uint8_t>;
    
        using ScalarType = VecTraits::ScalarType;
    
        using VectorType = VecTraits::VectorType;
    
        using Vector3Type = VecTraits::Vector3Type;
    
        using RawDestinationVectorType =
    
            typename std::conditional<kAlpha, uint8x16x4_t, uint8x16x3_t>::type;
    
      356
        explicit YUVToRGBAll()
    
      356
            : b_delta4_(vdupq_n_u32(kBDelta4)),
    
      356
              g_delta4_(vdupq_n_u32(kGDelta4)),
    
      356
              r_delta4_(vdupq_n_u32(kRDelta4)) {}
    
        // Returns the number of channels in the output image.
    
      292
        static constexpr size_t output_channels() {
    
      292
          return kAlpha ? /* RGBA */ 4 : /* RGB */ 3;
    
        }
    
      1880
        void vector_path(const ScalarType *src, ScalarType *dst) {
    
          // Load deinterleaved
    
      1880
          Vector3Type vsrc = vld3q_u8(src);
    
      1880
          int16x8_t y_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[0], vdupq_n_u8(0)));
    
      1880
          int16x8_t y_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[0], vdupq_n_u8(0)));
    
      3760
          int16x8_t u4_l =
    
      1880
              vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[1]), kPreShift));
    
      3760
          int16x8_t u4_h =
    
      1880
              vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[1], kPreShift));
    
      3760
          int16x8_t v4_l =
    
      1880
              vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[2]), kPreShift));
    
      3760
          int16x8_t v4_h =
    
      1880
              vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[2], kPreShift));
    
      1880
          uint8x16_t r, g, b;
    
          // Compute B value in 32-bit precision
    
          {
    
            // Multiplication is done with uint16_t because UBWeight only fits in
    
            // unsigned 16-bit
    
      3760
            int32x4_t b_ll = vreinterpretq_s32_u32(vmull_n_u16(
    
      1880
                vget_low_u16(vreinterpretq_u16_s16(u4_l)), kUnsignedUBWeight));
    
      3760
            int32x4_t b_hl = vreinterpretq_s32_u32(vmull_n_u16(
    
      1880
                vget_low_u16(vreinterpretq_u16_s16(u4_h)), kUnsignedUBWeight));
    
      3760
            int32x4_t b_lh = vreinterpretq_s32_u32(
    
      1880
                vmull_high_n_u16(vreinterpretq_u16_s16(u4_l), kUnsignedUBWeight));
    
      3760
            int32x4_t b_hh = vreinterpretq_s32_u32(
    
      1880
                vmull_high_n_u16(vreinterpretq_u16_s16(u4_h), kUnsignedUBWeight));
    
      1880
            b_ll = vaddq(b_ll, b_delta4_);
    
      1880
            b_hl = vaddq(b_hl, b_delta4_);
    
      1880
            b_lh = vaddq(b_lh, b_delta4_);
    
      1880
            b_hh = vaddq(b_hh, b_delta4_);
    
      3760
            int16x8_t b_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(b_ll),
    
      1880
                                                  vreinterpretq_s16_s32(b_lh)));
    
      3760
            int16x8_t b_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(b_hl),
    
      1880
                                                  vreinterpretq_s16_s32(b_hh)));
    
      1880
            b = vcombine_u8(vqmovun_s16(b_l), vqmovun_s16(b_h));
    
      1880
          }
    
          // Compute G value in 32-bit precision
    
          {
    
      1880
            int32x4_t g_ll = vmlal_n_s16(g_delta4_, vget_low_s16(u4_l), kUGWeight);
    
      1880
            int32x4_t g_hl = vmlal_n_s16(g_delta4_, vget_low_s16(u4_h), kUGWeight);
    
      1880
            int32x4_t g_lh = vmlal_high_n_s16(g_delta4_, u4_l, kUGWeight);
    
      1880
            int32x4_t g_hh = vmlal_high_n_s16(g_delta4_, u4_h, kUGWeight);
    
      1880
            g_ll = vmlal_n_s16(g_ll, vget_low_s16(v4_l), kVGWeight);
    
      1880
            g_hl = vmlal_n_s16(g_hl, vget_low_s16(v4_h), kVGWeight);
    
      1880
            g_lh = vmlal_high_n_s16(g_lh, v4_l, kVGWeight);
    
      1880
            g_hh = vmlal_high_n_s16(g_hh, v4_h, kVGWeight);
    
      3760
            int16x8_t g_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(g_ll),
    
      1880
                                                  vreinterpretq_s16_s32(g_lh)));
    
      3760
            int16x8_t g_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(g_hl),
    
      1880
                                                  vreinterpretq_s16_s32(g_hh)));
    
      1880
            g = vcombine_u8(vqmovun_s16(g_l), vqmovun_s16(g_h));
    
      1880
          }
    
          // Compute R value in 32-bit precision
    
          {
    
      1880
            int32x4_t r_ll = vmlal_n_s16(r_delta4_, vget_low_s16(v4_l), kVRWeight);
    
      1880
            int32x4_t r_hl = vmlal_n_s16(r_delta4_, vget_low_s16(v4_h), kVRWeight);
    
      1880
            int32x4_t r_lh = vmlal_high_n_s16(r_delta4_, v4_l, kVRWeight);
    
      1880
            int32x4_t r_hh = vmlal_high_n_s16(r_delta4_, v4_h, kVRWeight);
    
      3760
            int16x8_t r_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(r_ll),
    
      1880
                                                  vreinterpretq_s16_s32(r_lh)));
    
      3760
            int16x8_t r_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(r_hl),
    
      1880
                                                  vreinterpretq_s16_s32(r_hh)));
    
      1880
            r = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h));
    
      1880
          }
    
      1880
          RawDestinationVectorType rgb;
    
      1880
          rgb.val[r_index_] = r;
    
      1880
          rgb.val[g_index_] = g;
    
      1880
          rgb.val[b_index_] = b;
    
          if constexpr (kAlpha) {
    
      940
            rgb.val[alpha_index_] = vdupq_n_u8(alpha_value);
    
            // Store interleaved RGBA pixels to memory.
    
      940
            vst4q_u8(dst, rgb);
    
          } else {
    
            // Store interleaved RGB pixels to memory.
    
      940
            vst3q_u8(dst, rgb);
    
          }
    
      1880
        }
    
      412
        void scalar_path(const ScalarType *src, ScalarType *dst) {
    
      412
          int32_t y = static_cast<int32_t>(src[0]);
    
      412
          int32_t u = static_cast<int32_t>(src[1]);
    
      412
          int32_t v = static_cast<int32_t>(src[2]);
    
      412
          int32_t b = y + rounding_shift_right((u - 128) * kUBWeight, kWeightScale);
    
      824
          int32_t g =
    
      412
              y + rounding_shift_right((u - 128) * kUGWeight + (v - 128) * kVGWeight,
    
                                       kWeightScale);
    
      412
          int32_t r = y + rounding_shift_right((v - 128) * kVRWeight, kWeightScale);
    
      412
          dst[r_index_] = saturating_cast<int32_t, uint8_t>(r);
    
      412
          dst[g_index_] = saturating_cast<int32_t, uint8_t>(g);
    
      412
          dst[b_index_] = saturating_cast<int32_t, uint8_t>(b);
    
          if constexpr (kAlpha) {
    
      206
            dst[alpha_index_] = alpha_value;
    
          }
    
      412
        }
    
       private:
    
        static constexpr size_t r_index_ = BGR ? 2 : 0;
    
        static constexpr size_t g_index_ = 1;
    
        static constexpr size_t b_index_ = BGR ? 0 : 2;
    
        static constexpr size_t alpha_index_ = 3;
    
        static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
    
        int32x4_t b_delta4_, g_delta4_, r_delta4_;
    
      };  // end of class YUVToRGBAll<bool BGR>
    
      template <typename OperationType, typename ScalarType>
    
      356
      kleidicv_error_t yuv2rgb_operation(OperationType &operation,
    
                                         const ScalarType *src, size_t src_stride,
    
                                         ScalarType *dst, size_t dst_stride,
    
                                         size_t width, size_t height) {
    
        16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 85 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 85 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 85 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 85 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 85 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 85 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 85 times.

      356
        CHECK_POINTER_AND_STRIDE(src, src_stride, height);
    
        16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 81 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 81 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 81 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 81 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 81 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 81 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 81 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 81 times.

      340
        CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
    
        24/24✓ Branch 0 taken 4 times.
✓ Branch 1 taken 77 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 73 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 73 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 77 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 73 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 73 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 77 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 73 times.
✓ Branch 16 taken 8 times.
✓ Branch 17 taken 73 times.
✓ Branch 18 taken 4 times.
✓ Branch 19 taken 77 times.
✓ Branch 20 taken 4 times.
✓ Branch 21 taken 73 times.
✓ Branch 22 taken 8 times.
✓ Branch 23 taken 73 times.

      324
        CHECK_IMAGE_SIZE(width, height);
    
      292
        Rectangle rect{width, height};
    
      292
        Rows src_rows{src, src_stride, 3};
    
      292
        Rows dst_rows{dst, dst_stride, operation.output_channels()};
    
      292
        apply_operation_by_rows(operation, rect, src_rows, dst_rows);
    
      292
        return KLEIDICV_OK;
    
      356
      }
    
      using YUVToRGB = YUVToRGBAll<false, false>;
    
      using YUVToRGBA = YUVToRGBAll<false, true>;
    
      using YUVToBGR = YUVToRGBAll<true, false>;
    
      using YUVToBGRA = YUVToRGBAll<true, true>;
    
      KLEIDICV_TARGET_FN_ATTRS
    
      89
      kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride,
    
                                     uint8_t *dst, size_t dst_stride, size_t width,
    
                                     size_t height) {
    
      89
        YUVToRGB operation;
    
      267
        return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
    
      89
                                 height);
    
      89
      }
    
      KLEIDICV_TARGET_FN_ATTRS
    
      89
      kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride,
    
                                      uint8_t *dst, size_t dst_stride, size_t width,
    
                                      size_t height) {
    
      89
        YUVToRGBA operation;
    
      267
        return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
    
      89
                                 height);
    
      89
      }
    
      KLEIDICV_TARGET_FN_ATTRS
    
      89
      kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride,
    
                                     uint8_t *dst, size_t dst_stride, size_t width,
    
                                     size_t height) {
    
      89
        YUVToBGR operation;
    
      267
        return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
    
      89
                                 height);
    
      89
      }
    
      KLEIDICV_TARGET_FN_ATTRS
    
      89
      kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride,
    
                                      uint8_t *dst, size_t dst_stride, size_t width,
    
                                      size_t height) {
    
      89
        YUVToBGRA operation;
    
      267
        return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
    
      89
                                 height);
    
      89
      }
    
      }  // namespace kleidicv::neon

Line	Branch	Exec	Source
1			// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2			//
3			// SPDX-License-Identifier: Apache-2.0
4
5			#include <utility>
6
7			#include "kleidicv/conversions/yuv_to_rgb.h"
8			#include "kleidicv/ctypes.h"
9			#include "kleidicv/kleidicv.h"
10			#include "kleidicv/neon.h"
11
12			namespace kleidicv::neon {
13
14			template <bool BGR, bool kAlpha>
15			class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop {
16			public:
17			using VecTraits = neon::VecTraits<uint8_t>;
18			using ScalarType = VecTraits::ScalarType;
19			using VectorType = VecTraits::VectorType;
20			using Vector3Type = VecTraits::Vector3Type;
21			using RawDestinationVectorType =
22			typename std::conditional<kAlpha, uint8x16x4_t, uint8x16x3_t>::type;
23
24		356	explicit YUVToRGBAll()
25		356	: b_delta4_(vdupq_n_u32(kBDelta4)),
26		356	g_delta4_(vdupq_n_u32(kGDelta4)),
27		356	r_delta4_(vdupq_n_u32(kRDelta4)) {}
28
29			// Returns the number of channels in the output image.
30		292	static constexpr size_t output_channels() {
31		292	return kAlpha ? /* RGBA / 4 : / RGB */ 3;
32			}
33
34		1880	void vector_path(const ScalarType src, ScalarType dst) {
35			// Load deinterleaved
36		1880	Vector3Type vsrc = vld3q_u8(src);
37		1880	int16x8_t y_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[0], vdupq_n_u8(0)));
38		1880	int16x8_t y_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[0], vdupq_n_u8(0)));
39		3760	int16x8_t u4_l =
40		1880	vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[1]), kPreShift));
41		3760	int16x8_t u4_h =
42		1880	vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[1], kPreShift));
43		3760	int16x8_t v4_l =
44		1880	vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[2]), kPreShift));
45		3760	int16x8_t v4_h =
46		1880	vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[2], kPreShift));
47		1880	uint8x16_t r, g, b;
48
49			// Compute B value in 32-bit precision
50			{
51			// Multiplication is done with uint16_t because UBWeight only fits in
52			// unsigned 16-bit
53		3760	int32x4_t b_ll = vreinterpretq_s32_u32(vmull_n_u16(
54		1880	vget_low_u16(vreinterpretq_u16_s16(u4_l)), kUnsignedUBWeight));
55		3760	int32x4_t b_hl = vreinterpretq_s32_u32(vmull_n_u16(
56		1880	vget_low_u16(vreinterpretq_u16_s16(u4_h)), kUnsignedUBWeight));
57		3760	int32x4_t b_lh = vreinterpretq_s32_u32(
58		1880	vmull_high_n_u16(vreinterpretq_u16_s16(u4_l), kUnsignedUBWeight));
59		3760	int32x4_t b_hh = vreinterpretq_s32_u32(
60		1880	vmull_high_n_u16(vreinterpretq_u16_s16(u4_h), kUnsignedUBWeight));
61
62		1880	b_ll = vaddq(b_ll, b_delta4_);
63		1880	b_hl = vaddq(b_hl, b_delta4_);
64		1880	b_lh = vaddq(b_lh, b_delta4_);
65		1880	b_hh = vaddq(b_hh, b_delta4_);
66
67		3760	int16x8_t b_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(b_ll),
68		1880	vreinterpretq_s16_s32(b_lh)));
69		3760	int16x8_t b_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(b_hl),
70		1880	vreinterpretq_s16_s32(b_hh)));
71
72		1880	b = vcombine_u8(vqmovun_s16(b_l), vqmovun_s16(b_h));
73		1880	}
74
75			// Compute G value in 32-bit precision
76			{
77		1880	int32x4_t g_ll = vmlal_n_s16(g_delta4_, vget_low_s16(u4_l), kUGWeight);
78		1880	int32x4_t g_hl = vmlal_n_s16(g_delta4_, vget_low_s16(u4_h), kUGWeight);
79		1880	int32x4_t g_lh = vmlal_high_n_s16(g_delta4_, u4_l, kUGWeight);
80		1880	int32x4_t g_hh = vmlal_high_n_s16(g_delta4_, u4_h, kUGWeight);
81
82		1880	g_ll = vmlal_n_s16(g_ll, vget_low_s16(v4_l), kVGWeight);
83		1880	g_hl = vmlal_n_s16(g_hl, vget_low_s16(v4_h), kVGWeight);
84		1880	g_lh = vmlal_high_n_s16(g_lh, v4_l, kVGWeight);
85		1880	g_hh = vmlal_high_n_s16(g_hh, v4_h, kVGWeight);
86
87		3760	int16x8_t g_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(g_ll),
88		1880	vreinterpretq_s16_s32(g_lh)));
89		3760	int16x8_t g_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(g_hl),
90		1880	vreinterpretq_s16_s32(g_hh)));
91
92		1880	g = vcombine_u8(vqmovun_s16(g_l), vqmovun_s16(g_h));
93		1880	}
94
95			// Compute R value in 32-bit precision
96			{
97		1880	int32x4_t r_ll = vmlal_n_s16(r_delta4_, vget_low_s16(v4_l), kVRWeight);
98		1880	int32x4_t r_hl = vmlal_n_s16(r_delta4_, vget_low_s16(v4_h), kVRWeight);
99		1880	int32x4_t r_lh = vmlal_high_n_s16(r_delta4_, v4_l, kVRWeight);
100		1880	int32x4_t r_hh = vmlal_high_n_s16(r_delta4_, v4_h, kVRWeight);
101
102		3760	int16x8_t r_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(r_ll),
103		1880	vreinterpretq_s16_s32(r_lh)));
104		3760	int16x8_t r_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(r_hl),
105		1880	vreinterpretq_s16_s32(r_hh)));
106
107		1880	r = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h));
108		1880	}
109
110		1880	RawDestinationVectorType rgb;
111		1880	rgb.val[r_index_] = r;
112		1880	rgb.val[g_index_] = g;
113		1880	rgb.val[b_index_] = b;
114			if constexpr (kAlpha) {
115		940	rgb.val[alpha_index_] = vdupq_n_u8(alpha_value);
116			// Store interleaved RGBA pixels to memory.
117		940	vst4q_u8(dst, rgb);
118			} else {
119			// Store interleaved RGB pixels to memory.
120		940	vst3q_u8(dst, rgb);
121			}
122		1880	}
123
124		412	void scalar_path(const ScalarType src, ScalarType dst) {
125		412	int32_t y = static_cast<int32_t>(src[0]);
126		412	int32_t u = static_cast<int32_t>(src[1]);
127		412	int32_t v = static_cast<int32_t>(src[2]);
128		412	int32_t b = y + rounding_shift_right((u - 128) * kUBWeight, kWeightScale);
129		824	int32_t g =
130		412	y + rounding_shift_right((u - 128) * kUGWeight + (v - 128) * kVGWeight,
131			kWeightScale);
132		412	int32_t r = y + rounding_shift_right((v - 128) * kVRWeight, kWeightScale);
133		412	dst[r_index_] = saturating_cast<int32_t, uint8_t>(r);
134		412	dst[g_index_] = saturating_cast<int32_t, uint8_t>(g);
135		412	dst[b_index_] = saturating_cast<int32_t, uint8_t>(b);
136			if constexpr (kAlpha) {
137		206	dst[alpha_index_] = alpha_value;
138			}
139		412	}
140
141			private:
142			static constexpr size_t r_index_ = BGR ? 2 : 0;
143			static constexpr size_t g_index_ = 1;
144			static constexpr size_t b_index_ = BGR ? 0 : 2;
145			static constexpr size_t alpha_index_ = 3;
146			static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
147			int32x4_t b_delta4_, g_delta4_, r_delta4_;
148			}; // end of class YUVToRGBAll<bool BGR>
149
150			template <typename OperationType, typename ScalarType>
151		356	kleidicv_error_t yuv2rgb_operation(OperationType &operation,
152			const ScalarType *src, size_t src_stride,
153			ScalarType *dst, size_t dst_stride,
154			size_t width, size_t height) {
155	16/16 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 85 times. ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 85 times. ✓ Branch 4 taken 4 times. ✓ Branch 5 taken 85 times. ✓ Branch 6 taken 4 times. ✓ Branch 7 taken 85 times. ✓ Branch 8 taken 4 times. ✓ Branch 9 taken 85 times. ✓ Branch 10 taken 4 times. ✓ Branch 11 taken 85 times. ✓ Branch 12 taken 4 times. ✓ Branch 13 taken 85 times. ✓ Branch 14 taken 4 times. ✓ Branch 15 taken 85 times.	356	CHECK_POINTER_AND_STRIDE(src, src_stride, height);
156	16/16 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 81 times. ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 81 times. ✓ Branch 4 taken 4 times. ✓ Branch 5 taken 81 times. ✓ Branch 6 taken 4 times. ✓ Branch 7 taken 81 times. ✓ Branch 8 taken 4 times. ✓ Branch 9 taken 81 times. ✓ Branch 10 taken 4 times. ✓ Branch 11 taken 81 times. ✓ Branch 12 taken 4 times. ✓ Branch 13 taken 81 times. ✓ Branch 14 taken 4 times. ✓ Branch 15 taken 81 times.	340	CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
157	24/24 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 77 times. ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 73 times. ✓ Branch 4 taken 8 times. ✓ Branch 5 taken 73 times. ✓ Branch 6 taken 4 times. ✓ Branch 7 taken 77 times. ✓ Branch 8 taken 4 times. ✓ Branch 9 taken 73 times. ✓ Branch 10 taken 8 times. ✓ Branch 11 taken 73 times. ✓ Branch 12 taken 4 times. ✓ Branch 13 taken 77 times. ✓ Branch 14 taken 4 times. ✓ Branch 15 taken 73 times. ✓ Branch 16 taken 8 times. ✓ Branch 17 taken 73 times. ✓ Branch 18 taken 4 times. ✓ Branch 19 taken 77 times. ✓ Branch 20 taken 4 times. ✓ Branch 21 taken 73 times. ✓ Branch 22 taken 8 times. ✓ Branch 23 taken 73 times.	324	CHECK_IMAGE_SIZE(width, height);
158
159		292	Rectangle rect{width, height};
160		292	Rows src_rows{src, src_stride, 3};
161		292	Rows dst_rows{dst, dst_stride, operation.output_channels()};
162
163		292	apply_operation_by_rows(operation, rect, src_rows, dst_rows);
164		292	return KLEIDICV_OK;
165		356	}
166
167			using YUVToRGB = YUVToRGBAll<false, false>;
168			using YUVToRGBA = YUVToRGBAll<false, true>;
169			using YUVToBGR = YUVToRGBAll<true, false>;
170			using YUVToBGRA = YUVToRGBAll<true, true>;
171
172			KLEIDICV_TARGET_FN_ATTRS
173		89	kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride,
174			uint8_t *dst, size_t dst_stride, size_t width,
175			size_t height) {
176		89	YUVToRGB operation;
177		267	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
178		89	height);
179		89	}
180
181			KLEIDICV_TARGET_FN_ATTRS
182		89	kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride,
183			uint8_t *dst, size_t dst_stride, size_t width,
184			size_t height) {
185		89	YUVToRGBA operation;
186		267	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
187		89	height);
188		89	}
189
190			KLEIDICV_TARGET_FN_ATTRS
191		89	kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride,
192			uint8_t *dst, size_t dst_stride, size_t width,
193			size_t height) {
194		89	YUVToBGR operation;
195		267	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
196		89	height);
197		89	}
198
199			KLEIDICV_TARGET_FN_ATTRS
200		89	kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride,
201			uint8_t *dst, size_t dst_stride, size_t width,
202			size_t height) {
203		89	YUVToBGRA operation;
204		267	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width,
205		89	height);
206		89	}
207
208			} // namespace kleidicv::neon
209