KleidiCV Coverage Report

Directory:	./
File:	kleidicv/src/conversions/yuv444_to_rgb_neon.cpp
Date:	2026-03-05 15:57:40

	Exec	Total	Coverage
Lines:	112	112	100.0%
Functions:	21	21	100.0%
Branches:	61	61	100.0%

  
      Line
      Branch
      Exec
      Source
    
      // SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
    
      //
    
      // SPDX-License-Identifier: Apache-2.0
    
      #include <utility>
    
      #include "kleidicv/conversions/yuv_to_rgb.h"
    
      #include "kleidicv/ctypes.h"
    
      #include "kleidicv/kleidicv.h"
    
      #include "kleidicv/neon.h"
    
      #include "yuv444_coefficients.h"
    
      namespace kleidicv::neon {
    
      template <bool BGR, bool kAlpha>
    
      class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop {
    
       public:
    
        using VecTraits = neon::VecTraits<uint8_t>;
    
        using ScalarType = VecTraits::ScalarType;
    
        using VectorType = VecTraits::VectorType;
    
        using Vector3Type = VecTraits::Vector3Type;
    
        using RawDestinationVectorType =
    
            typename std::conditional<kAlpha, uint8x16x4_t, uint8x16x3_t>::type;
    
      336
        explicit YUVToRGBAll()
    
      336
            : b_delta4_(vdupq_n_u32(kBDelta4)),
    
      336
              g_delta4_(vdupq_n_u32(kGDelta4)),
    
      336
              r_delta4_(vdupq_n_u32(kRDelta4)) {}
    
        // Returns the number of channels in the output image.
    
      260
        static constexpr size_t output_channels() {
    
      260
          return kAlpha ? /* RGBA */ 4 : /* RGB */ 3;
    
        }
    
        KLEIDICV_FORCE_INLINE
    
      1864
        void vector_path(const ScalarType *src, ScalarType *dst) {
    
          // Load deinterleaved
    
      1864
          Vector3Type vsrc = vld3q_u8(src);
    
      1864
          int16x8_t y_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[0], vdupq_n_u8(0)));
    
      1864
          int16x8_t y_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[0], vdupq_n_u8(0)));
    
      3728
          int16x8_t u4_l =
    
      1864
              vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[1]), kPreShift));
    
      3728
          int16x8_t u4_h =
    
      1864
              vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[1], kPreShift));
    
      3728
          int16x8_t v4_l =
    
      1864
              vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[2]), kPreShift));
    
      3728
          int16x8_t v4_h =
    
      1864
              vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[2], kPreShift));
    
      1864
          uint8x16_t r, g, b;
    
          // Compute B value in 32-bit precision
    
          {
    
            // Multiplication is done with uint16_t because UBWeight only fits in
    
            // unsigned 16-bit
    
      3728
            int32x4_t b_ll = vreinterpretq_s32_u32(vmull_n_u16(
    
      1864
                vget_low_u16(vreinterpretq_u16_s16(u4_l)), kUnsignedUBWeight));
    
      3728
            int32x4_t b_hl = vreinterpretq_s32_u32(vmull_n_u16(
    
      1864
                vget_low_u16(vreinterpretq_u16_s16(u4_h)), kUnsignedUBWeight));
    
      3728
            int32x4_t b_lh = vreinterpretq_s32_u32(
    
      1864
                vmull_high_n_u16(vreinterpretq_u16_s16(u4_l), kUnsignedUBWeight));
    
      3728
            int32x4_t b_hh = vreinterpretq_s32_u32(
    
      1864
                vmull_high_n_u16(vreinterpretq_u16_s16(u4_h), kUnsignedUBWeight));
    
      1864
            b_ll = vaddq(b_ll, b_delta4_);
    
      1864
            b_hl = vaddq(b_hl, b_delta4_);
    
      1864
            b_lh = vaddq(b_lh, b_delta4_);
    
      1864
            b_hh = vaddq(b_hh, b_delta4_);
    
      3728
            int16x8_t b_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(b_ll),
    
      1864
                                                  vreinterpretq_s16_s32(b_lh)));
    
      3728
            int16x8_t b_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(b_hl),
    
      1864
                                                  vreinterpretq_s16_s32(b_hh)));
    
      1864
            b = vcombine_u8(vqmovun_s16(b_l), vqmovun_s16(b_h));
    
      1864
          }
    
          // Compute G value in 32-bit precision
    
          {
    
      1864
            int32x4_t g_ll = vmlal_n_s16(g_delta4_, vget_low_s16(u4_l), kUGWeight);
    
      1864
            int32x4_t g_hl = vmlal_n_s16(g_delta4_, vget_low_s16(u4_h), kUGWeight);
    
      1864
            int32x4_t g_lh = vmlal_high_n_s16(g_delta4_, u4_l, kUGWeight);
    
      1864
            int32x4_t g_hh = vmlal_high_n_s16(g_delta4_, u4_h, kUGWeight);
    
      1864
            g_ll = vmlal_n_s16(g_ll, vget_low_s16(v4_l), kVGWeight);
    
      1864
            g_hl = vmlal_n_s16(g_hl, vget_low_s16(v4_h), kVGWeight);
    
      1864
            g_lh = vmlal_high_n_s16(g_lh, v4_l, kVGWeight);
    
      1864
            g_hh = vmlal_high_n_s16(g_hh, v4_h, kVGWeight);
    
      3728
            int16x8_t g_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(g_ll),
    
      1864
                                                  vreinterpretq_s16_s32(g_lh)));
    
      3728
            int16x8_t g_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(g_hl),
    
      1864
                                                  vreinterpretq_s16_s32(g_hh)));
    
      1864
            g = vcombine_u8(vqmovun_s16(g_l), vqmovun_s16(g_h));
    
      1864
          }
    
          // Compute R value in 32-bit precision
    
          {
    
      1864
            int32x4_t r_ll = vmlal_n_s16(r_delta4_, vget_low_s16(v4_l), kVRWeight);
    
      1864
            int32x4_t r_hl = vmlal_n_s16(r_delta4_, vget_low_s16(v4_h), kVRWeight);
    
      1864
            int32x4_t r_lh = vmlal_high_n_s16(r_delta4_, v4_l, kVRWeight);
    
      1864
            int32x4_t r_hh = vmlal_high_n_s16(r_delta4_, v4_h, kVRWeight);
    
      3728
            int16x8_t r_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(r_ll),
    
      1864
                                                  vreinterpretq_s16_s32(r_lh)));
    
      3728
            int16x8_t r_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(r_hl),
    
      1864
                                                  vreinterpretq_s16_s32(r_hh)));
    
      1864
            r = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h));
    
      1864
          }
    
      1864
          RawDestinationVectorType rgb;
    
      1864
          rgb.val[r_index_] = r;
    
      1864
          rgb.val[g_index_] = g;
    
      1864
          rgb.val[b_index_] = b;
    
          if constexpr (kAlpha) {
    
      932
            rgb.val[alpha_index_] = vdupq_n_u8(alpha_value);
    
            // Store interleaved RGBA pixels to memory.
    
      932
            vst4q_u8(dst, rgb);
    
          } else {
    
            // Store interleaved RGB pixels to memory.
    
      932
            vst3q_u8(dst, rgb);
    
          }
    
      1864
        }
    
        KLEIDICV_FORCE_INLINE
    
      492
        void scalar_path(const ScalarType *src, ScalarType *dst) {
    
      492
          int32_t y = static_cast<int32_t>(src[0]);
    
      492
          int32_t u = static_cast<int32_t>(src[1]);
    
      492
          int32_t v = static_cast<int32_t>(src[2]);
    
      492
          int32_t b = y + rounding_shift_right((u - 128) * kUBWeight, kWeightScale);
    
      984
          int32_t g =
    
      492
              y + rounding_shift_right((u - 128) * kUGWeight + (v - 128) * kVGWeight,
    
                                       kWeightScale);
    
      492
          int32_t r = y + rounding_shift_right((v - 128) * kVRWeight, kWeightScale);
    
      492
          dst[r_index_] = saturating_cast<int32_t, uint8_t>(r);
    
      492
          dst[g_index_] = saturating_cast<int32_t, uint8_t>(g);
    
      492
          dst[b_index_] = saturating_cast<int32_t, uint8_t>(b);
    
          if constexpr (kAlpha) {
    
      246
            dst[alpha_index_] = alpha_value;
    
          }
    
      492
        }
    
       private:
    
        static constexpr size_t r_index_ = BGR ? 2 : 0;
    
        static constexpr size_t g_index_ = 1;
    
        static constexpr size_t b_index_ = BGR ? 0 : 2;
    
        static constexpr size_t alpha_index_ = 3;
    
        static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
    
        int32x4_t b_delta4_, g_delta4_, r_delta4_;
    
      };  // end of class YUVToRGBAll<bool BGR>
    
      template <typename OperationType, typename ScalarType>
    
      336
      KLEIDICV_FORCE_INLINE kleidicv_error_t yuv2rgb_operation(
    
          OperationType &operation, const ScalarType *src, size_t src_stride,
    
          ScalarType *dst, size_t dst_stride, size_t width, size_t height) {
    
        16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 80 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 80 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 80 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 80 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 80 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 80 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 80 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 80 times.

      336
        CHECK_POINTER_AND_STRIDE(src, src_stride, height);
    
        16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 76 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 76 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 76 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 76 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 76 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 76 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 76 times.

      320
        CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
    
        24/24✓ Branch 0 taken 6 times.
✓ Branch 1 taken 70 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 65 times.
✓ Branch 4 taken 11 times.
✓ Branch 5 taken 65 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 70 times.
✓ Branch 8 taken 5 times.
✓ Branch 9 taken 65 times.
✓ Branch 10 taken 11 times.
✓ Branch 11 taken 65 times.
✓ Branch 12 taken 6 times.
✓ Branch 13 taken 70 times.
✓ Branch 14 taken 5 times.
✓ Branch 15 taken 65 times.
✓ Branch 16 taken 11 times.
✓ Branch 17 taken 65 times.
✓ Branch 18 taken 6 times.
✓ Branch 19 taken 70 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 65 times.
✓ Branch 22 taken 11 times.
✓ Branch 23 taken 65 times.

      304
        CHECK_IMAGE_SIZE(width, height);
    
      260
        Rectangle rect{width, height};
    
      260
        Rows src_rows{src, src_stride, 3};
    
      260
        Rows dst_rows{dst, dst_stride, operation.output_channels()};
    
      260
        apply_operation_by_rows(operation, rect, src_rows, dst_rows);
    
      260
        return KLEIDICV_OK;
    
      336
      }
    
      using YUVToRGB = YUVToRGBAll<false, false>;
    
      using YUVToRGBA = YUVToRGBAll<false, true>;
    
      using YUVToBGR = YUVToRGBAll<true, false>;
    
      using YUVToBGRA = YUVToRGBAll<true, true>;
    
      KLEIDICV_TARGET_FN_ATTRS
    
      360
      kleidicv_error_t yuv444_to_rgb_u8(const uint8_t *src, size_t src_stride,
    
                                        uint8_t *dst, size_t dst_stride, size_t width,
    
                                        size_t height,
    
                                        kleidicv_color_conversion_t color_format) {
    
        5/5✓ Branch 0 taken 84 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 84 times.
✓ Branch 3 taken 84 times.
✓ Branch 4 taken 84 times.

      360
        switch (color_format) {
    
          case KLEIDICV_YUV444_TO_RGB: {
    
      84
            YUVToRGB operation;
    
      168
            return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
    
      84
                                     width, height);
    
      84
          }
    
          case KLEIDICV_YUV444_TO_BGR: {
    
      84
            YUVToBGR operation;
    
      168
            return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
    
      84
                                     width, height);
    
      84
          }
    
          case KLEIDICV_YUV444_TO_RGBA: {
    
      84
            YUVToRGBA operation;
    
      168
            return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
    
      84
                                     width, height);
    
      84
          }
    
          case KLEIDICV_YUV444_TO_BGRA: {
    
      84
            YUVToBGRA operation;
    
      168
            return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
    
      84
                                     width, height);
    
      84
          }
    
          default:
    
      24
            return KLEIDICV_ERROR_NOT_IMPLEMENTED;
    
        }
    
        return KLEIDICV_ERROR_NOT_IMPLEMENTED;
    
      360
      }
    
      }  // namespace kleidicv::neon

Line	Branch	Exec	Source
1			// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2			//
3			// SPDX-License-Identifier: Apache-2.0
4
5			#include <utility>
6
7			#include "kleidicv/conversions/yuv_to_rgb.h"
8			#include "kleidicv/ctypes.h"
9			#include "kleidicv/kleidicv.h"
10			#include "kleidicv/neon.h"
11			#include "yuv444_coefficients.h"
12
13			namespace kleidicv::neon {
14
15			template <bool BGR, bool kAlpha>
16			class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop {
17			public:
18			using VecTraits = neon::VecTraits<uint8_t>;
19			using ScalarType = VecTraits::ScalarType;
20			using VectorType = VecTraits::VectorType;
21			using Vector3Type = VecTraits::Vector3Type;
22			using RawDestinationVectorType =
23			typename std::conditional<kAlpha, uint8x16x4_t, uint8x16x3_t>::type;
24
25		336	explicit YUVToRGBAll()
26		336	: b_delta4_(vdupq_n_u32(kBDelta4)),
27		336	g_delta4_(vdupq_n_u32(kGDelta4)),
28		336	r_delta4_(vdupq_n_u32(kRDelta4)) {}
29
30			// Returns the number of channels in the output image.
31		260	static constexpr size_t output_channels() {
32		260	return kAlpha ? /* RGBA / 4 : / RGB */ 3;
33			}
34
35			KLEIDICV_FORCE_INLINE
36		1864	void vector_path(const ScalarType src, ScalarType dst) {
37			// Load deinterleaved
38		1864	Vector3Type vsrc = vld3q_u8(src);
39		1864	int16x8_t y_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[0], vdupq_n_u8(0)));
40		1864	int16x8_t y_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[0], vdupq_n_u8(0)));
41		3728	int16x8_t u4_l =
42		1864	vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[1]), kPreShift));
43		3728	int16x8_t u4_h =
44		1864	vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[1], kPreShift));
45		3728	int16x8_t v4_l =
46		1864	vreinterpretq_s16_u16(vshll_n_u8(vget_low_u8(vsrc.val[2]), kPreShift));
47		3728	int16x8_t v4_h =
48		1864	vreinterpretq_s16_u16(vshll_high_n_u8(vsrc.val[2], kPreShift));
49		1864	uint8x16_t r, g, b;
50
51			// Compute B value in 32-bit precision
52			{
53			// Multiplication is done with uint16_t because UBWeight only fits in
54			// unsigned 16-bit
55		3728	int32x4_t b_ll = vreinterpretq_s32_u32(vmull_n_u16(
56		1864	vget_low_u16(vreinterpretq_u16_s16(u4_l)), kUnsignedUBWeight));
57		3728	int32x4_t b_hl = vreinterpretq_s32_u32(vmull_n_u16(
58		1864	vget_low_u16(vreinterpretq_u16_s16(u4_h)), kUnsignedUBWeight));
59		3728	int32x4_t b_lh = vreinterpretq_s32_u32(
60		1864	vmull_high_n_u16(vreinterpretq_u16_s16(u4_l), kUnsignedUBWeight));
61		3728	int32x4_t b_hh = vreinterpretq_s32_u32(
62		1864	vmull_high_n_u16(vreinterpretq_u16_s16(u4_h), kUnsignedUBWeight));
63
64		1864	b_ll = vaddq(b_ll, b_delta4_);
65		1864	b_hl = vaddq(b_hl, b_delta4_);
66		1864	b_lh = vaddq(b_lh, b_delta4_);
67		1864	b_hh = vaddq(b_hh, b_delta4_);
68
69		3728	int16x8_t b_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(b_ll),
70		1864	vreinterpretq_s16_s32(b_lh)));
71		3728	int16x8_t b_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(b_hl),
72		1864	vreinterpretq_s16_s32(b_hh)));
73
74		1864	b = vcombine_u8(vqmovun_s16(b_l), vqmovun_s16(b_h));
75		1864	}
76
77			// Compute G value in 32-bit precision
78			{
79		1864	int32x4_t g_ll = vmlal_n_s16(g_delta4_, vget_low_s16(u4_l), kUGWeight);
80		1864	int32x4_t g_hl = vmlal_n_s16(g_delta4_, vget_low_s16(u4_h), kUGWeight);
81		1864	int32x4_t g_lh = vmlal_high_n_s16(g_delta4_, u4_l, kUGWeight);
82		1864	int32x4_t g_hh = vmlal_high_n_s16(g_delta4_, u4_h, kUGWeight);
83
84		1864	g_ll = vmlal_n_s16(g_ll, vget_low_s16(v4_l), kVGWeight);
85		1864	g_hl = vmlal_n_s16(g_hl, vget_low_s16(v4_h), kVGWeight);
86		1864	g_lh = vmlal_high_n_s16(g_lh, v4_l, kVGWeight);
87		1864	g_hh = vmlal_high_n_s16(g_hh, v4_h, kVGWeight);
88
89		3728	int16x8_t g_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(g_ll),
90		1864	vreinterpretq_s16_s32(g_lh)));
91		3728	int16x8_t g_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(g_hl),
92		1864	vreinterpretq_s16_s32(g_hh)));
93
94		1864	g = vcombine_u8(vqmovun_s16(g_l), vqmovun_s16(g_h));
95		1864	}
96
97			// Compute R value in 32-bit precision
98			{
99		1864	int32x4_t r_ll = vmlal_n_s16(r_delta4_, vget_low_s16(v4_l), kVRWeight);
100		1864	int32x4_t r_hl = vmlal_n_s16(r_delta4_, vget_low_s16(v4_h), kVRWeight);
101		1864	int32x4_t r_lh = vmlal_high_n_s16(r_delta4_, v4_l, kVRWeight);
102		1864	int32x4_t r_hh = vmlal_high_n_s16(r_delta4_, v4_h, kVRWeight);
103
104		3728	int16x8_t r_l = vaddq(y_l, vuzp2q_s16(vreinterpretq_s16_s32(r_ll),
105		1864	vreinterpretq_s16_s32(r_lh)));
106		3728	int16x8_t r_h = vaddq(y_h, vuzp2q_s16(vreinterpretq_s16_s32(r_hl),
107		1864	vreinterpretq_s16_s32(r_hh)));
108
109		1864	r = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h));
110		1864	}
111
112		1864	RawDestinationVectorType rgb;
113		1864	rgb.val[r_index_] = r;
114		1864	rgb.val[g_index_] = g;
115		1864	rgb.val[b_index_] = b;
116			if constexpr (kAlpha) {
117		932	rgb.val[alpha_index_] = vdupq_n_u8(alpha_value);
118			// Store interleaved RGBA pixels to memory.
119		932	vst4q_u8(dst, rgb);
120			} else {
121			// Store interleaved RGB pixels to memory.
122		932	vst3q_u8(dst, rgb);
123			}
124		1864	}
125
126			KLEIDICV_FORCE_INLINE
127		492	void scalar_path(const ScalarType src, ScalarType dst) {
128		492	int32_t y = static_cast<int32_t>(src[0]);
129		492	int32_t u = static_cast<int32_t>(src[1]);
130		492	int32_t v = static_cast<int32_t>(src[2]);
131		492	int32_t b = y + rounding_shift_right((u - 128) * kUBWeight, kWeightScale);
132		984	int32_t g =
133		492	y + rounding_shift_right((u - 128) * kUGWeight + (v - 128) * kVGWeight,
134			kWeightScale);
135		492	int32_t r = y + rounding_shift_right((v - 128) * kVRWeight, kWeightScale);
136		492	dst[r_index_] = saturating_cast<int32_t, uint8_t>(r);
137		492	dst[g_index_] = saturating_cast<int32_t, uint8_t>(g);
138		492	dst[b_index_] = saturating_cast<int32_t, uint8_t>(b);
139			if constexpr (kAlpha) {
140		246	dst[alpha_index_] = alpha_value;
141			}
142		492	}
143
144			private:
145			static constexpr size_t r_index_ = BGR ? 2 : 0;
146			static constexpr size_t g_index_ = 1;
147			static constexpr size_t b_index_ = BGR ? 0 : 2;
148			static constexpr size_t alpha_index_ = 3;
149			static constexpr uint8_t alpha_value = std::numeric_limits<uint8_t>::max();
150			int32x4_t b_delta4_, g_delta4_, r_delta4_;
151			}; // end of class YUVToRGBAll<bool BGR>
152
153			template <typename OperationType, typename ScalarType>
154		336	KLEIDICV_FORCE_INLINE kleidicv_error_t yuv2rgb_operation(
155			OperationType &operation, const ScalarType *src, size_t src_stride,
156			ScalarType *dst, size_t dst_stride, size_t width, size_t height) {
157	16/16 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 80 times. ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 80 times. ✓ Branch 4 taken 4 times. ✓ Branch 5 taken 80 times. ✓ Branch 6 taken 4 times. ✓ Branch 7 taken 80 times. ✓ Branch 8 taken 4 times. ✓ Branch 9 taken 80 times. ✓ Branch 10 taken 4 times. ✓ Branch 11 taken 80 times. ✓ Branch 12 taken 4 times. ✓ Branch 13 taken 80 times. ✓ Branch 14 taken 4 times. ✓ Branch 15 taken 80 times.	336	CHECK_POINTER_AND_STRIDE(src, src_stride, height);
158	16/16 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 76 times. ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 76 times. ✓ Branch 4 taken 4 times. ✓ Branch 5 taken 76 times. ✓ Branch 6 taken 4 times. ✓ Branch 7 taken 76 times. ✓ Branch 8 taken 4 times. ✓ Branch 9 taken 76 times. ✓ Branch 10 taken 4 times. ✓ Branch 11 taken 76 times. ✓ Branch 12 taken 4 times. ✓ Branch 13 taken 76 times. ✓ Branch 14 taken 4 times. ✓ Branch 15 taken 76 times.	320	CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
159	24/24 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 70 times. ✓ Branch 2 taken 5 times. ✓ Branch 3 taken 65 times. ✓ Branch 4 taken 11 times. ✓ Branch 5 taken 65 times. ✓ Branch 6 taken 6 times. ✓ Branch 7 taken 70 times. ✓ Branch 8 taken 5 times. ✓ Branch 9 taken 65 times. ✓ Branch 10 taken 11 times. ✓ Branch 11 taken 65 times. ✓ Branch 12 taken 6 times. ✓ Branch 13 taken 70 times. ✓ Branch 14 taken 5 times. ✓ Branch 15 taken 65 times. ✓ Branch 16 taken 11 times. ✓ Branch 17 taken 65 times. ✓ Branch 18 taken 6 times. ✓ Branch 19 taken 70 times. ✓ Branch 20 taken 5 times. ✓ Branch 21 taken 65 times. ✓ Branch 22 taken 11 times. ✓ Branch 23 taken 65 times.	304	CHECK_IMAGE_SIZE(width, height);
160
161		260	Rectangle rect{width, height};
162		260	Rows src_rows{src, src_stride, 3};
163		260	Rows dst_rows{dst, dst_stride, operation.output_channels()};
164
165		260	apply_operation_by_rows(operation, rect, src_rows, dst_rows);
166		260	return KLEIDICV_OK;
167		336	}
168
169			using YUVToRGB = YUVToRGBAll<false, false>;
170			using YUVToRGBA = YUVToRGBAll<false, true>;
171			using YUVToBGR = YUVToRGBAll<true, false>;
172			using YUVToBGRA = YUVToRGBAll<true, true>;
173
174			KLEIDICV_TARGET_FN_ATTRS
175		360	kleidicv_error_t yuv444_to_rgb_u8(const uint8_t *src, size_t src_stride,
176			uint8_t *dst, size_t dst_stride, size_t width,
177			size_t height,
178			kleidicv_color_conversion_t color_format) {
179	5/5 ✓ Branch 0 taken 84 times. ✓ Branch 1 taken 24 times. ✓ Branch 2 taken 84 times. ✓ Branch 3 taken 84 times. ✓ Branch 4 taken 84 times.	360	switch (color_format) {
180			case KLEIDICV_YUV444_TO_RGB: {
181		84	YUVToRGB operation;
182		168	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
183		84	width, height);
184		84	}
185
186			case KLEIDICV_YUV444_TO_BGR: {
187		84	YUVToBGR operation;
188		168	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
189		84	width, height);
190		84	}
191
192			case KLEIDICV_YUV444_TO_RGBA: {
193		84	YUVToRGBA operation;
194		168	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
195		84	width, height);
196		84	}
197
198			case KLEIDICV_YUV444_TO_BGRA: {
199		84	YUVToBGRA operation;
200		168	return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride,
201		84	width, height);
202		84	}
203
204			default:
205		24	return KLEIDICV_ERROR_NOT_IMPLEMENTED;
206			}
207
208			return KLEIDICV_ERROR_NOT_IMPLEMENTED;
209		360	}
210
211			} // namespace kleidicv::neon
212