| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #include "kleidicv/conversions/split.h" | ||
| 6 | #include "kleidicv/kleidicv.h" | ||
| 7 | #include "kleidicv/neon.h" | ||
| 8 | |||
| 9 | namespace kleidicv::neon { | ||
| 10 | |||
| 11 | // Generic 2-channel split | ||
| 12 | // | ||
| 13 | // Split the source into two channel: | ||
| 14 | // vsrc=[0,1,2,3,4,5,6,7] | ||
| 15 | // -> vdst0=[0,2,4,6] | ||
| 16 | // -> vdst1=[1,3,5,7] | ||
| 17 | template <typename ScalarType> | ||
| 18 | class Split2 final : public UnrollTwice { | ||
| 19 | public: | ||
| 20 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 21 | using VectorType = typename VecTraits::VectorType; | ||
| 22 | using Vector2Type = typename VecTraits::Vector2Type; | ||
| 23 | |||
| 24 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 25 | void vector_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1) { | ||
| 26 | Vector2Type vsrc; | ||
| 27 | vsrc = vld2q(src); | ||
| 28 | vst1q(dst0, vsrc.val[0]); | ||
| 29 | vst1q(dst1, vsrc.val[1]); | ||
| 30 | } | ||
| 31 | #else | ||
| 32 | 1600 | void vector_path(const Vector2Type vsrc, VectorType &vdst0, | |
| 33 | VectorType &vdst1) { | ||
| 34 | 1600 | vdst0 = vuzp1q(vsrc.val[0], vsrc.val[1]); | |
| 35 | 1600 | vdst1 = vuzp2q(vsrc.val[0], vsrc.val[1]); | |
| 36 | 1600 | } | |
| 37 | #endif | ||
| 38 | |||
| 39 | 256 | void scalar_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1) { | |
| 40 | 256 | dst0[0] = src[0]; | |
| 41 | 256 | dst1[0] = src[1]; | |
| 42 | 256 | } | |
| 43 | }; | ||
| 44 | |||
| 45 | // Generic 3-channel split | ||
| 46 | // | ||
| 47 | // Split the loaded triple vector size source into 3 channels | ||
| 48 | // vsrc=[0,1,2,3,4,5,6,7,8,9] | ||
| 49 | // -> vdst0=[0,3,6,9] | ||
| 50 | // -> vdst1=[1,4,7,10] | ||
| 51 | // -> vdst2=[2,5,8,11] | ||
| 52 | template <typename ScalarType> | ||
| 53 | class Split3 final : public UnrollTwice { | ||
| 54 | public: | ||
| 55 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 56 | using Vector3Type = typename VecTraits::Vector3Type; | ||
| 57 | using VectorType = typename VecTraits::VectorType; | ||
| 58 | |||
| 59 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 60 | // NOLINTBEGIN(hicpp-member-init) | ||
| 61 | 272 | Split3() { Split3Init(ScalarType()); } | |
| 62 | // NOLINTEND(hicpp-member-init) | ||
| 63 | #endif | ||
| 64 | |||
| 65 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 66 | void vector_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1, | ||
| 67 | ScalarType *dst2) { | ||
| 68 | Vector3Type vsrc; | ||
| 69 | vsrc = vld3q(src); | ||
| 70 | vst1q(dst0, vsrc.val[0]); | ||
| 71 | vst1q(dst1, vsrc.val[1]); | ||
| 72 | vst1q(dst2, vsrc.val[2]); | ||
| 73 | } | ||
| 74 | #else | ||
| 75 | 2880 | void vector_path(Vector3Type vsrc, VectorType &vdst0, VectorType &vdst1, | |
| 76 | VectorType &vdst2) { | ||
| 77 | 2880 | uint8x16x3_t tmp; | |
| 78 | 2880 | tmp.val[0] = reinterpret_cast<uint8x16_t>(vsrc.val[0]); | |
| 79 | 2880 | tmp.val[1] = reinterpret_cast<uint8x16_t>(vsrc.val[1]); | |
| 80 | 2880 | tmp.val[2] = reinterpret_cast<uint8x16_t>(vsrc.val[2]); | |
| 81 | 2880 | vdst0 = vqtbl3q_u8(tmp, index1_); | |
| 82 | 2880 | vdst1 = vqtbl3q_u8(tmp, index2_); | |
| 83 | 2880 | vdst2 = vqtbl3q_u8(tmp, index3_); | |
| 84 | 2880 | } | |
| 85 | #endif | ||
| 86 | |||
| 87 | 512 | void scalar_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1, | |
| 88 | ScalarType *dst2) { | ||
| 89 | 512 | dst0[0] = src[0]; | |
| 90 | 512 | dst1[0] = src[1]; | |
| 91 | 512 | dst2[0] = src[2]; | |
| 92 | 512 | } | |
| 93 | |||
| 94 | private: | ||
| 95 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 96 | uint8x16_t index1_, index2_, index3_; | ||
| 97 | |||
| 98 | 68 | void Split3Init(uint8_t) { | |
| 99 | // clang-format off | ||
| 100 | 68 | const uint8_t kIndices[3][16] = { | |
| 101 | {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45}, | ||
| 102 | {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46}, | ||
| 103 | {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47}}; | ||
| 104 | // clang-format on | ||
| 105 | |||
| 106 | 68 | index1_ = vld1q_u8(kIndices[0]); | |
| 107 | 68 | index2_ = vld1q_u8(kIndices[1]); | |
| 108 | 68 | index3_ = vld1q_u8(kIndices[2]); | |
| 109 | 68 | } | |
| 110 | |||
| 111 | 68 | void Split3Init(uint16_t) { | |
| 112 | // clang-format off | ||
| 113 | 68 | const uint8_t kIndices[3][16] = { | |
| 114 | {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 36, 37, 42, 43}, | ||
| 115 | {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 32, 33, 38, 39, 44, 45}, | ||
| 116 | {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 34, 35, 40, 41, 46, 47}}; | ||
| 117 | // clang-format on | ||
| 118 | |||
| 119 | 68 | index1_ = vld1q_u8(kIndices[0]); | |
| 120 | 68 | index2_ = vld1q_u8(kIndices[1]); | |
| 121 | 68 | index3_ = vld1q_u8(kIndices[2]); | |
| 122 | 68 | } | |
| 123 | |||
| 124 | 68 | void Split3Init(uint32_t) { | |
| 125 | // clang-format off | ||
| 126 | 68 | const uint8_t kIndices[3][16] = { | |
| 127 | {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39}, | ||
| 128 | {4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43}, | ||
| 129 | {8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47}}; | ||
| 130 | // clang-format on | ||
| 131 | |||
| 132 | 68 | index1_ = vld1q_u8(kIndices[0]); | |
| 133 | 68 | index2_ = vld1q_u8(kIndices[1]); | |
| 134 | 68 | index3_ = vld1q_u8(kIndices[2]); | |
| 135 | 68 | } | |
| 136 | |||
| 137 | 68 | void Split3Init(uint64_t) { | |
| 138 | // clang-format off | ||
| 139 | 68 | const uint8_t kIndices[3][16] = { | |
| 140 | {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}, | ||
| 141 | {8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39}, | ||
| 142 | {16, 17, 18, 19, 20, 21, 22, 23, 40, 41, 42, 43, 44, 45, 46, 47}}; | ||
| 143 | // clang-format on | ||
| 144 | |||
| 145 | 68 | index1_ = vld1q_u8(kIndices[0]); | |
| 146 | 68 | index2_ = vld1q_u8(kIndices[1]); | |
| 147 | 68 | index3_ = vld1q_u8(kIndices[2]); | |
| 148 | 68 | } | |
| 149 | #endif | ||
| 150 | }; | ||
| 151 | |||
| 152 | // Generic 4-channel split | ||
| 153 | // | ||
| 154 | // Split the source first into two part, using double size cast: | ||
| 155 | // vsrc=[0,1,2,3,4,5,6,7] | ||
| 156 | // -> halfway_unzipped_1=[0,1,4,5] | ||
| 157 | // -> halfway_unzipped_2=[2,3,6,7] | ||
| 158 | // Then split these parts into final channels: | ||
| 159 | // halfway_unzipped_1=[0,1,4,5] | ||
| 160 | // -> vdst0=[0,4] | ||
| 161 | // -> vdst1=[1,5] | ||
| 162 | // halfway_unzipped_2=[2,3,6,7] | ||
| 163 | // -> vdst0=[2,6] | ||
| 164 | // -> vdst1=[3,7] | ||
| 165 | template <typename ScalarType> | ||
| 166 | class Split4 final : public UnrollTwice { | ||
| 167 | public: | ||
| 168 | using VecTraits = neon::VecTraits<ScalarType>; | ||
| 169 | using VectorType = typename VecTraits::VectorType; | ||
| 170 | using Vector2Type = typename VecTraits::Vector2Type; | ||
| 171 | using Vector4Type = typename VecTraits::Vector4Type; | ||
| 172 | |||
| 173 | #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 174 | void vector_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1, | ||
| 175 | ScalarType *dst2, ScalarType *dst3) { | ||
| 176 | Vector4Type vsrc; | ||
| 177 | vsrc = vld4q(src); | ||
| 178 | vst1q(dst0, vsrc.val[0]); | ||
| 179 | vst1q(dst1, vsrc.val[1]); | ||
| 180 | vst1q(dst2, vsrc.val[2]); | ||
| 181 | vst1q(dst3, vsrc.val[3]); | ||
| 182 | } | ||
| 183 | #else | ||
| 184 | 2160 | void vector_path(const Vector4Type vsrc, VectorType &dst0, VectorType &dst1, | |
| 185 | VectorType &dst2, VectorType &dst3) { | ||
| 186 | 2160 | VectorType halfway_unzipped_1, halfway_unzipped_2; | |
| 187 | |||
| 188 | 2160 | halfway_unzipped_1 = vuzp1q( | |
| 189 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[0]), | |
| 190 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[1])); | |
| 191 | 2160 | halfway_unzipped_2 = vuzp1q( | |
| 192 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[2]), | |
| 193 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[3])); | |
| 194 | |||
| 195 | 2160 | dst0 = vuzp1q(halfway_unzipped_1, halfway_unzipped_2); | |
| 196 | 2160 | dst1 = vuzp2q(halfway_unzipped_1, halfway_unzipped_2); | |
| 197 | |||
| 198 | 2160 | halfway_unzipped_1 = vuzp2q( | |
| 199 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[0]), | |
| 200 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[1])); | |
| 201 | 2160 | halfway_unzipped_2 = vuzp2q( | |
| 202 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[2]), | |
| 203 | 2160 | reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[3])); | |
| 204 | |||
| 205 | 2160 | dst2 = vuzp1q(halfway_unzipped_1, halfway_unzipped_2); | |
| 206 | 2160 | dst3 = vuzp2q(halfway_unzipped_1, halfway_unzipped_2); | |
| 207 | 2160 | } | |
| 208 | #endif | ||
| 209 | |||
| 210 | 384 | void scalar_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1, | |
| 211 | ScalarType *dst2, ScalarType *dst3) { | ||
| 212 | 384 | dst0[0] = src[0]; | |
| 213 | 384 | dst1[0] = src[1]; | |
| 214 | 384 | dst2[0] = src[2]; | |
| 215 | 384 | dst3[0] = src[3]; | |
| 216 | 384 | } | |
| 217 | }; | ||
| 218 | |||
| 219 | // Specialized split implementation for 4 channels with 64 bits data | ||
| 220 | // As in case of 64 bits we have 2 values in one q-sized vector, | ||
| 221 | // the implementation is simpler, no need for cast to double size | ||
| 222 | #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE | ||
| 223 | template <> | ||
| 224 | class Split4<uint64_t> final : public UnrollTwice { | ||
| 225 | public: | ||
| 226 | using VecTraits = neon::VecTraits<uint64_t>; | ||
| 227 | using VectorType = typename VecTraits::VectorType; | ||
| 228 | using Vector2Type = typename VecTraits::Vector2Type; | ||
| 229 | using Vector4Type = typename VecTraits::Vector4Type; | ||
| 230 | |||
| 231 | 720 | void vector_path(const Vector4Type vsrc, VectorType &dst0, VectorType &dst1, | |
| 232 | VectorType &dst2, VectorType &dst3) { | ||
| 233 | 720 | dst0 = vuzp1q(vsrc.val[0], vsrc.val[2]); | |
| 234 | 720 | dst1 = vuzp2q(vsrc.val[0], vsrc.val[2]); | |
| 235 | 720 | dst2 = vuzp1q(vsrc.val[1], vsrc.val[3]); | |
| 236 | 720 | dst3 = vuzp2q(vsrc.val[1], vsrc.val[3]); | |
| 237 | 720 | } | |
| 238 | |||
| 239 | 128 | void scalar_path(const uint64_t *src, uint64_t *dst0, uint64_t *dst1, | |
| 240 | uint64_t *dst2, uint64_t *dst3) { | ||
| 241 | 128 | dst0[0] = src[0]; | |
| 242 | 128 | dst1[0] = src[1]; | |
| 243 | 128 | dst2[0] = src[2]; | |
| 244 | 128 | dst3[0] = src[3]; | |
| 245 | 128 | } | |
| 246 | }; | ||
| 247 | #endif | ||
| 248 | |||
| 249 | // Most of the complexity comes from parameter checking. | ||
| 250 | // NOLINTBEGIN(readability-function-cognitive-complexity) | ||
| 251 | template <typename ScalarType> | ||
| 252 | 1264 | kleidicv_error_t split(const void *src_void, const size_t src_stride, | |
| 253 | void **dst_data, const size_t *dst_strides, size_t width, | ||
| 254 | size_t height, size_t channels) { | ||
| 255 |
8/8✓ Branch 0 taken 4 times.
✓ Branch 1 taken 240 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 336 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 336 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 336 times.
|
1264 | if (channels < 2) { |
| 256 | 16 | return KLEIDICV_ERROR_RANGE; | |
| 257 | } | ||
| 258 | |||
| 259 |
8/8✓ Branch 0 taken 8 times.
✓ Branch 1 taken 232 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 328 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 328 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 328 times.
|
1248 | CHECK_POINTERS(dst_data, dst_strides); |
| 260 |
6/6✓ Branch 0 taken 12 times.
✓ Branch 1 taken 316 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 316 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 316 times.
|
1216 | MAKE_POINTER_CHECK_ALIGNMENT(const ScalarType, src_data, src_void); |
| 261 |
6/6✓ Branch 0 taken 12 times.
✓ Branch 1 taken 304 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 304 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 304 times.
|
1180 | MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst0, dst_data[0]); |
| 262 |
6/6✓ Branch 0 taken 12 times.
✓ Branch 1 taken 292 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 292 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 292 times.
|
1144 | MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst1, dst_data[1]); |
| 263 |
16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 228 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 228 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 276 times.
✓ Branch 6 taken 16 times.
✓ Branch 7 taken 276 times.
✓ Branch 8 taken 16 times.
✓ Branch 9 taken 276 times.
✓ Branch 10 taken 16 times.
✓ Branch 11 taken 276 times.
✓ Branch 12 taken 16 times.
✓ Branch 13 taken 276 times.
✓ Branch 14 taken 16 times.
✓ Branch 15 taken 276 times.
|
1108 | CHECK_POINTER_AND_STRIDE(src_data, src_stride, height); |
| 264 |
16/16✓ Branch 0 taken 12 times.
✓ Branch 1 taken 216 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 216 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 252 times.
✓ Branch 6 taken 24 times.
✓ Branch 7 taken 252 times.
✓ Branch 8 taken 24 times.
✓ Branch 9 taken 252 times.
✓ Branch 10 taken 24 times.
✓ Branch 11 taken 252 times.
✓ Branch 12 taken 24 times.
✓ Branch 13 taken 252 times.
✓ Branch 14 taken 24 times.
✓ Branch 15 taken 252 times.
|
1056 | CHECK_POINTER_AND_STRIDE(dst0, dst_strides[0], height); |
| 265 |
16/16✓ Branch 0 taken 12 times.
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 204 times.
✓ Branch 4 taken 24 times.
✓ Branch 5 taken 228 times.
✓ Branch 6 taken 24 times.
✓ Branch 7 taken 228 times.
✓ Branch 8 taken 24 times.
✓ Branch 9 taken 228 times.
✓ Branch 10 taken 24 times.
✓ Branch 11 taken 228 times.
✓ Branch 12 taken 24 times.
✓ Branch 13 taken 228 times.
✓ Branch 14 taken 24 times.
✓ Branch 15 taken 228 times.
|
972 | CHECK_POINTER_AND_STRIDE(dst1, dst_strides[1], height); |
| 266 |
24/24✓ Branch 0 taken 4 times.
✓ Branch 1 taken 200 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 196 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 196 times.
✓ Branch 6 taken 4 times.
✓ Branch 7 taken 224 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 220 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 220 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 224 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 220 times.
✓ Branch 16 taken 8 times.
✓ Branch 17 taken 220 times.
✓ Branch 18 taken 4 times.
✓ Branch 19 taken 224 times.
✓ Branch 20 taken 4 times.
✓ Branch 21 taken 220 times.
✓ Branch 22 taken 8 times.
✓ Branch 23 taken 220 times.
|
888 | CHECK_IMAGE_SIZE(width, height); |
| 267 | |||
| 268 | 856 | Rectangle rect{width, height}; | |
| 269 | 1712 | Rows<ScalarType> src_rows{const_cast<ScalarType *>(src_data), src_stride, | |
| 270 | 856 | channels}; | |
| 271 | 856 | Rows<ScalarType> dst_rows0{dst0, dst_strides[0]}; | |
| 272 | 856 | Rows<ScalarType> dst_rows1{dst1, dst_strides[1]}; | |
| 273 |
16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
✓ Branch 2 taken 72 times.
✓ Branch 3 taken 76 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 44 times.
✓ Branch 6 taken 80 times.
✓ Branch 7 taken 92 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 44 times.
✓ Branch 10 taken 80 times.
✓ Branch 11 taken 92 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 44 times.
✓ Branch 14 taken 80 times.
✓ Branch 15 taken 92 times.
|
856 | switch (channels) { |
| 274 | case 2: { | ||
| 275 | 176 | Split2<ScalarType> operation; | |
| 276 | 176 | apply_operation_by_rows(operation, rect, src_rows, dst_rows0, dst_rows1); | |
| 277 | 176 | } break; | |
| 278 | case 3: { | ||
| 279 |
6/6✓ Branch 0 taken 4 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 76 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 76 times.
|
312 | MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst2, dst_data[2]); |
| 280 |
16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 68 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 68 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 68 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 68 times.
✓ Branch 8 taken 8 times.
✓ Branch 9 taken 68 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 68 times.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 68 times.
✓ Branch 14 taken 8 times.
✓ Branch 15 taken 68 times.
|
300 | CHECK_POINTER_AND_STRIDE(dst2, dst_strides[2], height); |
| 281 | 272 | Rows<ScalarType> dst_rows2{dst2, dst_strides[2]}; | |
| 282 | 272 | Split3<ScalarType> operation; | |
| 283 | 272 | apply_operation_by_rows(operation, rect, src_rows, dst_rows0, dst_rows1, | |
| 284 | dst_rows2); | ||
| 285 |
8/8✓ Branch 0 taken 4 times.
✓ Branch 1 taken 68 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 68 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 68 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 68 times.
|
300 | } break; |
| 286 | case 4: { | ||
| 287 |
6/6✓ Branch 0 taken 4 times.
✓ Branch 1 taken 88 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 88 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 88 times.
|
352 | MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst2, dst_data[2]); |
| 288 |
6/6✓ Branch 0 taken 4 times.
✓ Branch 1 taken 84 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 84 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 84 times.
|
340 | MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst3, dst_data[3]); |
| 289 |
16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 72 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 72 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 76 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 76 times.
✓ Branch 8 taken 8 times.
✓ Branch 9 taken 76 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 76 times.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 76 times.
✓ Branch 14 taken 8 times.
✓ Branch 15 taken 76 times.
|
328 | CHECK_POINTER_AND_STRIDE(dst2, dst_strides[2], height); |
| 290 |
16/16✓ Branch 0 taken 4 times.
✓ Branch 1 taken 68 times.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 68 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 68 times.
✓ Branch 6 taken 8 times.
✓ Branch 7 taken 68 times.
✓ Branch 8 taken 8 times.
✓ Branch 9 taken 68 times.
✓ Branch 10 taken 8 times.
✓ Branch 11 taken 68 times.
✓ Branch 12 taken 8 times.
✓ Branch 13 taken 68 times.
✓ Branch 14 taken 8 times.
✓ Branch 15 taken 68 times.
|
300 | CHECK_POINTER_AND_STRIDE(dst3, dst_strides[3], height); |
| 291 | 272 | Rows<ScalarType> dst_rows2{dst2, dst_strides[2]}; | |
| 292 | 272 | Rows<ScalarType> dst_rows3{dst3, dst_strides[3]}; | |
| 293 | 272 | Split4<ScalarType> operation; | |
| 294 | 272 | apply_operation_by_rows(operation, rect, src_rows, dst_rows0, dst_rows1, | |
| 295 | dst_rows2, dst_rows3); | ||
| 296 |
8/8✓ Branch 0 taken 8 times.
✓ Branch 1 taken 68 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 68 times.
✓ Branch 4 taken 20 times.
✓ Branch 5 taken 68 times.
✓ Branch 6 taken 20 times.
✓ Branch 7 taken 68 times.
|
340 | } break; |
| 297 | default: | ||
| 298 | 16 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 299 | } | ||
| 300 | 720 | return KLEIDICV_OK; | |
| 301 | 1264 | } | |
| 302 | // NOLINTEND(readability-function-cognitive-complexity) | ||
| 303 | |||
| 304 | KLEIDICV_TARGET_FN_ATTRS | ||
| 305 | 1268 | kleidicv_error_t split(const void *src_data, size_t src_stride, void **dst_data, | |
| 306 | const size_t *dst_strides, size_t width, size_t height, | ||
| 307 | size_t channels, size_t element_size) { | ||
| 308 |
5/5✓ Branch 0 taken 340 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 244 times.
✓ Branch 3 taken 340 times.
✓ Branch 4 taken 340 times.
|
1268 | switch (element_size) { |
| 309 | case sizeof(uint8_t): | ||
| 310 | 488 | return split<uint8_t>(src_data, src_stride, dst_data, dst_strides, width, | |
| 311 | 244 | height, channels); | |
| 312 | |||
| 313 | case sizeof(uint16_t): | ||
| 314 | 680 | return split<uint16_t>(src_data, src_stride, dst_data, dst_strides, width, | |
| 315 | 340 | height, channels); | |
| 316 | |||
| 317 | case sizeof(uint32_t): | ||
| 318 | 680 | return split<uint32_t>(src_data, src_stride, dst_data, dst_strides, width, | |
| 319 | 340 | height, channels); | |
| 320 | |||
| 321 | case sizeof(uint64_t): | ||
| 322 | 680 | return split<uint64_t>(src_data, src_stride, dst_data, dst_strides, width, | |
| 323 | 340 | height, channels); | |
| 324 | |||
| 325 | default: | ||
| 326 | 4 | return KLEIDICV_ERROR_NOT_IMPLEMENTED; | |
| 327 | } | ||
| 328 | 1268 | } | |
| 329 | } // namespace kleidicv::neon | ||
| 330 |