KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/conversions/split_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 134 134 100.0%
Functions: 37 37 100.0%
Branches: 209 209 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include "kleidicv/conversions/split.h"
6 #include "kleidicv/kleidicv.h"
7 #include "kleidicv/neon.h"
8
9 namespace kleidicv::neon {
10
11 // Generic 2-channel split
12 //
13 // Split the source into two channel:
14 // vsrc=[0,1,2,3,4,5,6,7]
15 // -> vdst0=[0,2,4,6]
16 // -> vdst1=[1,3,5,7]
17 template <typename ScalarType>
18 class Split2 final : public UnrollTwice {
19 public:
20 using VecTraits = neon::VecTraits<ScalarType>;
21 using VectorType = typename VecTraits::VectorType;
22 using Vector2Type = typename VecTraits::Vector2Type;
23
24 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
25 void vector_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1) {
26 Vector2Type vsrc;
27 vsrc = vld2q(src);
28 vst1q(dst0, vsrc.val[0]);
29 vst1q(dst1, vsrc.val[1]);
30 }
31 #else
32 960 void vector_path(const Vector2Type vsrc, VectorType &vdst0,
33 VectorType &vdst1) {
34 960 vdst0 = vuzp1q(vsrc.val[0], vsrc.val[1]);
35 960 vdst1 = vuzp2q(vsrc.val[0], vsrc.val[1]);
36 960 }
37 #endif
38
39 192 void scalar_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1) {
40 192 dst0[0] = src[0];
41 192 dst1[0] = src[1];
42 192 }
43 };
44
45 // Generic 3-channel split
46 //
47 // Split the loaded triple vector size source into 3 channels
48 // vsrc=[0,1,2,3,4,5,6,7,8,9]
49 // -> vdst0=[0,3,6,9]
50 // -> vdst1=[1,4,7,10]
51 // -> vdst2=[2,5,8,11]
52 template <typename ScalarType>
53 class Split3 final : public UnrollTwice {
54 public:
55 using VecTraits = neon::VecTraits<ScalarType>;
56 using Vector3Type = typename VecTraits::Vector3Type;
57 using VectorType = typename VecTraits::VectorType;
58
59 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
60 // NOLINTBEGIN(hicpp-member-init)
61 204 Split3() { Split3Init(ScalarType()); }
62 // NOLINTEND(hicpp-member-init)
63 #endif
64
65 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
66 void vector_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1,
67 ScalarType *dst2) {
68 Vector3Type vsrc;
69 vsrc = vld3q(src);
70 vst1q(dst0, vsrc.val[0]);
71 vst1q(dst1, vsrc.val[1]);
72 vst1q(dst2, vsrc.val[2]);
73 }
74 #else
75 1728 void vector_path(Vector3Type vsrc, VectorType &vdst0, VectorType &vdst1,
76 VectorType &vdst2) {
77 1728 uint8x16x3_t tmp;
78 1728 tmp.val[0] = reinterpret_cast<uint8x16_t>(vsrc.val[0]);
79 1728 tmp.val[1] = reinterpret_cast<uint8x16_t>(vsrc.val[1]);
80 1728 tmp.val[2] = reinterpret_cast<uint8x16_t>(vsrc.val[2]);
81 1728 vdst0 = vqtbl3q_u8(tmp, index1_);
82 1728 vdst1 = vqtbl3q_u8(tmp, index2_);
83 1728 vdst2 = vqtbl3q_u8(tmp, index3_);
84 1728 }
85 #endif
86
87 384 void scalar_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1,
88 ScalarType *dst2) {
89 384 dst0[0] = src[0];
90 384 dst1[0] = src[1];
91 384 dst2[0] = src[2];
92 384 }
93
94 private:
95 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
96 uint8x16_t index1_, index2_, index3_;
97
98 51 void Split3Init(uint8_t) {
99 // clang-format off
100 51 const uint8_t kIndices[3][16] = {
101 {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45},
102 {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46},
103 {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47}};
104 // clang-format on
105
106 51 index1_ = vld1q_u8(kIndices[0]);
107 51 index2_ = vld1q_u8(kIndices[1]);
108 51 index3_ = vld1q_u8(kIndices[2]);
109 51 }
110
111 51 void Split3Init(uint16_t) {
112 // clang-format off
113 51 const uint8_t kIndices[3][16] = {
114 {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 36, 37, 42, 43},
115 {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 32, 33, 38, 39, 44, 45},
116 {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 34, 35, 40, 41, 46, 47}};
117 // clang-format on
118
119 51 index1_ = vld1q_u8(kIndices[0]);
120 51 index2_ = vld1q_u8(kIndices[1]);
121 51 index3_ = vld1q_u8(kIndices[2]);
122 51 }
123
124 51 void Split3Init(uint32_t) {
125 // clang-format off
126 51 const uint8_t kIndices[3][16] = {
127 {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39},
128 {4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43},
129 {8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47}};
130 // clang-format on
131
132 51 index1_ = vld1q_u8(kIndices[0]);
133 51 index2_ = vld1q_u8(kIndices[1]);
134 51 index3_ = vld1q_u8(kIndices[2]);
135 51 }
136
137 51 void Split3Init(uint64_t) {
138 // clang-format off
139 51 const uint8_t kIndices[3][16] = {
140 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
141 {8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39},
142 {16, 17, 18, 19, 20, 21, 22, 23, 40, 41, 42, 43, 44, 45, 46, 47}};
143 // clang-format on
144
145 51 index1_ = vld1q_u8(kIndices[0]);
146 51 index2_ = vld1q_u8(kIndices[1]);
147 51 index3_ = vld1q_u8(kIndices[2]);
148 51 }
149 #endif
150 };
151
152 // Generic 4-channel split
153 //
154 // Split the source first into two part, using double size cast:
155 // vsrc=[0,1,2,3,4,5,6,7]
156 // -> halfway_unzipped_1=[0,1,4,5]
157 // -> halfway_unzipped_2=[2,3,6,7]
158 // Then split these parts into final channels:
159 // halfway_unzipped_1=[0,1,4,5]
160 // -> vdst0=[0,4]
161 // -> vdst1=[1,5]
162 // halfway_unzipped_2=[2,3,6,7]
163 // -> vdst0=[2,6]
164 // -> vdst1=[3,7]
165 template <typename ScalarType>
166 class Split4 final : public UnrollTwice {
167 public:
168 using VecTraits = neon::VecTraits<ScalarType>;
169 using VectorType = typename VecTraits::VectorType;
170 using Vector2Type = typename VecTraits::Vector2Type;
171 using Vector4Type = typename VecTraits::Vector4Type;
172
173 #if KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
174 void vector_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1,
175 ScalarType *dst2, ScalarType *dst3) {
176 Vector4Type vsrc;
177 vsrc = vld4q(src);
178 vst1q(dst0, vsrc.val[0]);
179 vst1q(dst1, vsrc.val[1]);
180 vst1q(dst2, vsrc.val[2]);
181 vst1q(dst3, vsrc.val[3]);
182 }
183 #else
184 1296 void vector_path(const Vector4Type vsrc, VectorType &dst0, VectorType &dst1,
185 VectorType &dst2, VectorType &dst3) {
186 1296 VectorType halfway_unzipped_1, halfway_unzipped_2;
187
188 1296 halfway_unzipped_1 = vuzp1q(
189 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[0]),
190 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[1]));
191 1296 halfway_unzipped_2 = vuzp1q(
192 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[2]),
193 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[3]));
194
195 1296 dst0 = vuzp1q(halfway_unzipped_1, halfway_unzipped_2);
196 1296 dst1 = vuzp2q(halfway_unzipped_1, halfway_unzipped_2);
197
198 1296 halfway_unzipped_1 = vuzp2q(
199 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[0]),
200 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[1]));
201 1296 halfway_unzipped_2 = vuzp2q(
202 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[2]),
203 1296 reinterpret_cast<double_element_width_t<VectorType> >(vsrc.val[3]));
204
205 1296 dst2 = vuzp1q(halfway_unzipped_1, halfway_unzipped_2);
206 1296 dst3 = vuzp2q(halfway_unzipped_1, halfway_unzipped_2);
207 1296 }
208 #endif
209
210 288 void scalar_path(const ScalarType *src, ScalarType *dst0, ScalarType *dst1,
211 ScalarType *dst2, ScalarType *dst3) {
212 288 dst0[0] = src[0];
213 288 dst1[0] = src[1];
214 288 dst2[0] = src[2];
215 288 dst3[0] = src[3];
216 288 }
217 };
218
219 // Specialized split implementation for 4 channels with 64 bits data
220 // As in case of 64 bits we have 2 values in one q-sized vector,
221 // the implementation is simpler, no need for cast to double size
222 #if !KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE
223 template <>
224 class Split4<uint64_t> final : public UnrollTwice {
225 public:
226 using VecTraits = neon::VecTraits<uint64_t>;
227 using VectorType = typename VecTraits::VectorType;
228 using Vector2Type = typename VecTraits::Vector2Type;
229 using Vector4Type = typename VecTraits::Vector4Type;
230
231 432 void vector_path(const Vector4Type vsrc, VectorType &dst0, VectorType &dst1,
232 VectorType &dst2, VectorType &dst3) {
233 432 dst0 = vuzp1q(vsrc.val[0], vsrc.val[2]);
234 432 dst1 = vuzp2q(vsrc.val[0], vsrc.val[2]);
235 432 dst2 = vuzp1q(vsrc.val[1], vsrc.val[3]);
236 432 dst3 = vuzp2q(vsrc.val[1], vsrc.val[3]);
237 432 }
238
239 96 void scalar_path(const uint64_t *src, uint64_t *dst0, uint64_t *dst1,
240 uint64_t *dst2, uint64_t *dst3) {
241 96 dst0[0] = src[0];
242 96 dst1[0] = src[1];
243 96 dst2[0] = src[2];
244 96 dst3[0] = src[3];
245 96 }
246 };
247 #endif
248
249 // Most of the complexity comes from parameter checking.
250 // NOLINTBEGIN(readability-function-cognitive-complexity)
251 template <typename ScalarType>
252 948 kleidicv_error_t split(const void *src_void, const size_t src_stride,
253 void **dst_data, const size_t *dst_strides, size_t width,
254 size_t height, size_t channels) {
255
8/8
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 180 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 252 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 252 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 252 times.
948 if (channels < 2) {
256 12 return KLEIDICV_ERROR_RANGE;
257 }
258
259
8/8
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 174 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 246 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 246 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 246 times.
936 CHECK_POINTERS(dst_data, dst_strides);
260
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 237 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 237 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 237 times.
912 MAKE_POINTER_CHECK_ALIGNMENT(const ScalarType, src_data, src_void);
261
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 228 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 228 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 228 times.
885 MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst0, dst_data[0]);
262
6/6
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 219 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 219 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 219 times.
858 MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst1, dst_data[1]);
263
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 171 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 171 times.
✓ Branch 4 taken 12 times.
✓ Branch 5 taken 207 times.
✓ Branch 6 taken 12 times.
✓ Branch 7 taken 207 times.
✓ Branch 8 taken 12 times.
✓ Branch 9 taken 207 times.
✓ Branch 10 taken 12 times.
✓ Branch 11 taken 207 times.
✓ Branch 12 taken 12 times.
✓ Branch 13 taken 207 times.
✓ Branch 14 taken 12 times.
✓ Branch 15 taken 207 times.
831 CHECK_POINTER_AND_STRIDE(src_data, src_stride, height);
264
16/16
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 162 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 162 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 189 times.
✓ Branch 6 taken 18 times.
✓ Branch 7 taken 189 times.
✓ Branch 8 taken 18 times.
✓ Branch 9 taken 189 times.
✓ Branch 10 taken 18 times.
✓ Branch 11 taken 189 times.
✓ Branch 12 taken 18 times.
✓ Branch 13 taken 189 times.
✓ Branch 14 taken 18 times.
✓ Branch 15 taken 189 times.
792 CHECK_POINTER_AND_STRIDE(dst0, dst_strides[0], height);
265
16/16
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 153 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 153 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 171 times.
✓ Branch 6 taken 18 times.
✓ Branch 7 taken 171 times.
✓ Branch 8 taken 18 times.
✓ Branch 9 taken 171 times.
✓ Branch 10 taken 18 times.
✓ Branch 11 taken 171 times.
✓ Branch 12 taken 18 times.
✓ Branch 13 taken 171 times.
✓ Branch 14 taken 18 times.
✓ Branch 15 taken 171 times.
729 CHECK_POINTER_AND_STRIDE(dst1, dst_strides[1], height);
266
24/24
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 150 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 147 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 147 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 168 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 165 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 165 times.
✓ Branch 12 taken 3 times.
✓ Branch 13 taken 168 times.
✓ Branch 14 taken 3 times.
✓ Branch 15 taken 165 times.
✓ Branch 16 taken 6 times.
✓ Branch 17 taken 165 times.
✓ Branch 18 taken 3 times.
✓ Branch 19 taken 168 times.
✓ Branch 20 taken 3 times.
✓ Branch 21 taken 165 times.
✓ Branch 22 taken 6 times.
✓ Branch 23 taken 165 times.
666 CHECK_IMAGE_SIZE(width, height);
267
268 642 Rectangle rect{width, height};
269 1284 Rows<ScalarType> src_rows{const_cast<ScalarType *>(src_data), src_stride,
270 642 channels};
271 642 Rows<ScalarType> dst_rows0{dst0, dst_strides[0]};
272 642 Rows<ScalarType> dst_rows1{dst1, dst_strides[1]};
273
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 33 times.
✓ Branch 2 taken 54 times.
✓ Branch 3 taken 57 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 33 times.
✓ Branch 6 taken 60 times.
✓ Branch 7 taken 69 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 33 times.
✓ Branch 10 taken 60 times.
✓ Branch 11 taken 69 times.
✓ Branch 12 taken 3 times.
✓ Branch 13 taken 33 times.
✓ Branch 14 taken 60 times.
✓ Branch 15 taken 69 times.
642 switch (channels) {
274 case 2: {
275 132 Split2<ScalarType> operation;
276 132 apply_operation_by_rows(operation, rect, src_rows, dst_rows0, dst_rows1);
277 132 } break;
278 case 3: {
279
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 57 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 57 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 57 times.
234 MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst2, dst_data[2]);
280
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 51 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 51 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 51 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 51 times.
✓ Branch 8 taken 6 times.
✓ Branch 9 taken 51 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 51 times.
✓ Branch 12 taken 6 times.
✓ Branch 13 taken 51 times.
✓ Branch 14 taken 6 times.
✓ Branch 15 taken 51 times.
225 CHECK_POINTER_AND_STRIDE(dst2, dst_strides[2], height);
281 204 Rows<ScalarType> dst_rows2{dst2, dst_strides[2]};
282 204 Split3<ScalarType> operation;
283 204 apply_operation_by_rows(operation, rect, src_rows, dst_rows0, dst_rows1,
284 dst_rows2);
285
8/8
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 51 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 51 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 51 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 51 times.
225 } break;
286 case 4: {
287
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 66 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 66 times.
264 MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst2, dst_data[2]);
288
6/6
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 63 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 63 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 63 times.
255 MAKE_POINTER_CHECK_ALIGNMENT(ScalarType, dst3, dst_data[3]);
289
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 54 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 54 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 57 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 57 times.
✓ Branch 8 taken 6 times.
✓ Branch 9 taken 57 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 57 times.
✓ Branch 12 taken 6 times.
✓ Branch 13 taken 57 times.
✓ Branch 14 taken 6 times.
✓ Branch 15 taken 57 times.
246 CHECK_POINTER_AND_STRIDE(dst2, dst_strides[2], height);
290
16/16
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 51 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 51 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 51 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 51 times.
✓ Branch 8 taken 6 times.
✓ Branch 9 taken 51 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 51 times.
✓ Branch 12 taken 6 times.
✓ Branch 13 taken 51 times.
✓ Branch 14 taken 6 times.
✓ Branch 15 taken 51 times.
225 CHECK_POINTER_AND_STRIDE(dst3, dst_strides[3], height);
291 204 Rows<ScalarType> dst_rows2{dst2, dst_strides[2]};
292 204 Rows<ScalarType> dst_rows3{dst3, dst_strides[3]};
293 204 Split4<ScalarType> operation;
294 204 apply_operation_by_rows(operation, rect, src_rows, dst_rows0, dst_rows1,
295 dst_rows2, dst_rows3);
296
8/8
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 51 times.
✓ Branch 2 taken 15 times.
✓ Branch 3 taken 51 times.
✓ Branch 4 taken 15 times.
✓ Branch 5 taken 51 times.
✓ Branch 6 taken 15 times.
✓ Branch 7 taken 51 times.
255 } break;
297 default:
298 12 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
299 }
300 540 return KLEIDICV_OK;
301 948 }
302 // NOLINTEND(readability-function-cognitive-complexity)
303
304 KLEIDICV_TARGET_FN_ATTRS
305 951 kleidicv_error_t split(const void *src_data, size_t src_stride, void **dst_data,
306 const size_t *dst_strides, size_t width, size_t height,
307 size_t channels, size_t element_size) {
308
5/5
✓ Branch 0 taken 255 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 183 times.
✓ Branch 3 taken 255 times.
✓ Branch 4 taken 255 times.
951 switch (element_size) {
309 case sizeof(uint8_t):
310 366 return split<uint8_t>(src_data, src_stride, dst_data, dst_strides, width,
311 183 height, channels);
312
313 case sizeof(uint16_t):
314 510 return split<uint16_t>(src_data, src_stride, dst_data, dst_strides, width,
315 255 height, channels);
316
317 case sizeof(uint32_t):
318 510 return split<uint32_t>(src_data, src_stride, dst_data, dst_strides, width,
319 255 height, channels);
320
321 case sizeof(uint64_t):
322 510 return split<uint64_t>(src_data, src_stride, dst_data, dst_strides, width,
323 255 height, channels);
324
325 default:
326 3 return KLEIDICV_ERROR_NOT_IMPLEMENTED;
327 }
328 951 }
329 } // namespace kleidicv::neon
330