KleidiCV Coverage Report


Directory: ./
File: kleidicv/include/kleidicv/neon.h
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 155 155 100.0%
Functions: 632 632 100.0%
Branches: 24 24 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #ifndef KLEIDICV_NEON_H
6 #define KLEIDICV_NEON_H
7
8 #include <utility>
9
10 #include "kleidicv/neon_intrinsics.h"
11 #include "kleidicv/operations.h"
12 #include "kleidicv/utils.h"
13
14 namespace kleidicv::neon {
15
16 template <>
17 class half_element_width<uint16x8_t> {
18 public:
19 using type = uint8x16_t;
20 };
21
22 template <>
23 class half_element_width<uint32x4_t> {
24 public:
25 using type = uint16x8_t;
26 };
27
28 template <>
29 class half_element_width<uint64x2_t> {
30 public:
31 using type = uint32x4_t;
32 };
33
34 template <>
35 class double_element_width<uint8x16_t> {
36 public:
37 using type = uint16x8_t;
38 };
39
40 template <>
41 class double_element_width<uint16x8_t> {
42 public:
43 using type = uint32x4_t;
44 };
45
46 template <>
47 class double_element_width<uint32x4_t> {
48 public:
49 using type = uint64x2_t;
50 };
51
52 // Primary template to describe logically grouped peroperties of vectors.
53 template <typename ScalarType>
54 class VectorTypes;
55
56 template <>
57 class VectorTypes<int8_t> {
58 public:
59 using ScalarType = int8_t;
60 using VectorType = int8x16_t;
61 using Vector2Type = int8x16x2_t;
62 using Vector3Type = int8x16x3_t;
63 using Vector4Type = int8x16x4_t;
64 }; // end of class VectorTypes<int8_t>
65
66 template <>
67 class VectorTypes<uint8_t> {
68 public:
69 using ScalarType = uint8_t;
70 using VectorType = uint8x16_t;
71 using Vector2Type = uint8x16x2_t;
72 using Vector3Type = uint8x16x3_t;
73 using Vector4Type = uint8x16x4_t;
74 }; // end of class VectorTypes<uint8_t>
75
76 template <>
77 class VectorTypes<int16_t> {
78 public:
79 using ScalarType = int16_t;
80 using VectorType = int16x8_t;
81 using Vector2Type = int16x8x2_t;
82 using Vector3Type = int16x8x3_t;
83 using Vector4Type = int16x8x4_t;
84 }; // end of class VectorTypes<int16_t>
85
86 template <>
87 class VectorTypes<uint16_t> {
88 public:
89 using ScalarType = uint16_t;
90 using VectorType = uint16x8_t;
91 using Vector2Type = uint16x8x2_t;
92 using Vector3Type = uint16x8x3_t;
93 using Vector4Type = uint16x8x4_t;
94 }; // end of class VectorTypes<uint16_t>
95
96 template <>
97 class VectorTypes<int32_t> {
98 public:
99 using ScalarType = int32_t;
100 using VectorType = int32x4_t;
101 using Vector2Type = int32x4x2_t;
102 using Vector3Type = int32x4x3_t;
103 using Vector4Type = int32x4x4_t;
104 }; // end of class VectorTypes<int32_t>
105
106 template <>
107 class VectorTypes<uint32_t> {
108 public:
109 using ScalarType = uint32_t;
110 using VectorType = uint32x4_t;
111 using Vector2Type = uint32x4x2_t;
112 using Vector3Type = uint32x4x3_t;
113 using Vector4Type = uint32x4x4_t;
114 }; // end of class VectorTypes<uint32_t>
115
116 template <>
117 class VectorTypes<int64_t> {
118 public:
119 using ScalarType = int64_t;
120 using VectorType = int64x2_t;
121 using Vector2Type = int64x2x2_t;
122 using Vector3Type = int64x2x3_t;
123 using Vector4Type = int64x2x4_t;
124 }; // end of class VectorTypes<int64_t>
125
126 template <>
127 class VectorTypes<uint64_t> {
128 public:
129 using ScalarType = uint64_t;
130 using VectorType = uint64x2_t;
131 using Vector2Type = uint64x2x2_t;
132 using Vector3Type = uint64x2x3_t;
133 using Vector4Type = uint64x2x4_t;
134 }; // end of class VectorTypes<uint64_t>
135
136 template <>
137 class VectorTypes<float> {
138 public:
139 using ScalarType = float;
140 using VectorType = float32x4_t;
141 using Vector2Type = float32x4x2_t;
142 using Vector3Type = float32x4x3_t;
143 using Vector4Type = float32x4x4_t;
144 }; // end of class VectorTypes<float>
145
146 template <>
147 class VectorTypes<double> {
148 public:
149 using ScalarType = double;
150 using VectorType = float64x2_t;
151 using Vector2Type = float64x2x2_t;
152 using Vector3Type = float64x2x3_t;
153 using Vector4Type = float64x2x4_t;
154 }; // end of class VectorTypes<double>
155
156 // NEON vector length in bytes.
157 static constexpr size_t kVectorLength = 16;
158
159 // Base class for all NEON vector traits.
160 template <typename ScalarType>
161 class VecTraitsBase : public VectorTypes<ScalarType> {
162 public:
163 using typename VectorTypes<ScalarType>::VectorType;
164 using typename VectorTypes<ScalarType>::Vector2Type;
165 using typename VectorTypes<ScalarType>::Vector3Type;
166 using typename VectorTypes<ScalarType>::Vector4Type;
167
168 // Number of lanes in a vector.
169 444265 static constexpr size_t num_lanes() {
170 444265 return kVectorLength / sizeof(ScalarType);
171 }
172
173 // Maximum number of lanes in a vector.
174 static constexpr size_t max_num_lanes() { return num_lanes(); }
175
176 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
177
178 private:
179 static inline int8x16x2_t vld1q_x2(const int8_t *src) {
180 return vld1q_s8_x2(src);
181 }
182
183 3214 static inline uint8x16x2_t vld1q_x2(const uint8_t *src) {
184 3214 return vld1q_u8_x2(src);
185 }
186
187 4728 static inline int16x8x2_t vld1q_x2(const int16_t *src) {
188 4728 return vld1q_s16_x2(src);
189 }
190
191 408 static inline uint16x8x2_t vld1q_x2(const uint16_t *src) {
192 408 return vld1q_u16_x2(src);
193 }
194
195 static inline int32x4x2_t vld1q_x2(const int32_t *src) {
196 return vld1q_s32_x2(src);
197 }
198
199 240 static inline uint32x4x2_t vld1q_x2(const uint32_t *src) {
200 240 return vld1q_u32_x2(src);
201 }
202
203 static inline int64x2x2_t vld1q_x2(const int64_t *src) {
204 return vld1q_s64_x2(src);
205 }
206
207 240 static inline uint64x2x2_t vld1q_x2(const uint64_t *src) {
208 240 return vld1q_u64_x2(src);
209 }
210
211 static inline float32x4x2_t vld1q_x2(const float32_t *src) {
212 return vld1q_f32_x2(src);
213 }
214
215 static inline int8x16x3_t vld1q_x3(const int8_t *src) {
216 return vld1q_s8_x3(src);
217 }
218
219 1688 static inline uint8x16x3_t vld1q_x3(const uint8_t *src) {
220 1688 return vld1q_u8_x3(src);
221 }
222
223 static inline int16x8x3_t vld1q_x3(const int16_t *src) {
224 return vld1q_s16_x3(src);
225 }
226
227 432 static inline uint16x8x3_t vld1q_x3(const uint16_t *src) {
228 432 return vld1q_u16_x3(src);
229 }
230
231 static inline int32x4x3_t vld1q_x3(const int32_t *src) {
232 return vld1q_s32_x3(src);
233 }
234
235 432 static inline uint32x4x3_t vld1q_x3(const uint32_t *src) {
236 432 return vld1q_u32_x3(src);
237 }
238
239 static inline int64x2x3_t vld1q_x3(const int64_t *src) {
240 return vld1q_s64_x3(src);
241 }
242
243 432 static inline uint64x2x3_t vld1q_x3(const uint64_t *src) {
244 432 return vld1q_u64_x3(src);
245 }
246
247 static inline float32x4x3_t vld1q_x3(const float32_t *src) {
248 return vld1q_f32_x3(src);
249 }
250
251 884 static inline int8x16x4_t vld1q_x4(const int8_t *src) {
252 884 return vld1q_s8_x4(src);
253 }
254
255 1436 static inline uint8x16x4_t vld1q_x4(const uint8_t *src) {
256 1436 return vld1q_u8_x4(src);
257 }
258
259 static inline int16x8x4_t vld1q_x4(const int16_t *src) {
260 return vld1q_s16_x4(src);
261 }
262
263 432 static inline uint16x8x4_t vld1q_x4(const uint16_t *src) {
264 432 return vld1q_u16_x4(src);
265 }
266
267 static inline int32x4x4_t vld1q_x4(const int32_t *src) {
268 return vld1q_s32_x4(src);
269 }
270
271 432 static inline uint32x4x4_t vld1q_x4(const uint32_t *src) {
272 432 return vld1q_u32_x4(src);
273 }
274
275 static inline int64x2x4_t vld1q_x4(const int64_t *src) {
276 return vld1q_s64_x4(src);
277 }
278
279 432 static inline uint64x2x4_t vld1q_x4(const uint64_t *src) {
280 432 return vld1q_u64_x4(src);
281 }
282
283 457 static inline float32x4x4_t vld1q_x4(const float32_t *src) {
284 457 return vld1q_f32_x4(src);
285 }
286
287 static inline void vst1q_x2(int8_t *dst, int8x16x2_t vec) {
288 vst1q_s8_x2(dst, vec);
289 }
290
291 6904 static inline void vst1q_x2(uint8_t *dst, uint8x16x2_t vec) {
292 6904 vst1q_u8_x2(dst, vec);
293 6904 }
294
295 static inline void vst1q_x2(int16_t *dst, int16x8x2_t vec) {
296 vst1q_s16_x2(dst, vec);
297 }
298
299 1992 static inline void vst1q_x2(uint16_t *dst, uint16x8x2_t vec) {
300 1992 vst1q_u16_x2(dst, vec);
301 1992 }
302
303 static inline void vst1q_x2(int32_t *dst, int32x4x2_t vec) {
304 vst1q_s32_x2(dst, vec);
305 }
306
307 1992 static inline void vst1q_x2(uint32_t *dst, uint32x4x2_t vec) {
308 1992 vst1q_u32_x2(dst, vec);
309 1992 }
310
311 static inline void vst1q_x2(int64_t *dst, int64x2x2_t vec) {
312 vst1q_s64_x2(dst, vec);
313 }
314
315 1992 static inline void vst1q_x2(uint64_t *dst, uint64x2x2_t vec) {
316 1992 vst1q_u64_x2(dst, vec);
317 1992 }
318
319 73024 static inline void vst1q_x2(float32_t *dst, float32x4x2_t vec) {
320 73024 vst1q_f32_x2(dst, vec);
321 73024 }
322
323 static inline void vst1q_x3(int8_t *dst, int8x16x3_t vec) {
324 vst1q_s8_x3(dst, vec);
325 }
326
327 1304 static inline void vst1q_x3(uint8_t *dst, uint8x16x3_t vec) {
328 1304 vst1q_u8_x3(dst, vec);
329 1304 }
330
331 static inline void vst1q_x3(int16_t *dst, int16x8x3_t vec) {
332 vst1q_s16_x3(dst, vec);
333 }
334
335 432 static inline void vst1q_x3(uint16_t *dst, uint16x8x3_t vec) {
336 432 vst1q_u16_x3(dst, vec);
337 432 }
338
339 static inline void vst1q_x3(int32_t *dst, int32x4x3_t vec) {
340 vst1q_s32_x3(dst, vec);
341 }
342
343 static inline void vst1q_x3(uint32_t *dst, uint32x4x3_t vec) {
344 vst1q_u32_x3(dst, vec);
345 }
346
347 static inline void vst1q_x3(int64_t *dst, int64x2x3_t vec) {
348 vst1q_s64_x3(dst, vec);
349 }
350
351 432 static inline void vst1q_x3(uint64_t *dst, uint64x2x3_t vec) {
352 432 vst1q_u64_x3(dst, vec);
353 432 }
354
355 static inline void vst1q_x3(float32_t *dst, float32x4x3_t vec) {
356 vst1q_f32_x3(dst, vec);
357 }
358
359 static inline void vst1q_x4(int8_t *dst, int8x16x4_t vec) {
360 vst1q_s8_x4(dst, vec);
361 }
362
363 442 static inline void vst1q_x4(uint8_t *dst, uint8x16x4_t vec) {
364 442 vst1q_u8_x4(dst, vec);
365 442 }
366
367 static inline void vst1q_x4(int16_t *dst, int16x8x4_t vec) {
368 vst1q_s16_x4(dst, vec);
369 }
370
371 5608 static inline void vst1q_x4(uint16_t *dst, uint16x8x4_t vec) {
372 5608 vst1q_u16_x4(dst, vec);
373 5608 }
374
375 static inline void vst1q_x4(int32_t *dst, int32x4x4_t vec) {
376 vst1q_s32_x4(dst, vec);
377 }
378
379 816 static inline void vst1q_x4(uint32_t *dst, uint32x4x4_t vec) {
380 816 vst1q_u32_x4(dst, vec);
381 816 }
382
383 static inline void vst1q_x4(int64_t *dst, int64x2x4_t vec) {
384 vst1q_s64_x4(dst, vec);
385 }
386
387 1632 static inline void vst1q_x4(uint64_t *dst, uint64x2x4_t vec) {
388 1632 vst1q_u64_x4(dst, vec);
389 1632 }
390
391 312 static inline void vst1q_x4(float32_t *dst, float32x4x4_t vec) {
392 312 vst1q_f32_x4(dst, vec);
393 312 }
394
395 public:
396 #endif
397
398 // Loads a single vector from 'src'.
399 4622 static inline void load(const ScalarType *src, VectorType &vec) {
400 4622 vec = vld1q(&src[0]);
401 4622 }
402
403 // Loads two consecutive vectors from 'src'.
404 7870 static inline void load(const ScalarType *src, Vector2Type &vec) {
405 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
406 7870 vec = vld1q_x2(&src[0]);
407 #else
408 vec = {vld1q(&src[0]), vld1q(&src[0] + num_lanes())};
409 #endif
410 7870 }
411
412 // Loads three consecutive vectors from 'src'.
413 1256 static inline void load(const ScalarType *src, Vector3Type &vec) {
414 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
415 1256 vec = vld1q_x3(&src[0]);
416 #else
417 vec = {vld1q(&src[0]), vld1q(&src[0] + num_lanes()),
418 vld1q(&src[0] + (2 * num_lanes()))};
419 #endif
420 1256 }
421
422 // Loads four consecutive vectors from 'src'.
423 2345 static inline void load(const ScalarType *src, Vector4Type &vec) {
424 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
425 2345 vec = vld1q_x4(&src[0]);
426 #else
427 vec = {vld1q(&src[0]), vld1q(&src[0] + num_lanes()),
428 vld1q(&src[0] + (2 * num_lanes())),
429 vld1q(&src[0] + (3 * num_lanes()))};
430 #endif
431 2345 }
432
433 // Loads two consecutive vectors from 'src'.
434 304340 static inline void load_consecutive(const ScalarType *src, VectorType &vec_0,
435 VectorType &vec_1) {
436 304340 vec_0 = vld1q(&src[0]);
437 304340 vec_1 = vld1q(&src[num_lanes()]);
438 304340 }
439
440 // Loads 2x2 consecutive vectors from 'src'.
441 480 static inline void load_consecutive(const ScalarType *src, Vector2Type &vec_0,
442 Vector2Type &vec_1) {
443 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
444 480 vec_0 = vld1q_x2(&src[0]);
445 480 vec_1 = vld1q_x2(&src[num_lanes() * 2]);
446 #else
447 vec_0 = {vld1q(&src[0]), vld1q(&src[0] + num_lanes())};
448 vec_1 = {vld1q(&src[num_lanes() * 2]),
449 vld1q(&src[num_lanes() * 2] + num_lanes())};
450 #endif
451 480 }
452
453 // Loads 2x3 consecutive vectors from 'src'.
454 864 static inline void load_consecutive(const ScalarType *src, Vector3Type &vec_0,
455 Vector3Type &vec_1) {
456 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
457 864 vec_0 = vld1q_x3(&src[0]);
458 864 vec_1 = vld1q_x3(&src[num_lanes() * 3]);
459 #else
460 vec_0 = {vld1q(&src[0]), vld1q(&src[0] + num_lanes()),
461 vld1q(&src[0] + (2 * num_lanes()))};
462 vec_1 = {vld1q(&src[num_lanes() * 3]),
463 vld1q(&src[num_lanes() * 3] + num_lanes()),
464 vld1q(&src[num_lanes() * 3] + (2 * num_lanes()))};
465 #endif
466 864 }
467
468 // Loads 2x4 consecutive vectors from 'src'.
469 864 static inline void load_consecutive(const ScalarType *src, Vector4Type &vec_0,
470 Vector4Type &vec_1) {
471 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
472 864 vec_0 = vld1q_x4(&src[0]);
473 864 vec_1 = vld1q_x4(&src[num_lanes() * 4]);
474
475 #else
476 vec_0 = {vld1q(&src[0]), vld1q(&src[0] + num_lanes()),
477 vld1q(&src[0] + (2 * num_lanes())),
478 vld1q(&src[0] + (3 * num_lanes()))};
479 vec_1 = {vld1q(&src[num_lanes() * 4]),
480 vld1q(&src[num_lanes() * 4] + num_lanes()),
481 vld1q(&src[num_lanes() * 4] + (2 * num_lanes())),
482 vld1q(&src[num_lanes() * 4] + (3 * num_lanes()))};
483 #endif
484 864 }
485
486 // Stores a single vector to 'dst'.
487 11800 static inline void store(VectorType vec, ScalarType *dst) {
488 11800 vst1q(&dst[0], vec);
489 11800 }
490
491 // Stores two consecutive vectors to 'dst'.
492 85904 static inline void store(Vector2Type vec, ScalarType *dst) {
493 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
494 85904 vst1q_x2(&dst[0], vec);
495 #else
496 vst1q(&dst[0], vec.val[0]);
497 vst1q(&dst[0] + num_lanes(), vec.val[1]);
498 #endif
499 85904 }
500
501 // Stores three consecutive vectors to 'dst'.
502 2168 static inline void store(Vector3Type vec, ScalarType *dst) {
503 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
504 2168 vst1q_x3(&dst[0], vec);
505 #else
506 vst1q(&dst[0], vec.val[0]);
507 vst1q(&dst[0] + num_lanes(), vec.val[1]);
508 vst1q(&dst[0] + (2 * num_lanes()), vec.val[2]);
509 #endif
510 2168 }
511
512 // Stores four consecutive vectors to 'dst'.
513 8810 static inline void store(Vector4Type vec, ScalarType *dst) {
514 #if KLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS
515 8810 vst1q_x4(&dst[0], vec);
516 #else
517 vst1q(&dst[0], vec.val[0]);
518 vst1q(&dst[0] + num_lanes(), vec.val[1]);
519 vst1q(&dst[0] + (2 * num_lanes()), vec.val[2]);
520 vst1q(&dst[0] + (3 * num_lanes()), vec.val[3]);
521 #endif
522 8810 }
523
524 // Stores two consecutive vectors to 'dst'.
525 47670 static inline void store_consecutive(VectorType vec_0, VectorType vec_1,
526 ScalarType *dst) {
527 47670 vst1q(&dst[0], vec_0);
528 47670 vst1q(&dst[num_lanes()], vec_1);
529 47670 }
530 }; // end of class VecTraitsBase<ScalarType>
531
532 // Available NEON vector traits.
533 template <typename ScalarType>
534 class VecTraits : public VecTraitsBase<ScalarType> {};
535
536 // NEON has no associated context yet.
537 using NeonContextType = Monostate;
538
539 // Adapter which simply adds context and forwards all arguments.
540 template <typename OperationType>
541 class OperationContextAdapter : public OperationBase<OperationType> {
542 // Shorten rows: no need to write 'this->'.
543 using OperationBase<OperationType>::operation;
544
545 public:
546 using ContextType = NeonContextType;
547
548 6782 explicit OperationContextAdapter(OperationType &operation)
549 6782 : OperationBase<OperationType>(operation) {}
550
551 // Forwards vector_path_2x() calls to the inner operation.
552 template <typename... ArgTypes>
553 280270 void vector_path_2x(ArgTypes &&...args) {
554 280270 operation().vector_path_2x(ContextType{}, std::forward<ArgTypes>(args)...);
555 280270 }
556
557 // Forwards vector_path() calls to the inner operation.
558 template <typename... ArgTypes>
559 8156 void vector_path(ArgTypes &&...args) {
560 8156 operation().vector_path(ContextType{}, std::forward<ArgTypes>(args)...);
561 8156 }
562
563 // Forwards remaining_path() calls to the inner operation.
564 template <typename... ArgTypes>
565 9188 void remaining_path(ArgTypes &&...args) {
566 9188 operation().remaining_path(ContextType{}, std::forward<ArgTypes>(args)...);
567 9188 }
568 }; // end of class OperationContextAdapter<OperationType>
569
570 // Adapter which implements remaining_path() for general NEON operations.
571 template <typename OperationType>
572 class RemainingPathAdapter : public OperationBase<OperationType> {
573 public:
574 using ContextType = NeonContextType;
575
576 6478 explicit RemainingPathAdapter(OperationType &operation)
577 6478 : OperationBase<OperationType>(operation) {}
578
579 // Forwards remaining_path() calls to scalar_path() of the inner operation
580 // element by element.
581 template <typename... ColumnTypes>
582 8532 void remaining_path(ContextType ctx, size_t length, ColumnTypes... columns) {
583
24/24
✓ Branch 0 taken 1941 times.
✓ Branch 1 taken 30088 times.
✓ Branch 2 taken 984 times.
✓ Branch 3 taken 11947 times.
✓ Branch 4 taken 863 times.
✓ Branch 5 taken 4571 times.
✓ Branch 6 taken 1974 times.
✓ Branch 7 taken 7958 times.
✓ Branch 8 taken 702 times.
✓ Branch 9 taken 2289 times.
✓ Branch 10 taken 648 times.
✓ Branch 11 taken 1935 times.
✓ Branch 12 taken 282 times.
✓ Branch 13 taken 324 times.
✓ Branch 14 taken 298 times.
✓ Branch 15 taken 336 times.
✓ Branch 16 taken 282 times.
✓ Branch 17 taken 288 times.
✓ Branch 18 taken 90 times.
✓ Branch 19 taken 96 times.
✓ Branch 20 taken 186 times.
✓ Branch 21 taken 192 times.
✓ Branch 22 taken 282 times.
✓ Branch 23 taken 288 times.
68844 for (size_t index = 0; index < length; ++index) {
584 60312 disable_loop_vectorization();
585 60312 this->operation().scalar_path(ctx, columns.at(index)...);
586 60312 }
587 8532 }
588 }; // end of class RemainingPathAdapter<OperationType>
589
590 // Adapter which implements remaining_path() for NEON operations which
591 // implementation custom processing of remaining elements.
592 template <typename OperationType>
593 class RemainingPathToScalarPathAdapter : public OperationBase<OperationType> {
594 public:
595 using ContextType = NeonContextType;
596
597 304 explicit RemainingPathToScalarPathAdapter(OperationType &operation)
598 304 : OperationBase<OperationType>(operation) {}
599
600 // Forwards remaining_path() calls to scalar_path() of the inner operation.
601 template <typename... ArgTypes>
602 656 void remaining_path(ArgTypes &&...args) {
603 656 this->operation().scalar_path(std::forward<ArgTypes>(args)...);
604 656 }
605 }; // end of class RemainingPathToScalarPathAdapter<OperationType>
606
607 // Shorthand for applying a generic unrolled NEON operation.
608 template <typename OperationType, typename... ArgTypes>
609 6010 void apply_operation_by_rows(OperationType &operation, ArgTypes &&...args) {
610 6010 RemoveContextAdapter remove_context_adapter{operation};
611 6010 OperationAdapter operation_adapter{remove_context_adapter};
612 6010 RemainingPathAdapter remaining_path_adapter{operation_adapter};
613 6010 OperationContextAdapter context_adapter{remaining_path_adapter};
614 6010 RowBasedOperation row_based_operation{context_adapter};
615 6010 zip_rows(row_based_operation, std::forward<ArgTypes>(args)...);
616 6010 }
617
618 // Shorthand for applying a generic unrolled and block-based NEON operation.
619 template <typename OperationType, typename... ArgTypes>
620 468 void apply_block_operation_by_rows(OperationType &operation,
621 ArgTypes &&...args) {
622 468 RemoveContextAdapter remove_context_adapter{operation};
623 468 OperationAdapter operation_adapter{remove_context_adapter};
624 468 RemainingPathAdapter remaining_path_adapter{operation_adapter};
625 468 OperationContextAdapter context_adapter{remaining_path_adapter};
626 468 RowBasedBlockOperation block_operation{context_adapter};
627 468 zip_rows(block_operation, std::forward<ArgTypes>(args)...);
628 468 }
629
630 } // namespace kleidicv::neon
631
632 #endif // KLEIDICV_NEON_H
633