KleidiCV Coverage Report


Directory: ./
File: kleidicv/src/arithmetics/multiply_neon.cpp
Date: 2025-09-25 14:13:34
Exec Total Coverage
Lines: 32 32 100.0%
Functions: 20 20 100.0%
Branches: 118 118 100.0%

Line Branch Exec Source
1 // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
2 //
3 // SPDX-License-Identifier: Apache-2.0
4
5 #include <limits>
6
7 #include "kleidicv/kleidicv.h"
8 #include "kleidicv/neon.h"
9 #include "kleidicv/types.h"
10
11 namespace kleidicv::neon {
12
13 template <typename ScalarType>
14 class SaturatingMultiply final : public UnrollTwice {
15 public:
16 using VecTraits = neon::VecTraits<ScalarType>;
17 using VectorType = typename VecTraits::VectorType;
18
19 325 explicit SaturatingMultiply(double scale = 1.0) : scale_{scale} {};
20
21 4664 VectorType vector_path(VectorType src_a, VectorType src_b) {
22 4664 VectorType result;
23
24 // multiply-widen high part of the vectors.
25 // results in e.g. int16x8 -> int32x4
26 4664 auto high_part = vmull_high(src_a, src_b);
27
28 // get low part of the vectors and multiply-widen these.
29 4664 auto low_part = vmull(vget_low(src_a), vget_low(src_b));
30
31 // narrow-saturate low_part back to int16x4
32 // narrow-saturate high_part back to int16x4
33 // and stitch them back together into int16x8
34 4664 result = vqmovn_high(vqmovn(low_part), high_part);
35
36 /* TODO: figure out the way to multiply by double or some
37 fixed supported scale. Note that vmulq_n does not support 8x8 vectors.
38 */
39 // result = vmulq_n(result, this->scale_);
40
41 9328 return result;
42 4664 }
43
44 1736 ScalarType scalar_path(ScalarType src_a, ScalarType src_b) {
45 1736 ScalarType result;
46 if (std::numeric_limits<ScalarType>::is_signed) {
47
6/6
✓ Branch 0 taken 416 times.
✓ Branch 1 taken 232 times.
✓ Branch 2 taken 140 times.
✓ Branch 3 taken 108 times.
✓ Branch 4 taken 76 times.
✓ Branch 5 taken 44 times.
1016 if (__builtin_mul_overflow(src_a, src_b, &result)) {
48
18/18
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 236 times.
✓ Branch 2 taken 77 times.
✓ Branch 3 taken 103 times.
✓ Branch 4 taken 103 times.
✓ Branch 5 taken 236 times.
✓ Branch 6 taken 61 times.
✓ Branch 7 taken 79 times.
✓ Branch 8 taken 19 times.
✓ Branch 9 taken 42 times.
✓ Branch 10 taken 42 times.
✓ Branch 11 taken 79 times.
✓ Branch 12 taken 29 times.
✓ Branch 13 taken 47 times.
✓ Branch 14 taken 9 times.
✓ Branch 15 taken 20 times.
✓ Branch 16 taken 20 times.
✓ Branch 17 taken 47 times.
632 return (src_a < 0 && src_b > 0) || (src_a > 0 && src_b < 0)
49 632 ? std::numeric_limits<ScalarType>::lowest()
50 632 : std::numeric_limits<ScalarType>::max();
51 }
52 384 return result;
53 }
54
55
4/4
✓ Branch 0 taken 418 times.
✓ Branch 1 taken 118 times.
✓ Branch 2 taken 136 times.
✓ Branch 3 taken 48 times.
720 if (__builtin_mul_overflow(src_a, src_b, &result)) {
56 554 return std::numeric_limits<ScalarType>::max();
57 }
58 166 return result;
59 1736 }
60
61 private:
62 double scale_;
63 };
64
65 template <typename T>
66 359 kleidicv_error_t saturating_multiply(const T *src_a, size_t src_a_stride,
67 const T *src_b, size_t src_b_stride,
68 T *dst, size_t dst_stride, size_t width,
69 size_t height, double scale) {
70
20/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 69 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 69 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 69 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 69 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 71 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 71 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 71 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 71 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 71 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 71 times.
359 CHECK_POINTER_AND_STRIDE(src_a, src_a_stride, height);
71
20/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 68 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 68 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 68 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 68 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 69 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 69 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 69 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 69 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 69 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 69 times.
351 CHECK_POINTER_AND_STRIDE(src_b, src_b_stride, height);
72
20/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 67 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 67 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 67 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 67 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 67 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 67 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 67 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 67 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 67 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 67 times.
343 CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
73
30/30
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 66 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 65 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 65 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 66 times.
✓ Branch 8 taken 1 times.
✓ Branch 9 taken 65 times.
✓ Branch 10 taken 2 times.
✓ Branch 11 taken 65 times.
✓ Branch 12 taken 1 times.
✓ Branch 13 taken 66 times.
✓ Branch 14 taken 1 times.
✓ Branch 15 taken 65 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 65 times.
✓ Branch 18 taken 1 times.
✓ Branch 19 taken 66 times.
✓ Branch 20 taken 1 times.
✓ Branch 21 taken 65 times.
✓ Branch 22 taken 2 times.
✓ Branch 23 taken 65 times.
✓ Branch 24 taken 1 times.
✓ Branch 25 taken 66 times.
✓ Branch 26 taken 1 times.
✓ Branch 27 taken 65 times.
✓ Branch 28 taken 2 times.
✓ Branch 29 taken 65 times.
335 CHECK_IMAGE_SIZE(width, height);
74
75 (void)scale; // TODO: figure out the way to process the scale.
76 325 SaturatingMultiply<T> operation;
77 325 Rectangle rect{width, height};
78 325 Rows<const T> src_a_rows{src_a, src_a_stride};
79 325 Rows<const T> src_b_rows{src_b, src_b_stride};
80 325 Rows<T> dst_rows{dst, dst_stride};
81 325 neon::apply_operation_by_rows(operation, rect, src_a_rows, src_b_rows,
82 dst_rows);
83 325 return KLEIDICV_OK;
84 359 }
85
86 #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \
87 template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \
88 saturating_multiply<type>(const type *src_a, size_t src_a_stride, \
89 const type *src_b, size_t src_b_stride, type *dst, \
90 size_t dst_stride, size_t width, size_t height, \
91 double scale)
92
93 KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t);
94 KLEIDICV_INSTANTIATE_TEMPLATE(int8_t);
95 KLEIDICV_INSTANTIATE_TEMPLATE(uint16_t);
96 KLEIDICV_INSTANTIATE_TEMPLATE(int16_t);
97 KLEIDICV_INSTANTIATE_TEMPLATE(int32_t);
98
99 } // namespace kleidicv::neon
100