Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
2 | // | ||
3 | // SPDX-License-Identifier: Apache-2.0 | ||
4 | |||
5 | #include "kleidicv/kleidicv.h" | ||
6 | #include "kleidicv/sve2.h" | ||
7 | |||
8 | namespace kleidicv::sve2 { | ||
9 | |||
10 | template <typename ScalarType> | ||
11 | class SaturatingMultiply final : public UnrollTwice { | ||
12 | public: | ||
13 | using ContextType = Context; | ||
14 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<ScalarType>; | ||
15 | using VectorType = typename VecTraits::VectorType; | ||
16 | |||
17 | 650 | explicit SaturatingMultiply(double scale = 1.0) : scale_{scale} {}; | |
18 | |||
19 | 10858 | VectorType vector_path(ContextType ctx, VectorType src_a, VectorType src_b) { | |
20 | 10858 | VectorType result; | |
21 | (void)ctx; | ||
22 | |||
23 | // multiply-widen even-indexed elements | ||
24 | 10858 | auto bottom_part = svmullb(src_a, src_b); | |
25 | // multiply-widen odd-indexed elements | ||
26 | 10858 | auto top_part = svmullt(src_a, src_b); | |
27 | // saturating-narrow even-indexed | ||
28 | 10858 | auto narrow_bottom = svqxtnb(bottom_part); | |
29 | // saturaning-narrow odd-indexed and merge with even-indexed | ||
30 | 10858 | result = svqxtnt(narrow_bottom, top_part); | |
31 | |||
32 | /* TODO: figure out the way to multiply by double or some | ||
33 | fixed supported scale. | ||
34 | */ | ||
35 | |||
36 | 21716 | return result; | |
37 | 10858 | } | |
38 | |||
39 | private: | ||
40 | double scale_; | ||
41 | }; | ||
42 | |||
43 | template <typename T> | ||
44 | 718 | kleidicv_error_t saturating_multiply(const T *src_a, size_t src_a_stride, | |
45 | const T *src_b, size_t src_b_stride, | ||
46 | T *dst, size_t dst_stride, size_t width, | ||
47 | size_t height, double scale) { | ||
48 |
20/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 138 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 138 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 138 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 138 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 142 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 142 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 142 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 142 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 142 times.
✓ Branch 18 taken 4 times.
✓ Branch 19 taken 142 times.
|
718 | CHECK_POINTER_AND_STRIDE(src_a, src_a_stride, height); |
49 |
20/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 136 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 136 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 136 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 136 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 138 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 138 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 138 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 138 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 138 times.
✓ Branch 18 taken 4 times.
✓ Branch 19 taken 138 times.
|
702 | CHECK_POINTER_AND_STRIDE(src_b, src_b_stride, height); |
50 |
20/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 134 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 134 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 134 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 134 times.
✓ Branch 8 taken 4 times.
✓ Branch 9 taken 134 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 134 times.
✓ Branch 12 taken 4 times.
✓ Branch 13 taken 134 times.
✓ Branch 14 taken 4 times.
✓ Branch 15 taken 134 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 134 times.
✓ Branch 18 taken 4 times.
✓ Branch 19 taken 134 times.
|
686 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
51 |
30/30✓ Branch 0 taken 2 times.
✓ Branch 1 taken 132 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 130 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 130 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 132 times.
✓ Branch 8 taken 2 times.
✓ Branch 9 taken 130 times.
✓ Branch 10 taken 4 times.
✓ Branch 11 taken 130 times.
✓ Branch 12 taken 2 times.
✓ Branch 13 taken 132 times.
✓ Branch 14 taken 2 times.
✓ Branch 15 taken 130 times.
✓ Branch 16 taken 4 times.
✓ Branch 17 taken 130 times.
✓ Branch 18 taken 2 times.
✓ Branch 19 taken 132 times.
✓ Branch 20 taken 2 times.
✓ Branch 21 taken 130 times.
✓ Branch 22 taken 4 times.
✓ Branch 23 taken 130 times.
✓ Branch 24 taken 2 times.
✓ Branch 25 taken 132 times.
✓ Branch 26 taken 2 times.
✓ Branch 27 taken 130 times.
✓ Branch 28 taken 4 times.
✓ Branch 29 taken 130 times.
|
670 | CHECK_IMAGE_SIZE(width, height); |
52 | |||
53 | (void)scale; // TODO: figure out the way to process the scale. | ||
54 | 650 | SaturatingMultiply<T> operation; | |
55 | 650 | Rectangle rect{width, height}; | |
56 | 650 | Rows<const T> src_a_rows{src_a, src_a_stride}; | |
57 | 650 | Rows<const T> src_b_rows{src_b, src_b_stride}; | |
58 | 650 | Rows<T> dst_rows{dst, dst_stride}; | |
59 | 650 | apply_operation_by_rows(operation, rect, src_a_rows, src_b_rows, dst_rows); | |
60 | 650 | return KLEIDICV_OK; | |
61 | 718 | } | |
62 | |||
63 | #define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ | ||
64 | template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ | ||
65 | saturating_multiply<type>(const type *src_a, size_t src_a_stride, \ | ||
66 | const type *src_b, size_t src_b_stride, type *dst, \ | ||
67 | size_t dst_stride, size_t width, size_t height, \ | ||
68 | double scale) | ||
69 | |||
70 | KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); | ||
71 | KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); | ||
72 | KLEIDICV_INSTANTIATE_TEMPLATE(uint16_t); | ||
73 | KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); | ||
74 | KLEIDICV_INSTANTIATE_TEMPLATE(int32_t); | ||
75 | |||
76 | } // namespace kleidicv::sve2 | ||
77 |