| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | // SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates <open-source-office@arm.com> | ||
| 2 | // | ||
| 3 | // SPDX-License-Identifier: Apache-2.0 | ||
| 4 | |||
| 5 | #ifndef KLEIDICV_IN_RANGE_SC_H | ||
| 6 | #define KLEIDICV_IN_RANGE_SC_H | ||
| 7 | |||
| 8 | #include "kleidicv/kleidicv.h" | ||
| 9 | #include "kleidicv/sve2.h" | ||
| 10 | |||
| 11 | namespace KLEIDICV_TARGET_NAMESPACE { | ||
| 12 | |||
| 13 | template <typename ScalarType> | ||
| 14 | class InRange; | ||
| 15 | |||
| 16 | template <> | ||
| 17 | class InRange<uint8_t> : public UnrollTwice { | ||
| 18 | public: | ||
| 19 | using ContextType = Context; | ||
| 20 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<uint8_t>; | ||
| 21 | using VectorType = typename VecTraits::VectorType; | ||
| 22 | using SignedScalarType = typename std::make_signed<uint8_t>::type; | ||
| 23 | using SignedVecTraits = | ||
| 24 | KLEIDICV_TARGET_NAMESPACE::VecTraits<SignedScalarType>; | ||
| 25 | using SignedVectorType = typename SignedVecTraits::VectorType; | ||
| 26 | |||
| 27 | 225 | InRange(VectorType &vec_lower_bound, | |
| 28 | VectorType &vec_upper_bound) KLEIDICV_STREAMING | ||
| 29 | 225 | : vec_lower_bound_(vec_lower_bound), | |
| 30 | 225 | vec_upper_bound_(vec_upper_bound) {} | |
| 31 | |||
| 32 | // NOLINTBEGIN(readability-make-member-function-const) | ||
| 33 | 861 | VectorType vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING { | |
| 34 | 861 | svbool_t pg = ctx.predicate(); | |
| 35 | |||
| 36 | 861 | VectorType diff_low = svsub_x(pg, src, vec_lower_bound_); | |
| 37 | // Shift subtraction result 7 bits to the right, i.e. divide by 2^7 to keep | ||
| 38 | // sign bit only. | ||
| 39 | 1722 | VectorType result_within_low = | |
| 40 | 861 | VecTraits::svreinterpret(SignedVecTraits::svasr_n( | |
| 41 | 861 | pg, SignedVecTraits::svreinterpret(diff_low), 7)); | |
| 42 | |||
| 43 | 861 | VectorType diff_up = svsub_x(pg, vec_upper_bound_, src); | |
| 44 | 1722 | VectorType result_within_up = | |
| 45 | 861 | VecTraits::svreinterpret(SignedVecTraits::svasr_n( | |
| 46 | 861 | pg, SignedVecTraits::svreinterpret(diff_up), 7)); | |
| 47 | |||
| 48 | // src[i] < lower_bound OR src[i] > upper_bound | ||
| 49 | 861 | VectorType out_of_range = svorr_x(pg, result_within_low, result_within_up); | |
| 50 | // NOT(out_of_range) to set within elements to 1 and the rest to 0. | ||
| 51 | 861 | VectorType within_range = svcnot_x(pg, out_of_range); | |
| 52 | // Negate to set elements within to 0xFF (all 1s). | ||
| 53 | 1722 | return VecTraits::svreinterpret( | |
| 54 | 861 | svqneg_x(pg, SignedVecTraits::svreinterpret(within_range))); | |
| 55 | 861 | } | |
| 56 | // NOLINTEND(readability-make-member-function-const) | ||
| 57 | |||
| 58 | private: | ||
| 59 | VectorType &vec_lower_bound_; | ||
| 60 | VectorType &vec_upper_bound_; | ||
| 61 | }; // end of class InRange<uint8_t> | ||
| 62 | |||
| 63 | template <> | ||
| 64 | class InRange<float> { | ||
| 65 | public: | ||
| 66 | using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>; | ||
| 67 | using SrcVectorType = typename SrcVecTraits::VectorType; | ||
| 68 | using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<uint8_t>; | ||
| 69 | using DstVectorType = typename DstVecTraits::VectorType; | ||
| 70 | |||
| 71 | 225 | InRange(float lower_bound, float upper_bound) KLEIDICV_STREAMING | |
| 72 | 225 | : lower_bound_(lower_bound), | |
| 73 | 225 | upper_bound_(upper_bound) {} | |
| 74 | |||
| 75 | 259 | void process_row(size_t width, Columns<const float> src, | |
| 76 | Columns<uint8_t> dst) KLEIDICV_STREAMING { | ||
| 77 | 518 | LoopUnroll{width, SrcVecTraits::num_lanes()} | |
| 78 | 922 | .unroll_n_times<4>([&](size_t step) KLEIDICV_STREAMING { | |
| 79 | 663 | svbool_t pg_src = SrcVecTraits::svptrue(); | |
| 80 | 663 | SrcVectorType src_v0 = svld1(pg_src, &src[0]); | |
| 81 | 663 | SrcVectorType src_v1 = svld1_vnum(pg_src, &src[0], 1); | |
| 82 | 663 | SrcVectorType src_v2 = svld1_vnum(pg_src, &src[0], 2); | |
| 83 | 663 | SrcVectorType src_v3 = svld1_vnum(pg_src, &src[0], 3); | |
| 84 | 1326 | DstVectorType res0 = | |
| 85 | 663 | vector_path(pg_src, src_v0, src_v1, src_v2, src_v3); | |
| 86 | 663 | svbool_t pg_dst = DstVecTraits::svptrue(); | |
| 87 | 663 | svst1(pg_dst, &dst[0], res0); | |
| 88 | 663 | src += ptrdiff_t(step); | |
| 89 | 663 | dst += ptrdiff_t(step); | |
| 90 | 663 | }) | |
| 91 | 437 | .remaining([&](size_t length, size_t) KLEIDICV_STREAMING { | |
| 92 | 178 | size_t index = 0; | |
| 93 | 178 | svbool_t pg = SrcVecTraits::svwhilelt(index, length); | |
| 94 |
2/2✓ Branch 0 taken 326 times.
✓ Branch 1 taken 178 times.
|
504 | while (svptest_first(SrcVecTraits::svptrue(), pg)) { |
| 95 | 326 | SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); | |
| 96 | 326 | DstVectorType result_vector = remaining_path(pg, src_vector); | |
| 97 | 652 | svst1b(pg, &dst[ptrdiff_t(index)], | |
| 98 | 326 | svreinterpret_u32(result_vector)); | |
| 99 | // Update loop counter and calculate the next governing predicate. | ||
| 100 | 326 | index += SrcVecTraits::num_lanes(); | |
| 101 | 326 | pg = SrcVecTraits::svwhilelt(index, length); | |
| 102 | 326 | } | |
| 103 | 178 | }); | |
| 104 | 259 | } | |
| 105 | |||
| 106 | private: | ||
| 107 | // NOLINTBEGIN(readability-make-member-function-const) | ||
| 108 | 663 | DstVectorType vector_path(svbool_t full_pg, SrcVectorType fsrc0, | |
| 109 | SrcVectorType fsrc1, SrcVectorType fsrc2, | ||
| 110 | SrcVectorType fsrc3) KLEIDICV_STREAMING { | ||
| 111 | 1326 | svbool_t pred0 = svand_z(full_pg, svcmpge(full_pg, fsrc0, lower_bound_), | |
| 112 | 663 | svcmple(full_pg, fsrc0, upper_bound_)); | |
| 113 | 663 | auto res00 = svsel(pred0, svdup_u32(0xFF), svdup_u32(0)); | |
| 114 | |||
| 115 | 1326 | svbool_t pred1 = svand_z(full_pg, svcmpge(full_pg, fsrc1, lower_bound_), | |
| 116 | 663 | svcmple(full_pg, fsrc1, upper_bound_)); | |
| 117 | 663 | auto res01 = svsel(pred1, svdup_u32(0xFF), svdup_u32(0)); | |
| 118 | |||
| 119 | 1326 | svbool_t pred2 = svand_z(full_pg, svcmpge(full_pg, fsrc2, lower_bound_), | |
| 120 | 663 | svcmple(full_pg, fsrc2, upper_bound_)); | |
| 121 | 663 | auto res10 = svsel(pred2, svdup_u32(0xFF), svdup_u32(0)); | |
| 122 | |||
| 123 | 1326 | svbool_t pred3 = svand_z(full_pg, svcmpge(full_pg, fsrc3, lower_bound_), | |
| 124 | 663 | svcmple(full_pg, fsrc3, upper_bound_)); | |
| 125 | 663 | auto res11 = svsel(pred3, svdup_u32(0xFF), svdup_u32(0)); | |
| 126 | |||
| 127 | 1326 | auto res0 = | |
| 128 | 663 | svuzp1(svreinterpret_u16_u32(res00), svreinterpret_u16_u32(res01)); | |
| 129 | 1326 | auto res1 = | |
| 130 | 663 | svuzp1(svreinterpret_u16_u32(res10), svreinterpret_u16_u32(res11)); | |
| 131 | 1326 | return svuzp1(svreinterpret_u8_u16(res0), svreinterpret_u8_u16(res1)); | |
| 132 | 663 | } | |
| 133 | // NOLINTEND(readability-make-member-function-const) | ||
| 134 | |||
| 135 | // NOLINTBEGIN(readability-make-member-function-const) | ||
| 136 | 326 | DstVectorType remaining_path(svbool_t &pg, | |
| 137 | SrcVectorType src) KLEIDICV_STREAMING { | ||
| 138 | 652 | svbool_t predicate = svand_z(pg, svcmpge(pg, src, lower_bound_), | |
| 139 | 326 | svcmple(pg, src, upper_bound_)); | |
| 140 | 652 | return svsel(predicate, DstVecTraits::svdup(0xFF), DstVecTraits::svdup(0)); | |
| 141 | 326 | } | |
| 142 | // NOLINTEND(readability-make-member-function-const) | ||
| 143 | |||
| 144 | float lower_bound_; | ||
| 145 | float upper_bound_; | ||
| 146 | }; // end of class InRange<float> | ||
| 147 | |||
| 148 | template <typename T> | ||
| 149 | 477 | kleidicv_error_t in_range_sc(const T *src, size_t src_stride, uint8_t *dst, | |
| 150 | size_t dst_stride, size_t width, size_t height, | ||
| 151 | T lower_bound, T upper_bound) KLEIDICV_STREAMING { | ||
| 152 |
8/8✓ Branch 0 taken 3 times.
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 234 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 234 times.
✓ Branch 6 taken 6 times.
✓ Branch 7 taken 234 times.
|
477 | CHECK_POINTER_AND_STRIDE(src, src_stride, height); |
| 153 |
8/8✓ Branch 0 taken 3 times.
✓ Branch 1 taken 231 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 231 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 231 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 231 times.
|
468 | CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); |
| 154 |
12/12✓ Branch 0 taken 3 times.
✓ Branch 1 taken 228 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 225 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 225 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 228 times.
✓ Branch 8 taken 3 times.
✓ Branch 9 taken 225 times.
✓ Branch 10 taken 6 times.
✓ Branch 11 taken 225 times.
|
462 | CHECK_IMAGE_SIZE(width, height); |
| 155 | |||
| 156 | 450 | Rectangle rect{width, height}; | |
| 157 | 450 | Rows<const T> src_rows{src, src_stride}; | |
| 158 | 450 | Rows<uint8_t> dst_rows{dst, dst_stride}; | |
| 159 | |||
| 160 | using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<T>; | ||
| 161 | using VectorType = typename VecTraits::VectorType; | ||
| 162 | |||
| 163 | if constexpr (std::is_same_v<T, uint8_t>) { | ||
| 164 | 225 | VectorType vec_lower_bound = VecTraits::svdup(lower_bound); | |
| 165 | 225 | VectorType vec_upper_bound = VecTraits::svdup(upper_bound); | |
| 166 | 225 | InRange<T> operation{vec_lower_bound, vec_upper_bound}; | |
| 167 | 225 | apply_operation_by_rows(operation, rect, src_rows, dst_rows); | |
| 168 | 225 | } else { | |
| 169 | 225 | InRange<T> operation{lower_bound, upper_bound}; | |
| 170 | 225 | zip_rows(operation, rect, src_rows, dst_rows); | |
| 171 | 225 | } | |
| 172 | |||
| 173 | 450 | return KLEIDICV_OK; | |
| 174 | 477 | } | |
| 175 | |||
| 176 | } // namespace KLEIDICV_TARGET_NAMESPACE | ||
| 177 | |||
| 178 | #endif // KLEIDICV_IN_RANGE_SC_H | ||
| 179 |