From: Kyrylo Tkachov <[email protected]>
For unsigned types the svextb, svexth and svextw intrinsics are plain
zero-extends, which the expander already lowers to a bitwise AND with a
constant mask. The any/don't-care (_x) form and the zeroing (_z) form with
an all-true predicate therefore compile to a single unpredicated AND, but
the merging (_m) form with an all-true predicate did not: it kept the
inactive argument and produced a predicated UXT. For example
svuint64_t f (svuint64_t x, svuint64_t y)
{ return svextb_m (y, svptrue_b64 (), x); }
compiled to
ptrue p3.b, all
mov z31.d, z0.d
movprfx z0, z1
uxtb z0.d, p3/m, z31.d
ret
where a single
and z0.d, z0.d, #0xff
ret
is sufficient, because the all-true predicate makes the inactive operand
dead.
Give svext_bhw_impl a gimple fold that rewrites the unsigned merging form
with an all-true predicate to a BIT_AND_EXPR. Signed types (which use a
real sign-extend instruction), partial predicates and pfalse predicates are
left to the existing handling, as is the _x form, which the expander already
turns into an AND.
Bootstrapped and tested on aarch64-none-linux-gnu.
Will push to trunk in a couple of days if now objections.
PR target/120027
gcc/ChangeLog:
* config/aarch64/aarch64-sve-builtins-base.cc (svext_bhw_impl::fold):
New member function. Fold the unsigned svextb/svexth/svextw
intrinsics to a bitwise AND when the merging form has an all-true
predicate.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/acle/general/pr120027.c: New test.
Signed-off-by: Kyrylo Tkachov <[email protected]>
---
.../aarch64/aarch64-sve-builtins-base.cc | 19 ++
.../aarch64/sve/acle/general/pr120027.c | 180 ++++++++++++++++++
2 files changed, 199 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 963f0adfda1..bfd9641861c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1311,6 +1311,25 @@ public:
CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
: m_from_mode (from_mode) {}
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ /* For unsigned types this is a zero-extend, i.e. a bitwise AND with a
+ constant mask. The _x form and an all-true _z already lower to that
+ AND; fold the merging form with an all-true predicate to it too, since
+ the predicate makes the inactive operand dead. */
+ if (f.pred != PRED_m
+ || !f.type_suffix (0).unsigned_p
+ || !is_ptrue (f.gp_value (f.call), f.type_suffix (0).element_bytes))
+ return NULL;
+
+ tree op = gimple_call_arg (f.call, 2);
+ tree mask = build_int_cstu (TREE_TYPE (TREE_TYPE (f.lhs)),
+ GET_MODE_MASK (m_from_mode));
+ tree mask_vec = build_vector_from_val (TREE_TYPE (f.lhs), mask);
+ return gimple_build_assign (f.lhs, BIT_AND_EXPR, op, mask_vec);
+ }
+
rtx
expand (function_expander &e) const override
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c
new file mode 100644
index 00000000000..ba688e1473b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c
@@ -0,0 +1,180 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Check that the merging form of the unsigned svextb/svexth/svextw intrinsics
+ folds to an unpredicated AND when the governing predicate is all-true, and
+ that the signed forms keep their sign-extend. */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** uxtb_m_u16_tied:
+** and z0\.h, z0\.h, #0xff
+** ret
+*/
+svuint16_t
+uxtb_m_u16_tied (svuint16_t x)
+{
+ return svextb_m (x, svptrue_b16 (), x);
+}
+
+/*
+** uxtb_m_u16_untied:
+** movprfx z0, z1
+** and z0\.h, z0\.h, #0xff
+** ret
+*/
+svuint16_t
+uxtb_m_u16_untied (svuint16_t inactive, svuint16_t x)
+{
+ return svextb_m (inactive, svptrue_b16 (), x);
+}
+
+/*
+** uxtb_m_u32_tied:
+** and z0\.s, z0\.s, #0xff
+** ret
+*/
+svuint32_t
+uxtb_m_u32_tied (svuint32_t x)
+{
+ return svextb_m (x, svptrue_b32 (), x);
+}
+
+/*
+** uxtb_m_u32_untied:
+** movprfx z0, z1
+** and z0\.s, z0\.s, #0xff
+** ret
+*/
+svuint32_t
+uxtb_m_u32_untied (svuint32_t inactive, svuint32_t x)
+{
+ return svextb_m (inactive, svptrue_b32 (), x);
+}
+
+/*
+** uxtb_m_u64_tied:
+** and z0\.d, z0\.d, #0xff
+** ret
+*/
+svuint64_t
+uxtb_m_u64_tied (svuint64_t x)
+{
+ return svextb_m (x, svptrue_b64 (), x);
+}
+
+/*
+** uxtb_m_u64_untied:
+** movprfx z0, z1
+** and z0\.d, z0\.d, #0xff
+** ret
+*/
+svuint64_t
+uxtb_m_u64_untied (svuint64_t inactive, svuint64_t x)
+{
+ return svextb_m (inactive, svptrue_b64 (), x);
+}
+
+/*
+** uxth_m_u32_tied:
+** and z0\.s, z0\.s, #0xffff
+** ret
+*/
+svuint32_t
+uxth_m_u32_tied (svuint32_t x)
+{
+ return svexth_m (x, svptrue_b32 (), x);
+}
+
+/*
+** uxth_m_u64_untied:
+** movprfx z0, z1
+** and z0\.d, z0\.d, #0xffff
+** ret
+*/
+svuint64_t
+uxth_m_u64_untied (svuint64_t inactive, svuint64_t x)
+{
+ return svexth_m (inactive, svptrue_b64 (), x);
+}
+
+/*
+** uxtw_m_u64_tied:
+** and z0\.d, z0\.d, #0xffffffff
+** ret
+*/
+svuint64_t
+uxtw_m_u64_tied (svuint64_t x)
+{
+ return svextw_m (x, svptrue_b64 (), x);
+}
+
+/*
+** uxtw_m_u64_untied:
+** movprfx z0, z1
+** and z0\.d, z0\.d, #0xffffffff
+** ret
+*/
+svuint64_t
+uxtw_m_u64_untied (svuint64_t inactive, svuint64_t x)
+{
+ return svextw_m (inactive, svptrue_b64 (), x);
+}
+
+/*
+** sxtb_m_s32_tied:
+** ptrue (p[0-7])\.b, all
+** sxtb z0\.s, \1/m, z0\.s
+** ret
+*/
+svint32_t
+sxtb_m_s32_tied (svint32_t x)
+{
+ return svextb_m (x, svptrue_b32 (), x);
+}
+
+/*
+** sxtb_m_s32_untied:
+** ptrue (p[0-7])\.b, all
+** sxtb z0\.s, \1/m, z1\.s
+** ret
+*/
+svint32_t
+sxtb_m_s32_untied (svint32_t inactive, svint32_t x)
+{
+ return svextb_m (inactive, svptrue_b32 (), x);
+}
+
+/*
+** sxth_m_s64_tied:
+** ptrue (p[0-7])\.b, all
+** sxth z0\.d, \1/m, z0\.d
+** ret
+*/
+svint64_t
+sxth_m_s64_tied (svint64_t x)
+{
+ return svexth_m (x, svptrue_b64 (), x);
+}
+
+/*
+** sxtw_m_s64_tied:
+** ptrue (p[0-7])\.b, all
+** sxtw z0\.d, \1/m, z0\.d
+** ret
+*/
+svint64_t
+sxtw_m_s64_tied (svint64_t x)
+{
+ return svextw_m (x, svptrue_b64 (), x);
+}
+
+#ifdef __cplusplus
+}
+#endif
--
2.50.1 (Apple Git-155)