From: Kyrylo Tkachov <[email protected]>

For unsigned types the svextb, svexth and svextw intrinsics are plain
zero-extends, which the expander already lowers to a bitwise AND with a
constant mask.  The any/don't-care (_x) form and the zeroing (_z) form with
an all-true predicate therefore compile to a single unpredicated AND, but
the merging (_m) form with an all-true predicate did not: it kept the
inactive argument and produced a predicated UXT.  For example

  svuint64_t f (svuint64_t x, svuint64_t y)
  { return svextb_m (y, svptrue_b64 (), x); }

compiled to

        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.d, p3/m, z31.d
        ret

where a single

        and     z0.d, z0.d, #0xff
        ret

is sufficient, because the all-true predicate makes the inactive operand
dead.

Give svext_bhw_impl a gimple fold that rewrites the unsigned merging form
with an all-true predicate to a BIT_AND_EXPR.  Signed types (which use a
real sign-extend instruction), partial predicates and pfalse predicates are
left to the existing handling, as is the _x form, which the expander already
turns into an AND.

Bootstrapped and tested on aarch64-none-linux-gnu.
Will push to trunk in a couple of days if now objections.

        PR target/120027

gcc/ChangeLog:

        * config/aarch64/aarch64-sve-builtins-base.cc (svext_bhw_impl::fold):
        New member function.  Fold the unsigned svextb/svexth/svextw
        intrinsics to a bitwise AND when the merging form has an all-true
        predicate.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/acle/general/pr120027.c: New test.

Signed-off-by: Kyrylo Tkachov <[email protected]>
---
 .../aarch64/aarch64-sve-builtins-base.cc      |  19 ++
 .../aarch64/sve/acle/general/pr120027.c       | 180 ++++++++++++++++++
 2 files changed, 199 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 963f0adfda1..bfd9641861c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1311,6 +1311,25 @@ public:
   CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
     : m_from_mode (from_mode) {}
 
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    /* For unsigned types this is a zero-extend, i.e. a bitwise AND with a
+       constant mask.  The _x form and an all-true _z already lower to that
+       AND; fold the merging form with an all-true predicate to it too, since
+       the predicate makes the inactive operand dead.  */
+    if (f.pred != PRED_m
+       || !f.type_suffix (0).unsigned_p
+       || !is_ptrue (f.gp_value (f.call), f.type_suffix (0).element_bytes))
+      return NULL;
+
+    tree op = gimple_call_arg (f.call, 2);
+    tree mask = build_int_cstu (TREE_TYPE (TREE_TYPE (f.lhs)),
+                               GET_MODE_MASK (m_from_mode));
+    tree mask_vec = build_vector_from_val (TREE_TYPE (f.lhs), mask);
+    return gimple_build_assign (f.lhs, BIT_AND_EXPR, op, mask_vec);
+  }
+
   rtx
   expand (function_expander &e) const override
   {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c
new file mode 100644
index 00000000000..ba688e1473b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr120027.c
@@ -0,0 +1,180 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Check that the merging form of the unsigned svextb/svexth/svextw intrinsics
+   folds to an unpredicated AND when the governing predicate is all-true, and
+   that the signed forms keep their sign-extend.  */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** uxtb_m_u16_tied:
+**     and     z0\.h, z0\.h, #0xff
+**     ret
+*/
+svuint16_t
+uxtb_m_u16_tied (svuint16_t x)
+{
+  return svextb_m (x, svptrue_b16 (), x);
+}
+
+/*
+** uxtb_m_u16_untied:
+**     movprfx z0, z1
+**     and     z0\.h, z0\.h, #0xff
+**     ret
+*/
+svuint16_t
+uxtb_m_u16_untied (svuint16_t inactive, svuint16_t x)
+{
+  return svextb_m (inactive, svptrue_b16 (), x);
+}
+
+/*
+** uxtb_m_u32_tied:
+**     and     z0\.s, z0\.s, #0xff
+**     ret
+*/
+svuint32_t
+uxtb_m_u32_tied (svuint32_t x)
+{
+  return svextb_m (x, svptrue_b32 (), x);
+}
+
+/*
+** uxtb_m_u32_untied:
+**     movprfx z0, z1
+**     and     z0\.s, z0\.s, #0xff
+**     ret
+*/
+svuint32_t
+uxtb_m_u32_untied (svuint32_t inactive, svuint32_t x)
+{
+  return svextb_m (inactive, svptrue_b32 (), x);
+}
+
+/*
+** uxtb_m_u64_tied:
+**     and     z0\.d, z0\.d, #0xff
+**     ret
+*/
+svuint64_t
+uxtb_m_u64_tied (svuint64_t x)
+{
+  return svextb_m (x, svptrue_b64 (), x);
+}
+
+/*
+** uxtb_m_u64_untied:
+**     movprfx z0, z1
+**     and     z0\.d, z0\.d, #0xff
+**     ret
+*/
+svuint64_t
+uxtb_m_u64_untied (svuint64_t inactive, svuint64_t x)
+{
+  return svextb_m (inactive, svptrue_b64 (), x);
+}
+
+/*
+** uxth_m_u32_tied:
+**     and     z0\.s, z0\.s, #0xffff
+**     ret
+*/
+svuint32_t
+uxth_m_u32_tied (svuint32_t x)
+{
+  return svexth_m (x, svptrue_b32 (), x);
+}
+
+/*
+** uxth_m_u64_untied:
+**     movprfx z0, z1
+**     and     z0\.d, z0\.d, #0xffff
+**     ret
+*/
+svuint64_t
+uxth_m_u64_untied (svuint64_t inactive, svuint64_t x)
+{
+  return svexth_m (inactive, svptrue_b64 (), x);
+}
+
+/*
+** uxtw_m_u64_tied:
+**     and     z0\.d, z0\.d, #0xffffffff
+**     ret
+*/
+svuint64_t
+uxtw_m_u64_tied (svuint64_t x)
+{
+  return svextw_m (x, svptrue_b64 (), x);
+}
+
+/*
+** uxtw_m_u64_untied:
+**     movprfx z0, z1
+**     and     z0\.d, z0\.d, #0xffffffff
+**     ret
+*/
+svuint64_t
+uxtw_m_u64_untied (svuint64_t inactive, svuint64_t x)
+{
+  return svextw_m (inactive, svptrue_b64 (), x);
+}
+
+/*
+** sxtb_m_s32_tied:
+**     ptrue   (p[0-7])\.b, all
+**     sxtb    z0\.s, \1/m, z0\.s
+**     ret
+*/
+svint32_t
+sxtb_m_s32_tied (svint32_t x)
+{
+  return svextb_m (x, svptrue_b32 (), x);
+}
+
+/*
+** sxtb_m_s32_untied:
+**     ptrue   (p[0-7])\.b, all
+**     sxtb    z0\.s, \1/m, z1\.s
+**     ret
+*/
+svint32_t
+sxtb_m_s32_untied (svint32_t inactive, svint32_t x)
+{
+  return svextb_m (inactive, svptrue_b32 (), x);
+}
+
+/*
+** sxth_m_s64_tied:
+**     ptrue   (p[0-7])\.b, all
+**     sxth    z0\.d, \1/m, z0\.d
+**     ret
+*/
+svint64_t
+sxth_m_s64_tied (svint64_t x)
+{
+  return svexth_m (x, svptrue_b64 (), x);
+}
+
+/*
+** sxtw_m_s64_tied:
+**     ptrue   (p[0-7])\.b, all
+**     sxtw    z0\.d, \1/m, z0\.d
+**     ret
+*/
+svint64_t
+sxtw_m_s64_tied (svint64_t x)
+{
+  return svextw_m (x, svptrue_b64 (), x);
+}
+
+#ifdef __cplusplus
+}
+#endif
-- 
2.50.1 (Apple Git-155)

Reply via email to