From fb8f00b9d0e7dabac2d1d143f5a9ae677a515912 Mon Sep 17 00:00:00 2001
From: Soumya AR <soumyaa@nvidia.com>
Date: Fri, 21 Feb 2025 02:29:25 -0800
Subject: [PATCH] aarch64: Use SVE ASRD with vector division using division
 operator.

The ASRD instruction on SVE performs an arithmetic shift right by an immediate
for divide. This patch enables ASRD when dividing vectors using the GNU C
division operator.

For example:

int32x4_t
foo (int32x4_t x)
{
	return x / 4;
}

svint32_t
bar (svint32_t x)
{
	return x / 4;
}

currently generates a DIV, but can be done using ASRD.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for GCC16?

Signed-off-by: Soumya AR <soumyaa@nvidia.com>

gcc/ChangeLog:

	* expmed.cc (expand_divmod): Expand to sdiv_pow2 optab for vectors.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/sve-asrd-2.c: New test.
---
 gcc/expmed.cc                                 | 18 +++++
 .../gcc.target/aarch64/sve/sve-asrd-2.c       | 74 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/sve-asrd-2.c

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index df09cbccd08..a35aa229d44 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -4465,6 +4465,24 @@ expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
       {
       case TRUNC_MOD_EXPR:
       case TRUNC_DIV_EXPR:
+	if (CONST_VECTOR_P (op1)
+	    && optab_handler (sdiv_pow2_optab, mode) != CODE_FOR_nothing)
+	  {
+	    rtx scalar_op1 = unwrap_const_vec_duplicate (op1);
+	    if (scalar_op1 != NULL_RTX && CONST_INT_P (scalar_op1))
+	      {
+		HOST_WIDE_INT d = INTVAL (scalar_op1);
+		if (d > 0 && pow2p_hwi (d))
+		  {
+		    rtx shift_amount
+		      = gen_const_vec_duplicate (mode,
+						 GEN_INT (floor_log2 (d)));
+		    return expand_binop (mode, sdiv_pow2_optab, op0,
+					 shift_amount, target, unsignedp,
+					 methods);
+		  }
+	      }
+	  }
 	if (op1_is_constant)
 	  {
 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd-2.c b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd-2.c
new file mode 100644
index 00000000000..a5163421a69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd-2.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+#define FOO(type) \
+type foo_##type(type x) { \
+  return x / 8; \
+}
+
+/*
+** foo_int32x4_t:
+**     ptrue	(p[0-7]).b, vl16
+**     asrd	z[0-9]+\.s, \1/m, z[0-9]+\.s, #3
+**     ret
+*/
+FOO(int32x4_t)
+
+/*
+** foo_int64x2_t:
+**     ptrue	(p[0-7]).b, vl16
+**     asrd	z[0-9]+\.d, \1/m, z[0-9]+\.d, #3
+**     ret
+*/
+FOO(int64x2_t)
+
+/*
+** foo_svint32_t:
+**     ptrue	(p[0-7]).b, all
+**     asrd	z[0-9]+\.s, \1/m, z[0-9]+\.s, #3
+**     ret
+*/
+FOO(svint32_t)
+
+/*
+** foo_svint64_t:
+**     ptrue	(p[0-7]).b, all
+**     asrd	z[0-9]+\.d, \1/m, z[0-9]+\.d, #3
+**     ret
+*/
+FOO(svint64_t)
+
+/*
+** foo_uint32x4_t:
+**     ptrue	(p[0-7]).b, vl16
+**     asrd	z[0-9]+\.s, \1/m, z[0-9]+\.s, #3
+**     ret
+*/
+FOO(uint32x4_t)
+
+/*
+** foo_uint64x2_t:
+**     ptrue	(p[0-7]).b, vl16
+**     asrd	z[0-9]+\.d, \1/m, z[0-9]+\.d, #3
+**     ret
+*/
+FOO(uint64x2_t)
+
+/*
+** foo_svuint32_t:
+**     ptrue	(p[0-7]).b, all
+**     asrd	z[0-9]+\.s, \1/m, z[0-9]+\.s, #3
+**     ret
+*/
+FOO(svuint32_t)
+
+/*
+** foo_svuint64_t:
+**     ptrue	(p[0-7]).b, all
+**     asrd	z[0-9]+\.d, \1/m, z[0-9]+\.d, #3
+**     ret
+*/
+FOO(svuint64_t)
-- 
2.34.1

