Hi:
  Currently smax/smin pattern added by r274481 cause some regression
in 525.x264_r by 8% with -O2 -march=corei7. The reason is some IA
backends (contain TARGET_SSE4_1) will do transform for simple abs
(using rshift, xor and sub) to pmax/pmin if smax/smin pattern exists,
which generate unnecessary sse instruction. This patch adds abs
patterns to generate simple abs for integer mode to recover the
regression.

  Bootstrap ok, regression test on i386 backend is ok.
  Ok for trunk?

Changelog
gcc/
    PR target/92651
    * config/i386/i386.h (TARGET_USE_SIMPLE_ABS_PATTERN): New macro.
    * config/i386/x86-tune.def (X86_TUNE_USE_SIMPLE_ABS_PATTERN): New
    * config/i386/i386.md (abs<SWI48x>2): New define_expand.

gcc/testsuite
    * gcc.target/i386/pr92651.c: New testcase.

Regards,
Hongyu, Wang
From c0bf64efbcaa989349130b676880cc2ed49fca69 Mon Sep 17 00:00:00 2001
From: hongyuw1 <hongy...@intel.com>
Date: Thu, 28 Nov 2019 12:49:04 +0000
Subject: [PATCH] Add abs pattern to handle {si,di} mode abs to avoid
 pmax/cmove conversion.

    gcc/
	PR target/92651
	* config/i386/i386.h (TARGET_USE_SIMPLE_ABS_PATTERN): New macro.
	* config/i386/x86-tune.def (X86_TUNE_USE_SIMPLE_ABS_PATTERN): New.
	* config/i386/i386.md (abs<SWI48x>2): New define_expand.

    gcc/testsuite
	* gcc.target/i386/pr92651.c: New testcase.
---
 gcc/config/i386/i386.h                  |  2 ++
 gcc/config/i386/i386.md                 | 39 +++++++++++++++++++++++++
 gcc/config/i386/x86-tune.def            |  7 +++++
 gcc/testsuite/gcc.target/i386/pr92651.c | 16 ++++++++++
 4 files changed, 64 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92651.c

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 108456b14d4..ea27526e42e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -596,6 +596,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
 #define TARGET_EMIT_VZEROUPPER \
 	ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
+#define TARGET_USE_SIMPLE_ABS_PATTERN \
+	ix86_tune_features[X86_TUNE_USE_SIMPLE_ABS_PATTERN]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7ff5872ba43..8e5059aedec 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9668,6 +9668,45 @@
   "#"
   [(set_attr "isa" "noavx,noavx,avx,avx")])
 
+;; Special expand pattern to handle integer mode abs
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:SWI48x 0 "register_operand")
+    (abs:SWI48x
+      (match_operand:SWI48x 1 "register_operand")))]
+  "TARGET_USE_SIMPLE_ABS_PATTERN"
+  {
+    machine_mode mode = <MODE>mode;
+
+    /* Generate rtx abs using abs (x) = (((signed) x >> (W-1)) ^ x) -
+       ((signed) x >> (W-1)) */
+    rtx shift_amount = gen_int_shift_amount (mode,
+				       GET_MODE_PRECISION (mode)
+				       - 1);
+    shift_amount = convert_modes (E_QImode, GET_MODE (shift_amount),
+			    shift_amount, 1);
+    rtx shift_dst = gen_reg_rtx (mode);
+    rtx shift_op = gen_rtx_SET (shift_dst,
+			  gen_rtx_fmt_ee (ASHIFTRT, mode,
+					  operands[1], shift_amount));
+    rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode,
+						    FLAGS_REG));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, shift_op,
+						clobber)));
+
+    rtx xor_op = gen_rtx_SET (operands[0],
+			gen_rtx_fmt_ee (XOR, mode, shift_dst,
+					operands[1]));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, xor_op, clobber)));
+
+    rtx minus_op = gen_rtx_SET (operands[0],
+			  gen_rtx_fmt_ee (MINUS, mode,
+					  operands[0], shift_dst));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, minus_op,
+						clobber)));
+    DONE;
+  })
+
 (define_expand "<code><mode>2"
   [(set (match_operand:X87MODEF 0 "register_operand")
 	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 328535d38d7..86ff24122e6 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -317,6 +317,13 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
 DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
 	 m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
 
+/* X86_TUNE_USE_SIMPLE_ABS_PATTERN: This enables a new abs pattern by
+   generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
+   (signed) x >> (W-1)) instead of cmove or SSE max/abs instructions.  */
+DEF_TUNE (X86_TUNE_USE_SIMPLE_ABS_PATTERN, "use_simple_abs_pattern",
+	  m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
+	  | m_GOLDMONT_PLUS | m_TREMONT )
+
 /*****************************************************************************/
 /* 387 instruction selection tuning                                          */
 /*****************************************************************************/
diff --git a/gcc/testsuite/gcc.target/i386/pr92651.c b/gcc/testsuite/gcc.target/i386/pr92651.c
new file mode 100644
index 00000000000..3d0c3c7bf4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92651.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=corei7" } */
+
+#include <stdlib.h>
+
+int foo(unsigned char a, unsigned char b)
+{
+    int isum=abs(a - b);
+    return isum;
+}
+
+/* { dg-final { scan-assembler-not "cmov*" } } */
+/* { dg-final { scan-assembler "(cltd|cdq|shr)" } } */
+/* { dg-final { scan-assembler-times "xor" 1 } } */
+/* { dg-final { scan-assembler-times "sub" 2 } } */
+
-- 
2.19.1

Reply via email to