https://gcc.gnu.org/g:102b21f9ce7d7a30cdee7c729a152e95c96107ac

commit r16-1094-g102b21f9ce7d7a30cdee7c729a152e95c96107ac
Author: Hu, Lin1 <lin1...@intel.com>
Date:   Mon Mar 10 16:52:22 2025 +0800

    i386: Add more peephole2 for APX NDD
    
    The patch aims to optimize
             movb    (%rdi), %al
             movq    %rdi, %rbx
             xorl    %esi, %eax, %edx
             movb    %dl, (%rdi)
             cmpb    %sil, %al
             jne
    to
             xorb    %sil, (%rdi)
             movq    %rdi, %rbx
             jne
    
    Reduce 2 mov and 1 cmp instructions.
    
    Due to APX NDD allowing the dest register and source register to be 
different,
    some original peephole2 are invalid. Add new peephole2 patterns for APX NDD.
    
    gcc/ChangeLog:
    
            * config/i386/i386.md (define_peephole2): Define some new peephole2 
for
            APX NDD.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr49095-2.c: New test.

Diff:
---
 gcc/config/i386/i386.md                   | 135 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr49095-2.c |  73 ++++++++++++++++
 2 files changed, 208 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c9cb81d5f9d..40b43cf092ac 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -28398,6 +28398,41 @@
                       const0_rtx);
 })
 
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG)
+                  (compare (match_operator:SWI 2 "plusminuslogic_operator"
+                             [(match_operand:SWI 0 "memory_operand")
+                              (match_operand:SWI 1 "<nonmemory_operand>")])
+                           (const_int 0)))
+             (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+   (set (match_dup 0) (match_dup 3))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (2, operands[3])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && ix86_match_ccmode (peep2_next_insn (0),
+                        (GET_CODE (operands[2]) == PLUS
+                         || GET_CODE (operands[2]) == MINUS)
+                        ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 6))
+             (set (match_dup 0) (match_dup 5))])]
+{
+  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+  operands[5]
+    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+                     copy_rtx (operands[0]), operands[1]);
+  operands[6]
+    = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+                      const0_rtx);
+})
+
 ;; Likewise for instances where we have a lea pattern.
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
@@ -28491,6 +28526,54 @@
                       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+       (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+                  (xor:SWI (match_operand:SWI 3 "register_operand")
+                           (match_operand:SWI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI 5 "register_operand")
+                    (match_operand:SWI 6 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && (rtx_equal_p (operands[0], operands[5])
+       ? rtx_equal_p (operands[2], operands[6])
+       : rtx_equal_p (operands[2], operands[5])
+        && rtx_equal_p (operands[0], operands[6]))
+   && peep2_reg_dead_p (3, operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+             (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            operands[2]);
+  operands[9]
+    = gen_rtx_COMPARE (GET_MODE (operands[7]),
+                      copy_rtx (operands[8]),
+                      const0_rtx);
+})
+
 (define_peephole2
   [(set (match_operand:SWI12 0 "register_operand")
        (match_operand:SWI12 1 "memory_operand"))
@@ -28734,6 +28817,58 @@
                       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+       (match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+                  (xor:SI (match_operand:SI 3 "register_operand")
+                          (match_operand:SI 2 "<nonmemory_operand>")))
+             (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+   (set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (match_operand:SWI12 6 "register_operand")
+                    (match_operand:SWI12 7 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && REGNO (operands[5]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[0], operands[6])
+       ? (REG_P (operands[2])
+         ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+         : rtx_equal_p (operands[2], operands[7]))
+       : (rtx_equal_p (operands[0], operands[7])
+         && REG_P (operands[2])
+         && REGNO (operands[2]) == REGNO (operands[6])))
+   && peep2_reg_dead_p (3, operands[5])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))"
+  [(parallel [(set (match_dup 8) (match_dup 10))
+             (set (match_dup 1) (match_dup 9))])]
+{
+  operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+                            gen_lowpart (<MODE>mode, operands[2]));
+  operands[10]
+    = gen_rtx_COMPARE (GET_MODE (operands[8]),
+                      copy_rtx (operands[9]),
+                      const0_rtx);
+})
+
 ;; Attempt to optimize away memory stores of values the memory already
 ;; has.  See PR79593.
 (define_peephole2
diff --git a/gcc/testsuite/gcc.target/i386/pr49095-2.c 
b/gcc/testsuite/gcc.target/i386/pr49095-2.c
new file mode 100644
index 000000000000..25bc6b79a43a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr49095-2.c
@@ -0,0 +1,73 @@
+/* PR rtl-optimization/49095 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-Os -fno-shrink-wrap -masm=att -mapxf" } */
+
+void foo (void *);
+
+int *
+f1 (int *x)
+{
+  if (!--*x)
+    foo (x);
+  return x;
+}
+
+int
+g1 (int x)
+{
+  if (!--x)
+    foo ((void *) 0);
+  return x;
+}
+
+#define F(T, OP, OPN) \
+T *                    \
+f##T##OPN (T *x, T y)  \
+{                      \
+  *x OP y;             \
+  if (!*x)             \
+    foo (x);           \
+  return x;            \
+}                      \
+                       \
+T                      \
+g##T##OPN (T x, T y)   \
+{                      \
+  x OP y;              \
+  if (!x)              \
+    foo ((void *) 0);  \
+  return x;            \
+}                      \
+                       \
+T *                    \
+h##T##OPN (T *x)       \
+{                      \
+  *x OP 24;            \
+  if (!*x)             \
+    foo (x);           \
+  return x;            \
+}                      \
+                       \
+T                      \
+i##T##OPN (T x, T y)   \
+{                      \
+  x OP 24;             \
+  if (!x)              \
+    foo ((void *) 0);  \
+  return x;            \
+}
+
+#define G(T) \
+F (T, +=, plus)                \
+F (T, -=, minus)       \
+F (T, &=, and)         \
+F (T, |=, or)          \
+F (T, ^=, xor)
+
+G (char)
+G (short)
+G (int)
+G (long)
+
+/* { dg-final { scan-assembler-not "test\[lq\]" } } */
+/* { dg-final { scan-assembler-not "\\(%\[re\]di\\), %" } } */

Reply via email to