https://gcc.gnu.org/g:102b21f9ce7d7a30cdee7c729a152e95c96107ac
commit r16-1094-g102b21f9ce7d7a30cdee7c729a152e95c96107ac Author: Hu, Lin1 <lin1...@intel.com> Date: Mon Mar 10 16:52:22 2025 +0800 i386: Add more peephole2 for APX NDD The patch aims to optimize movb (%rdi), %al movq %rdi, %rbx xorl %esi, %eax, %edx movb %dl, (%rdi) cmpb %sil, %al jne to xorb %sil, (%rdi) movq %rdi, %rbx jne Reduce 2 mov and 1 cmp instructions. Due to APX NDD allowing the dest register and source register to be different, some original peephole2 are invalid. Add new peephole2 patterns for APX NDD. gcc/ChangeLog: * config/i386/i386.md (define_peephole2): Define some new peephole2 for APX NDD. gcc/testsuite/ChangeLog: * gcc.target/i386/pr49095-2.c: New test. Diff: --- gcc/config/i386/i386.md | 135 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr49095-2.c | 73 ++++++++++++++++ 2 files changed, 208 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4c9cb81d5f9d..40b43cf092ac 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -28398,6 +28398,41 @@ const0_rtx); }) +;; For APX NDD PLUS/MINUS/LOGIC +;; Like cmpelim optimized pattern. +;; Reduce an extra mov instruction like +;; decl (%rdi), %eax +;; mov %eax, (%rdi) +;; to +;; decl (%rdi) +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 2 "plusminuslogic_operator" + [(match_operand:SWI 0 "memory_operand") + (match_operand:SWI 1 "<nonmemory_operand>")]) + (const_int 0))) + (set (match_operand:SWI 3 "register_operand") (match_dup 2))]) + (set (match_dup 0) (match_dup 3))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[3]) + && !reg_overlap_mentioned_p (operands[3], operands[0]) + && ix86_match_ccmode (peep2_next_insn (0), + (GET_CODE (operands[2]) == PLUS + || GET_CODE (operands[2]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (match_dup 5))])] +{ + operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); + operands[5] + = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + copy_rtx (operands[0]), operands[1]); + operands[6] + = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), + const0_rtx); +}) + ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") @@ -28491,6 +28526,54 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movq (%rdi), %rax +;; xorq %rsi, %rax, %rdx +;; movb %rdx, (%rdi) +;; cmpb %rsi, %rax +;; jne +;; to +;; xorb %rsi, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_operand:SWI 4 "register_operand") + (xor:SWI (match_operand:SWI 3 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 4)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI 5 "register_operand") + (match_operand:SWI 6 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && (rtx_equal_p (operands[0], operands[5]) + ? rtx_equal_p (operands[2], operands[6]) + : rtx_equal_p (operands[2], operands[5]) + && rtx_equal_p (operands[0], operands[6])) + && peep2_reg_dead_p (3, operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], QImode) + || any_QIreg_operand (operands[2], QImode))" + [(parallel [(set (match_dup 7) (match_dup 9)) + (set (match_dup 1) (match_dup 8))])] +{ + operands[7] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + operands[2]); + operands[9] + = gen_rtx_COMPARE (GET_MODE (operands[7]), + copy_rtx (operands[8]), + const0_rtx); +}) + (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) @@ -28734,6 +28817,58 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movb (%rdi), %al +;; xorl %esi, %eax, %edx +;; movb %dl, (%rdi) +;; cmpb %sil, %al +;; jne +;; to +;; xorl %sil, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand") + (match_operand:SWI12 1 "memory_operand")) + (parallel [(set (match_operand:SI 4 "register_operand") + (xor:SI (match_operand:SI 3 "register_operand") + (match_operand:SI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_operand:SWI12 5 "register_operand")) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI12 6 "register_operand") + (match_operand:SWI12 7 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && REGNO (operands[5]) == REGNO (operands[4]) + && (rtx_equal_p (operands[0], operands[6]) + ? (REG_P (operands[2]) + ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7]) + : rtx_equal_p (operands[2], operands[7])) + : (rtx_equal_p (operands[0], operands[7]) + && REG_P (operands[2]) + && REGNO (operands[2]) == REGNO (operands[6]))) + && peep2_reg_dead_p (3, operands[5]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], SImode) + || any_QIreg_operand (operands[2], SImode))" + [(parallel [(set (match_dup 8) (match_dup 10)) + (set (match_dup 1) (match_dup 9))])] +{ + operands[8] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + gen_lowpart (<MODE>mode, operands[2])); + operands[10] + = gen_rtx_COMPARE (GET_MODE (operands[8]), + copy_rtx (operands[9]), + const0_rtx); +}) + ;; Attempt to optimize away memory stores of values the memory already ;; has. See PR79593. (define_peephole2 diff --git a/gcc/testsuite/gcc.target/i386/pr49095-2.c b/gcc/testsuite/gcc.target/i386/pr49095-2.c new file mode 100644 index 000000000000..25bc6b79a43a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr49095-2.c @@ -0,0 +1,73 @@ +/* PR rtl-optimization/49095 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-Os -fno-shrink-wrap -masm=att -mapxf" } */ + +void foo (void *); + +int * +f1 (int *x) +{ + if (!--*x) + foo (x); + return x; +} + +int +g1 (int x) +{ + if (!--x) + foo ((void *) 0); + return x; +} + +#define F(T, OP, OPN) \ +T * \ +f##T##OPN (T *x, T y) \ +{ \ + *x OP y; \ + if (!*x) \ + foo (x); \ + return x; \ +} \ + \ +T \ +g##T##OPN (T x, T y) \ +{ \ + x OP y; \ + if (!x) \ + foo ((void *) 0); \ + return x; \ +} \ + \ +T * \ +h##T##OPN (T *x) \ +{ \ + *x OP 24; \ + if (!*x) \ + foo (x); \ + return x; \ +} \ + \ +T \ +i##T##OPN (T x, T y) \ +{ \ + x OP 24; \ + if (!x) \ + foo ((void *) 0); \ + return x; \ +} + +#define G(T) \ +F (T, +=, plus) \ +F (T, -=, minus) \ +F (T, &=, and) \ +F (T, |=, or) \ +F (T, ^=, xor) + +G (char) +G (short) +G (int) +G (long) + +/* { dg-final { scan-assembler-not "test\[lq\]" } } */ +/* { dg-final { scan-assembler-not "\\(%\[re\]di\\), %" } } */