On Wed, Apr 13, 2022 at 9:08 AM Roger Sayle <ro...@nextmovesoftware.com> wrote: > > > Hi Uros, > Many thanks for the speedy review. Here's the revised version of the > patch incorporating all of your suggested fixes and improvements.
How about we go with an alternative route and enhance the *_maybe_si instructions with your findings, like the attached patch? Please note there is no dependency on TARGET_PARTIAL_REG_STALL. I think that when -Os is in effect, we don't care for speed, but size gets priority, disregarding runtime slowdowns. Uros. > > This has been tested on x86_64-pc-linux-gnu with make bootstrap > and make -k check with no new failures. Ok for mainline? > > 2022-04-13 Roger Sayle <ro...@nextmovesoftware.com> > Uroš Bizjak <ubiz...@gmail.com> > > gcc/ChangeLog > * config/i386/i386.md (peephole2): Transform *and_qi_maybe_si into > *andsi_2 with -Os when the instruction encoding is shorter. > > gcc/testsuite/ChangeLog > * gcc.target/i386/and-1.c: New test case. > > > Thanks again, > Roger > -- > > > -----Original Message----- > > From: Uros Bizjak <ubiz...@gmail.com> > > Sent: 12 April 2022 17:37 > > To: Roger Sayle <ro...@nextmovesoftware.com> > > Cc: gcc-patches@gcc.gnu.org > > Subject: Re: [x86_64 PATCH] Avoid andb %dil when optimizing for size. > > > > On Tue, Apr 12, 2022 at 5:42 PM Roger Sayle <ro...@nextmovesoftware.com> > > wrote: > > > > > > > > > This stand-alone patch avoids (-Os) regressions from a fix for PR 70321. > > > > > > The simple test case below has the unfortunate property that on x86_64 > > > it is larger when compiled with -Os than when compiled with -O2. > > > > > > int foo(char x) > > > { > > > return (x & 123) != 0; > > > } > > > > > > The issue is x86's complex instruction encoding, where andb $XX,%dil > > > requires more bytes than andl $XX,%edi. This patch provides a > > > peephole2 to convert *and_qi_2_maybe_si into *andsi_2 for the problematic > > cases. > > > > > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap > > > and make -k check with no new failures. Ok for mainline? > > > > > > > > > 2022-04-12 Roger Sayle <ro...@nextmovesoftware.com> > > > > > > gcc/ChangeLog > > > * config/i386/i386.md (peephole2): Transform *and_qi_maybe_si into > > > *andsi_2 with -Os when the instruction encoding is shorter. > > > > > > gcc/testsuite/ChangeLog > > > * gcc.target/i386/and-1.c: New test case. > > > > +(define_peephole2 > > + [(parallel > > + [(set (match_operand 0 "flags_reg_operand") > > + (match_operator 1 "compare_operator" > > + [(and:QI (match_operand:QI 2 "register_operand") > > + (match_operand:QI 3 "immediate_operand")) > > > > const_int_operand here, you have INTVAL accessor for this operand below. > > > > + (const_int 0)])) > > + (set (match_dup 2) > > + (and:QI (match_dup 2) (match_dup 3)))])] "TARGET_64BIT > > + && !TARGET_PARTIAL_REG_STALL > > + && optimize_insn_for_size_p () > > + && ix86_match_ccmode (insn, CCNOmode) > > + && (INTVAL (operands[3]) & 0x80) == 0 > > > > val_signbit_known_clear_p (QImode, INTVAL (operands[3])) > > > > + /* NON_Q_REG_P (operands[2]) : %sil, %dil, %bpl, %spl. */ > > + && LEGACY_INT_REG_P (operands[2]) && !QI_REGNO_P (REGNO > > (operands[2])) > > + && peep2_reg_dead_p (1, operands[2])" > > > > Do we really need the above condition? Operand 2 is effectively inout > > operand, > > and when ANDed in SImode with zero-extended immediate, it will result in the > > same QImode value. The value outside QImode is not guaranteed without using > > strict_low_part. > > > > + [(parallel > > + [(set (match_dup 0) > > + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) > > + (const_int 0)])) > > + (set (match_dup 2) > > + (and:SI (match_dup 2) (match_dup 3)))])] { > > + operands[2] = gen_rtx_REG (SImode, REGNO (operands[2])); > > > > gen_lowpart (Simode, operands[2]) should work here. > > > > + operands[3] = gen_int_mode (INTVAL (operands[3]) & 0xff, SImode); > > > > No need to convert the immediate to SImode. The insn condition also > > guarantees it to be unsigned. > > > > +}) > > > > Uros. > > > > > Thanks in advance, > > > Roger > > > -- > > >
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c74edd1aaef..adad211c617 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9502,14 +9502,14 @@ [(set (reg FLAGS_REG) (compare (and:QI - (match_operand:QI 0 "nonimmediate_operand" "%qm,*a,qm,r") - (match_operand:QI 1 "nonmemory_operand" "q,n,n,n")) + (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r") + (match_operand:QI 1 "nonmemory_operand" "q,n,n")) (const_int 0)))] "ix86_match_ccmode (insn, CONST_INT_P (operands[1]) && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" { - if (which_alternative == 3) + if (get_attr_mode (insn) == MODE_SI) { if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0) operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); @@ -9518,8 +9518,16 @@ return "test{b}\t{%1, %0|%0, %1}"; } [(set_attr "type" "test") - (set_attr "mode" "QI,QI,QI,SI") - (set_attr "pent_pair" "uv,uv,np,np")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (match_test "optimize_insn_for_size_p ()") + (and (match_operand 0 "ext_QIreg_operand") + (match_operand 1 "const_0_to_127_operand"))) + (const_string "SI") + ] + (const_string "QI"))) + (set_attr "pent_pair" "uv,np,np")]) (define_insn "*test<mode>_1" [(set (reg FLAGS_REG) @@ -10110,7 +10118,7 @@ CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" { - if (which_alternative == 2) + if (get_attr_mode (insn) == MODE_SI) { if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); @@ -10119,7 +10127,15 @@ return "and{b}\t{%2, %0|%0, %2}"; } [(set_attr "type" "alu") - (set_attr "mode" "QI,QI,SI") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (match_test "optimize_insn_for_size_p ()") + (and (match_operand 0 "ext_QIreg_operand") + (match_operand 2 "const_0_to_127_operand"))) + (const_string "SI") + ] + (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index a8cc17a054d..848a79a8d16 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -906,6 +906,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 63)"))) +;; Match 0 to 127. +(define_predicate "const_0_to_127_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 127)"))) + ;; Match 0 to 255. (define_predicate "const_0_to_255_operand" (and (match_code "const_int")