On Wed, Apr 13, 2022 at 9:08 AM Roger Sayle <ro...@nextmovesoftware.com> wrote:
>
>
> Hi Uros,
> Many thanks for the speedy review.  Here's the revised version of the
> patch incorporating all of your suggested fixes and improvements.

How about we go with an alternative route and enhance the *_maybe_si
instructions with your findings, like the attached patch?

Please note there is no dependency on TARGET_PARTIAL_REG_STALL. I
think that when -Os is in effect, we don't care for speed, but size
gets priority,  disregarding runtime slowdowns.

Uros.

>
> This has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  Ok for mainline?
>
> 2022-04-13  Roger Sayle  <ro...@nextmovesoftware.com>
>             Uroš Bizjak  <ubiz...@gmail.com>
>
> gcc/ChangeLog
>         * config/i386/i386.md (peephole2): Transform *and_qi_maybe_si into
>         *andsi_2 with -Os when the instruction encoding is shorter.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/and-1.c: New test case.
>
>
> Thanks again,
> Roger
> --
>
> > -----Original Message-----
> > From: Uros Bizjak <ubiz...@gmail.com>
> > Sent: 12 April 2022 17:37
> > To: Roger Sayle <ro...@nextmovesoftware.com>
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [x86_64 PATCH] Avoid andb %dil when optimizing for size.
> >
> > On Tue, Apr 12, 2022 at 5:42 PM Roger Sayle <ro...@nextmovesoftware.com>
> > wrote:
> > >
> > >
> > > This stand-alone patch avoids (-Os) regressions from a fix for PR 70321.
> > >
> > > The simple test case below has the unfortunate property that on x86_64
> > > it is larger when compiled with -Os than when compiled with -O2.
> > >
> > > int foo(char x)
> > > {
> > >   return (x & 123) != 0;
> > > }
> > >
> > > The issue is x86's complex instruction encoding, where andb $XX,%dil
> > > requires more bytes than andl $XX,%edi.  This patch provides a
> > > peephole2 to convert *and_qi_2_maybe_si into *andsi_2 for the problematic
> > cases.
> > >
> > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> > > and make -k check with no new failures.  Ok for mainline?
> > >
> > >
> > > 2022-04-12  Roger Sayle  <ro...@nextmovesoftware.com>
> > >
> > > gcc/ChangeLog
> > >         * config/i386/i386.md (peephole2): Transform *and_qi_maybe_si into
> > >         *andsi_2 with -Os when the instruction encoding is shorter.
> > >
> > > gcc/testsuite/ChangeLog
> > >         * gcc.target/i386/and-1.c: New test case.
> >
> > +(define_peephole2
> > +  [(parallel
> > +    [(set (match_operand 0 "flags_reg_operand")
> > +  (match_operator 1 "compare_operator"
> > +    [(and:QI (match_operand:QI 2 "register_operand")
> > +     (match_operand:QI 3 "immediate_operand"))
> >
> > const_int_operand here, you have INTVAL accessor for this operand below.
> >
> > +     (const_int 0)]))
> > +      (set (match_dup 2)
> > +   (and:QI (match_dup 2) (match_dup 3)))])]  "TARGET_64BIT
> > +   && !TARGET_PARTIAL_REG_STALL
> > +   && optimize_insn_for_size_p ()
> > +   && ix86_match_ccmode (insn, CCNOmode)
> > +   && (INTVAL (operands[3]) & 0x80) == 0
> >
> > val_signbit_known_clear_p (QImode, INTVAL (operands[3]))
> >
> > +   /* NON_Q_REG_P (operands[2]) : %sil, %dil, %bpl, %spl.  */
> > +   && LEGACY_INT_REG_P (operands[2]) && !QI_REGNO_P (REGNO
> > (operands[2]))
> > +   && peep2_reg_dead_p (1, operands[2])"
> >
> > Do we really need the above condition? Operand 2 is effectively inout 
> > operand,
> > and when ANDed in SImode with zero-extended immediate, it will result in the
> > same QImode value. The value outside QImode is not guaranteed without using
> > strict_low_part.
> >
> > +  [(parallel
> > +     [(set (match_dup 0)
> > +   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
> > +    (const_int 0)]))
> > +      (set (match_dup 2)
> > +   (and:SI (match_dup 2) (match_dup 3)))])] {
> > +  operands[2] = gen_rtx_REG (SImode, REGNO (operands[2]));
> >
> > gen_lowpart (Simode, operands[2]) should work here.
> >
> > +  operands[3] = gen_int_mode (INTVAL (operands[3]) & 0xff, SImode);
> >
> > No need to convert the immediate to SImode. The insn condition also
> > guarantees it to be unsigned.
> >
> > +})
> >
> > Uros.
> >
> > > Thanks in advance,
> > > Roger
> > > --
> > >
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c74edd1aaef..adad211c617 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9502,14 +9502,14 @@
   [(set (reg FLAGS_REG)
        (compare
          (and:QI
-           (match_operand:QI 0 "nonimmediate_operand" "%qm,*a,qm,r")
-           (match_operand:QI 1 "nonmemory_operand" "q,n,n,n"))
+           (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r")
+           (match_operand:QI 1 "nonmemory_operand" "q,n,n"))
          (const_int 0)))]
   "ix86_match_ccmode (insn,
                      CONST_INT_P (operands[1])
                      && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
 {
-  if (which_alternative == 3)
+  if (get_attr_mode (insn) == MODE_SI)
     {
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
        operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
@@ -9518,8 +9518,16 @@
   return "test{b}\t{%1, %0|%0, %1}";
 }
   [(set_attr "type" "test")
-   (set_attr "mode" "QI,QI,QI,SI")
-   (set_attr "pent_pair" "uv,uv,np,np")])
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2")
+             (const_string "SI")
+           (and (match_test "optimize_insn_for_size_p ()")
+                (and (match_operand 0 "ext_QIreg_operand")
+                     (match_operand 1 "const_0_to_127_operand")))
+             (const_string "SI")
+          ]
+          (const_string "QI")))
+   (set_attr "pent_pair" "uv,np,np")])
 
 (define_insn "*test<mode>_1"
   [(set (reg FLAGS_REG)
@@ -10110,7 +10118,7 @@
                         CONST_INT_P (operands[2])
                         && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
 {
-  if (which_alternative == 2)
+  if (get_attr_mode (insn) == MODE_SI)
     {
       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
         operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
@@ -10119,7 +10127,15 @@
   return "and{b}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "alu")
-   (set_attr "mode" "QI,QI,SI")
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2")
+             (const_string "SI")
+           (and (match_test "optimize_insn_for_size_p ()")
+                (and (match_operand 0 "ext_QIreg_operand")
+                     (match_operand 2 "const_0_to_127_operand")))
+             (const_string "SI")
+          ]
+          (const_string "QI")))
    ;; Potential partial reg stall on alternative 2.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "2")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a8cc17a054d..848a79a8d16 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -906,6 +906,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
 
+;; Match 0 to 127.
+(define_predicate "const_0_to_127_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 127)")))
+
 ;; Match 0 to 255.
 (define_predicate "const_0_to_255_operand"
   (and (match_code "const_int")

Reply via email to