https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119127

--- Comment #5 from Xi Ruoyao <xry111 at gcc dot gnu.org> ---
(In reply to chenglulu from comment #4)
> (In reply to Xi Ruoyao from comment #3)
> > It happens at:
> > 
> > trying to combine definition of r94 in: 
> >    15: r94:DI=r92:DI<<0x2&0xfffffffc
> >       REG_DEAD r92:DI
> > into:
> >    17: r96:DI=sign_extend(r87:SI+r94:DI#0)
> >       REG_DEAD r94:DI
> >       REG_DEAD r87:SI
> > 
> > i.e.
> > 
> > bstrpick.d t0, a0, 31, 0
> > slli.d     t0, t0, 2
> > addi.w     t0, t0, a1
> > 
> > But we want just
> > 
> > alsl.w     t0, a0, a1, 2
> > 
> > It seems modifying SImode to DImode as comment 1 will defeat the
> > optimization.  I guess we should sign extend mask in
> > loongarch_reassoc_shift_bitwise to mode if it's not already sign-extended.
> 
> I disagree with your viewpoint.  Like the optimization below.
> If the mask is sign-extended here, an error will occur.
> 
> (insn 17 13 28 2 (set (reg:DI 14 $r14 [96])
>         (sign_extend:DI (plus:SI (subreg/s/u:SI (and:DI (ashift:DI (reg:DI
> 14 $r14 [96])
>                             (const_int 2 [0x2]))
>                         (const_int 4294967292 [0xfffffffc])) 0)
>                 (reg:SI 12 $r12 [orig:87 _3 ] [87])))) "pr119127.ii":11:39
> 266 {and_alsl_reversesi_extended}
>      (nil)) 
> 
> split to :
> (insn 32 13 33 2 (set (reg:DI 14 $r14 [96])
>         (and:DI (reg:DI 14 $r14 [96])
>             (const_int 1073741823 [0x3fffffff]))) "pr119127.ii":11:39 101
> {*anddi3}
>      (nil))
> (insn 33 32 28 2 (set (reg:DI 14 $r14 [96])
>         (sign_extend:DI (plus:SI (ashift:SI (reg:SI 14 $r14 [96])
>                     (const_int 2 [0x2]))
>                 (reg:SI 12 $r12 [orig:87 _3 ] [87])))) "pr119127.ii":11:39
> 256 {*alslsi3_extend}
>      (nil))

Before split it's something like

$r14 = sign_extend (low32 (($r14 << 2) & 0xfffffffc) + low32 ($r12))

And after split it's something like

$r14 = $r14 & 0x3fffffff                                  # bstrpick
$r14 = sign_extend (low32 ($r14) << 2 + low32 ($r12)) # alsl

The calculation result should be correct, so I guess "incorrect" means the
redundant $r14 = $r14 & 0x3fffffff here?  It can be removed like:

diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 3779e283f8d..f21b8ae0ea3 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4575,8 +4575,22 @@ loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt,
rtx mask,
   if (ctz_hwi (INTVAL (mask)) < INTVAL (shamt))
     return NULL_RTX;

+  /* When trying alsl.w, deliberately ignore the high bits.  */
+  mask = gen_int_mode (UINTVAL (mask), mode);
+
   rtx new_mask = simplify_const_binary_operation (LSHIFTRT, mode, mask,
                                                  shamt);
+
+  /* Do an arithmetic shift for checking ins_zero_bitmask_operand or -1:
+     ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an
+     ins_zero_bitmask_operand, but lshiftrt will produce
+     0x3fffffff60000000.  */
+  rtx new_mask_1 = simplify_const_binary_operation (ASHIFTRT, mode, mask,
+                                                   shamt);
+
+  if (is_and && const_m1_operand (new_mask_1, mode))
+    return new_mask_1;
+
   if (const_uns_arith_operand (new_mask, mode))
     return new_mask;

@@ -4586,13 +4600,7 @@ loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt,
rtx mask,
   if (low_bitmask_operand (new_mask, mode))
     return new_mask;

-  /* Do an arithmetic shift for checking ins_zero_bitmask_operand:
-     ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an
-     ins_zero_bitmask_operand, but lshiftrt will produce
-     0x3fffffff60000000.  */
-  new_mask = simplify_const_binary_operation (ASHIFTRT, mode, mask,
-                                             shamt);
-  return ins_zero_bitmask_operand (new_mask, mode) ? new_mask : NULL_RTX;
+  return ins_zero_bitmask_operand (new_mask_1, mode) ? new_mask_1 : NULL_RTX;
 }

 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
diff --git a/gcc/config/loongarch/loongarch.md
b/gcc/config/loongarch/loongarch.md
index 478f859051c..a13398fdff4 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -3230,10 +3230,8 @@ (define_insn_and_split "<optab>_alsl_reversesi_extended"
       emit_insn (gen_<optab>di3 (operands[0], operands[1], operands[3]));
     else
       {
-       /* Hmm would we really reach here?  If we reach here we'd have
-          a miss-optimization in the generic code (as it should have
-          optimized this to alslsi3_extend_subreg).  But let's be safe
-          than sorry.  */
+       /* We can end up here with things like:
+          x:DI = sign_extend(a:SI + ((b:DI << 2) & 0xfffffffc)#0)  */
        gcc_checking_assert (<is_and>);
        emit_move_insn (operands[0], operands[1]);
       }

Reply via email to