On Thu, Jan 07, 2010 at 03:53:59AM -0500, Jakub Jelinek wrote:
> On Thu, Jan 07, 2010 at 09:48:53AM +0100, Gabriel Paubert wrote:
> > > apparently rs6000_emit_set_long_const needs work.
> > > lis 3,0x8034
> > > extsw 3,3
> > > or
> > > li 3,0x401a
> > > sldi 3,3,17
> > > etc. do IMHO the same.
> >
> > Huh? I don't think so:
> >
> > - first one loads 0xffff_ffff_8034_0000 in r3, and the extsw looks redundant
>
> I meant lis 3,0x8034; rldicl 3,3,0,32 for the first case, sorry for mixing
> sign extension with zero extension.
Attached is a quick version of an optimization.
On:
long f0 (long x) { return 0x80340001UL; }
long f1 (long x) { return 0x80340000UL; }
long f2 (long x) { return 0xfffffff000000000UL; }
long f3 (long x) { return 0x1ffffffffUL; }
long f4 (long x) { return 0xffffff1230000000UL; }
long f5 (long x) { return 0x180340000UL; }
long f6 (long x) { return 0xfffffff180300000UL; }
the change is:
.f0:
- li 3,0
- ori 3,3,32820
- sldi 3,3,16
+ lis 3,0x8034
ori 3,3,1
+ rldicl 3,3,0,32
...
.f1:
- li 3,0
- ori 3,3,32820
- sldi 3,3,16
+ lis 3,0x8034
+ rldicl 3,3,0,32
Unfortunately the current constraints prohibit the other
easy constants (e.g. the 2 insns ones) from being expanded inline,
and the cases with just one 0->1 resp. 1->0 bit transition were already
handled before. So, either we'd need to add a new constraint (for which
cases, just all 2 insn ones?), or it would be sufficient to kill
analyze_64bit_constant and just handle the ud3 == 0 && ud4 == 0 case.
Jakub
2010-01-07 Jakub Jelinek <[email protected]>
* config/rs6000/rs6000.c (analyze_64bit_constant): New function.
(rs6000_emit_set_long_const): Optimize.
--- gcc/config/rs6000/rs6000.c.jj 2009-12-10 19:19:08.000000000 +0100
+++ gcc/config/rs6000/rs6000.c 2010-01-07 09:59:44.000000000 +0100
@@ -6091,6 +6091,45 @@ rs6000_emit_set_const (rtx dest, enum ma
return result;
}
+/* Analyze a 64-bit constant for certain properties. */
+static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
+ int *, int *, int *);
+
+static void
+analyze_64bit_constant (unsigned HOST_WIDE_INT bits,
+ int *hbsp, int *lbsp, int *topp)
+{
+ int lowest_bit_set, highest_bit_set, top_bits_set;
+ int i;
+
+ lowest_bit_set = highest_bit_set = -1;
+ i = 0;
+ do
+ {
+ if ((lowest_bit_set == -1) && ((bits >> i) & 1))
+ lowest_bit_set = i;
+ if ((highest_bit_set == -1) && ((bits >> (64 - i - 1)) & 1))
+ highest_bit_set = 64 - i - 1;
+ }
+ while (++i < 64
+ && ((highest_bit_set == -1)
+ || (lowest_bit_set == -1)));
+ /* If there are no bits set this should have gone out
+ as one instruction! */
+ gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
+ top_bits_set = highest_bit_set - lowest_bit_set + 1;
+ for (i = highest_bit_set; i >= lowest_bit_set; i--)
+ {
+ if ((bits & ((HOST_WIDE_INT) 1 << i)) != 0)
+ continue;
+ top_bits_set = highest_bit_set - i;
+ break;
+ }
+ *hbsp = highest_bit_set;
+ *lbsp = lowest_bit_set;
+ *topp = top_bits_set;
+}
+
/* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
fall back to a straight forward decomposition. We do this to avoid
exponential run times encountered when looking for longer sequences
@@ -6112,12 +6151,11 @@ rs6000_emit_set_long_const (rtx dest, HO
else
{
HOST_WIDE_INT ud1, ud2, ud3, ud4;
+ int lowest_bit_set, highest_bit_set, top_bits_set;
ud1 = c1 & 0xffff;
ud2 = (c1 & 0xffff0000) >> 16;
-#if HOST_BITS_PER_WIDE_INT >= 64
c2 = c1 >> 32;
-#endif
ud3 = c2 & 0xffff;
ud4 = (c2 & 0xffff0000) >> 16;
@@ -6128,6 +6166,7 @@ rs6000_emit_set_long_const (rtx dest, HO
emit_move_insn (dest, GEN_INT (((ud1 ^ 0x8000) - 0x8000)));
else
emit_move_insn (dest, GEN_INT (ud1));
+ return dest;
}
else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
@@ -6142,6 +6181,52 @@ rs6000_emit_set_long_const (rtx dest, HO
emit_move_insn (copy_rtx (dest),
gen_rtx_IOR (DImode, copy_rtx (dest),
GEN_INT (ud1)));
+ return dest;
+ }
+ analyze_64bit_constant (c1, &highest_bit_set, &lowest_bit_set,
+ &top_bits_set);
+ /* See if a 2 insn sequence isn't possible.
+ li reg,cst; sldi reg,reg,shift. */
+ if (((highest_bit_set - lowest_bit_set) < 15
+ || (highest_bit_set == 63
+ && (highest_bit_set - lowest_bit_set) < 15 + top_bits_set))
+ /* For 0x00000000XXXX0000 prefer the next 2 insn sequence. */
+ && (ud1 | ud3 | ud4) != 0)
+ {
+ HOST_WIDE_INT the_const = c1 >> lowest_bit_set;
+ emit_move_insn (dest, GEN_INT (the_const));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+ GEN_INT (lowest_bit_set)));
+ }
+ else if (ud3 == 0 && ud4 == 0)
+ {
+ gcc_assert (ud2 & 0x8000);
+ emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
+ - 0x80000000));
+ if (ud1 != 0)
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (ud1)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_ZERO_EXTEND (DImode,
+ gen_lowpart (SImode,
+ copy_rtx (dest))));
+ }
+ /* 3 insn sequence.
+ lis reg,csth; ori reg,reg,cstl; sldi reg,reg,shift. */
+ else if ((highest_bit_set - lowest_bit_set) < 31
+ || (highest_bit_set == 63
+ && (highest_bit_set - lowest_bit_set) < 31 + top_bits_set))
+ {
+ HOST_WIDE_INT the_const = c1 >> lowest_bit_set;
+ emit_move_insn (dest, GEN_INT (the_const >> 16));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (the_const & 0xffff)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+ GEN_INT (lowest_bit_set)));
}
else if ((ud4 == 0xffff && (ud3 & 0x8000))
|| (ud4 == 0 && ! (ud3 & 0x8000)))