On Tue, Jan 26, 2016 at 09:45:19PM +0100, Richard Biener wrote:
> On January 26, 2016 8:00:52 PM GMT+01:00, Mike Stump <mikest...@comcast.net> 
> wrote:
> >On Jan 26, 2016, at 10:21 AM, Jakub Jelinek <ja...@redhat.com> wrote
> >> The question is, shall we do what wi::lshift does and have the fast
> >path
> >> only for the constant shifts, as the untested patch below does, or
> >shall we
> >> check shift dynamically (thus use
> >> shift < HOST_BITS_PER_WIDE_INT
> >> instead of
> >> STATIC_CONSTANT_P (shift < HOST_BITS_PER_WIDE_INT)
> >> in the patch),
> >
> >Hum…  I think I prefer the dynamic check.  The reasoning is that when
> >we fast path, we can tolerate the conditional branch to retain the fast
> >path, as most of the time, that condition will usually be true.  If the
> >compiler had troubles with knowing the usual truth value of the
> >expression, seems like we can hint that it will be true and influence
> >the static prediction of the branch.  This permits us to fast path
> >almost all the time in the non-constant, but small enough case.  For
> >known shifts, there is no code gen difference, so it doesn’t matter.
> 
> The original reasoning was to inline only the fast path if it is known at 
> compile time and otherwise have a call. Exactly to avoid bloating callers 
> with inlined conditionals.

I'm now also bootstrapping/regtesting following patch, the previous one
passed bootstrap/regtest, and will do cc1plus size comparison afterwards.
That said, I have done a quick check, where I believe that unless xi and
shift are both compile time constants, for the
STATIC_CONSTANT_P (xi.precision > HOST_BITS_PER_WIDE_INT)
case there should be some comparison and the lrshift_large call with
the non-STATIC_CONSTANT_P variant, but in the bootstrapped (non-LTO)
cc1plus I only see 14 calls to lrshift_large, thus I bet it will likely affect
only <= 14 places right now:

0000000000776990 
<_ZL11build_new_1PP3vecIP9tree_node5va_gc8vl_embedES1_S1_S6_bi>:
  777ca4:       e8 17 28 91 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
00000000008b8bc0 <_ZL31adjust_offset_for_component_refP9tree_nodePbPl.part.91>:
  8b8cc2:       e8 f9 17 7d 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000a7b550 
<_ZN2wi7rrotateIPK9tree_node16generic_wide_intI16wide_int_storageEEENS_12unary_traitsIT_E11result_typeERKS8_RKT0_j>:
  a7baea:       e8 d1 e9 60 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
0000000000a7bd90 
<_ZN2wi7lrotateIPK9tree_node16generic_wide_intI16wide_int_storageEEENS_12unary_traitsIT_E11result_typeERKS8_RKT0_j>:
  a7c457:       e8 64 e0 60 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000a7c710 
<_ZN2wi7lrshiftIP9tree_nodelEENS_12unary_traitsIT_E11result_typeERKS4_RKT0_>:
  a7c851:       e8 6a dc 60 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000a7d280 
<_ZN2wi7lrshiftIPK9tree_node16generic_wide_intI16wide_int_storageEEENS_12unary_traitsIT_E11result_typeERKS8_RKT0_>:
  a7d3b9:       e8 02 d1 60 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000cc1370 <_Z15real_to_integerPK10real_valuePbi>:
  cc1752:       e8 69 8d 3c 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000cc27f0 
<_Z17real_from_integerP10real_value13format_helperRK16generic_wide_intI20wide_int_ref_storageILb0EEE6signop>:
  cc2f05:       e8 b6 75 3c 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000d6c420 
<_Z31simplify_const_binary_operation8rtx_code12machine_modeP7rtx_defS2_>:
  d6f5a5:       e8 16 af 31 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000d7ca40 
<_ZN2wi7lrotateISt4pairIP7rtx_def12machine_modeES5_EENS_12unary_traitsIT_E11result_typeERKS7_RKT0_j>:
  d7d17a:       e8 41 d3 30 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
0000000000d7d310 
<_ZN2wi7rrotateISt4pairIP7rtx_def12machine_modeES5_EENS_12unary_traitsIT_E11result_typeERKS7_RKT0_j>:
  d7d99d:       e8 1e cb 30 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000ea3c40 
<_ZN2wi7lrshiftI16generic_wide_intI22fixed_wide_int_storageILi192EEES4_EENS_12unary_traitsIT_E11result_typeERKS6_RKT0_>:
  ea3ca7:       e8 14 68 1e 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
0000000000f435a0 
<_ZL27copy_reference_ops_from_refP9tree_nodeP3vecI22vn_reference_op_struct7va_heap6vl_ptrE>:
  f444b2:       e8 09 60 14 00          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>
--
000000000161c840 
<_ZL21restructure_referencePP9tree_nodeS1_P16generic_wide_intI22fixed_wide_int_storageILi192EEES1_.constprop.133>:
 161cb51:       e8 6a d9 a6 ff          callq  108a4c0 
<_ZN2wi13lrshift_largeEPlPKljjjj>

2016-01-26  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/69399
        * wide-int.h (wi::lrshift): For larger precisions, only
        use fast path if shift is known to be < HOST_BITS_PER_WIDE_INT.

        * gcc.dg/torture/pr69399.c: New test.

--- gcc/wide-int.h.jj   2016-01-04 14:55:50.000000000 +0100
+++ gcc/wide-int.h      2016-01-26 19:05:20.715269366 +0100
@@ -2909,7 +2909,9 @@ wi::lrshift (const T1 &x, const T2 &y)
         For variable-precision integers like wide_int, handle HWI
         and sub-HWI integers inline.  */
       if (STATIC_CONSTANT_P (xi.precision > HOST_BITS_PER_WIDE_INT)
-         ? xi.len == 1 && xi.val[0] >= 0
+         ? (STATIC_CONSTANT_P (shift < HOST_BITS_PER_WIDE_INT)
+            && xi.len == 1
+            && xi.val[0] >= 0)
          : xi.precision <= HOST_BITS_PER_WIDE_INT)
        {
          val[0] = xi.to_uhwi () >> shift;
--- gcc/testsuite/gcc.dg/torture/pr69399.c.jj   2016-01-26 21:39:45.732927748 
+0100
+++ gcc/testsuite/gcc.dg/torture/pr69399.c      2016-01-26 21:41:09.081753051 
+0100
@@ -0,0 +1,18 @@
+/* { dg-do run { target int128 } } */
+
+static unsigned __attribute__((noinline, noclone))
+foo (unsigned long long u)
+{
+  unsigned __int128 v = u | 0xffffff81U;
+  v >>= 64;
+  return v;
+}
+
+int
+main ()
+{
+  unsigned x = foo (27);
+  if (x != 0)
+    __builtin_abort ();
+  return 0;
+}


        Jakub

Reply via email to