8 Regression] gcc 7.1 generates branch for code which was branchless in earlier gcc version

jakub at gcc dot gnu.org Tue, 12 Dec 2017 07:40:09 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81914


--- Comment #6 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Another testcase, which has even higher prediction of not returning -1:
int
cmp (int a, int b)
{
  if (a < b)
    return -1;
  if (a > b)
    return 1;
  return 0;
}

In the #c0 case, we have in the IL:
  <bb 2> :
  if (a_3(D) >= b_4(D))
    goto <bb 3>; [INV]
  else
    goto <bb 4>; [INV]

  <bb 3> :
  _1 = a_3(D) > b_4(D);
  iftmp.0_6 = (int) _1;

  <bb 4> :
  # iftmp.0_2 = PHI <iftmp.0_6(3), -1(2)>
  return iftmp.0_2;
while in this one:
  <bb 2>:
  if (a_2(D) < b_3(D))
    goto <bb 5>;
  else
    goto <bb 3>;

  <bb 3>:
  if (a_2(D) > b_3(D))
    goto <bb 5>;
  else
    goto <bb 4>;

  <bb 4>:

  <bb 5>:
  # _1 = PHI <-1(2), 1(3), 0(4)>
  return _1;
In any case, I think it is worth recognizing these patterns.
We should also consider other examples, e.g. from gcc itself:
static int
oecount_cmp (const void *p1, const void *p2)
{
  const oecount *c1 = (const oecount *)p1;
  const oecount *c2 = (const oecount *)p2;
  if (c1->cnt != c2->cnt)
    return c1->cnt > c2->cnt ? 1 : -1;
  else
    /* If counts are identical, use unique IDs to stabilize qsort.  */
    return c1->id > c2->id ? 1 : -1;
}
static int
tm_alias_pair_cmp (const void *x, const void *y)
{
  const tm_alias_pair *p1 = (const tm_alias_pair *) x;
  const tm_alias_pair *p2 = (const tm_alias_pair *) y;
  if (p1->uid < p2->uid)
    return -1;
  if (p1->uid > p2->uid)
    return 1;
  return 0;
}
int
type_warning_cmp (const void *p1, const void *p2)
{
  const odr_type_warn_count *t1 = (const odr_type_warn_count *)p1;
  const odr_type_warn_count *t2 = (const odr_type_warn_count *)p2;

  if (t1->dyn_count < t2->dyn_count)
   return 1;
  if (t1->dyn_count > t2->dyn_count)
   return -1;
  return t2->count - t1->count;
}
static int
stack_var_cmp (const void *a, const void *b)
{
  size_t ia = *(const size_t *)a;
  size_t ib = *(const size_t *)b;
  unsigned int aligna = stack_vars[ia].alignb;
  unsigned int alignb = stack_vars[ib].alignb;
  HOST_WIDE_INT sizea = stack_vars[ia].size;
  HOST_WIDE_INT sizeb = stack_vars[ib].size;
  tree decla = stack_vars[ia].decl;
  tree declb = stack_vars[ib].decl;
  bool largea, largeb;
  unsigned int uida, uidb;

  /* Primary compare on "large" alignment.  Large comes first.  */
  largea = (aligna * BITS_PER_UNIT > MAX_SUPPORTED_STACK_ALIGNMENT);
  largeb = (alignb * BITS_PER_UNIT > MAX_SUPPORTED_STACK_ALIGNMENT);
  if (largea != largeb)
    return (int)largeb - (int)largea;

  /* Secondary compare on size, decreasing  */
  if (sizea > sizeb)
    return -1;
  if (sizea < sizeb)
    return 1;

  /* Tertiary compare on true alignment, decreasing.  */
  if (aligna < alignb)
    return -1;
  if (aligna > alignb)
    return 1;

  /* Final compare on ID for sort stability, increasing.
     Two SSA names are compared by their version, SSA names come before
     non-SSA names, and two normal decls are compared by their DECL_UID.  */
  if (TREE_CODE (decla) == SSA_NAME)
    {
      if (TREE_CODE (declb) == SSA_NAME)
        uida = SSA_NAME_VERSION (decla), uidb = SSA_NAME_VERSION (declb);
      else
        return -1;
    }
  else if (TREE_CODE (declb) == SSA_NAME)
    return 1;
  else
    uida = DECL_UID (decla), uidb = DECL_UID (declb);
  if (uida < uidb)
    return 1;
  if (uida > uidb)
    return -1;
  return 0;
}

etc.  Perhaps catching everything is too hard for heuristics, but at least
figuring a pattern of these <=> comparisons, or consecutive comparisons like:
  if (uida < uidb)
    return 1;
  if (uida > uidb)
    return -1;
is highly desirable.

[Bug middle-end/81914] [7/8 Regression] gcc 7.1 generates branch for code which was branchless in earlier gcc version

Reply via email to