On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <i...@linux.ibm.com> wrote:
>
> > Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guent...@gmail.com>:
> >
> > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <i...@linux.ibm.com> wrote:
> >>
> >>> Am 23.08.2019 um 13:24 schrieb Richard Biener 
> >>> <richard.guent...@gmail.com>:
> >>>
> >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford
> >>> <richard.sandif...@arm.com> wrote:
> >>>>
> >>>> Ilya Leoshkevich <i...@linux.ibm.com> writes:
> >>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode 
> >>>>> mode,
> >>>>>  return 0;
> >>>>> }
> >>>>>
> >>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the 
> >>>>> back-end
> >>>>> +   in order to determine its capabilities.  In order to avoid creating 
> >>>>> fake
> >>>>> +   operations on each call, values from previous calls are cached in a 
> >>>>> global
> >>>>> +   cached_binops hash_table.  It contains rtxes, which can be looked 
> >>>>> up using
> >>>>> +   binop_keys.  */
> >>>>> +
> >>>>> +struct binop_key {
> >>>>> +  enum rtx_code code;        /* Operation code.  */
> >>>>> +  machine_mode value_mode;   /* Result mode.     */
> >>>>> +  machine_mode cmp_op_mode;  /* Operand mode.    */
> >>>>> +};
> >>>>> +
> >>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> {
> >>>>> +  typedef rtx value_type;
> >>>>> +  typedef binop_key compare_type;
> >>>>> +
> >>>>> +  static hashval_t
> >>>>> +  hash (enum rtx_code code, machine_mode value_mode, machine_mode 
> >>>>> cmp_op_mode)
> >>>>> +  {
> >>>>> +    inchash::hash hstate (0);
> >>>>> +    hstate.add_int (code);
> >>>>> +    hstate.add_int (value_mode);
> >>>>> +    hstate.add_int (cmp_op_mode);
> >>>>> +    return hstate.end ();
> >>>>> +  }
> >>>>> +
> >>>>> +  static hashval_t
> >>>>> +  hash (const rtx &ref)
> >>>>> +  {
> >>>>> +    return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 
> >>>>> 0)));
> >>>>> +  }
> >>>>> +
> >>>>> +  static bool
> >>>>> +  equal (const rtx &ref1, const binop_key &ref2)
> >>>>> +  {
> >>>>> +    return (GET_CODE (ref1) == ref2.code)
> >>>>> +        && (GET_MODE (ref1) == ref2.value_mode)
> >>>>> +        && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode);
> >>>>> +  }
> >>>>> +};
> >>>>> +
> >>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops;
> >>>>> +
> >>>>> +static rtx
> >>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode,
> >>>>> +               machine_mode cmp_op_mode)
> >>>>> +{
> >>>>> +  if (!cached_binops)
> >>>>> +    cached_binops = hash_table<binop_hasher>::create_ggc (1024);
> >>>>> +  binop_key key = { code, value_mode, cmp_op_mode };
> >>>>> +  hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode);
> >>>>> +  rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT);
> >>>>> +  if (!*slot)
> >>>>> +    *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx 
> >>>>> (cmp_op_mode),
> >>>>> +                         gen_reg_rtx (cmp_op_mode));
> >>>>> +  return *slot;
> >>>>> +}
> >>>>
> >>>> Sorry, I didn't mean anything this complicated.  I just meant that
> >>>> we should have a single cached rtx that we can change via PUT_CODE and
> >>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each
> >>>> time.
> >>>>
> >>>> Something like:
> >>>>
> >>>> static GTY ((cache)) rtx cached_binop;
> >>>>
> >>>> rtx
> >>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode)
> >>>> {
> >>>> if (cached_binop)
> >>>>   {
> >>>>     PUT_CODE (cached_binop, code);
> >>>>     PUT_MODE_RAW (cached_binop, mode);
> >>>>     PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode);
> >>>>     PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode);
> >>>>   }
> >>>> else
> >>>>   {
> >>>>     rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
> >>>>     rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2);
> >>>>     cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2);
> >>>>   }
> >>>> return cached_binop;
> >>>> }
> >>>
> >>> Hmm, maybe we need  auto_rtx (code) that constructs such
> >>> RTX on the stack instead of wasting a GC root (and causing
> >>> issues for future threading of GCC ;)).
> >>
> >> Do you mean something like this?
> >>
> >> union {
> >>  char raw[rtx_code_size[code]];
> >>  rtx rtx;
> >> } binop;
> >>
> >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show
> >> anything useful), or should I implement this?
> >
> > It doesn't exist AFAIK, I thought about using alloca like
> >
> > rtx tem;
> > rtx_alloca (tem, PLUS);
> >
> > and due to using alloca rtx_alloca has to be a macro like
> >
> > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code));
> > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code);
> >
> > maybe C++ can help making this prettier but of course
> > since we use alloca we have to avoid opening new scopes.
> >
> > I guess templates like with auto_vec doesn't work unless
> > we can make RTX_CODE_SIZE constant-evaluated.
> >
> > Richard.
>
> I ended up with the following change:
>
> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
> index a667cdab94e..97aa2144e95 100644
> --- a/gcc/emit-rtl.c
> +++ b/gcc/emit-rtl.c
> @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned 
> int regno)
>    set_regno_raw (x, regno, nregs);
>  }
>
> -/* Generate a new REG rtx.  Make sure ORIGINAL_REGNO is set properly, and
> +/* Initialize a REG rtx.  Make sure ORIGINAL_REGNO is set properly, and
>     don't attempt to share with the various global pieces of rtl (such as
>     frame_pointer_rtx).  */
>
> -rtx
> -gen_raw_REG (machine_mode mode, unsigned int regno)
> +void
> +init_raw_REG (rtx x, machine_mode mode, unsigned int regno)
>  {
> -  rtx x = rtx_alloc (REG MEM_STAT_INFO);
>    set_mode_and_regno (x, mode, regno);
>    REG_ATTRS (x) = NULL;
>    ORIGINAL_REGNO (x) = regno;
> +}
> +
> +/* Generate a new REG rtx.  */
> +
> +rtx
> +gen_raw_REG (machine_mode mode, unsigned int regno)
> +{
> +  rtx x = rtx_alloc (REG MEM_STAT_INFO);
> +  init_raw_REG (x, mode, regno);
>    return x;
>  }
>
> diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c
> index 5c78fabfb50..bb2087da258 100644
> --- a/gcc/gengenrtl.c
> +++ b/gcc/gengenrtl.c
> @@ -231,8 +231,7 @@ genmacro (int idx)
>    puts (")");
>  }
>
> -/* Generate the code for the function to generate RTL whose
> -   format is FORMAT.  */
> +/* Generate the code for functions to generate RTL whose format is FORMAT.  
> */
>
>  static void
>  gendef (const char *format)
> @@ -240,22 +239,18 @@ gendef (const char *format)
>    const char *p;
>    int i, j;
>
> -  /* Start by writing the definition of the function name and the types
> +  /* Write the definition of the init function name and the types
>       of the arguments.  */
>
> -  printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, 
> machine_mode mode", format);
> +  puts ("static inline void");
> +  printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format);
>    for (p = format, i = 0; *p != 0; p++)
>      if (*p != '0')
>        printf (",\n\t%sarg%d", type_from_format (*p), i++);
> +  puts (")");
>
> -  puts (" MEM_STAT_DECL)");
> -
> -  /* Now write out the body of the function itself, which allocates
> -     the memory and initializes it.  */
> +  /* Now write out the body of the init function itself.  */
>    puts ("{");
> -  puts ("  rtx rt;");
> -  puts ("  rt = rtx_alloc (code PASS_MEM_STAT);\n");
> -
>    puts ("  PUT_MODE_RAW (rt, mode);");
>
>    for (p = format, i = j = 0; *p ; ++p, ++i)
> @@ -266,16 +261,56 @@ gendef (const char *format)
>      else
>        printf ("  %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++);
>
> -  puts ("\n  return rt;\n}\n");
> +  puts ("}\n");
> +
> +  /* Write the definition of the gen function name and the types
> +     of the arguments.  */
> +
> +  puts ("static inline rtx");
> +  printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format);
> +  for (p = format, i = 0; *p != 0; p++)
> +    if (*p != '0')
> +      printf (",\n\t%sarg%d", type_from_format (*p), i++);
> +  puts (" MEM_STAT_DECL)");
> +
> +  /* Now write out the body of the function itself, which allocates
> +     the memory and initializes it.  */
> +  puts ("{");
> +  puts ("  rtx rt;\n");
> +
> +  puts ("  rt = rtx_alloc (code PASS_MEM_STAT);");
> +  printf ("  init_rtx_fmt_%s (rt, mode", format);
> +  for (p = format, i = 0; *p != 0; p++)
> +    if (*p != '0')
> +      printf (", arg%d", i++);
> +  puts (");\n");
> +
> +  puts ("  return rt;\n}\n");
> +
> +  /* Write the definition of gen macro.  */
> +
>    printf ("#define gen_rtx_fmt_%s(c, m", format);
>    for (p = format, i = 0; *p != 0; p++)
>      if (*p != '0')
> -      printf (", p%i",i++);
> -  printf (")\\\n        gen_rtx_fmt_%s_stat (c, m", format);
> +      printf (", arg%d", i++);
> +  printf (") \\\n  gen_rtx_fmt_%s_stat ((c), (m)", format);
>    for (p = format, i = 0; *p != 0; p++)
>      if (*p != '0')
> -      printf (", p%i",i++);
> +      printf (", (arg%d)", i++);
>    printf (" MEM_STAT_INFO)\n\n");
> +
> +  /* Write the definition of alloca macro.  */
> +
> +  printf ("#define alloca_rtx_fmt_%s(rt, c, m", format);
> +  for (p = format, i = 0; *p != 0; p++)
> +    if (*p != '0')
> +      printf (", arg%d", i++);
> +  printf (") \\\n  rtx_alloca ((rt), (c)); \\\n");
> +  printf ("  init_rtx_fmt_%s ((rt), (m)", format);
> +  for (p = format, i = 0; *p != 0; p++)
> +    if (*p != '0')
> +      printf (", (arg%d)", i++);
> +  printf (")\n\n");
>  }
>
>  /* Generate the documentation header for files we write.  */
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index efb9b3ce40d..44733d8a39e 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void);
>
>  /* In rtl.c */
>  extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO);
> +#define rtx_alloca(rt, code) \
> +  (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \
> +  memset ((rt), 0, RTX_HDR_SIZE); \
> +  PUT_CODE ((rt), (code));
>  extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int);
>  #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ)
>  #define const_wide_int_alloc(NWORDS)                           \
> @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, 
> rtx_insn *next_insn,
>  extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT);
>  extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec);
>  extern void set_mode_and_regno (rtx, machine_mode, unsigned int);
> +extern void init_raw_REG (rtx, machine_mode, unsigned int);
>  extern rtx gen_raw_REG (machine_mode, unsigned int);
> +#define alloca_raw_REG(rt, mode, regno) \
> +  rtx_alloca ((rt), REG); \
> +  init_raw_REG ((rt), (mode), (regno))
>  extern rtx gen_rtx_REG (machine_mode, unsigned int);
>  extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64);
>  extern rtx gen_rtx_MEM (machine_mode, rtx);
>
> which now allows me to write:
>
> rtx reg1, reg2, test;
> alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1);
> alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2);
> alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2);
>
> If that looks ok, I'll resend the series.

that looks OK to me - please leave Richard S. time to comment.  Also while
I'd like to see

rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1);

I don't really see a way to write that portably (or at all), do you all agree?
GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore
nor place CLOBBERs to end their lifetime.  But is it guaranteed that the
alloca result is valid until frame termination?

Thanks,
Richard.

Reply via email to