On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <i...@linux.ibm.com> wrote: > > > Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guent...@gmail.com>: > > > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <i...@linux.ibm.com> wrote: > >> > >>> Am 23.08.2019 um 13:24 schrieb Richard Biener > >>> <richard.guent...@gmail.com>: > >>> > >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > >>> <richard.sandif...@arm.com> wrote: > >>>> > >>>> Ilya Leoshkevich <i...@linux.ibm.com> writes: > >>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode > >>>>> mode, > >>>>> return 0; > >>>>> } > >>>>> > >>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the > >>>>> back-end > >>>>> + in order to determine its capabilities. In order to avoid creating > >>>>> fake > >>>>> + operations on each call, values from previous calls are cached in a > >>>>> global > >>>>> + cached_binops hash_table. It contains rtxes, which can be looked > >>>>> up using > >>>>> + binop_keys. */ > >>>>> + > >>>>> +struct binop_key { > >>>>> + enum rtx_code code; /* Operation code. */ > >>>>> + machine_mode value_mode; /* Result mode. */ > >>>>> + machine_mode cmp_op_mode; /* Operand mode. */ > >>>>> +}; > >>>>> + > >>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { > >>>>> + typedef rtx value_type; > >>>>> + typedef binop_key compare_type; > >>>>> + > >>>>> + static hashval_t > >>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode > >>>>> cmp_op_mode) > >>>>> + { > >>>>> + inchash::hash hstate (0); > >>>>> + hstate.add_int (code); > >>>>> + hstate.add_int (value_mode); > >>>>> + hstate.add_int (cmp_op_mode); > >>>>> + return hstate.end (); > >>>>> + } > >>>>> + > >>>>> + static hashval_t > >>>>> + hash (const rtx &ref) > >>>>> + { > >>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, > >>>>> 0))); > >>>>> + } > >>>>> + > >>>>> + static bool > >>>>> + equal (const rtx &ref1, const binop_key &ref2) > >>>>> + { > >>>>> + return (GET_CODE (ref1) == ref2.code) > >>>>> + && (GET_MODE (ref1) == ref2.value_mode) > >>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > >>>>> + } > >>>>> +}; > >>>>> + > >>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; > >>>>> + > >>>>> +static rtx > >>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, > >>>>> + machine_mode cmp_op_mode) > >>>>> +{ > >>>>> + if (!cached_binops) > >>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); > >>>>> + binop_key key = { code, value_mode, cmp_op_mode }; > >>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > >>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > >>>>> + if (!*slot) > >>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx > >>>>> (cmp_op_mode), > >>>>> + gen_reg_rtx (cmp_op_mode)); > >>>>> + return *slot; > >>>>> +} > >>>> > >>>> Sorry, I didn't mean anything this complicated. I just meant that > >>>> we should have a single cached rtx that we can change via PUT_CODE and > >>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each > >>>> time. > >>>> > >>>> Something like: > >>>> > >>>> static GTY ((cache)) rtx cached_binop; > >>>> > >>>> rtx > >>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > >>>> { > >>>> if (cached_binop) > >>>> { > >>>> PUT_CODE (cached_binop, code); > >>>> PUT_MODE_RAW (cached_binop, mode); > >>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > >>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > >>>> } > >>>> else > >>>> { > >>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > >>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > >>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > >>>> } > >>>> return cached_binop; > >>>> } > >>> > >>> Hmm, maybe we need auto_rtx (code) that constructs such > >>> RTX on the stack instead of wasting a GC root (and causing > >>> issues for future threading of GCC ;)). > >> > >> Do you mean something like this? > >> > >> union { > >> char raw[rtx_code_size[code]]; > >> rtx rtx; > >> } binop; > >> > >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > >> anything useful), or should I implement this? > > > > It doesn't exist AFAIK, I thought about using alloca like > > > > rtx tem; > > rtx_alloca (tem, PLUS); > > > > and due to using alloca rtx_alloca has to be a macro like > > > > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); > > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); > > > > maybe C++ can help making this prettier but of course > > since we use alloca we have to avoid opening new scopes. > > > > I guess templates like with auto_vec doesn't work unless > > we can make RTX_CODE_SIZE constant-evaluated. > > > > Richard. > > I ended up with the following change: > > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c > index a667cdab94e..97aa2144e95 100644 > --- a/gcc/emit-rtl.c > +++ b/gcc/emit-rtl.c > @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned > int regno) > set_regno_raw (x, regno, nregs); > } > > -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and > +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and > don't attempt to share with the various global pieces of rtl (such as > frame_pointer_rtx). */ > > -rtx > -gen_raw_REG (machine_mode mode, unsigned int regno) > +void > +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) > { > - rtx x = rtx_alloc (REG MEM_STAT_INFO); > set_mode_and_regno (x, mode, regno); > REG_ATTRS (x) = NULL; > ORIGINAL_REGNO (x) = regno; > +} > + > +/* Generate a new REG rtx. */ > + > +rtx > +gen_raw_REG (machine_mode mode, unsigned int regno) > +{ > + rtx x = rtx_alloc (REG MEM_STAT_INFO); > + init_raw_REG (x, mode, regno); > return x; > } > > diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c > index 5c78fabfb50..bb2087da258 100644 > --- a/gcc/gengenrtl.c > +++ b/gcc/gengenrtl.c > @@ -231,8 +231,7 @@ genmacro (int idx) > puts (")"); > } > > -/* Generate the code for the function to generate RTL whose > - format is FORMAT. */ > +/* Generate the code for functions to generate RTL whose format is FORMAT. > */ > > static void > gendef (const char *format) > @@ -240,22 +239,18 @@ gendef (const char *format) > const char *p; > int i, j; > > - /* Start by writing the definition of the function name and the types > + /* Write the definition of the init function name and the types > of the arguments. */ > > - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, > machine_mode mode", format); > + puts ("static inline void"); > + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); > for (p = format, i = 0; *p != 0; p++) > if (*p != '0') > printf (",\n\t%sarg%d", type_from_format (*p), i++); > + puts (")"); > > - puts (" MEM_STAT_DECL)"); > - > - /* Now write out the body of the function itself, which allocates > - the memory and initializes it. */ > + /* Now write out the body of the init function itself. */ > puts ("{"); > - puts (" rtx rt;"); > - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); > - > puts (" PUT_MODE_RAW (rt, mode);"); > > for (p = format, i = j = 0; *p ; ++p, ++i) > @@ -266,16 +261,56 @@ gendef (const char *format) > else > printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); > > - puts ("\n return rt;\n}\n"); > + puts ("}\n"); > + > + /* Write the definition of the gen function name and the types > + of the arguments. */ > + > + puts ("static inline rtx"); > + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (",\n\t%sarg%d", type_from_format (*p), i++); > + puts (" MEM_STAT_DECL)"); > + > + /* Now write out the body of the function itself, which allocates > + the memory and initializes it. */ > + puts ("{"); > + puts (" rtx rt;\n"); > + > + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); > + printf (" init_rtx_fmt_%s (rt, mode", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (", arg%d", i++); > + puts (");\n"); > + > + puts (" return rt;\n}\n"); > + > + /* Write the definition of gen macro. */ > + > printf ("#define gen_rtx_fmt_%s(c, m", format); > for (p = format, i = 0; *p != 0; p++) > if (*p != '0') > - printf (", p%i",i++); > - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); > + printf (", arg%d", i++); > + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); > for (p = format, i = 0; *p != 0; p++) > if (*p != '0') > - printf (", p%i",i++); > + printf (", (arg%d)", i++); > printf (" MEM_STAT_INFO)\n\n"); > + > + /* Write the definition of alloca macro. */ > + > + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (", arg%d", i++); > + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); > + printf (" init_rtx_fmt_%s ((rt), (m)", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (", (arg%d)", i++); > + printf (")\n\n"); > } > > /* Generate the documentation header for files we write. */ > diff --git a/gcc/rtl.h b/gcc/rtl.h > index efb9b3ce40d..44733d8a39e 100644 > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); > > /* In rtl.c */ > extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); > +#define rtx_alloca(rt, code) \ > + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ > + memset ((rt), 0, RTX_HDR_SIZE); \ > + PUT_CODE ((rt), (code)); > extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); > #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) > #define const_wide_int_alloc(NWORDS) \ > @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, > rtx_insn *next_insn, > extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); > extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); > extern void set_mode_and_regno (rtx, machine_mode, unsigned int); > +extern void init_raw_REG (rtx, machine_mode, unsigned int); > extern rtx gen_raw_REG (machine_mode, unsigned int); > +#define alloca_raw_REG(rt, mode, regno) \ > + rtx_alloca ((rt), REG); \ > + init_raw_REG ((rt), (mode), (regno)) > extern rtx gen_rtx_REG (machine_mode, unsigned int); > extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); > extern rtx gen_rtx_MEM (machine_mode, rtx); > > which now allows me to write: > > rtx reg1, reg2, test; > alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); > alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); > alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); > > If that looks ok, I'll resend the series.
that looks OK to me - please leave Richard S. time to comment. Also while I'd like to see rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); I don't really see a way to write that portably (or at all), do you all agree? GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore nor place CLOBBERs to end their lifetime. But is it guaranteed that the alloca result is valid until frame termination? Thanks, Richard.