On Fri, Jan 3, 2014 at 6:27 PM, Andrew Pinski <[email protected]> wrote:
> On Fri, Jan 3, 2014 at 3:37 PM, Jakub Jelinek <[email protected]> wrote:
>> On Fri, Jan 03, 2014 at 03:39:11PM +0000, Joseph S. Myers wrote:
>>> On Fri, 3 Jan 2014, Jakub Jelinek wrote:
>>>
>>> > I've noticed that especially with the AVX512F introduction we use
>>> > GET_MODE_SIZE (<MODE>mode) quite heavily in the i386 *.md files, and
>>> > the problem with that is GET_MODE_SIZE isn't a compile time constant,
>>> > needs to read mode_size array (non-const) at runtime.
>>>
>>> It would seem better to me for genmodes to generate appropriate macro /
>>> inline function definitions that can be used by GET_MODE_SIZE (along the
>>> lines of (__builtin_constant_p (MODE) && !mode_size_variable (MODE) ?
>>> get_mode_size_constant (MODE) : (unsigned short) mode_size[MODE]), where
>>> mode_size_variable and get_mode_size_constant are always_inline functions
>>> generated by genmodes) - that way all targets are covered automatically,
>>> without needing such duplication of mode sizes. (Of course such
>>> optimizations wouldn't apply for a first-stage compiler bootstrapped by a
>>> compiler not supporting __builtin_constant_p, but lack of optimization in
>>> the first stage of a bootstrap is not a particular concern.)
>>
>> That is certainly doable (as attached), but strangely if the patch (that I've
>> already committed) is reverted and this one applied, the .text savings are
>> much smaller.
>>
>> Here are .text and .rodata readelf -WS lines from x86_64 (first 4 pairs) and
>> i686 (last 4 pairs) builds, always vanilla trunk before r206312, that plus
>> r206312 patch, without r206312 but with attached patch, with both r206312
>> and attached patch. So, for .text size the best is both patches, but
>> for .rodata patches just r206312. I'll try to look at details why this is so
>> next week.
>>
>> [12] .text PROGBITS 00000000004f4b00 0f4b00 1131704 00
>> AX 0 0 16
>> [14] .rodata PROGBITS 0000000001626240 1226240 4093b4 00
>> A 0 0 64
>> [12] .text PROGBITS 00000000004f20a0 0f20a0 11156e4 00
>> AX 0 0 16
>> [14] .rodata PROGBITS 00000000016077c0 12077c0 3fcbb4 00
>> A 0 0 64
>> [12] .text PROGBITS 00000000004f4c60 0f4c60 112b8b4 00
>> AX 0 0 16
>> [14] .rodata PROGBITS 0000000001620540 1220540 40b134 00
>> A 0 0 64
>> [12] .text PROGBITS 00000000004f2200 0f2200 1113eb4 00
>> AX 0 0 16
>> [14] .rodata PROGBITS 00000000016060c0 12060c0 3fea74 00
>> A 0 0 64
>> [12] .text PROGBITS 0811d750 0d5750 12b4464 00 AX 0
>> 0 16
>> [14] .rodata PROGBITS 093d1c00 1389c00 2d8094 00 A 0
>> 0 64
>> [12] .text PROGBITS 0811b150 0d3150 12996a4 00 AX 0
>> 0 16
>> [14] .rodata PROGBITS 093b4840 136c840 2d1354 00 A 0
>> 0 64
>> [12] .text PROGBITS 0811d840 0d5840 12aa1e4 00 AX 0
>> 0 16
>> [14] .rodata PROGBITS 093c7a40 137fa40 2d8b94 00 A 0
>> 0 64
>> [12] .text PROGBITS 0811b240 0d3240 1292914 00 AX 0
>> 0 16
>> [14] .rodata PROGBITS 093adb80 1365b80 2d1f94 00 A 0
>> 0 64
>>
>> 2014-01-04 Jakub Jelinek <[email protected]>
>>
>> * genmodes.c (struct mode_data): Add need_bytesize_adj field.
>> (blank_mdoe): Initialize it.
>> (emit_mode_size_inline, emit_mode_nunits_inline,
>> emit_mode_inner_inline): New functions.
>> (emit_insn_modes_h): Call them and surround their output with
>> #if GCC_VERSION >= 4001 ... #endif.
>> * machmode.h (GET_MODE_SIZE, GET_MODE_NUNITS, GET_MODE_INNER):
>> For GCC_VERSION >= 4001 use mode_*_inline routines instead of
>> mode_* arrays if the argument is __builtin_constant_p.
>> * lower-subreg.c (dump_choices): Make sure GET_MODE_SIZE argument
>> is enum machine_mode.
>> fortran/
>> * trans-types.c (gfc_init_kinds): Make sure GET_MODE_BITSIZE
>> argument is enum machine_mode.
>
> It turns out Jorn filed a bug about this exact issue (back in 2008).
> See bug 36109.
Also Kazu filed a similar thing about TREE_CODE_LENGTH and
TREE_CODE_CLASS, see bug 14840; I attached a patch but I never got
around to seeing if it improves compile time speed either. It
definitely showed up in fold-const.c code at one point.
Thanks,
Andrew Pinski
>
>
> Thanks,
> Andrew Pinski
>
>
>>
>> --- gcc/lower-subreg.c.jj 2013-12-10 18:18:39.077943292 +0100
>> +++ gcc/lower-subreg.c 2014-01-03 18:35:00.510418999 +0100
>> @@ -1371,7 +1371,7 @@ dump_choices (bool speed_p, const char *
>> fprintf (dump_file, "Choices when optimizing for %s:\n", description);
>>
>> for (i = 0; i < MAX_MACHINE_MODE; i++)
>> - if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
>> + if (GET_MODE_SIZE ((enum machine_mode) i) > UNITS_PER_WORD)
>> fprintf (dump_file, " %s mode %s for copy lowering.\n",
>> choices[speed_p].move_modes_to_split[i]
>> ? "Splitting"
>> --- gcc/fortran/trans-types.c.jj 2013-11-21 22:24:18.790939654 +0100
>> +++ gcc/fortran/trans-types.c 2014-01-03 18:35:00.534418997 +0100
>> @@ -373,7 +373,7 @@ gfc_init_kinds (void)
>> /* The middle end doesn't support constants larger than 2*HWI.
>> Perhaps the target hook shouldn't have accepted these either,
>> but just to be safe... */
>> - bitsize = GET_MODE_BITSIZE (mode);
>> + bitsize = GET_MODE_BITSIZE ((enum machine_mode) mode);
>> if (bitsize > 2*HOST_BITS_PER_WIDE_INT)
>> continue;
>>
>> --- gcc/machmode.h.jj 2013-11-29 18:22:15.671594951 +0100
>> +++ gcc/machmode.h 2014-01-03 18:47:30.514374282 +0100
>> @@ -177,7 +177,13 @@ extern const unsigned char mode_class[NU
>> /* Get the size in bytes and bits of an object of mode MODE. */
>>
>> extern CONST_MODE_SIZE unsigned char mode_size[NUM_MACHINE_MODES];
>> +#if GCC_VERSION >= 4001
>> +#define GET_MODE_SIZE(MODE) \
>> + ((unsigned short) (__builtin_constant_p (MODE) \
>> + ? mode_size_inline (MODE) : mode_size[MODE]))
>> +#else
>> #define GET_MODE_SIZE(MODE) ((unsigned short) mode_size[MODE])
>> +#endif
>> #define GET_MODE_BITSIZE(MODE) \
>> ((unsigned short) (GET_MODE_SIZE (MODE) * BITS_PER_UNIT))
>>
>> @@ -203,7 +209,13 @@ extern const unsigned HOST_WIDE_INT mode
>> /* Return the mode of the inner elements in a vector. */
>>
>> extern const unsigned char mode_inner[NUM_MACHINE_MODES];
>> +#if GCC_VERSION >= 4001
>> +#define GET_MODE_INNER(MODE) \
>> + ((enum machine_mode) (__builtin_constant_p (MODE) \
>> + ? mode_inner_inline (MODE) : mode_inner[MODE]))
>> +#else
>> #define GET_MODE_INNER(MODE) ((enum machine_mode) mode_inner[MODE])
>> +#endif
>>
>> /* Get the size in bytes or bites of the basic parts of an
>> object of mode MODE. */
>> @@ -224,7 +236,13 @@ extern const unsigned char mode_inner[NU
>> /* Get the number of units in the object. */
>>
>> extern const unsigned char mode_nunits[NUM_MACHINE_MODES];
>> +#if GCC_VERSION >= 4001
>> +#define GET_MODE_NUNITS(MODE) \
>> + ((unsigned char) (__builtin_constant_p (MODE) \
>> + ? mode_nunits_inline (MODE) : mode_nunits[MODE]))
>> +#else
>> #define GET_MODE_NUNITS(MODE) mode_nunits[MODE]
>> +#endif
>>
>> /* Get the next wider natural mode (eg, QI -> HI -> SI -> DI -> TI). */
>>
>> --- gcc/genmodes.c.jj 2013-12-16 23:11:04.982989225 +0100
>> +++ gcc/genmodes.c 2014-01-03 18:47:16.153375138 +0100
>> @@ -72,6 +72,8 @@ struct mode_data
>> unsigned int counter; /* Rank ordering of modes */
>> unsigned int ibit; /* the number of integral bits */
>> unsigned int fbit; /* the number of fractional bits */
>> + bool need_bytesize_adj; /* true if this mode need dynamic size
>> + adjustment */
>> };
>>
>> static struct mode_data *modes[MAX_MODE_CLASS];
>> @@ -82,7 +84,7 @@ static const struct mode_data blank_mode
>> 0, "<unknown>", MAX_MODE_CLASS,
>> -1U, -1U, -1U, -1U,
>> 0, 0, 0, 0, 0,
>> - "<unknown>", 0, 0, 0, 0
>> + "<unknown>", 0, 0, 0, 0, false
>> };
>>
>> static htab_t modes_by_name;
>> @@ -904,6 +906,105 @@ emit_max_int (void)
>> printf ("#define MAX_BITSIZE_MODE_ANY_MODE (%d*BITS_PER_UNIT)\n", mmax);
>> }
>>
>> +/* Emit mode_size_inline routine into insn-modes.h header. */
>> +static void
>> +emit_mode_size_inline (void)
>> +{
>> + int c;
>> + struct mode_adjust *a;
>> + struct mode_data *m;
>> +
>> + /* Size adjustments must be propagated to all containing modes. */
>> + for (a = adj_bytesize; a; a = a->next)
>> + {
>> + a->mode->need_bytesize_adj = true;
>> + for (m = a->mode->contained; m; m = m->next_cont)
>> + m->need_bytesize_adj = true;
>> + }
>> +
>> + printf ("\
>> +#ifdef __cplusplus\n\
>> +inline __attribute__((__always_inline__))\n\
>> +#else\n\
>> +extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
>> +#endif\n\
>> +unsigned char\n\
>> +mode_size_inline (enum machine_mode mode)\n\
>> +{\n\
>> + extern %sunsigned char mode_size[NUM_MACHINE_MODES];\n\
>> + switch (mode)\n\
>> + {\n", adj_bytesize ? "" : "const ");
>> +
>> + for_all_modes (c, m)
>> + if (!m->need_bytesize_adj)
>> + printf (" case %smode: return %u;\n", m->name, m->bytesize);
>> +
>> + puts ("\
>> + default: return mode_size[mode];\n\
>> + }\n\
>> +}\n");
>> +}
>> +
>> +/* Emit mode_nunits_inline routine into insn-modes.h header. */
>> +static void
>> +emit_mode_nunits_inline (void)
>> +{
>> + int c;
>> + struct mode_data *m;
>> +
>> + puts ("\
>> +#ifdef __cplusplus\n\
>> +inline __attribute__((__always_inline__))\n\
>> +#else\n\
>> +extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
>> +#endif\n\
>> +unsigned char\n\
>> +mode_nunits_inline (enum machine_mode mode)\n\
>> +{\n\
>> + extern const unsigned char mode_nunits[NUM_MACHINE_MODES];\n\
>> + switch (mode)\n\
>> + {");
>> +
>> + for_all_modes (c, m)
>> + printf (" case %smode: return %u;\n", m->name, m->ncomponents);
>> +
>> + puts ("\
>> + default: return mode_nunits[mode];\n\
>> + }\n\
>> +}\n");
>> +}
>> +
>> +/* Emit mode_inner_inline routine into insn-modes.h header. */
>> +static void
>> +emit_mode_inner_inline (void)
>> +{
>> + int c;
>> + struct mode_data *m;
>> +
>> + puts ("\
>> +#ifdef __cplusplus\n\
>> +inline __attribute__((__always_inline__))\n\
>> +#else\n\
>> +extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
>> +#endif\n\
>> +unsigned char\n\
>> +mode_inner_inline (enum machine_mode mode)\n\
>> +{\n\
>> + extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
>> + switch (mode)\n\
>> + {");
>> +
>> + for_all_modes (c, m)
>> + printf (" case %smode: return %smode;\n", m->name,
>> + c != MODE_PARTIAL_INT && m->component
>> + ? m->component->name : void_mode->name);
>> +
>> + puts ("\
>> + default: return mode_inner[mode];\n\
>> + }\n\
>> +}\n");
>> +}
>> +
>> static void
>> emit_insn_modes_h (void)
>> {
>> @@ -969,6 +1070,12 @@ enum machine_mode\n{");
>> printf ("#define CONST_MODE_IBIT%s\n", adj_ibit ? "" : " const");
>> printf ("#define CONST_MODE_FBIT%s\n", adj_fbit ? "" : " const");
>> emit_max_int ();
>> + puts ("\n#if GCC_VERSION >= 4001\n");
>> + emit_mode_size_inline ();
>> + emit_mode_nunits_inline ();
>> + emit_mode_inner_inline ();
>> + puts ("#endif /* GCC_VERSION >= 4001 */");
>> +
>> puts ("\
>> \n\
>> #endif /* insn-modes.h */");
>>
>>
>> Jakub