The attached patch illustrates the following idea: we can replace several builtins having the same signature with one auxiliary builtin with a special argument. This argument will specify which particular builtin the auxiliary one should expand to. We pull the argument out at the expand stage and based on its value specify which exactly builtin we want to get: we add info about ISA restrictions and according named pattern. And then it is translated as always.
The benefit is that at the initialization time we process as many builtins as many different signatures there are. If we take three static arrays in gcc/config/i386/i386.c with builtin descriptions (namely, bdesc_args, bdesc_special_args, bdesc_round_args) and apply the idea, we can turn 2163 builtin to only 677 (3.2 times less or minus 1486).
commit bd37499025b09c9bb0337077040068fcd750c1e5 Author: Maxim Blumenthal <maxim.blument...@intel.com> Date: Mon Feb 16 16:14:06 2015 +0300 Here we combine several builtins having the same signature by introducing one auxiliary builtin with a special argument which will specify what particular builtin the auxiliary one should expand to. We pull the argument out at the expand stage and based on its value specify information about the builtin we want to get, i.e. ISA mask, named pattern, etc. diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h index 4da5250..a4e8edf 100644 --- a/gcc/config/i386/avx512cdintrin.h +++ b/gcc/config/i386/avx512cdintrin.h @@ -34,6 +34,9 @@ #define __DISABLE_AVX512CD__ #endif /* __AVX512CD__ */ +/*Opcode argument to __builtin_ia32_v8di_ftype_qi*/ +#define BROADCASTMB512 0 + /* Internal data types for implementing the intrinsics. */ typedef long long __v8di __attribute__ ((__vector_size__ (64))); typedef int __v16si __attribute__ ((__vector_size__ (64))); @@ -166,7 +169,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcastmb_epi64 (__mmask8 __A) { - return (__m512i) __builtin_ia32_broadcastmb512 (__A); + return (__m512i) __builtin_ia32_v8di_ftype_qi (__A, BROADCASTMB512); } extern __inline __m512i diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index b36ef48..053a391 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -34,6 +34,9 @@ #define __DISABLE_AVX512DQ__ #endif /* __AVX512DQ__ */ +/*Opcode argument to __builtin_ia32_v8di_ftype_qi*/ +#define CVTMASK2Q512 1 + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_f64x2 (__m128d __A) @@ -534,7 +537,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_movm_epi64 (__mmask8 __A) { - return (__m512i) __builtin_ia32_cvtmask2q512 (__A); + return (__m512i) __builtin_ia32_v8di_ftype_qi (__A, CVTMASK2Q512); } extern __inline __m512i diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 864d0ea..d59a88e 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -686,6 +686,7 @@ DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI) DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI, INT) DEF_FUNCTION_TYPE (V16SI, HI) DEF_FUNCTION_TYPE (V8DI, QI) +DEF_FUNCTION_TYPE (V8DI, QI, INT) DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT) DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 71a5b22..9d38151 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -30497,9 +30497,17 @@ enum ix86_builtins IX86_BUILTIN_READ_FLAGS, IX86_BUILTIN_WRITE_FLAGS, + IX86_BUILTIN_V8DI_FTYPE_QI, IX86_BUILTIN_MAX }; +enum V8DI_FTYPE_QI_type +{ + BROADCASTMB512, + CVTMASK2Q512, + V8DI_FTYPE_QI_type_MAX +}; + /* Table for the ix86 builtin decls. */ static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; @@ -31878,7 +31886,7 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI }, - { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI }, + { 0, CODE_FOR_nothing, "__builtin_ia32_v8di_ftype_qi", IX86_BUILTIN_V8DI_FTYPE_QI, UNKNOWN, (int) V8DI_FTYPE_QI_INT }, { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI }, @@ -32795,7 +32803,6 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI }, { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI }, { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI }, - { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI }, /* AVX512BW. */ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI }, @@ -33389,6 +33396,11 @@ static const struct builtin_description bdesc_tm[] = { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, }; +static const struct builtin_description bdesc_united[] = +{ + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_v8di_ftype_qi", IX86_BUILTIN_V8DI_FTYPE_QI, UNKNOWN, V8DI_FTYPE_QI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_v8di_ftype_qi", IX86_BUILTIN_V8DI_FTYPE_QI, UNKNOWN, V8DI_FTYPE_QI } +}; /* TM callbacks. */ /* Return the builtin decl needed to load a vector of TYPE. */ @@ -36505,7 +36517,6 @@ ix86_expand_args_builtin (const struct builtin_description *d, machine_mode rmode = VOIDmode; bool swap = false; enum rtx_code comparison = d->comparison; - switch ((enum ix86_builtin_func_type) d->flag) { case V2DF_FTYPE_V2DF_ROUND: @@ -36806,6 +36817,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case HI_FTYPE_HI_INT: case QI_FTYPE_V4SF_INT: case QI_FTYPE_V2DF_INT: + case V8DI_FTYPE_QI_INT: nargs = 2; nargs_constant = 1; break; @@ -40006,12 +40018,37 @@ addcarryx: if (d->code == fcode) switch (fcode) { - case IX86_BUILTIN_FABSQ: + case IX86_BUILTIN_V8DI_FTYPE_QI: + { + tree opcode_arg_tree = CALL_EXPR_ARG (exp, 1); + rtx opcode_arg_rtx = expand_normal (opcode_arg_tree); + int opcode = INTVAL(opcode_arg_rtx); + switch((enum V8DI_FTYPE_QI_type)opcode) + { + case BROADCASTMB512: + { + ix86_builtins_isa[d->code].isa = OPTION_MASK_ISA_AVX512CD; + return ix86_expand_args_builtin (&bdesc_united[opcode], exp, target); + } + case CVTMASK2Q512: + { + ix86_builtins_isa[d->code].isa = OPTION_MASK_ISA_AVX512DQ; + return ix86_expand_args_builtin (&bdesc_united[opcode], exp, target); + } + default: + error ("Invalid second argument \"%d\" was passed to \"__builtin_ia32_v8di_ftype_qi\". \ + Only values from \"0\" to \"%d\" are mapped to instructions.", + opcode, (int)V8DI_FTYPE_QI_type_MAX - 1); + return const0_rtx; + } + break; + } + case IX86_BUILTIN_FABSQ: case IX86_BUILTIN_COPYSIGNQ: if (!TARGET_SSE) /* Emit a normal call if SSE isn't available. */ return expand_call (exp, target, ignore); - default: + default: return ix86_expand_args_builtin (d, exp, target); } diff --git a/gcc/testsuite/gcc.target/i386/testimm-11.c b/gcc/testsuite/gcc.target/i386/testimm-11.c new file mode 100644 index 0000000..bb7db92 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/testimm-11.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512f" } */ + +#include <x86intrin.h> + +__m512i m512i; +__mmask8 mmask8; + +void +test (void) +{ + m512i = __builtin_ia32_v8di_ftype_qi (mmask8, -1); /* { dg-error "Invalid second argument" } */ +}