This is an attempt to start moving the x86 backend to use standard pattern names for [mask_]gather_load and [mask_]scatter_store rather than using the builtin_{gather,scatter} target hooks.
I've started with AVX2 gathers and given x86 only supports masked gather I only implemented mask_gather_load. Note while for the builtin_gather case the vectorizer will provide an all-true mask operand for non-masked gathers this capability does not exist for the IFN path yet, so only testcases with actual masked gathers will work. If this looks reasonable on the backend side I'll see to first complete the vectorizer part, ripping out the target hook and arranging for the missing pieces. Another one is the support for SImode indices with DFmode data which requires unpacking the index vector and actually recognizing the IFN. 2021-08-17 Richard Biener <rguent...@suse.de> * tree-vect-data-refs.c (vect_check_gather_scatter): Always use internal functions. * config/i386/sse.md (mask_gather_load<mode><vec_gather_idxsi>): New expander. (mask_gather_load<mode><vec_gather_idxdi>): Likewise. --- gcc/config/i386/sse.md | 56 +++++++++++++++++++++++++++++++++++++++ gcc/tree-vect-data-refs.c | 4 +-- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3957c86c3df..40bec98d9f7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -23232,12 +23232,22 @@ (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI") (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")]) +(define_mode_attr vec_gather_idxsi + [(V2DI "v4si") (V4DI "v4si") (V8DI "v8si") + (V2DF "v4si") (V4DF "v4si") (V8DF "v8si") + (V4SI "v4si") (V8SI "v8si") (V16SI "v16si") + (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")]) (define_mode_attr VEC_GATHER_IDXDI [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI") (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI") (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")]) +(define_mode_attr vec_gather_idxdi + [(V2DI "v2di") (V4DI "v4di") (V8DI "v8di") + (V2DF "v2di") (V4DF "v4di") (V8DF "v8di") + (V4SI "v2di") (V8SI "v4di") (V16SI "v8di") + (V4SF "v2di") (V8SF "v4di") (V16SF "v8di")]) (define_mode_attr VEC_GATHER_SRCDI [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI") @@ -23245,6 +23255,29 @@ (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI") (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")]) +(define_expand "mask_gather_load<mode><vec_gather_idxsi>" + [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") + (unspec:VEC_GATHER_MODE + [(pc) + (mem:<ssescalarmode> + (match_par_dup 6 + [(match_operand 1 "vsib_address_operand") + (match_operand:<VEC_GATHER_IDXSI> + 2 "register_operand") + (match_operand:SI 4 "const1248_operand") + (match_operand:SI 3 "const0_operand")])) + (mem:BLK (scratch)) + (match_operand:<sseintvecmode> 5 "register_operand")] + UNSPEC_GATHER)) + (clobber (match_scratch:VEC_GATHER_MODE 7))])] + "TARGET_AVX2 && TARGET_USE_GATHER" +{ + operands[5] = gen_lowpart_SUBREG (<MODE>mode, operands[5]); + operands[6] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], + operands[5]), UNSPEC_VSIBADDR); +}) + (define_expand "avx2_gathersi<mode>" [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") (unspec:VEC_GATHER_MODE @@ -23306,6 +23339,29 @@ (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) +(define_expand "mask_gather_load<mode><vec_gather_idxdi>" + [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") + (unspec:VEC_GATHER_MODE + [(pc) + (mem:<ssescalarmode> + (match_par_dup 6 + [(match_operand 1 "vsib_address_operand") + (match_operand:<VEC_GATHER_IDXDI> + 2 "register_operand") + (match_operand:SI 4 "const1248_operand ") + (match_operand:SI 3 "const0_operand")])) + (mem:BLK (scratch)) + (match_operand:<sseintvecmode> 5 "register_operand")] + UNSPEC_GATHER)) + (clobber (match_scratch:VEC_GATHER_MODE 7))])] + "TARGET_AVX2 && TARGET_USE_GATHER" +{ + operands[5] = gen_lowpart_SUBREG (<VEC_GATHER_SRCDI>mode, operands[5]); + operands[6] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[1], operands[2], + operands[4]), UNSPEC_VSIBADDR); +}) + (define_expand "avx2_gatherdi<mode>" [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") (unspec:VEC_GATHER_MODE diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index d594c0a1b1e..67564b45c32 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3824,9 +3824,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, /* True if we should aim to use internal functions rather than built-in functions. */ - bool use_ifn_p = (DR_IS_READ (dr) - ? supports_vec_gather_load_p () - : supports_vec_scatter_store_p ()); + bool use_ifn_p = true; base = DR_REF (dr); /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF, -- 2.31.1