On Mon, Oct 23, 2023 at 10:48 AM liuhongt <hongtao....@intel.com> wrote: > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ready push to trunk.
vcond<mode> and vcondeq<mode> shouldn't be necessary if there's vcond_mask<mode> and vcmp<mode> support which is the "modern" way of handling vcond<mode>. Unless the ISA really can do compare and select with a single instruction. Richard. > gcc/ChangeLog: > > PR target/103861 > * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle > V2HF/V2BF/V4HF/V4BFmode. > * config/i386/mmx.md (vec_cmpv4hfqi): New expander. > (vcond<mode>v4hf): Ditto. > (vcond<mode>v4hi): Ditto. > (vcondu<mode>v4hi): Ditto. > (vcond_mask_<mode>v4hi): Ditto. > (vcond_mask_<mode>qi): Ditto. > (vec_cmpv2hfqi): Ditto. > (vcond<mode>v2hf): Ditto. > (vcond<mode>v2hi): Ditto. > (vcondu<mode>v2hi): Ditto. > (vcond_mask_<mode>v2hi): Ditto. > * config/i386/sse.md (vcond<mode><mode>): Merge this with .. > (vcond<sseintvecmodelower><mode>): .. this into .. > (vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this, > and extend to V8BF/V16BF/V32BFmode. > > gcc/testsuite/ChangeLog: > > * g++.target/i386/part-vect-vcondhf.C: New test. > * gcc.target/i386/part-vect-vec_cmphf.c: New test. > --- > gcc/config/i386/i386-expand.cc | 4 + > gcc/config/i386/mmx.md | 237 +++++++++++++++++- > gcc/config/i386/sse.md | 25 +- > .../g++.target/i386/part-vect-vcondhf.C | 34 +++ > .../gcc.target/i386/part-vect-vec_cmphf.c | 26 ++ > 5 files changed, 304 insertions(+), 22 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C > create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 1eae9d7c78c..9658f9c5a2d 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, > rtx op_false) > break; > case E_V8QImode: > case E_V4HImode: > + case E_V4HFmode: > + case E_V4BFmode: > case E_V2SImode: > if (TARGET_SSE4_1) > { > @@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, > rtx op_false) > break; > case E_V4QImode: > case E_V2HImode: > + case E_V2HFmode: > + case E_V2BFmode: > if (TARGET_SSE4_1) > { > gen = gen_mmx_pblendvb_v4qi; > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 491a0a51272..b9617e9d8c6 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) > (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF]) > > (define_mode_iterator V2FI_32 [V2HF V2BF V2HI]) > +(define_mode_iterator V4FI_64 [V4HF V4BF V4HI]) > +(define_mode_iterator V4F_64 [V4HF V4BF]) > +(define_mode_iterator V2F_32 [V2HF V2BF]) > ;; 4-byte integer vector modes > (define_mode_iterator VI_32 [V4QI V2HI]) > > @@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix > [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")]) > > (define_mode_attr mmxxmmmode > - [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")]) > + [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF") > + (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")]) > > (define_mode_attr mmxxmmmodelower > - [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")]) > + [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf") > + (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")]) > > (define_expand "movd_<mode>_to_sse" > [(set (match_operand:<mmxxmmmode> 0 "register_operand") > @@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2" > [(set (match_dup 0) > (ior:<MODE> (match_dup 1) (match_dup 2)))]) > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > +;; > +;; Parallel half-precision floating point comparisons > +;; > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > + > +(define_expand "vec_cmpv4hfqi" > + [(set (match_operand:QI 0 "register_operand") > + (match_operator:QI 1 "" > + [(match_operand:V4HF 2 "nonimmediate_operand") > + (match_operand:V4HF 3 "nonimmediate_operand")]))] > + "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL > + && ix86_partial_vec_fp_math" > +{ > + rtx ops[4]; > + ops[3] = gen_reg_rtx (V8HFmode); > + ops[2] = gen_reg_rtx (V8HFmode); > + > + emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3])); > + emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2])); > + emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3])); > + DONE; > +}) > + > +(define_expand "vcond<mode>v4hf" > + [(set (match_operand:V4FI_64 0 "register_operand") > + (if_then_else:V4FI_64 > + (match_operator 3 "" > + [(match_operand:V4HF 4 "nonimmediate_operand") > + (match_operand:V4HF 5 "nonimmediate_operand")]) > + (match_operand:V4FI_64 1 "general_operand") > + (match_operand:V4FI_64 2 "general_operand")))] > + "TARGET_AVX512FP16 && TARGET_AVX512VL > + && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math" > +{ > + rtx ops[6]; > + ops[5] = gen_reg_rtx (V8HFmode); > + ops[4] = gen_reg_rtx (V8HFmode); > + ops[0] = gen_reg_rtx (<mmxxmmmode>mode); > + ops[1] = lowpart_subreg (<mmxxmmmode>mode, > + force_reg (<MODE>mode, operands[1]), > + <MODE>mode); > + ops[2] = lowpart_subreg (<mmxxmmmode>mode, > + force_reg (<MODE>mode, operands[2]), > + <MODE>mode); > + ops[3] = operands[3]; > + emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4])); > + emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5])); > + bool ok = ix86_expand_fp_vcond (ops); > + gcc_assert (ok); > + > + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0], > + <mmxxmmmode>mode)); > + DONE; > +}) > + > +(define_expand "vcond<mode>v4hi" > + [(set (match_operand:V4F_64 0 "register_operand") > + (if_then_else:V4F_64 > + (match_operator 3 "" > + [(match_operand:V4HI 4 "nonimmediate_operand") > + (match_operand:V4HI 5 "nonimmediate_operand")]) > + (match_operand:V4F_64 1 "general_operand") > + (match_operand:V4F_64 2 "general_operand")))] > + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" > +{ > + bool ok = ix86_expand_int_vcond (operands); > + gcc_assert (ok); > + DONE; > +}) > + > +(define_expand "vcondu<mode>v4hi" > + [(set (match_operand:V4F_64 0 "register_operand") > + (if_then_else:V4F_64 > + (match_operator 3 "" > + [(match_operand:V4HI 4 "nonimmediate_operand") > + (match_operand:V4HI 5 "nonimmediate_operand")]) > + (match_operand:V4F_64 1 "general_operand") > + (match_operand:V4F_64 2 "general_operand")))] > + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" > +{ > + bool ok = ix86_expand_int_vcond (operands); > + gcc_assert (ok); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode>v4hi" > + [(set (match_operand:V4F_64 0 "register_operand") > + (vec_merge:V4F_64 > + (match_operand:V4F_64 1 "register_operand") > + (match_operand:V4F_64 2 "register_operand") > + (match_operand:V4HI 3 "register_operand")))] > + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" > +{ > + ix86_expand_sse_movcc (operands[0], operands[3], > + operands[1], operands[2]); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode>qi" > + [(set (match_operand:V4FI_64 0 "register_operand") > + (vec_merge:V4FI_64 > + (match_operand:V4FI_64 1 "register_operand") > + (match_operand:V4FI_64 2 "register_operand") > + (match_operand:QI 3 "register_operand")))] > + "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL" > +{ > + rtx op0 = gen_reg_rtx (<mmxxmmmode>mode); > + operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode); > + operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode); > + emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1], > + operands[2], operands[3])); > + emit_move_insn (operands[0], > + lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode)); > + DONE; > +}) > + > +(define_expand "vec_cmpv2hfqi" > + [(set (match_operand:QI 0 "register_operand") > + (match_operator:QI 1 "" > + [(match_operand:V2HF 2 "nonimmediate_operand") > + (match_operand:V2HF 3 "nonimmediate_operand")]))] > + "TARGET_AVX512FP16 && TARGET_AVX512VL > + && ix86_partial_vec_fp_math" > +{ > + rtx ops[4]; > + ops[3] = gen_reg_rtx (V8HFmode); > + ops[2] = gen_reg_rtx (V8HFmode); > + > + emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3])); > + emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2])); > + emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3])); > + DONE; > +}) > + > +(define_expand "vcond<mode>v2hf" > + [(set (match_operand:V2FI_32 0 "register_operand") > + (if_then_else:V2FI_32 > + (match_operator 3 "" > + [(match_operand:V2HF 4 "nonimmediate_operand") > + (match_operand:V2HF 5 "nonimmediate_operand")]) > + (match_operand:V2FI_32 1 "general_operand") > + (match_operand:V2FI_32 2 "general_operand")))] > + "TARGET_AVX512FP16 && TARGET_AVX512VL > + && ix86_partial_vec_fp_math" > +{ > + rtx ops[6]; > + ops[5] = gen_reg_rtx (V8HFmode); > + ops[4] = gen_reg_rtx (V8HFmode); > + ops[0] = gen_reg_rtx (<mmxxmmmode>mode); > + ops[1] = lowpart_subreg (<mmxxmmmode>mode, > + force_reg (<MODE>mode, operands[1]), > + <MODE>mode); > + ops[2] = lowpart_subreg (<mmxxmmmode>mode, > + force_reg (<MODE>mode, operands[2]), > + <MODE>mode); > + ops[3] = operands[3]; > + emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4])); > + emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5])); > + bool ok = ix86_expand_fp_vcond (ops); > + gcc_assert (ok); > + > + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0], > + <mmxxmmmode>mode)); > + DONE; > +}) > + > +(define_expand "vcond<mode>v2hi" > + [(set (match_operand:V2F_32 0 "register_operand") > + (if_then_else:V2F_32 > + (match_operator 3 "" > + [(match_operand:V2HI 4 "nonimmediate_operand") > + (match_operand:V2HI 5 "nonimmediate_operand")]) > + (match_operand:V2F_32 1 "general_operand") > + (match_operand:V2F_32 2 "general_operand")))] > + "TARGET_SSE4_1" > +{ > + bool ok = ix86_expand_int_vcond (operands); > + gcc_assert (ok); > + DONE; > +}) > + > +(define_expand "vcondu<mode>v2hi" > + [(set (match_operand:V2F_32 0 "register_operand") > + (if_then_else:V2F_32 > + (match_operator 3 "" > + [(match_operand:V2HI 4 "nonimmediate_operand") > + (match_operand:V2HI 5 "nonimmediate_operand")]) > + (match_operand:V2F_32 1 "general_operand") > + (match_operand:V2F_32 2 "general_operand")))] > + "TARGET_SSE4_1" > +{ > + bool ok = ix86_expand_int_vcond (operands); > + gcc_assert (ok); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode>v2hi" > + [(set (match_operand:V2F_32 0 "register_operand") > + (vec_merge:V2F_32 > + (match_operand:V2F_32 1 "register_operand") > + (match_operand:V2F_32 2 "register_operand") > + (match_operand:V2HI 3 "register_operand")))] > + "TARGET_SSE4_1" > +{ > + ix86_expand_sse_movcc (operands[0], operands[3], > + operands[1], operands[2]); > + DONE; > +}) > + > +(define_expand "vcond_mask_<mode>qi" > + [(set (match_operand:V2FI_32 0 "register_operand") > + (vec_merge:V2FI_32 > + (match_operand:V2FI_32 1 "register_operand") > + (match_operand:V2FI_32 2 "register_operand") > + (match_operand:QI 3 "register_operand")))] > + "TARGET_AVX512BW && TARGET_AVX512VL" > +{ > + rtx op0 = gen_reg_rtx (<mmxxmmmode>mode); > + operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode); > + operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode); > + emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1], > + operands[2], operands[3])); > + emit_move_insn (operands[0], > + lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode)); > + DONE; > +}) > + > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > ;; > ;; Parallel half-precision floating point rounding operations. > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index c988935d4df..e2a7cbeb722 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>" > DONE; > }) > > -(define_expand "vcond<mode><mode>" > - [(set (match_operand:VHF_AVX512VL 0 "register_operand") > - (if_then_else:VHF_AVX512VL > - (match_operator 3 "" > - [(match_operand:VHF_AVX512VL 4 "vector_operand") > - (match_operand:VHF_AVX512VL 5 "vector_operand")]) > - (match_operand:VHF_AVX512VL 1 "general_operand") > - (match_operand:VHF_AVX512VL 2 "general_operand")))] > - "TARGET_AVX512FP16" > -{ > - bool ok = ix86_expand_fp_vcond (operands); > - gcc_assert (ok); > - DONE; > -}) > - > -(define_expand "vcond<sseintvecmodelower><mode>" > - [(set (match_operand:<sseintvecmode> 0 "register_operand") > - (if_then_else:<sseintvecmode> > +(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>" > + [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand") > + (if_then_else:VI2HFBF_AVX512VL > (match_operator 3 "" > [(match_operand:VHF_AVX512VL 4 "vector_operand") > (match_operand:VHF_AVX512VL 5 "vector_operand")]) > - (match_operand:<sseintvecmode> 1 "general_operand") > - (match_operand:<sseintvecmode> 2 "general_operand")))] > + (match_operand:VI2HFBF_AVX512VL 1 "general_operand") > + (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))] > "TARGET_AVX512FP16" > { > bool ok = ix86_expand_fp_vcond (operands); > diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C > b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C > new file mode 100644 > index 00000000000..8bf01b7cb4a > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C > @@ -0,0 +1,34 @@ > +/* PR target/103861 */ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ > +/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */ > +/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */ > +/* { dg-final { scan-assembler-times "vcmpph" 4 } } */ > +/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */ > +typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu; > +typedef short __attribute__((__vector_size__ (4))) __v2hi; > + > +typedef unsigned short __attribute__((__vector_size__ (8))) __v4hu; > +typedef short __attribute__((__vector_size__ (8))) __v4hi; > + > +typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf; > +typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf; > + > + > +__v2hu au, bu; > +__v2hi as, bs; > +__v2hf af, bf; > + > +__v4hu cu, du; > +__v4hi cs, ds; > +__v4hf cf, df; > + > +__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; } > +__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; } > +__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; } > +__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; } > + > +__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; } > +__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; } > +__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; } > +__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; } > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c > b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c > new file mode 100644 > index 00000000000..ee8659395eb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ > +/* { dg-final { scan-assembler-times "vcmpph" 10 } } */ > + > +typedef _Float16 __attribute__((__vector_size__ (4))) v2hf; > +typedef _Float16 __attribute__((__vector_size__ (8))) v4hf; > + > + > +#define VCMPMN(type, op, name) \ > +type \ > +__attribute__ ((noinline, noclone)) \ > +vec_cmp_##type##type##name (type a, type b) \ > +{ \ > + return a op b; \ > +} > + > +VCMPMN (v4hf, <, lt) > +VCMPMN (v2hf, <, lt) > +VCMPMN (v4hf, <=, le) > +VCMPMN (v2hf, <=, le) > +VCMPMN (v4hf, >, gt) > +VCMPMN (v2hf, >, gt) > +VCMPMN (v4hf, >=, ge) > +VCMPMN (v2hf, >=, ge) > +VCMPMN (v4hf, ==, eq) > +VCMPMN (v2hf, ==, eq) > -- > 2.31.1 >