Hello, On 02 Dec 16:09, Kirill Yukhin wrote: > Hello, > On 19 Nov 12:08, Kirill Yukhin wrote: > > Hello, > > On 15 Nov 20:06, Kirill Yukhin wrote: > > > Ping. > > Ping. > Ping. Ping.
Rebased patch in the bottom. -- Thanks, K --- gcc/config/i386/i386.c | 32 ++++ gcc/config/i386/i386.md | 10 ++ gcc/config/i386/sse.md | 457 +++++++++++++++++++++++++---------------------- gcc/config/i386/subst.md | 41 +++++ 4 files changed, 326 insertions(+), 214 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ecf5e0b..a3dd307 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15041,6 +15041,38 @@ ix86_print_operand (FILE *file, rtx x, int code) fputs ("{z}", file); return; + case 'R': + gcc_assert (CONST_INT_P (x)); + + if (ASSEMBLER_DIALECT == ASM_INTEL) + fputs (", ", file); + + switch (INTVAL (x)) + { + case ROUND_NEAREST_INT: + fputs ("{rn-sae}", file); + break; + case ROUND_NEG_INF: + fputs ("{rd-sae}", file); + break; + case ROUND_POS_INF: + fputs ("{ru-sae}", file); + break; + case ROUND_ZERO: + fputs ("{rz-sae}", file); + break; + case ROUND_SAE: + fputs ("{sae}", file); + break; + default: + gcc_unreachable (); + } + + if (ASSEMBLER_DIALECT == ASM_ATT) + fputs (", ", file); + + return; + case '*': if (ASSEMBLER_DIALECT == ASM_ATT) putc ('*', file); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ab5b33f..30b8d74 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -241,6 +241,16 @@ (ROUND_NO_EXC 0x8) ]) +;; Constants to represent AVX512F embeded rounding +(define_constants + [(ROUND_NEAREST_INT 0) + (ROUND_NEG_INF 1) + (ROUND_POS_INF 2) + (ROUND_ZERO 3) + (NO_ROUND 4) + (ROUND_SAE 5) + ]) + ;; Constants to represent pcomtrue/pcomfalse variants (define_constants [(PCOM_FALSE 0) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index adedf44..23edbd3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1229,23 +1229,23 @@ } [(set_attr "isa" "noavx,noavx,avx,avx")]) -(define_expand "<plusminus_insn><mode>3<mask_name>" +(define_expand "<plusminus_insn><mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand") (plusminus:VF (match_operand:VF 1 "nonimmediate_operand") (match_operand:VF 2 "nonimmediate_operand")))] - "TARGET_SSE && <mask_mode512bit_condition>" + "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") -(define_insn "*<plusminus_insn><mode>3<mask_name>" +(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (plusminus:VF (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v") - (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] - "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>" + (match_operand:VF 2 "nonimmediate_operand" "xm,<round_constraint>")))] + "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>" "@ <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} - v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "<mask_prefix3>") @@ -1268,23 +1268,23 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<ssescalarmode>")]) -(define_expand "mul<mode>3<mask_name>" +(define_expand "mul<mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand") (mult:VF (match_operand:VF 1 "nonimmediate_operand") (match_operand:VF 2 "nonimmediate_operand")))] - "TARGET_SSE && <mask_mode512bit_condition>" + "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") -(define_insn "*mul<mode>3<mask_name>" +(define_insn "*mul<mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (mult:VF (match_operand:VF 1 "nonimmediate_operand" "%0,v") - (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] - "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>" + (match_operand:VF 2 "nonimmediate_operand" "xm,<round_constraint>")))] + "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>" "@ mul<ssemodesuffix>\t{%2, %0|%0, %2} - vmul<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" [(set_attr "isa" "noavx,avx") (set_attr "type" "ssemul") (set_attr "prefix" "<mask_prefix3>") @@ -1335,15 +1335,15 @@ } }) -(define_insn "<sse>_div<mode>3<mask_name>" +(define_insn "<sse>_div<mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (div:VF (match_operand:VF 1 "register_operand" "0,v") - (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))] - "TARGET_SSE && <mask_mode512bit_condition>" + (match_operand:VF 2 "nonimmediate_operand" "xm,<round_constraint>")))] + "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" "@ div<ssemodesuffix>\t{%2, %0|%0, %2} - vdiv<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" [(set_attr "isa" "noavx,avx") (set_attr "type" "ssediv") (set_attr "prefix" "<mask_prefix3>") @@ -1427,11 +1427,11 @@ } }) -(define_insn "<sse>_sqrt<mode>2<mask_name>" +(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=v") - (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))] - "TARGET_SSE && <mask_mode512bit_condition>" - "%vsqrt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "<round_constraint>")))] + "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" + "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") (set_attr "btver2_sse_attr" "sqrt") @@ -2698,210 +2698,224 @@ (match_operand:FMAMODE 3 "nonimmediate_operand")))] "") -(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>" +(define_expand "avx512f_fmadd_<mode>_maskz" + [(match_operand:VF_512 0 "register_operand") + (match_operand:VF_512 1 "nonimmediate_operand") + (match_operand:VF_512 2 "nonimmediate_operand") + (match_operand:VF_512 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX512F" +{ + emit_insn (gen_fma_fmadd_<mode>_maskz_1 ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (<MODE>mode), operands[4])); + DONE; +}) + +(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] - "<sd_mask_mode512bit_condition>" - "@ - vfmadd132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfmadd213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfmadd231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2} + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmadd_<mode>_mask" +(define_insn "avx512f_fmadd_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 (fma:VF_512 (match_operand:VF_512 1 "register_operand" "0,0") - (match_operand:VF_512 2 "nonimmediate_operand" "vm,v") - (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")) + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v") + (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))] "TARGET_AVX512F" "@ - vfmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} + vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" [(set_attr "isa" "fma_avx512f,fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmadd_<mode>_mask3" +(define_insn "avx512f_fmadd_<mode>_mask3<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=x") (vec_merge:VF_512 (fma:VF_512 (match_operand:VF_512 1 "register_operand" "x") - (match_operand:VF_512 2 "nonimmediate_operand" "vm") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>") (match_operand:VF_512 3 "register_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))] "TARGET_AVX512F" - "vfmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" [(set_attr "isa" "fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>" +(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") + (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] - "<sd_mask_mode512bit_condition>" + (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ - vfmsub132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfmsub213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfmsub231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2} + vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmsub_<mode>_mask" +(define_insn "avx512f_fmsub_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 (fma:VF_512 (match_operand:VF_512 1 "register_operand" "0,0") - (match_operand:VF_512 2 "nonimmediate_operand" "vm,v") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v") (neg:VF_512 - (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))) + (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))] "TARGET_AVX512F" "@ - vfmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} + vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" [(set_attr "isa" "fma_avx512f,fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmsub_<mode>_mask3" +(define_insn "avx512f_fmsub_<mode>_mask3<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (vec_merge:VF_512 (fma:VF_512 (match_operand:VF_512 1 "register_operand" "v") - (match_operand:VF_512 2 "nonimmediate_operand" "vm") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>") (neg:VF_512 (match_operand:VF_512 3 "register_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))] "TARGET_AVX512F" - "vfmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" [(set_attr "isa" "fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>" +(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] - "<sd_mask_mode512bit_condition>" - "@ - vfnmadd132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfnmadd213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfnmadd231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2} + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fnmadd_<mode>_mask" +(define_insn "avx512f_fnmadd_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 (fma:VF_512 (neg:VF_512 (match_operand:VF_512 1 "register_operand" "0,0")) - (match_operand:VF_512 2 "nonimmediate_operand" "vm,v") - (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")) + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v") + (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))] "TARGET_AVX512F" "@ - vfnmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfnmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} + vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" [(set_attr "isa" "fma_avx512f,fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fnmadd_<mode>_mask3" +(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (vec_merge:VF_512 (fma:VF_512 (neg:VF_512 (match_operand:VF_512 1 "register_operand" "v")) - (match_operand:VF_512 2 "nonimmediate_operand" "vm") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>") (match_operand:VF_512 3 "register_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))] "TARGET_AVX512F" - "vfnmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" [(set_attr "isa" "fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>" +(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] - "<sd_mask_mode512bit_condition>" + (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ - vfnmsub132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfnmsub213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfnmsub231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2} + vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fnmsub_<mode>_mask" +(define_insn "avx512f_fnmsub_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 (fma:VF_512 (neg:VF_512 (match_operand:VF_512 1 "register_operand" "0,0")) - (match_operand:VF_512 2 "nonimmediate_operand" "vm,v") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v") (neg:VF_512 - (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))) + (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))] "TARGET_AVX512F" "@ - vfnmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfnmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} + vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" [(set_attr "isa" "fma_avx512f,fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fnmsub_<mode>_mask3" +(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (vec_merge:VF_512 (fma:VF_512 (neg:VF_512 (match_operand:VF_512 1 "register_operand" "v")) - (match_operand:VF_512 2 "nonimmediate_operand" "vm") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>") (neg:VF_512 (match_operand:VF_512 3 "register_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))] "TARGET_AVX512F" - "vfnmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" [(set_attr "isa" "fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) @@ -2940,109 +2954,109 @@ DONE; }) -(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name>" +(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x") - (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m") - (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")] + [(match_operand:VF 1 "nonimmediate_operand" "%0,0,v,x,x") + (match_operand:VF 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m") + (match_operand:VF 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")] UNSPEC_FMADDSUB))] - "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition>" + "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ - vfmaddsub132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfmaddsub213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfmaddsub231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2} + vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmaddsub_<mode>_mask" +(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 (unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "0,0") - (match_operand:VF_512 2 "nonimmediate_operand" "vm,v") - (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")] + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v") + (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")] UNSPEC_FMADDSUB) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))] "TARGET_AVX512F" "@ - vfmaddsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfmaddsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} + vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" [(set_attr "isa" "fma_avx512f,fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmaddsub_<mode>_mask3" +(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (vec_merge:VF_512 (unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "v") - (match_operand:VF_512 2 "nonimmediate_operand" "vm") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>") (match_operand:VF_512 3 "register_operand" "0")] UNSPEC_FMADDSUB) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))] "TARGET_AVX512F" - "vfmaddsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" [(set_attr "isa" "fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name>" +(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x") - (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m") + [(match_operand:VF 1 "nonimmediate_operand" "%0,0,v,x,x") + (match_operand:VF 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m") (neg:VF - (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))] + (match_operand:VF 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))] UNSPEC_FMADDSUB))] - "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition>" + "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ - vfmsubadd132<ssemodesuffix>\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfmsubadd213<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfmsubadd231<ssemodesuffix>\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2} + vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmsubadd_<mode>_mask" +(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 (unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "0,0") - (match_operand:VF_512 2 "nonimmediate_operand" "vm,v") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v") (neg:VF_512 - (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))] + (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))] UNSPEC_FMADDSUB) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))] "TARGET_AVX512F" "@ - vfmsubadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfmsubadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} + vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" [(set_attr "isa" "fma_avx512f,fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_fmsubadd_<mode>_mask3" +(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (vec_merge:VF_512 (unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "v") - (match_operand:VF_512 2 "nonimmediate_operand" "vm") + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>") (neg:VF_512 (match_operand:VF_512 3 "register_operand" "0"))] UNSPEC_FMADDSUB) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))] "TARGET_AVX512F" - "vfmsubadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" [(set_attr "isa" "fma_avx512f") (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) @@ -3050,7 +3064,22 @@ ;; FMA3 floating point scalar intrinsics. These merge result with ;; high-order elements from the destination register. -(define_expand "fmai_vmfmadd_<mode>" +(define_expand "fmai_vmfmadd_<mode>_maskz<round_name>" + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand") + (match_operand:VF_128 2 "nonimmediate_operand") + (match_operand:VF_128 3 "nonimmediate_operand")) + (match_dup <round_opnum>) + (match_operand:QI 4 "register_operand")) + (match_dup 1) + (const_int 1)))] + "TARGET_AVX512F" + "operands[<round_opnum>] = CONST0_RTX (<MODE>mode);") + +(define_expand "fmai_vmfmadd_<mode><round_name>" [(set (match_operand:VF_128 0 "register_operand") (vec_merge:VF_128 (fma:VF_128 @@ -3081,51 +3110,51 @@ [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") - (match_operand:VF_128 2 "nonimmediate_operand" "vm, v") + (match_operand:VF_128 1 "nonimmediate_operand" "0,0") + (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v") (neg:VF_128 - (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) + (match_operand:VF_128 3 "nonimmediate_operand" " v,<round_constraint>"))) (match_dup 1) (const_int 1)))] "TARGET_FMA || TARGET_AVX512F" "@ - vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} - vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" + vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} + vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fmai_fnmadd_<mode>" +(define_insn "*fmai_fnmadd_<mode><round_name>" [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 (neg:VF_128 - (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")) - (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") - (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) + (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")) + (match_operand:VF_128 1 "nonimmediate_operand" "0,0") + (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")) (match_dup 1) (const_int 1)))] "TARGET_FMA || TARGET_AVX512F" "@ - vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} - vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" + vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} + vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fmai_fnmsub_<mode>" +(define_insn "*fmai_fnmsub_<mode><round_name>" [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 (fma:VF_128 (neg:VF_128 - (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")) + (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>, v")) (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") (neg:VF_128 - (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) + (match_operand:VF_128 3 "nonimmediate_operand" " v,<round_constraint>"))) (match_dup 1) (const_int 1)))] "TARGET_FMA || TARGET_AVX512F" "@ - vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2} - vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}" + vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} + vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) @@ -3246,18 +3275,18 @@ (set_attr "prefix_rep" "0") (set_attr "mode" "SF")]) -(define_insn "sse_cvtsi2ss" +(define_insn "sse_cvtsi2ss<round_name>" [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") (vec_merge:V4SF (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) + (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,<round_constraint3>"))) (match_operand:V4SF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE" "@ cvtsi2ss\t{%2, %0|%0, %2} cvtsi2ss\t{%2, %0|%0, %2} - vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}" + vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseicvt") (set_attr "athlon_decode" "vector,double,*") @@ -3267,18 +3296,18 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "SF")]) -(define_insn "sse_cvtsi2ssq" +(define_insn "sse_cvtsi2ssq<round_name>" [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") (vec_merge:V4SF (vec_duplicate:V4SF - (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>"))) (match_operand:V4SF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE && TARGET_64BIT" "@ cvtsi2ssq\t{%2, %0|%0, %2} cvtsi2ssq\t{%2, %0|%0, %2} - vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" + vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseicvt") (set_attr "athlon_decode" "vector,double,*") @@ -3290,15 +3319,15 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "SF")]) -(define_insn "sse_cvtss2si" +(define_insn "sse_cvtss2si<round_name>" [(set (match_operand:SI 0 "register_operand" "=r,r") (unspec:SI [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "v,m") + (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>") (parallel [(const_int 0)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE" - "%vcvtss2si\t{%1, %0|%0, %k1}" + "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "athlon_decode" "double,vector") (set_attr "bdver1_decode" "double,double") @@ -3320,15 +3349,15 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) -(define_insn "sse_cvtss2siq" +(define_insn "sse_cvtss2siq<round_name>" [(set (match_operand:DI 0 "register_operand" "=r,r") (unspec:DI [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "v,m") + (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>") (parallel [(const_int 0)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE && TARGET_64BIT" - "%vcvtss2si{q}\t{%1, %0|%0, %k1}" + "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "athlon_decode" "double,vector") (set_attr "bdver1_decode" "double,double") @@ -3382,50 +3411,50 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "DI")]) -(define_insn "cvtusi2<ssescalarmodesuffix>32" +(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (vec_duplicate:VF_128 (unsigned_float:<ssescalarmode> - (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 2 "nonimmediate_operand" "<round_constraint3>"))) (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] - "TARGET_AVX512F" - "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + "TARGET_AVX512F && <round_modev4sf_condition>" + "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "<ssescalarmode>")]) -(define_insn "cvtusi2<ssescalarmodesuffix>64" +(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (vec_duplicate:VF_128 (unsigned_float:<ssescalarmode> - (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (match_operand:DI 2 "nonimmediate_operand" "<round_constraint3>"))) (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F && TARGET_64BIT" - "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "<ssescalarmode>")]) -(define_insn "float<sseintvecmodelower><mode>2<mask_name>" +(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>" [(set (match_operand:VF1 0 "register_operand" "=v") (float:VF1 - (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))] - "TARGET_SSE2 && <mask_mode512bit_condition>" - "%vcvtdq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))] + "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>" + "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "ufloatv16siv16sf2<mask_name>" +(define_insn "ufloatv16siv16sf2<mask_name><round_name>" [(set (match_operand:V16SF 0 "register_operand" "=v") (unsigned_float:V16SF - (match_operand:V16SI 1 "nonimmediate_operand" "vm")))] + (match_operand:V16SI 1 "nonimmediate_operand" "<round_constraint>")))] "TARGET_AVX512F" - "vcvtudq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "V16SF")]) @@ -3460,24 +3489,24 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name>" +(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>" [(set (match_operand:V16SI 0 "register_operand" "=v") (unspec:V16SI - [(match_operand:V16SF 1 "nonimmediate_operand" "vm")] + [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_FIX_NOTRUNC))] "TARGET_AVX512F" - "vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name>" +(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>" [(set (match_operand:V16SI 0 "register_operand" "=v") (unspec:V16SI - [(match_operand:V16SF 1 "nonimmediate_operand" "vm")] + [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512F" - "vcvtps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -3595,18 +3624,18 @@ (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "DF")]) -(define_insn "sse2_cvtsi2sdq" +(define_insn "sse2_cvtsi2sdq<round_name>" [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") (vec_merge:V2DF (vec_duplicate:V2DF - (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) + (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>"))) (match_operand:V2DF 1 "register_operand" "0,0,v") (const_int 1)))] "TARGET_SSE2 && TARGET_64BIT" "@ cvtsi2sdq\t{%2, %0|%0, %2} cvtsi2sdq\t{%2, %0|%0, %2} - vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" + vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sseicvt") (set_attr "athlon_decode" "double,direct,*") @@ -3617,28 +3646,28 @@ (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "DF")]) -(define_insn "avx512f_vcvtss2usi" +(define_insn "avx512f_vcvtss2usi<round_name>" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "vm") + (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>") (parallel [(const_int 0)]))] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512F" - "vcvtss2usi\t{%1, %0|%0, %1}" + "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "SI")]) -(define_insn "avx512f_vcvtss2usiq" +(define_insn "avx512f_vcvtss2usiq<round_name>" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "vm") + (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>") (parallel [(const_int 0)]))] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512F && TARGET_64BIT" - "vcvtss2usi\t{%1, %0|%0, %1}" + "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "DI")]) @@ -3667,28 +3696,28 @@ (set_attr "prefix" "evex") (set_attr "mode" "DI")]) -(define_insn "avx512f_vcvtsd2usi" +(define_insn "avx512f_vcvtsd2usi<round_name>" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "vm") + (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>") (parallel [(const_int 0)]))] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512F" - "vcvtsd2usi\t{%1, %0|%0, %1}" + "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "SI")]) -(define_insn "avx512f_vcvtsd2usiq" +(define_insn "avx512f_vcvtsd2usiq<round_name>" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "vm") + (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>") (parallel [(const_int 0)]))] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512F && TARGET_64BIT" - "vcvtsd2usi\t{%1, %0|%0, %1}" + "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "DI")]) @@ -3717,15 +3746,15 @@ (set_attr "prefix" "evex") (set_attr "mode" "DI")]) -(define_insn "sse2_cvtsd2si" +(define_insn "sse2_cvtsd2si<round_name>" [(set (match_operand:SI 0 "register_operand" "=r,r") (unspec:SI [(vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "v,m") + (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>") (parallel [(const_int 0)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2" - "%vcvtsd2si\t{%1, %0|%0, %q1}" + "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "athlon_decode" "double,vector") (set_attr "bdver1_decode" "double,double") @@ -3748,15 +3777,15 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) -(define_insn "sse2_cvtsd2siq" +(define_insn "sse2_cvtsd2siq<round_name>" [(set (match_operand:DI 0 "register_operand" "=r,r") (unspec:DI [(vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" "v,m") + (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>") (parallel [(const_int 0)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_SSE2 && TARGET_64BIT" - "%vcvtsd2si{q}\t{%1, %0|%0, %q1}" + "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}" [(set_attr "type" "sseicvt") (set_attr "athlon_decode" "double,vector") (set_attr "bdver1_decode" "double,double") @@ -3877,13 +3906,13 @@ (set_attr "ssememalign" "64") (set_attr "mode" "V2DF")]) -(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>" +(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>" [(set (match_operand:V8SI 0 "register_operand" "=v") (unspec:V8SI - [(match_operand:V8DF 1 "nonimmediate_operand" "vm")] + [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_FIX_NOTRUNC))] "TARGET_AVX512F" - "vcvtpd2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "OI")]) @@ -3951,13 +3980,13 @@ (set_attr "athlon_decode" "vector") (set_attr "bdver1_decode" "double")]) -(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name>" +(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>" [(set (match_operand:V8SI 0 "register_operand" "=v") (unspec:V8SI - [(match_operand:V8DF 1 "nonimmediate_operand" "vm")] + [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512F" - "vcvtpd2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "OI")]) @@ -4073,12 +4102,12 @@ (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "DF")]) -(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>" +(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>" [(set (match_operand:V8SF 0 "register_operand" "=v") (float_truncate:V8SF - (match_operand:V8DF 1 "nonimmediate_operand" "vm")))] + (match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")))] "TARGET_AVX512F" - "vcvtpd2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "V8SF")]) @@ -6446,14 +6475,14 @@ [(set_attr "prefix" "evex") (set_attr "mode" "<ssescalarmode>")]) -(define_insn "avx512f_scalef<mode><mask_name>" +(define_insn "avx512f_scalef<mode><mask_name><round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "v") - (match_operand:VF_512 2 "nonimmediate_operand" "vm")] + (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")] UNSPEC_SCALEF))] "TARGET_AVX512F" - "%vscalef<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + "%vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" [(set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) @@ -8187,22 +8216,22 @@ [(set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "<code><mode>3<mask_name>" +(define_expand "<code><mode>3<mask_name><round_name>" [(set (match_operand:VI124_256_48_512 0 "register_operand") (maxmin:VI124_256_48_512 (match_operand:VI124_256_48_512 1 "nonimmediate_operand") (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))] - "TARGET_AVX2 && <mask_mode512bit_condition>" + "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") -(define_insn "*avx2_<code><mode>3<mask_name>" +(define_insn "*avx2_<code><mode>3<mask_name><round_name>" [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v") (maxmin:VI124_256_48_512 (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v") - (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))] + (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "<round_constraint>")))] "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) - && <mask_mode512bit_condition>" - "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + && <mask_mode512bit_condition> && <round_mode512bit_condition>" + "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" [(set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") @@ -12500,33 +12529,33 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "avx512er_exp2<mode><mask_name>" +(define_insn "avx512er_exp2<mode><mask_name><round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512 - [(match_operand:VF_512 1 "nonimmediate_operand" "vm")] + [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_EXP2))] "TARGET_AVX512ER" - "vexp2<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vexp2<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name>" +(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512 - [(match_operand:VF_512 1 "nonimmediate_operand" "vm")] + [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_RCP28))] "TARGET_AVX512ER" - "vrcp28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vrcp28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name>" +(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512 - [(match_operand:VF_512 1 "nonimmediate_operand" "vm")] + [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")] UNSPEC_RSQRT28))] "TARGET_AVX512ER" - "vrsqrt28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vrsqrt28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 594dc43..76c183c 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -30,6 +30,16 @@ (define_mode_iterator SUBST_S [QI HI SI DI]) +(define_mode_iterator SUBST_A + [V16QI + V16HI V8HI + V16SI V8SI V4SI + V8DI V4DI V2DI + V16SF V8SF V4SF + V8DF V4DF V2DF + QI HI SI DI SF DF + CCFP CCFPU]) + (define_subst_attr "mask_name" "mask" "" "_mask") (define_subst_attr "mask_applied" "mask" "false" "true") (define_subst_attr "mask_operand2" "mask" "" "%{%3%}%N2") @@ -87,3 +97,34 @@ (match_operand:SUBST_V 2 "const0_operand" "C") (match_operand:<avx512fmaskmode> 3 "register_operand" "k"))) ]) + +(define_subst_attr "round_name" "round" "" "_round") +(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4") +(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5") +(define_subst_attr "round_mask_scalar_operand3" "mask_scalar" "%R3" "%R5") +(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6") +(define_subst_attr "round_op2" "round" "" "%R2") +(define_subst_attr "round_op3" "round" "" "%R3") +(define_subst_attr "round_op4" "round" "" "%R4") +(define_subst_attr "round_op5" "round" "" "%R5") +(define_subst_attr "round_op6" "round" "" "%R6") +(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>") +(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>") +(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>") +(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>") +(define_subst_attr "round_constraint" "round" "vm" "v") +(define_subst_attr "round_constraint2" "round" "m" "v") +(define_subst_attr "round_constraint3" "round" "rm" "r") +(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)") +(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)") +(define_subst_attr "round_codefor" "round" "*" "") +(define_subst_attr "round_opnum" "round" "5" "6") + +(define_subst "round" + [(set (match_operand:SUBST_A 0) + (match_operand:SUBST_A 1))] + "TARGET_AVX512F" + [(parallel[ + (set (match_dup 0) + (match_dup 1)) + (unspec [(match_operand:SI 2 "const_0_to_4_operand")] UNSPEC_EMBEDDED_ROUNDING)])])