On 10/17/2013 07:15 AM, Kirill Yukhin wrote:
> +(define_mode_attr ssescalarsize
> + [(V8DI "64") (V4DI "64") (V2DI "64")
> + (V32HI "16") (V16HI "16") (V8HI "16")
> + (V16SI "32") (V8SI "32") (V4SI "32")
> + (V16SF "16") (V8DF "64")])
Error on V16SF. Probably better to fill this out.
> +(define_insn "avx512f_load<mode>_mask"
> + [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
> + (vec_merge:VI48F_512
> + (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
> + (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
> + (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
> + "TARGET_AVX512F"
> +{
> + switch (get_attr_mode (insn))
Better to just use <sseinsnmode> here, as it's a compile-time constant.
> + {
> + case MODE_V8DF:
> + case MODE_V16SF:
> + return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
> + default:
> + return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
> + }
> +}
> + [(set_attr "type" "ssemov")
> + (set_attr "prefix" "evex")
> + (set_attr "memory" "none,load")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "avx512f_store<mode>_mask"
Likewise.
> +(define_insn "avx512f_moves<mode>_mask"
> + [(set (match_operand:VF_128 0 "register_operand" "=v")
> + (vec_merge:VF_128
> + (vec_merge:VF_128
> + (match_operand:VF_128 2 "register_operand" "v")
> + (match_operand:VF_128 3 "vector_move_operand" "0C")
> + (match_operand:<avx512fmaskmode> 4 "register_operand" "k"))
> + (match_operand:VF_128 1 "register_operand" "v")
> + (const_int 1)))]
> + "TARGET_AVX512F"
> + "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
> + [(set_attr "type" "ssemov")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
Nested vec_merge? That seems... odd to say the least.
How in the world does this get matched?
> +(define_insn "*avx512f_loads<mode>_mask"
Likewise.
> +(define_insn "avx512f_stores<mode>_mask"
> + [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
> + (vec_select:<ssescalarmode>
> + (vec_merge:VF_128
> + (match_operand:VF_128 1 "register_operand" "v")
> + (vec_duplicate:VF_128
> + (match_dup 0))
> + (match_operand:<avx512fmaskmode> 2 "register_operand" "k"))
> + (parallel [(const_int 0)])))]
This seems similar, though of course it's an extract.
I still can't imagine how it could be used.
> -(define_insn "rcp14<mode>"
> +(define_insn "<mask_codefor>rcp14<mode><mask_name>"
What, this name isn't used for non-masked anymore?
> -(define_insn "srcp14<mode>"
> +(define_insn "*srcp14<mode>"
Likewise. These changes don't belong in this patch.
> -(define_insn "rsqrt14<mode>"
> +(define_insn "*rsqrt14<mode>"
Likewise.
> @@ -2565,9 +2751,9 @@
> (define_insn "*fma_fmadd_<mode>"
> [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
> (fma:FMAMODE
> - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
> - (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
> - (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
> + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
> + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
> + (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
Unrelated changes. Repeated throughout the fma patterns.
> +(define_insn "*fmai_fmadd_<mode>_maskz"
> + [(set (match_operand:VF_128 0 "register_operand" "=v,v")
> + (vec_merge:VF_128
> + (vec_merge:VF_128
> + (fma:VF_128
> + (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
> + (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
> + (match_operand:VF_128 3 "nonimmediate_operand" "v,vm"))
> + (match_operand:VF_128 4 "const0_operand")
> + (match_operand:QI 5 "register_operand" "k,k"))
> + (match_dup 1)
> + (const_int 1)))]
> + "TARGET_AVX512F"
> + "@
> + vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %3, %2}
> + vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
> + [(set_attr "type" "ssemuladd")
> + (set_attr "mode" "<MODE>")])
These seem like useless patterns. If they're for builtins,
then they seem like useless builtins. See above.
> @@ -3686,8 +4328,8 @@
> (set_attr "athlon_decode" "vector,double,*")
> (set_attr "amdfam10_decode" "vector,double,*")
> (set_attr "bdver1_decode" "direct,direct,*")
> - (set_attr "btver2_decode" "double,double,double")
> (set_attr "prefix" "orig,orig,vex")
> + (set_attr "btver2_decode" "double,double,double")
> (set_attr "mode" "SF")])
Unrelated changes.
> +(define_expand "vec_unpacku_float_hi_v16si"
> + [(match_operand:V8DF 0 "register_operand")
> + (match_operand:V16SI 1 "register_operand")]
> + "TARGET_AVX512F"
> +{
> + REAL_VALUE_TYPE TWO32r;
> + rtx k, x, tmp[4];
> +
> + real_ldexp (&TWO32r, &dconst1, 32);
> + x = const_double_from_real_value (TWO32r, DFmode);
> +
> + tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
> + tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
> + tmp[2] = gen_reg_rtx (V8DFmode);
> + tmp[3] = gen_reg_rtx (V8SImode);
> + k = gen_reg_rtx (QImode);
> +
> + emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
> + emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
> + emit_insn (gen_rtx_SET (VOIDmode, k,
> + gen_rtx_LT (QImode, tmp[2], tmp[0])));
> + emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
> + emit_move_insn (operands[0], tmp[2]);
> + DONE;
> +})
Separate patch. And this is too complicated, since vcvtudq2pd exists.
> -(define_insn "avx512f_unpckhps512"
> +(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
Non-masked name change again.
> -(define_insn "avx512f_unpcklps512"
> +(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
Ditto.
> -(define_insn "avx512f_movshdup512"
> +(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
Ditto.
> -(define_insn "avx512f_movsldup512"
> +(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
Ditto.
There's probably more, but that'll do for a first pass.
r~