On Tue, Aug 12, 2014 at 1:29 PM, Kirill Yukhin <kirill.yuk...@gmail.com> wrote:

> This patch extends support of masking instructions.
>
> Bootstrapped.
>
> Is it ok for trunk?
>
> gcc/
>         * config/i386/i386.md
>         (define_attr "isa"): Add avx512bw,noavx512bw.
>         (define_attr "enabled"): Ditto.
>         (define_split): Add 32/64-bit mask logic.
>         (define_insn "*k<logic>qi"): New.
>         (define_insn "*k<logic>hi"): New.
>         (define_insn "*anddi_1"): Add mask version.
>         (define_insn "*andsi_1"): Ditto.
>         (define_insn "*<code><mode>_1"): Ditto.
>         (define_insn "*<code>hi_1"): Ditto.
>         (define_insn "kxnor<mode>"): New.
>         (define_insn "kunpcksi"): New.
>         (define_insn "kunpckdi"): New.
>         (define_insn "*one_cmpl<mode>2_1"): Add mask version.
>         (define_insn "*one_cmplhi2_1"): Ditto.

OK with a couple of small adjustments below.

Thanks,
Uros.

> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 4867e7e..7f14ddb 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -740,7 +740,8 @@
>  ;; Used to control the "enabled" attribute on a per-instruction basis.
>  (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
>                     sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
> -                   
> avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
> +                   avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
> +                   fma_avx512f,avx512bw,noavx512bw"
>    (const_string "base"))
>
>  (define_attr "enabled" ""
> @@ -771,6 +772,8 @@
>          (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
>          (eq_attr "isa" "fma_avx512f")
>            (symbol_ref "TARGET_FMA || TARGET_AVX512F")
> +        (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
> +        (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
>         ]
>         (const_int 1)))
>
> @@ -7510,21 +7513,45 @@
>  })
>
>  (define_split
> -  [(set (match_operand:SWI12 0 "mask_reg_operand")
> -       (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand")
> -                        (match_operand:SWI12 2 "mask_reg_operand")))
> +  [(set (match_operand:SWI1248x 0 "mask_reg_operand")
> +       (any_logic:SWI1248x (match_operand:SWI1248x 1 "mask_reg_operand")
> +                           (match_operand:SWI1248x 2 "mask_reg_operand")))
>     (clobber (reg:CC FLAGS_REG))]
> -  "TARGET_AVX512F && reload_completed"
> +;;TODO removed avx512f check because mask_reg implies it.

Please leave TARGET_AVX512F in the insn constraint, it shortcuts the
pattern recognition for non AVX512F targets.

> +  "reload_completed"
>    [(set (match_dup 0)
> -       (any_logic:SWI12 (match_dup 1)
> -                        (match_dup 2)))])
> +       (any_logic:SWI1248x (match_dup 1)
> +                           (match_dup 2)))])
>
> -(define_insn "*k<logic><mode>"
> -  [(set (match_operand:SWI12 0 "mask_reg_operand" "=k")
> -       (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k")
> -                        (match_operand:SWI12 2 "mask_reg_operand" "k")))]
> +(define_insn "*k<logic>qi"
> +  [(set (match_operand:QI 0 "mask_reg_operand" "=k")
> +       (any_logic:QI (match_operand:QI 1 "mask_reg_operand" "k")
> +                     (match_operand:QI 2 "mask_reg_operand" "k")))]
> +  "TARGET_AVX512F"
> +{
> +  return TARGET_AVX512DQ ? "k<logic>b\t{%2, %1, %0|%0, %1, %2}"
> +                        : "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
> +}
> +  [(set_attr "mode" "QI")
> +   (set_attr "type" "msklog")
> +   (set_attr "prefix" "vex")])
> +
> +(define_insn "*k<logic>hi"
> +  [(set (match_operand:HI 0 "mask_reg_operand" "=k")
> +       (any_logic:HI (match_operand:HI 1 "mask_reg_operand" "k")
> +                     (match_operand:HI 2 "mask_reg_operand" "k")))]
>    "TARGET_AVX512F"
>    "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
> +  [(set_attr "mode" "HI")
> +   (set_attr "type" "msklog")
> +   (set_attr "prefix" "vex")])
> +
> +(define_insn "*k<logic><mode>"
> +  [(set (match_operand:SWI48x 0 "mask_reg_operand" "=k")
> +       (any_logic:SWI48x (match_operand:SWI48x 1 "mask_reg_operand" "k")
> +                         (match_operand:SWI48x 2 "mask_reg_operand" "k")))]
> +  "TARGET_AVX512BW"
> +  "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
>    [(set_attr "mode" "<MODE>")
>     (set_attr "type" "msklog")
>     (set_attr "prefix" "vex")])
> @@ -7580,10 +7607,10 @@
>  })

The patterns above can be macroized using conditional modes, but we
can live with the above for now. Maybe add a TODO marker here.

>  (define_insn "*anddi_1"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,!k")
>         (and:DI
> -        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
> -        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
> +        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
> +        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L,k")))
>     (clobber (reg:CC FLAGS_REG))]
>    "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
>  {
> @@ -7592,6 +7619,9 @@
>      case TYPE_IMOVX:
>        return "#";
>
> +    case TYPE_MSKLOG:
> +      return "kandq\t{%2, %1, %0|%0, %1, %2}";
> +
>      default:
>        gcc_assert (rtx_equal_p (operands[0], operands[1]));
>        if (get_attr_mode (insn) == MODE_SI)
> @@ -7600,8 +7630,8 @@
>         return "and{q}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "type" "alu,alu,alu,imovx")
> -   (set_attr "length_immediate" "*,*,*,0")
> +  [(set_attr "type" "alu,alu,alu,imovx,msklog")
> +   (set_attr "length_immediate" "*,*,*,0,0")
>     (set (attr "prefix_rex")
>       (if_then_else
>         (and (eq_attr "type" "imovx")
> @@ -7609,12 +7639,12 @@
>                  (match_operand 1 "ext_QIreg_operand")))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "mode" "SI,DI,DI,SI")])
> +   (set_attr "mode" "SI,DI,DI,SI,DI")])
>
>  (define_insn "*andsi_1"
> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya")
> -       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
> -               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k")
> +       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k")
> +               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L,k")))
>     (clobber (reg:CC FLAGS_REG))]
>    "ix86_binary_operator_ok (AND, SImode, operands)"
>  {
> @@ -7623,12 +7653,15 @@
>      case TYPE_IMOVX:
>        return "#";
>
> +    case TYPE_MSKLOG:
> +      return "kandd\t{%2, %1, %0|%0, %1, %2}";
> +
>      default:
>        gcc_assert (rtx_equal_p (operands[0], operands[1]));
>        return "and{l}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "type" "alu,alu,imovx")
> +  [(set_attr "type" "alu,alu,imovx,msklog")
>     (set (attr "prefix_rex")
>       (if_then_else
>         (and (eq_attr "type" "imovx")
> @@ -7636,7 +7669,7 @@
>                  (match_operand 1 "ext_QIreg_operand")))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "length_immediate" "*,*,0")
> +   (set_attr "length_immediate" "*,*,0,0")
>     (set_attr "mode" "SI")])
>
>  ;; See comment for addsi_1_zext why we do use nonimmediate_operand
> @@ -7688,11 +7721,21 @@
>                 (match_operand:QI 2 "general_operand" "qn,qmn,rn,k")))
>     (clobber (reg:CC FLAGS_REG))]
>    "ix86_binary_operator_ok (AND, QImode, operands)"
> -  "@
> -   and{b}\t{%2, %0|%0, %2}
> -   and{b}\t{%2, %0|%0, %2}
> -   and{l}\t{%k2, %k0|%k0, %k2}
> -   kandw\t{%2, %1, %0|%0, %1, %2}"
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +    case 1:
> +      return "and{b}\t{%2, %0|%0, %2}";
> +    case 2:
> +      return "and{l}\t{%k2, %k0|%k0, %k2}";
> +    case 3:
> +      return TARGET_AVX512DQ ? "kandb\t{%2, %1, %0|%0, %1, %2}"
> +                            : "kandw\t{%2, %1, %0|%0, %1, %2}";
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
>    [(set_attr "type" "alu,alu,alu,msklog")
>     (set_attr "mode" "QI,QI,SI,HI")])
>
> @@ -7715,10 +7758,22 @@
>           (match_operand:SWI12 2 "register_operand" "r,r,k")))
>     (clobber (reg:CC FLAGS_REG))]
>    "TARGET_AVX512F"
> -  "@
> -   andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
> -   #
> -   kandnw\t{%2, %1, %0|%0, %1, %2}"
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}";
> +    case 1:
> +      return "#";
> +    case 2:
> +      if (TARGET_AVX512DQ && <MODE>mode == QImode)
> +       return "kandnb\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "kandnw\t{%2, %1, %0|%0, %1, %2}";
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
>    [(set_attr "isa" "bmi,*,avx512f")
>     (set_attr "type" "bitmanip,*,msklog")
>     (set_attr "prefix" "*,*,vex")
> @@ -8082,14 +8137,17 @@
>    "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
>
>  (define_insn "*<code><mode>_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,k")
>         (any_or:SWI48
> -        (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
> -        (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")))
> +        (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,k")
> +        (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,k")))
>     (clobber (reg:CC FLAGS_REG))]
>    "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> -  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "alu")
> +  "@
> +   <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
> +   <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
> +   k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "type" "alu,alu,msklog")
>     (set_attr "mode" "<MODE>")])
>
>  (define_insn "*<code>hi_1"
> @@ -8177,19 +8235,36 @@
>             (match_operand:SWI12 2 "register_operand" "r,k"))))
>     (clobber (reg:CC FLAGS_REG))]
>    "TARGET_AVX512F"
> +{
> +  if (which_alternative == 1 && <MODE>mode == QImode && TARGET_AVX512DQ)
> +    return "kxnorb\t{%2, %1, %0|%0, %1, %2}";
> +  return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
> +}
> +  [(set_attr "type" "*,msklog")
> +   (set_attr "prefix" "*,vex")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "kxnor<mode>"
> +  [(set (match_operand:SWI48x 0 "register_operand" "=r,!k")
> +       (not:SWI48x
> +         (xor:SWI48x
> +           (match_operand:SWI48x 1 "register_operand" "0,k")
> +           (match_operand:SWI48x 2 "register_operand" "r,k"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_AVX512BW"
>    "@
>     #
> -   kxnorw\t{%2, %1, %0|%0, %1, %2}"
> +   kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "type" "*,msklog")
>     (set_attr "prefix" "*,vex")
>     (set_attr "mode" "<MODE>")])
>
>  (define_split
> -  [(set (match_operand:SWI12 0 "general_reg_operand")
> -       (not:SWI12
> -         (xor:SWI12
> +  [(set (match_operand:SWI1248x 0 "general_reg_operand")
> +       (not:SWI1248x
> +         (xor:SWI1248x
>             (match_dup 0)
> -           (match_operand:SWI12 1 "general_reg_operand"))))
> +           (match_operand:SWI1248x 1 "general_reg_operand"))))
>     (clobber (reg:CC FLAGS_REG))]
>    "TARGET_AVX512F && reload_completed"
>     [(parallel [(set (match_dup 0)
> @@ -8199,6 +8274,8 @@
>      (set (match_dup 0)
>          (not:HI (match_dup 0)))])
>
> +;;There are kortrest[bdq] but no intrinsics for them.
> +;;We probably don't need to implement them.
>  (define_insn "kortestzhi"
>    [(set (reg:CCZ FLAGS_REG)
>         (compare:CCZ
> @@ -8238,6 +8315,28 @@
>     (set_attr "type" "msklog")
>     (set_attr "prefix" "vex")])
>
> +(define_insn "kunpcksi"
> +  [(set (match_operand:SI 0 "register_operand" "=k")
> +       (ior:SI
> +         (ashift:SI
> +           (match_operand:SI 1 "register_operand" "k")
> +           (const_int 16))
> +         (zero_extend:SI (subreg:HI (match_operand:SI 2 "register_operand" 
> "k") 0))))]
> +  "TARGET_AVX512BW"
> +  "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mode" "SI")])
> +
> +(define_insn "kunpckdi"
> +  [(set (match_operand:DI 0 "register_operand" "=k")
> +       (ior:DI
> +         (ashift:DI
> +           (match_operand:DI 1 "register_operand" "k")
> +           (const_int 32))
> +         (zero_extend:DI (subreg:SI (match_operand:DI 2 "register_operand" 
> "k") 0))))]
> +  "TARGET_AVX512BW"
> +  "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mode" "DI")])
> +
>  ;; See comment for addsi_1_zext why we do use nonimmediate_operand
>  ;; ??? Special case for immediate operand is missing - it is tricky.
>  (define_insn "*<code>si_2_zext"
> @@ -8837,11 +8936,15 @@
>    "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
>
>  (define_insn "*one_cmpl<mode>2_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
> -       (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,k")
> +       (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,k")))]
>    "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
> -  "not{<imodesuffix>}\t%0"
> -  [(set_attr "type" "negnot")
> +  "@
> +   not{<imodesuffix>}\t%0
> +   knot<mskmodesuffix>\t{%1, %0|%0, %1}"
> +  [(set_attr "isa" "*,avx512bw")
> +   (set_attr "type" "negnot,msklog")
> +   (set_attr "prefix" "*,vex")
>     (set_attr "mode" "<MODE>")])
>
>  (define_insn "*one_cmplhi2_1"
> @@ -8861,10 +8964,21 @@
>    [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k")
>         (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
>    "ix86_unary_operator_ok (NOT, QImode, operands)"
> -  "@
> -   not{b}\t%0
> -   not{l}\t%k0
> -   knotw\t{%1, %0|%0, %1}"
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "not{b}\t%0";
> +    case 1:
> +      return "not{l}\t%k0";
> +    case 2:
> +      if (TARGET_AVX512DQ)
> +       return "knotb\t{%1, %0|%0, %1}";
> +      return "knotw\t{%1, %0|%0, %1}";
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
>    [(set_attr "isa" "*,*,avx512f")
>     (set_attr "type" "negnot,negnot,msklog")
>     (set_attr "prefix" "*,*,vex")
>
>

Reply via email to