On Wed, May 29, 2024 at 1:11 PM Kong, Lingling <lingling.k...@intel.com> wrote:
>
> Hi, compared with v2, these patches restored the original lea patten position 
> and addressed hongtao's comment.
>
> APX NF(no flags) feature implements suppresses the update of status flags
> for arithmetic operations.
Ok for the patch and the remaining.

[PATCH v3 1/8] [APX NF]: Support APX NF add   Kong, Lingling
[PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg}   Kong, Lingling
[PATCH v3 3/8] [APX NF] Support APX NF for left shift insns   Kong, Lingling
[PATCH v3 4/8] [APX NF] Support APX NF for right shift insns   Kong, Lingling
[PATCH v3 5/8] [APX NF] Support APX NF for rotate insns   Kong, Lingling
[PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd   Kong, Lingling
[PATCH v3 7/8] [APX NF] Support APX NF for mul/div   Kong, Lingling
[PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt   Kong, Lingling

>
> For NF add, it is not clear whether nf add can be faster than lea. If so,
> the pattern needs to be adjusted to perfer lea generation.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-opts.h (enum apx_features): Add nf
>         enumeration.
>         * config/i386/i386.h (TARGET_APX_NF): New.
>         * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
>         * config/i386/i386.opt: Add apx_nf enumeration.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ndd.c: Fixed test.
>
> Co-authored-by: Lingling Kong <lingling.k...@intel.com>
> ---
>  gcc/config/i386/i386-opts.h             |   3 +-
>  gcc/config/i386/i386.h                  |   1 +
>  gcc/config/i386/i386.md                 | 135 ++++++++++++++++--------
>  gcc/config/i386/i386.opt                |   3 +
>  gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
>  5 files changed, 98 insertions(+), 46 deletions(-)
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index ef2825803b3..60176ce609f 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,7 +140,8 @@ enum apx_features {
>    apx_push2pop2 = 1 << 1,
>    apx_ndd = 1 << 2,
>    apx_ppx = 1 << 3,
> -  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
> +  apx_nf = 1<< 4,
> +  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 359a8408263..969391d3013 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
>  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
>  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
> +#define TARGET_APX_NF (ix86_apx_features & apx_nf)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index e8073f5a200..1eeadaddeba 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6290,6 +6290,13 @@
>    [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
>                (clobber (reg:CC FLAGS_REG))])]
>    "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
> +
> +(define_split
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 
> "const1248_operand")))]
> +  "TARGET_APX_NF && reload_completed"
> +  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
> +  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
>
>
>  ;; Add instructions
>
> @@ -6437,48 +6444,65 @@
>               (clobber (reg:CC FLAGS_REG))])]
>   "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], 
> &operands[5]);")
>
> -(define_insn "*add<mode>_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
> +(define_subst_attr "nf_name" "nf_subst" "_nf" "")
> +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
> +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
> +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
> +(define_subst_attr "nf_applied" "nf_subst" "true" "false")
> +
> +(define_subst "nf_subst"
> +  [(set (match_operand:SWI 0)
> +        (match_operand:SWI 1))]
> +  ""
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +       (clobber (reg:CC FLAGS_REG))])
> +
> +(define_insn "*add<mode>_1<nf_name>"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" 
> "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
>         (plus:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
> -         (match_operand:SWI48 2 "x86_64_general_operand" 
> "re,BM,0,le,r,e,BM")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
> +         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
> +         (match_operand:SWI48 2 "x86_64_general_operand" 
> "r,e,BM,0,le,r,e,BM")))]
> +  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                     : "inc{<imodesuffix>}\t%0";
> +        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                     : "<nf_prefix>inc{<imodesuffix>}\t%0";
>        else
>          {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                       : "dec{<imodesuffix>}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                       : "<nf_prefix>dec{<imodesuffix>}\t%0";
>         }
>
>      default:
>        /* For most processors, ADD is faster than LEA.  This alternative
>          was added to use ADD as much as possible.  */
> -      if (which_alternative == 2)
> +      if (which_alternative == 3)
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
> -        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                     : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
> +        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, 
> %2}"
> +                     : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                   : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, 
> %2}"
> +                   : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
> +  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "3")
> +     (cond [(eq_attr "alternative" "4")
>                (const_string "lea")
>             (match_operand:SWI48 2 "incdec_operand")
>               (const_string "incdec")
> @@ -6552,26 +6576,29 @@
>         (const_string "*")))
>     (set_attr "mode" "SI")])
>
> -(define_insn "*addhi_1"
> +(define_insn "*addhi_1<nf_name>"
>    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
>         (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
> -                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
> +                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -       return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
> +       return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : 
> "<nf_prefix>inc{w}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : 
> "<nf_prefix>dec{w}\t%0";
>         }
>
>      default:
> @@ -6581,11 +6608,11 @@
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], HImode))
> -       return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
> -                      : "sub{w}\t{%2, %0|%0, %2}";
> +       return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
> +                      : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
> -                    : "add{w}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
> +                    : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
> @@ -6603,33 +6630,36 @@
>         (const_string "*")))
>     (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
>
> -(define_insn "*addqi_1"
> +(define_insn "*addqi_1<nf_name>"
>    [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
>         (plus:QI (match_operand:QI 1 "nonimmediate_operand" 
> "%0,0,q,0,r,Yp,rm,r")
> -                (match_operand:QI 2 "general_operand" 
> "qn,m,0,rn,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
> +                (match_operand:QI 2 "general_operand" 
> "qn,m,0,rn,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool widen = (get_attr_mode (insn) != MODE_QI);
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
>         if (use_ndd)
> -         return "inc{b}\t{%1, %0|%0, %1}";
> +         return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
>         else
> -         return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
> +         return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
>           if (use_ndd)
> -           return "dec{b}\t{%1, %0|%0, %1}";
> +           return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
>           else
> -           return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
> +           return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
>         }
>
>      default:
> @@ -6641,16 +6671,16 @@
>        if (x86_maybe_negate_const_int (&operands[2], QImode))
>         {
>           if (use_ndd)
> -           return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
> +           return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
>           else
> -           return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
> -                        : "sub{b}\t{%2, %0|%0, %2}";
> +           return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
> +                        : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
>         }
>        if (use_ndd)
> -       return "add{b}\t{%2, %1, %0|%0, %1, %2}";
> +       return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
>        else
> -       return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
> -                    : "add{b}\t{%2, %0|%0, %2}";
> +       return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
> +                    : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
> @@ -6824,6 +6854,23 @@
>      }
>  })
>
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (plus:SWI (match_operand:SWI 1 "register_operand")
> +                 (match_operand:SWI 2 "<nonmemory_operand>")))]
> +  "TARGET_APX_NF && reload_completed
> +   && ix86_lea_for_add_ok (insn, operands)"
> +  [(set (match_dup 0)
> +       (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
> +{
> +  if (<MODE>mode != <LEAMODE>mode)
> +    {
> +      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
> +      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
> +      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
> +    }
> +})
> +
>  ;; Convert add to the lea pattern to avoid flags dependency.
>  (define_split
>    [(set (match_operand:DI 0 "register_operand")
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 7151fb1b147..b6f28a2b4bd 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1336,6 +1336,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
>  EnumValue
>  Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
>
> +EnumValue
> +Enum(apx_features) String(nf) Value(apx_nf) Set(6)
> +
>  EnumValue
>  Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c 
> b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0eb751ad225..0ff4df0780c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } 
> */
>  /* { dg-final { scan-assembler-not "movl"} } */
>
>  #include <stdint.h>
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to