On Wed, May 22, 2024 at 10:29 AM Kong, Lingling <lingling.k...@intel.com> wrote: > > > I wonder if we can use "define_subst" to conditionally add flags clobber > > for !TARGET_APX_NF targets. Even the example for "Define Subst" uses the > > insn > > w/ and w/o the clobber, so I think it is worth considering this approach. > > > > Uros. > > Good Suggestion, I defined new subst for no flags, and Bootstrapped and > regtested on x86_64-linux-gnu. Also supported SPEC 2017 run normally on Intel > software development emulator. > Ok for trunk? > > Thanks, > Lingling > > Subject: [PATCH v2 1/8] [APX NF]: Support APX NF add > APX NF(no flags) feature implements suppresses the update of status flags > for arithmetic operations. > > For NF add, it is not clear whether nf add can be faster than lea. If so, > the pattern needs to be adjusted to perfer lea generation. > > gcc/ChangeLog: > > * config/i386/i386-opts.h (enum apx_features): Add nf > enumeration. > * config/i386/i386.h (TARGET_APX_NF): New. > * config/i386/i386.md (nf_subst): New define_subst. > (nf_name): New subst_attr. > (nf_prefix): Ditto. > (nf_condition): Ditto. > (nf_mem_constraint): Ditto. > (nf_applied): Ditto. > (*add<mode>_1_nf): New define_insn. > (addhi_1_nf): Ditto. > (addqi_1_nf): Ditto. > * config/i386/i386.opt: Add apx_nf enumeration. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-ndd.c: Fixed test. > * gcc.target/i386/apx-nf.c: New test.
LGTM, but I'll leave the final approval to Hongtao. Thanks, Uros. > > Co-authored-by: Lingling Kong <lingling.k...@intel.com> > --- > gcc/config/i386/i386-opts.h | 3 +- > gcc/config/i386/i386.h | 1 + > gcc/config/i386/i386.md | 179 +++++++++++++++--------- > gcc/config/i386/i386.opt | 3 + > gcc/testsuite/gcc.target/i386/apx-ndd.c | 2 +- > gcc/testsuite/gcc.target/i386/apx-nf.c | 6 + > 6 files changed, 126 insertions(+), 68 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h > index ef2825803b3..60176ce609f 100644 > --- a/gcc/config/i386/i386-opts.h > +++ b/gcc/config/i386/i386-opts.h > @@ -140,7 +140,8 @@ enum apx_features { > apx_push2pop2 = 1 << 1, > apx_ndd = 1 << 2, > apx_ppx = 1 << 3, > - apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, > + apx_nf = 1<< 4, > + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf, > }; > > #endif > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 529edff93a4..f20ae4726da 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see > #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) > #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) > #define TARGET_APX_PPX (ix86_apx_features & apx_ppx) > +#define TARGET_APX_NF (ix86_apx_features & apx_nf) > > #include "config/vxworks-dummy.h" > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 764bfe20ff2..bae344518bd 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -6233,28 +6233,6 @@ > } > }) > > > -;; Load effective address instructions > - > -(define_insn "*lea<mode>" > - [(set (match_operand:SWI48 0 "register_operand" "=r") > - (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))] > - "ix86_hardreg_mov_ok (operands[0], operands[1])" > -{ > - if (SImode_address_operand (operands[1], VOIDmode)) > - { > - gcc_assert (TARGET_64BIT); > - return "lea{l}\t{%E1, %k0|%k0, %E1}"; > - } > - else > - return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}"; > -} > - [(set_attr "type" "lea") > - (set (attr "mode") > - (if_then_else > - (match_operand 1 "SImode_address_operand") > - (const_string "SI") > - (const_string "<MODE>")))]) > - > (define_peephole2 > [(set (match_operand:SWI48 0 "register_operand") > (match_operand:SWI48 1 "address_no_seg_operand"))] > @@ -6290,6 +6268,13 @@ > [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) > (clobber (reg:CC FLAGS_REG))])] > "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") > + > +(define_split > + [(set (match_operand:SWI48 0 "general_reg_operand") > + (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 > "const1248_operand")))] > + "TARGET_APX_NF && reload_completed" > + [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))] > + "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") > > > ;; Add instructions > > @@ -6437,48 +6422,65 @@ > (clobber (reg:CC FLAGS_REG))])] > "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], > &operands[5]);") > > -(define_insn "*add<mode>_1" > - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r") > +(define_subst_attr "nf_name" "nf_subst" "_nf" "") > +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "") > +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true") > +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m") > +(define_subst_attr "nf_applied" "nf_subst" "true" "false") > + > +(define_subst "nf_subst" > + [(set (match_operand:SWI 0) > + (match_operand:SWI 1))] > + "" > + [(set (match_dup 0) > + (match_dup 1)) > + (clobber (reg:CC FLAGS_REG))]) > + > +(define_insn "*add<mode>_1<nf_name>" > + [(set (match_operand:SWI48 0 "nonimmediate_operand" > "=rm,r<nf_mem_constraint>,r,r,r,r,r,r") > (plus:SWI48 > - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r") > - (match_operand:SWI48 2 "x86_64_general_operand" > "re,BM,0,le,r,e,BM"))) > - (clobber (reg:CC FLAGS_REG))] > - "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)" > + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r") > + (match_operand:SWI48 2 "x86_64_general_operand" > "r,e,BM,0,le,r,e,BM")))] > + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD) > + && <nf_condition>" > { > bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; > switch (get_attr_type (insn)) > { > case TYPE_LEA: > - return "#"; > + if (TARGET_APX_NDD && <nf_applied>) > + return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "#"; > > case TYPE_INCDEC: > if (operands[2] == const1_rtx) > - return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}" > - : "inc{<imodesuffix>}\t%0"; > + return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}" > + : "<nf_prefix>inc{<imodesuffix>}\t%0"; > else > { > gcc_assert (operands[2] == constm1_rtx); > - return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}" > - : "dec{<imodesuffix>}\t%0"; > + return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}" > + : "<nf_prefix>dec{<imodesuffix>}\t%0"; > } > > default: > /* For most processors, ADD is faster than LEA. This alternative > was added to use ADD as much as possible. */ > - if (which_alternative == 2) > + if (which_alternative == 3) > std::swap (operands[1], operands[2]); > > if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) > - return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > - : "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, > %2}" > + : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}"; > > - return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > - : "add{<imodesuffix>}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, > %2}" > + : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}"; > } > } > - [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd") > + [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd") > (set (attr "type") > - (cond [(eq_attr "alternative" "3") > + (cond [(eq_attr "alternative" "4") > (const_string "lea") > (match_operand:SWI48 2 "incdec_operand") > (const_string "incdec") > @@ -6491,6 +6493,28 @@ > (const_string "*"))) > (set_attr "mode" "<MODE>")]) > > +;; Load effective address instructions > + > +(define_insn "*lea<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))] > + "ix86_hardreg_mov_ok (operands[0], operands[1])" > +{ > + if (SImode_address_operand (operands[1], VOIDmode)) > + { > + gcc_assert (TARGET_64BIT); > + return "lea{l}\t{%E1, %k0|%k0, %E1}"; > + } > + else > + return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}"; > +} > + [(set_attr "type" "lea") > + (set (attr "mode") > + (if_then_else > + (match_operand 1 "SImode_address_operand") > + (const_string "SI") > + (const_string "<MODE>")))]) > + > ;; It may seem that nonimmediate operand is proper one for operand 1. > ;; The addsi_1 pattern allows nonimmediate operand at that place and > ;; we take care in ix86_binary_operator_ok to not allow two memory > @@ -6552,26 +6576,29 @@ > (const_string "*"))) > (set_attr "mode" "SI")]) > > -(define_insn "*addhi_1" > +(define_insn "*addhi_1<nf_name>" > [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r") > (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r") > - (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m"))) > - (clobber (reg:CC FLAGS_REG))] > - "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)" > + (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))] > + "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD) > + && <nf_condition>" > { > bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; > switch (get_attr_type (insn)) > { > case TYPE_LEA: > - return "#"; > + if (TARGET_APX_NDD && <nf_applied>) > + return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "#"; > > case TYPE_INCDEC: > if (operands[2] == const1_rtx) > - return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0"; > + return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : > "<nf_prefix>inc{w}\t%0"; > else > { > gcc_assert (operands[2] == constm1_rtx); > - return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0"; > + return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : > "<nf_prefix>dec{w}\t%0"; > } > > default: > @@ -6581,11 +6608,11 @@ > std::swap (operands[1], operands[2]); > > if (x86_maybe_negate_const_int (&operands[2], HImode)) > - return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}" > - : "sub{w}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}" > + : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}"; > > - return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}" > - : "add{w}\t{%2, %0|%0, %2}"; > + return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}" > + : "<nf_prefix>add{w}\t{%2, %0|%0, %2}"; > } > } > [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") > @@ -6603,33 +6630,36 @@ > (const_string "*"))) > (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) > > -(define_insn "*addqi_1" > +(define_insn "*addqi_1<nf_name>" > [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r") > (plus:QI (match_operand:QI 1 "nonimmediate_operand" > "%0,0,q,0,r,Yp,rm,r") > - (match_operand:QI 2 "general_operand" > "qn,m,0,rn,0,ln,rn,m"))) > - (clobber (reg:CC FLAGS_REG))] > - "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)" > + (match_operand:QI 2 "general_operand" > "qn,m,0,rn,0,ln,rn,m")))] > + "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD) > + && <nf_condition>" > { > bool widen = (get_attr_mode (insn) != MODE_QI); > bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; > switch (get_attr_type (insn)) > { > case TYPE_LEA: > - return "#"; > + if (TARGET_APX_NDD && <nf_applied>) > + return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "#"; > > case TYPE_INCDEC: > if (operands[2] == const1_rtx) > if (use_ndd) > - return "inc{b}\t{%1, %0|%0, %1}"; > + return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}"; > else > - return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; > + return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0"; > else > { > gcc_assert (operands[2] == constm1_rtx); > if (use_ndd) > - return "dec{b}\t{%1, %0|%0, %1}"; > + return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}"; > else > - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; > + return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0"; > } > > default: > @@ -6641,16 +6671,16 @@ > if (x86_maybe_negate_const_int (&operands[2], QImode)) > { > if (use_ndd) > - return "sub{b}\t{%2, %1, %0|%0, %1, %2}"; > + return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}"; > else > - return widen ? "sub{l}\t{%2, %k0|%k0, %2}" > - : "sub{b}\t{%2, %0|%0, %2}"; > + return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}" > + : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}"; > } > if (use_ndd) > - return "add{b}\t{%2, %1, %0|%0, %1, %2}"; > + return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}"; > else > - return widen ? "add{l}\t{%k2, %k0|%k0, %k2}" > - : "add{b}\t{%2, %0|%0, %2}"; > + return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}" > + : "<nf_prefix>add{b}\t{%2, %0|%0, %2}"; > } > } > [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd") > @@ -6824,6 +6854,23 @@ > } > }) > > +(define_split > + [(set (match_operand:SWI 0 "register_operand") > + (plus:SWI (match_operand:SWI 1 "register_operand") > + (match_operand:SWI 2 "<nonmemory_operand>")))] > + "TARGET_APX_NF && reload_completed > + && ix86_lea_for_add_ok (insn, operands)" > + [(set (match_dup 0) > + (plus:<LEAMODE> (match_dup 1) (match_dup 2)))] > +{ > + if (<MODE>mode != <LEAMODE>mode) > + { > + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); > + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); > + operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]); > + } > +}) > + > ;; Convert add to the lea pattern to avoid flags dependency. > (define_split > [(set (match_operand:DI 0 "register_operand") > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index d5f793a9e8b..66021d59d4e 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1356,6 +1356,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) > EnumValue > Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) > > +EnumValue > +Enum(apx_features) String(nf) Value(apx_nf) Set(6) > + > EnumValue > Enum(apx_features) String(all) Value(apx_all) Set(1) > > diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c > b/gcc/testsuite/gcc.target/i386/apx-ndd.c > index 0eb751ad225..0ff4df0780c 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c > +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-mapxf -march=x86-64 -O2" } */ > +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } > */ > /* { dg-final { scan-assembler-not "movl"} } */ > > #include <stdint.h> > diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c > b/gcc/testsuite/gcc.target/i386/apx-nf.c > new file mode 100644 > index 00000000000..3adc7a27902 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-nf.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" > } */ > +/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */ > + > +#include "apx-ndd.c" > + > -- > 2.31.1 > >