For left shift, there is an optimization TARGET_DOUBLE_WITH_ADD that shl 1 can be optimized to add. As NDD form of add requires src operand to be register since NDD cannot take 2 memory src, we currently just keep using NDD form shift instead of add.
The optimization TARGET_SHIFT1 will try to remove constant 1, but under NDD it could create ambiguous mnemonic like sal %ecx, %edx, this will be encoded to legacy shift sal %cl, %edx which changes the expected behavior that %ecx is actually considered as NDD src. Under such case we emit $1 explicitly when operands[1] is CX reg. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add ASHIFT. * config/i386/i386.md (*ashl<mode>3_1): Extend with new alternatives to support NDD, limit the new alternative to generate sal only, and adjust output template for NDD. (*ashlsi3_1_zext): Likewise. (*ashlhi3_1): Likewise. (*ashlqi3_1): Likewise. (*ashl<mode>3_cmp): Likewise. (*ashlsi3_cmp_zext): Likewise. (*ashl<mode>3_cconly): Likewise. (*ashl<mode>3_doubleword): Likewise. (*ashl<dwi>3_doubleword_highpart): Adjust codegen for NDD. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add tests for sal. --- gcc/config/i386/i386-expand.cc | 1 + gcc/config/i386/i386.md | 194 ++++++++++++++++-------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 22 +++ 3 files changed, 150 insertions(+), 67 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5f02d557a50..7e3080482a6 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1276,6 +1276,7 @@ bool ix86_can_use_ndd_p (enum rtx_code code) case AND: case IOR: case XOR: + case ASHIFT: return true; default: return false; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index cf9842d1a49..a0e81545f17 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14103,13 +14103,14 @@ (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1" }) (define_insn "ashl<mode>3_doubleword" - [(set (match_operand:DWI 0 "register_operand" "=&r") - (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n") - (match_operand:QI 2 "nonmemory_operand" "<S>c"))) + [(set (match_operand:DWI 0 "register_operand" "=&r,r") + (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r") + (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c"))) (clobber (reg:CC FLAGS_REG))] "" "#" - [(set_attr "type" "multi")]) + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "multi")]) (define_split [(set (match_operand:DWI 0 "register_operand") @@ -14149,11 +14150,14 @@ (define_insn_and_split "*ashl<dwi>3_doubleword_highpart" [(const_int 0)] { split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]); + bool use_ndd = ix86_can_use_ndd_p (ASHIFT) + && !rtx_equal_p (operands[3], operands[1]); int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT); - if (!rtx_equal_p (operands[3], operands[1])) + if (!rtx_equal_p (operands[3], operands[1]) || !use_ndd) emit_move_insn (operands[3], operands[1]); + rtx op_tmp = use_ndd? operands[1] : operands[3]; if (bits > 0) - emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits))); + emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits))); ix86_expand_clear (operands[0]); DONE; }) @@ -14460,12 +14464,14 @@ (define_insn "*bmi2_ashl<mode>3_1" (set_attr "mode" "<MODE>")]) (define_insn "*ashl<mode>3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k") - (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k") - (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") + (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)" + "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, + ix86_can_use_ndd_p (ASHIFT))" { + bool use_ndd = (which_alternative == 4); switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14480,18 +14486,24 @@ (define_insn "*ashl<mode>3_1" default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "sal{<imodesuffix>}\t%0"; + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REG_P (operands[1]) + && REGNO (operands[1]) == CX_REG)) + return use_ndd ? "sal{<imodesuffix>}\t{%1, %0|%0, %1}" + : "sal{<imodesuffix>}\t%0"; else - return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,bmi2,<kmov_isa>") + [(set_attr "isa" "*,*,bmi2,<kmov_isa>,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") + (eq_attr "alternative" "4") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14533,13 +14545,15 @@ (define_insn "*bmi2_ashlsi3_1_zext" (set_attr "mode" "SI")]) (define_insn "*ashlsi3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,M,r")))) + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands, + ix86_can_use_ndd_p (ASHIFT))" { + bool use_ndd = (which_alternative == 3); switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14552,18 +14566,24 @@ (define_insn "*ashlsi3_1_zext" default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "sal{l}\t%k0"; + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REG_P (operands[1]) + && REGNO (operands[1]) == CX_REG)) + return use_ndd ? "sal{l}\t{%1, %k0|%k0, %1}" + : "sal{l}\t%k0"; else - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,*,bmi2") + [(set_attr "isa" "*,*,bmi2,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") + (eq_attr "alternative" "3") + (const_string "ishift") (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") @@ -14593,12 +14613,14 @@ (define_split "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashlhi3_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k") - (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, HImode, operands)" + "ix86_binary_operator_ok (ASHIFT, HImode, operands, + ix86_can_use_ndd_p (ASHIFT))" { + bool use_ndd = (which_alternative == 3); switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14611,18 +14633,24 @@ (define_insn "*ashlhi3_1" default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "sal{w}\t%0"; + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REG_P (operands[1]) + && REGNO (operands[1]) == CX_REG)) + return use_ndd ? "sal{w}\t{%1, %0|%0, %1}" + : "sal{w}\t%0"; else - return "sal{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}" + : "sal{w}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,avx512f") + [(set_attr "isa" "*,*,avx512f,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "msklog") + (eq_attr "alternative" "3") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14638,15 +14666,17 @@ (define_insn "*ashlhi3_1" (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "HI,SI,HI")]) + (set_attr "mode" "HI,SI,HI,HI")]) (define_insn "*ashlqi3_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, QImode, operands)" + "ix86_binary_operator_ok (ASHIFT, QImode, operands, + ix86_can_use_ndd_p (ASHIFT))" { + bool use_ndd = (which_alternative == 4); switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14662,28 +14692,34 @@ (define_insn "*ashlqi3_1" default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REG_P (operands[1]) + && REGNO (operands[1]) == CX_REG)) { if (get_attr_mode (insn) == MODE_SI) - return "sal{l}\t%k0"; + return use_ndd ? "sal{l}\t{%1, %k0|%k0, %1}" + : "sal{l}\t%k0"; else return "sal{b}\t%0"; } else { if (get_attr_mode (insn) == MODE_SI) - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; else return "sal{b}\t{%2, %0|%0, %2}"; } } } - [(set_attr "isa" "*,*,*,avx512dq") + [(set_attr "isa" "*,*,*,avx512dq,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") (eq_attr "alternative" "3") (const_string "msklog") + (eq_attr "alternative" "4") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14699,10 +14735,10 @@ (define_insn "*ashlqi3_1" (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI,QI") + (set_attr "mode" "QI,SI,SI,QI,SI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") - (cond [(eq_attr "alternative" "1") + (cond [(eq_attr "alternative" "1,4") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) @@ -14797,10 +14833,10 @@ (define_split (define_insn "*ashl<mode>3_cmp" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r") (ashift:SWI (match_dup 1) (match_dup 2)))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL @@ -14808,8 +14844,10 @@ (define_insn "*ashl<mode>3_cmp" && (TARGET_SHIFT1 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)" + && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, + ix86_can_use_ndd_p (ASHIFT))" { + bool use_ndd = (which_alternative == 1); switch (get_attr_type (insn)) { case TYPE_ALU: @@ -14818,14 +14856,21 @@ (define_insn "*ashl<mode>3_cmp" default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "sal{<imodesuffix>}\t%0"; + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REG_P (operands[1]) + && REGNO (operands[1]) == CX_REG)) + return use_ndd ? "sal{<imodesuffix>}\t{%1, %0|%0, %1}" + : "sal{<imodesuffix>}\t%0"; else - return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") - (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") @@ -14845,10 +14890,10 @@ (define_insn "*ashl<mode>3_cmp" (define_insn "*ashlsi3_cmp_zext" [(set (reg FLAGS_REG) (compare - (ashift:SI (match_operand:SI 1 "register_operand" "0") + (ashift:SI (match_operand:SI 1 "register_operand" "0,r") (match_operand:QI 2 "const_1_to_31_operand")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && (optimize_function_for_size_p (cfun) @@ -14857,8 +14902,10 @@ (define_insn "*ashlsi3_cmp_zext" && (TARGET_SHIFT1 || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands)" + && ix86_binary_operator_ok (ASHIFT, SImode, operands, + ix86_can_use_ndd_p (ASHIFT))" { + bool use_ndd = (which_alternative == 1); switch (get_attr_type (insn)) { case TYPE_ALU: @@ -14867,14 +14914,20 @@ (define_insn "*ashlsi3_cmp_zext" default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "sal{l}\t%k0"; + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REGNO (operands[1]) == CX_REG)) + return use_ndd ? "sal{l}\t{%1, %k0|%k0, %1}" + : "sal{l}\t%k0"; else - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") - (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] @@ -14893,10 +14946,10 @@ (define_insn "*ashlsi3_cmp_zext" (define_insn "*ashl<mode>3_cconly" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "register_operand" "0") - (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) + (ashift:SWI (match_operand:SWI 1 "register_operand" "0,r") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>")) (const_int 0))) - (clobber (match_scratch:SWI 0 "=<r>"))] + (clobber (match_scratch:SWI 0 "=<r>,r"))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx @@ -14904,22 +14957,29 @@ (define_insn "*ashl<mode>3_cconly" || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode)" { + bool use_ndd = (which_alternative == 1); switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{<imodesuffix>}\t%0, %0"; - default: + default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "sal{<imodesuffix>}\t%0"; + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !(use_ndd && REGNO (operands[1]) == CX_REG)) + return use_ndd ? "sal{<imodesuffix>}\t{%1, %0|%0, %1}" + : "sal{<imodesuffix>}\t%0"; else - return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + : "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") - (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 7541a41a01e..481ec8b00a8 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -29,6 +29,16 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \ return c; \ } +#define FOO3(TYPE, OP_NAME, OP, IMM) \ +TYPE \ +__attribute__ ((noipa)) \ +foo3_##OP_NAME##_##TYPE (TYPE a) \ +{ \ + TYPE b = a OP IMM; \ + return b; \ +} + + #define F(TYPE, OP_NAME, OP) \ TYPE \ __attribute__ ((noipa)) \ @@ -112,6 +122,16 @@ FOO (int, xor, ^) FOO1 (int, xor, ^) FOO (long, xor, ^) FOO1 (long, xor, ^) + +FOO (char, shl, <<) +FOO3 (char, shl, <<, 7) +FOO (short, shl, <<) +FOO3 (short, shl, <<, 7) +FOO (int, shl, <<) +FOO3 (int, shl, <<, 7) +FOO (long, shl, <<) +FOO3 (long, shl, <<, 7) + /* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */ @@ -134,3 +154,5 @@ FOO1 (long, xor, ^) /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ -- 2.31.1