For shld/shrd insns, the old pattern use match_dup 0 as its shift src and use
+r*m as its constraint. To support NDD we added new define_insns to handle NDD
form pattern with extra input and dest operand to be fixed in register.
gcc/ChangeLog:
* config/i386/i386.md (x86_64_shld_ndd): New define_insn.
(x86_64_shld_ndd_1): Likewise.
(*x86_64_shld_ndd_2): Likewise.
(x86_shld_ndd): Likewise.
(x86_shld_ndd_1): Likewise.
(*x86_shld_ndd_2): Likewise.
(x86_64_shrd_ndd): Likewise.
(x86_64_shrd_ndd_1): Likewise.
(*x86_64_shrd_ndd_2): Likewise.
(x86_shrd_ndd): Likewise.
(x86_shrd_ndd_1): Likewise.
(*x86_shrd_ndd_2): Likewise.
(*x86_64_shld_shrd_1_nozext): Adjust codegen under TARGET_APX_NDD.
(*x86_shld_shrd_1_nozext): Likewise.
(*x86_64_shrd_shld_1_nozext): Likewise.
(*x86_shrd_shld_1_nozext): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd-shld-shrd.c: New test.
---
gcc/config/i386/i386.md | 323 +++++++++++++++++-
.../gcc.target/i386/apx-ndd-shld-shrd.c | 24 ++
2 files changed, 345 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 760c0d32f4d..2e3d37d08b0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14183,6 +14183,24 @@ (define_insn "x86_64_shld"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shld_ndd"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3) (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD"
+ "shld{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")])
+
(define_insn "x86_64_shld_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
@@ -14204,6 +14222,24 @@ (define_insn "x86_64_shld_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shld_ndd_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_63_operand"))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+ "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "1")])
+
+
(define_insn_and_split "*x86_64_shld_shrd_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand")
(ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
@@ -14229,6 +14265,23 @@ (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
operands[4] = force_reg (DImode, operands[4]);
emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3],
operands[2]));
}
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
else
{
operands[1] = force_reg (DImode, operands[1]);
@@ -14261,6 +14314,33 @@ (define_insn_and_split "*x86_64_shld_2"
(const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_64_shld_ndd_2"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (lshiftrt:DI (match_operand:DI 2 "register_operand")
+ (minus:QI (const_int 64) (match_dup 3)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:DI (ashift:DI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 63)))
+ (subreg:DI
+ (lshiftrt:TI
+ (zero_extend:TI (match_dup 2))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3)
+ (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (DImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
(define_insn "x86_shld"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
@@ -14283,6 +14363,24 @@ (define_insn "x86_shld"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shld_ndd"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3) (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD"
+ "shld{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+
(define_insn "x86_shld_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
@@ -14304,6 +14402,24 @@ (define_insn "x86_shld_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shld_ndd_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_31_operand"))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD &&
+ INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
+ "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")])
+
+
(define_insn_and_split "*x86_shld_shrd_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
(ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
@@ -14328,7 +14444,24 @@ (define_insn_and_split "*x86_shld_shrd_1_nozext"
operands[4] = force_reg (SImode, operands[4]);
emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3],
operands[2]));
}
- else
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
{
operands[1] = force_reg (SImode, operands[1]);
rtx tmp = gen_reg_rtx (SImode);
@@ -14360,6 +14493,33 @@ (define_insn_and_split "*x86_shld_2"
(const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_shld_ndd_2"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (lshiftrt:SI (match_operand:SI 2 "register_operand")
+ (minus:QI (const_int 32) (match_dup 3)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:SI (ashift:SI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 31)))
+ (subreg:SI
+ (lshiftrt:DI
+ (zero_extend:DI (match_dup 2))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3)
+ (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (SImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
(define_expand "@x86_shift<mode>_adj_1"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
@@ -15308,6 +15468,24 @@ (define_insn "x86_64_shrd"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shrd_ndd"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
+ (const_int 63)))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3) (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD"
+ "shrd{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+
(define_insn "x86_64_shrd_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0)
@@ -15329,6 +15507,24 @@ (define_insn "x86_64_shrd_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_64_shrd_ndd_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_63_operand"))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_255_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
+ "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "DI")])
+
+
(define_insn_and_split "*x86_64_shrd_shld_1_nozext"
[(set (match_operand:DI 0 "nonimmediate_operand")
(ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
@@ -15354,6 +15550,23 @@ (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
operands[4] = force_reg (DImode, operands[4]);
emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3],
operands[2]));
}
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
else
{
operands[1] = force_reg (DImode, operands[1]);
@@ -15386,6 +15599,33 @@ (define_insn_and_split "*x86_64_shrd_2"
(const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_64_shrd_ndd_2"
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (ashift:DI (match_operand:DI 2 "register_operand")
+ (minus:QI (const_int 64) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:DI (lshiftrt:DI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 63)))
+ (subreg:DI
+ (ashift:TI
+ (zero_extend:TI (match_dup 2))
+ (minus:QI (const_int 64)
+ (and:QI (match_dup 3)
+ (const_int 63)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (DImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
(define_insn "x86_shrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
@@ -15408,6 +15648,23 @@ (define_insn "x86_shrd"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shrd_ndd"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
+ (const_int 31)))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3) (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD"
+ "shrd{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
(define_insn "x86_shrd_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
@@ -15429,6 +15686,24 @@ (define_insn "x86_shrd_1"
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
+(define_insn "x86_shrd_ndd_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 3 "const_0_to_31_operand"))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:QI 4 "const_0_to_63_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_APX_NDD
+ && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
+ "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ishift")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "SI")])
+
+
(define_insn_and_split "*x86_shrd_shld_1_nozext"
[(set (match_operand:SI 0 "nonimmediate_operand")
(ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
@@ -15453,7 +15728,24 @@ (define_insn_and_split "*x86_shrd_shld_1_nozext"
operands[4] = force_reg (SImode, operands[4]);
emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3],
operands[2]));
}
- else
+ else if (TARGET_APX_NDD)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ if (MEM_P (operands[4]))
+ {
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ }
+ else if (MEM_P (operands[1]))
+ emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4],
+ operands[3], operands[2]));
+ else
+ emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
+ operands[2], operands[3]));
+ emit_move_insn (operands[0], tmp);
+ }
+ else
{
operands[1] = force_reg (SImode, operands[1]);
rtx tmp = gen_reg_rtx (SImode);
@@ -15485,6 +15777,33 @@ (define_insn_and_split "*x86_shrd_2"
(const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*x86_shrd_ndd_2"
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
+ (match_operand:QI 3 "nonmemory_operand"))
+ (ashift:SI (match_operand:SI 2 "register_operand")
+ (minus:QI (const_int 32) (match_dup 3)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_APX_NDD
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 4)
+ (ior:SI (lshiftrt:SI (match_dup 1)
+ (and:QI (match_dup 3) (const_int 31)))
+ (subreg:SI
+ (ashift:DI
+ (zero_extend:DI (match_dup 2))
+ (minus:QI (const_int 32)
+ (and:QI (match_dup 3)
+ (const_int 31)))) 0)))
+ (clobber (reg:CC FLAGS_REG))
+ (set (match_dup 0) (match_dup 4))])]
+{
+ operands[4] = gen_reg_rtx (SImode);
+ emit_move_insn (operands[4], operands[0]);
+})
+
;; Base name for insn mnemonic.
(define_mode_attr cvt_mnemonic
[(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
b/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
new file mode 100644
index 00000000000..87068ea31aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -Wno-shift-count-overflow -m64 -mapxf" } */
+/* { dg-final { scan-assembler-times {(?n)shld[ql]?[\t ]*\$2} 4 } } */
+/* { dg-final { scan-assembler-times {(?n)shrd[ql]?[\t ]*\$2} 4 } } */
+
+typedef unsigned long u64;
+typedef unsigned int u32;
+
+long a;
+int c;
+const char n = 2;
+
+long test64r (long e) { long t = ((u64)a >> n) | (e << (64 - n)); return t;}
+long test64l (u64 e) { long t = (a << n) | (e >> (64 - n)); return t;}
+int test32r (int f) { int t = ((u32)c >> n) | (f << (32 - n)); return t; }
+int test32l (u32 f) { int t = (c << n) | (f >> (32 - n)); return t; }
+
+u64 ua;
+u32 uc;
+
+u64 testu64l (u64 ue) { u64 ut = (ua << n) | (ue >> (64 - n)); return ut; }
+u64 testu64r (u64 ue) { u64 ut = (ua >> n) | (ue << (64 - n)); return ut; }
+u32 testu32l (u32 uf) { u32 ut = (uc << n) | (uf >> (32 - n)); return ut; }
+u32 testu32r (u32 uf) { u32 ut = (uc >> n) | (uf << (32 - n)); return ut; }
--
2.31.1