This is another step towards a possible solution for PR 105137.
This patch introduces a define_insn_and_split for extendditi2,
that allows DImode to TImode sign-extension to be represented in
the early RTL optimizers, before being split post-reload into
the exact same idiom as currently produced by RTL expansion.
Typically this produces the identical code, so the first new
test case:
__int128 foo(long long x) { return (__int128)x; }
continues to generate:
foo: movq %rdi, %rax
cqto
ret
The "magic" is that this representation allows combine and the
other RTL optimizers to do a better job. Hence, the second
test case:
__int128 foo(__int128 a, long long b) {
a += ((__int128)b) << 70;
return a;
}
which mainline with -O2 currently generates as:
foo: movq %rsi, %rax
movq %rdx, %rcx
movq %rdi, %rsi
salq $6, %rcx
movq %rax, %rdi
xorl %eax, %eax
movq %rcx, %rdx
addq %rsi, %rax
adcq %rdi, %rdx
ret
with this patch now becomes:
foo: movl $0, %eax
salq $6, %rdx
addq %rdi, %rax
adcq %rsi, %rdx
ret
i.e. the same code for the signed and unsigned extension variants.
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures. Ok for mainline?
2022-12-28 Roger Sayle <[email protected]>
gcc/ChangeLog
* config/i386/i386.md (extendditi2): New define_insn_and_split
to split DImode to TImode sign-extension after reload.
gcc/testsuite/ChangeLog
* gcc.target/i386/extendditi2-1.c: New test case.
* gcc.target/i386/extendditi2-2.c: Likewise.
Thanks in advance,
Roger
--
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0626752..fabddc2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4756,6 +4756,38 @@
(if_then_else (eq_attr "prefix_0f" "0")
(const_string "0")
(const_string "1")))])
+
+(define_insn_and_split "extendditi2"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (sign_extend:TI (match_operand:DI 1 "register_operand" "r")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ split_double_mode (TImode, &operands[0], 1, &operands[2], &operands[3]);
+ if (REGNO (operands[1]) != REGNO (operands[2]))
+ emit_move_insn (operands[2], operands[1]);
+
+ rtx src = operands[1];
+ if (REGNO (operands[2]) == AX_REG)
+ src = operands[2];
+
+ /* Generate a cltd if possible and doing so it profitable. */
+ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ && REGNO (operands[3]) == DX_REG
+ && REGNO (src) == AX_REG)
+ {
+ emit_insn (gen_ashrdi3_cvt (operands[3], src, GEN_INT (63)));
+ }
+ else
+ {
+ if (REGNO (operands[1]) != REGNO (operands[3]))
+ emit_move_insn (operands[3], operands[1]);
+ emit_insn (gen_ashrdi3_cvt (operands[3], operands[3], GEN_INT (63)));
+ }
+})
;; Conversions between float and double.
diff --git a/gcc/testsuite/gcc.target/i386/extendditi2-1.c
b/gcc/testsuite/gcc.target/i386/extendditi2-1.c
new file mode 100644
index 0000000..846afef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extendditi2-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 foo(long long x)
+{
+ return (__int128)x;
+}
+/* { dg-final { scan-assembler "cqto" } } */
diff --git a/gcc/testsuite/gcc.target/i386/extendditi2-2.c
b/gcc/testsuite/gcc.target/i386/extendditi2-2.c
new file mode 100644
index 0000000..dbfa6fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/extendditi2-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 foo(__int128 a, long long b) {
+ a += ((__int128)b) << 70;
+ return a;
+}
+
+__int128 bar(__int128 a, unsigned long long b) {
+ a += ((__int128)b) << 70;
+ return a;
+}
+/* { dg-final { scan-assembler-not "movq" } } */