This patch enhances the i386 backend's stv2 pass to consider the
pattern (zero_extend:DI (mem:SI ...)) to be a candidate for conversion.
Loading an SImode value into an SSE register clears the rest of the
vector, i.e. effectively (v4si){ mem, 0, 0, 0 }, which can be used
to conveniently implement zero-extension to DImode, when performing
V2DImode Scalar-To-Vector (STV) conversion.
Consider the new test case:
long long y,z;
unsigned int p;
void foo()
{
long long t = p;
t ^= y;
z = t;
}
With -m32 -O2 -msse2 this currently generates:
foo: movl p, %eax
xorl %edx, %edx
movd %edx, %xmm1
movd %eax, %xmm0
punpckldq %xmm1, %xmm0
movq y, %xmm1
pxor %xmm1, %xmm0
movq %xmm0, z
ret
With this patch we now generate:
foo: movq y, %xmm1
movd p, %xmm0
pxor %xmm1, %xmm0
movq %xmm0, z
ret
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline?
2026-06-30 Roger Sayle <[email protected]>
gcc/ChangeLog
* config/i386/i386-features.cc (compute_convert_gain) <ZERO_EXTEND>:
Provide costs for the new transformation.
(convert_insn): Implement *zero_extendsidi2 using the backend's
vec_setv2di_0_zero_extendsi_1 pattern (i.e. movq mem, %xmm).
(general_scalar_to_vector_candidate_p): Consider the pattern
(zero_extend:DI (mem:SI ...)) to be a candidate for DImode STV.
gcc/testsuite/ChangeLog
* gcc.target/i386/sse2-stv-6.c: New test case.
Thanks in advance,
Roger
--
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 10d84f52001..e0fc13efe86 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -863,6 +863,16 @@ general_scalar_chain::compute_convert_gain ()
}
break;
+ case ZERO_EXTEND:
+ /* mov eax (6 bytes) vs movd xmm0 (8 bytes). */
+ /* mov eax; xor edx,edx (7 bytes). */
+ if (speed_p)
+ igain += COSTS_N_INSNS (ix86_cost->int_load[2]
+ - ix86_cost->sse_load[0]) / 2;
+ else
+ igain += COSTS_N_BYTES (TARGET_64BIT ? -2 : -1);
+ break;
+
default:
gcc_unreachable ();
}
@@ -1588,6 +1598,11 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
}
break;
+ case ZERO_EXTEND:
+ /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1. */
+ src = gen_rtx_VEC_CONCAT (V2DImode, src, const0_rtx);
+ break;
+
default:
gcc_unreachable ();
}
@@ -2499,6 +2514,13 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn,
enum machine_mode mode)
&& XVECLEN (XEXP (src, 1), 0) == 1
&& CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
+ case ZERO_EXTEND:
+ /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1. */
+ return mode == DImode
+ && REG_P (dst)
+ && GET_MODE (XEXP (src, 0)) == SImode
+ && MEM_P (XEXP (src, 0));
+
default:
return false;
}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
new file mode 100644
index 00000000000..4e1095418da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-m32 -O2 -msse2 -mno-stackrealign" } */
+
+long long y,z;
+unsigned int p;
+
+void foo()
+{
+ long long t = p;
+ t ^= y;
+ z = t;
+}
+
+/* { dg-final { scan-assembler-not "movl" } } */
+/* { dg-final { scan-assembler-not "xorl" } } */
+/* { dg-final { scan-assembler-not "punpckldq" } } */
+