https://gcc.gnu.org/g:83880beea997e435f56de9cab08929e213732025
commit r17-2059-g83880beea997e435f56de9cab08929e213732025 Author: Roger Sayle <[email protected]> Date: Wed Jul 1 14:31:10 2026 +0100 i386: Handle (zero_extend:DI (mem:SI)) in x86's STV. This patch enhances the i386 backend's stv2 pass to consider the pattern (zero_extend:DI (mem:SI ...)) to be a candidate for conversion. Loading an SImode value into an SSE register clears the rest of the vector, i.e. effectively (v4si){ mem, 0, 0, 0 }, which can be used to conveniently implement zero-extension to DImode, when performing V2DImode Scalar-To-Vector (STV) conversion. Consider the new test case: long long y,z; unsigned int p; void foo() { long long t = p; t ^= y; z = t; } With -m32 -O2 -msse2 this currently generates: foo: movl p, %eax xorl %edx, %edx movd %edx, %xmm1 movd %eax, %xmm0 punpckldq %xmm1, %xmm0 movq y, %xmm1 pxor %xmm1, %xmm0 movq %xmm0, z ret With this patch we now generate: foo: movq y, %xmm1 movd p, %xmm0 pxor %xmm1, %xmm0 movq %xmm0, z ret 2026-07-01 Roger Sayle <[email protected]> gcc/ChangeLog * config/i386/i386-features.cc (compute_convert_gain) <ZERO_EXTEND>: Provide costs for the new transformation. (convert_insn): Implement *zero_extendsidi2 using the backend's vec_setv2di_0_zero_extendsi_1 pattern (i.e. movq mem, %xmm). (general_scalar_to_vector_candidate_p): Consider the pattern (zero_extend:DI (mem:SI ...)) to be a candidate for DImode STV. gcc/testsuite/ChangeLog * gcc.target/i386/sse2-stv-6.c: New test case. Diff: --- gcc/config/i386/i386-features.cc | 22 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/sse2-stv-6.c | 17 +++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 10d84f520016..e0fc13efe86e 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -863,6 +863,16 @@ general_scalar_chain::compute_convert_gain () } break; + case ZERO_EXTEND: + /* mov eax (6 bytes) vs movd xmm0 (8 bytes). */ + /* mov eax; xor edx,edx (7 bytes). */ + if (speed_p) + igain += COSTS_N_INSNS (ix86_cost->int_load[2] + - ix86_cost->sse_load[0]) / 2; + else + igain += COSTS_N_BYTES (TARGET_64BIT ? -2 : -1); + break; + default: gcc_unreachable (); } @@ -1588,6 +1598,11 @@ general_scalar_chain::convert_insn (rtx_insn *insn) } break; + case ZERO_EXTEND: + /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1. */ + src = gen_rtx_VEC_CONCAT (V2DImode, src, const0_rtx); + break; + default: gcc_unreachable (); } @@ -2499,6 +2514,13 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) && XVECLEN (XEXP (src, 1), 0) == 1 && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0)); + case ZERO_EXTEND: + /* *zero_extendsidi2 becomes *vec_setv2di_0_zero_extendsi_1. */ + return mode == DImode + && REG_P (dst) + && GET_MODE (XEXP (src, 0)) == SImode + && MEM_P (XEXP (src, 0)); + default: return false; } diff --git a/gcc/testsuite/gcc.target/i386/sse2-stv-6.c b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c new file mode 100644 index 000000000000..4e1095418daa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-stv-6.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-m32 -O2 -msse2 -mno-stackrealign" } */ + +long long y,z; +unsigned int p; + +void foo() +{ + long long t = p; + t ^= y; + z = t; +} + +/* { dg-final { scan-assembler-not "movl" } } */ +/* { dg-final { scan-assembler-not "xorl" } } */ +/* { dg-final { scan-assembler-not "punpckldq" } } */ +
