Use the previous scratch register if it is a general purpose register wide enough to supply the required mode, or an SSE register which is valid in the required mode. If the required mode is QImode, the previous scratch register must be valid in QImode. Also match const0_rtx against CONST0_RTX.
gcc/ PR target/125958 * config/i386/i386-expand.cc (ix86_expand_lcp_stall_peephole): Update previous scratch register check. Match const0_rtx against CONST0_RTX. gcc/testsuite/ PR target/125958 * gcc.target/i386/pr125958a.c: New test. * gcc.target/i386/pr125958b.c: Likewise. -- H.J.
From 787ccc424956d2f9fe019d17b8f4f151919c1538 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <[email protected]> Date: Wed, 24 Jun 2026 06:45:11 +0800 Subject: [PATCH] x86: Update previous scratch register check in LCP stall peepholes Use the previous scratch register if it is a general purpose register wide enough to supply the required mode, or an SSE register which is valid in the required mode. If the required mode is QImode, the previous scratch register must be valid in QImode. Also match const0_rtx against CONST0_RTX. gcc/ PR target/125958 * config/i386/i386-expand.cc (ix86_expand_lcp_stall_peephole): Update previous scratch register check. Match const0_rtx against CONST0_RTX. gcc/testsuite/ PR target/125958 * gcc.target/i386/pr125958a.c: New test. * gcc.target/i386/pr125958b.c: Likewise. Signed-off-by: H.J. Lu <[email protected]> --- gcc/config/i386/i386-expand.cc | 23 +++++++++++++++-------- gcc/testsuite/gcc.target/i386/pr125958a.c | 22 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr125958b.c | 16 ++++++++++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr125958a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr125958b.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 429f00f03e9..d533b8a26d1 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -28313,17 +28313,24 @@ ix86_expand_lcp_stall_peephole (rtx_insn *insn, rtx *operands, rtx dest = SET_DEST (set); - /* Reject DEST if a register is not wide enough to - supply MODE or invalid for QImode. */ - if (!REG_P (dest) - || (GET_MODE_SIZE (GET_MODE (dest)) - < GET_MODE_SIZE (mode)) - || (mode == QImode - && !ANY_QI_REGNO_P (REGNO (dest)))) + /* Reject DEST if it isn't a general purpose register + wide enough to supply MODE or invalid for QImode + or isn't an SSE register which is invalid for + MODE. */ + if ((!GENERAL_REG_P (dest) + || (GET_MODE_SIZE (GET_MODE (dest)) + < GET_MODE_SIZE (mode)) + || (mode == QImode + && !ANY_QI_REGNO_P (REGNO (dest)))) + && (mode == QImode + || (mode == HImode && !TARGET_AVX512FP16) + || !SSE_REG_P (dest))) continue; rtx src = SET_SRC (set); - if (rtx_equal_p (src, imm)) + if (rtx_equal_p (src, imm) + || (imm == const0_rtx + && src == CONST0_RTX (GET_MODE (dest)))) { /* A previous scratch register is found. */ prev_scratch = dest; diff --git a/gcc/testsuite/gcc.target/i386/pr125958a.c b/gcc/testsuite/gcc.target/i386/pr125958a.c new file mode 100644 index 00000000000..c747cb97596 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr125958a.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -fno-pic -march=x86-64" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target *-*-* } {^\t?\.} } } */ + +/* +**vcn_init_session_buf_fc: +**.LFB0: +**... +** pxor %xmm0, %xmm0 +** xorl %[a-z0-9]+, %[a-z0-9]+ +** movw %[a-z0-9]+, 16\(%[a-z0-9]+\) +** movups %xmm0, \(%[a-z0-9]+\) +**... +*/ + +const short ar[9] = {}; +void +vcn_init_session_buf_fc (void * r) +{ + __builtin_memcpy (r, ar,sizeof(ar)); +} diff --git a/gcc/testsuite/gcc.target/i386/pr125958b.c b/gcc/testsuite/gcc.target/i386/pr125958b.c new file mode 100644 index 00000000000..fb6a0adca15 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr125958b.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -fno-pic -march=x86-64 -mavx512fp16" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target *-*-* } {^\t?\.} } } */ + +/* +**vcn_init_session_buf_fc: +**.LFB0: +**... +** vpxor %xmm0, %xmm0, %xmm0 +** vmovdqu %xmm0, \(%[a-z0-9]+\) +** vmovw %xmm0, 16\(%[a-z0-9]+\) +**... +*/ + +#include "pr125958a.c" -- 2.54.0
