https://gcc.gnu.org/g:b6fb4f7f651d2aa89548c5833fe2679af2638df5

commit r15-2940-gb6fb4f7f651d2aa89548c5833fe2679af2638df5
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Thu Aug 15 22:02:05 2024 +0100

    i386: Improve split of *extendv2di2_highpart_stv_noavx512vl.
    
    This patch follows up on the previous patch to fix PR target/116275 by
    improving the code STV (ultimately) generates for highpart sign extensions
    like (x<<8)>>8.  The arithmetic right shift is able to take advantage of
    the available common subexpressions from the preceding left shift.
    
    Hence previously with -O2 -m32 -mavx -mno-avx512vl we'd generate:
    
            vpsllq  $8, %xmm0, %xmm0
            vpsrad  $8, %xmm0, %xmm1
            vpsrlq  $8, %xmm0, %xmm0
            vpblendw        $51, %xmm0, %xmm1, %xmm0
    
    But with improved splitting, we now generate three instructions:
    
            vpslld  $8, %xmm1, %xmm0
            vpsrad  $8, %xmm0, %xmm0
            vpblendw        $51, %xmm1, %xmm0, %xmm0
    
    This patch also implements Uros' suggestion that the pre-reload
    splitter could introduced a new pseudo to hold the intermediate
    to potentially help reload with register allocation, which applies
    when not performing the above optimization, i.e. on TARGET_XOP.
    
    2024-08-15  Roger Sayle  <ro...@nextmovesoftware.com>
                Uros Bizjak  <ubiz...@gmail.com>
    
    gcc/ChangeLog
            * config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): Split
            to an improved implementation on !TARGET_XOP.  On TARGET_XOP, use
            a new pseudo for the intermediate to simplify register allocation.
    
    gcc/testsuite/ChangeLog
            * g++.target/i386/pr116275-2.C: New test case.

Diff:
---
 gcc/config/i386/i386.md                    | 32 ++++++++++++++++++++++++++++--
 gcc/testsuite/g++.target/i386/pr116275-2.C | 19 ++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index efbab2f25ec..36108e5c2c9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17872,10 +17872,38 @@
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(set (match_dup 0)
+  [(set (match_dup 4)
        (ashift:V2DI (match_dup 1) (match_dup 2)))
    (set (match_dup 0)
-       (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
+       (ashiftrt:V2DI (match_dup 4) (match_dup 2)))]
+{
+  if (!TARGET_XOP)
+    {
+      rtx op0 = operands[0];
+      rtx op2 = operands[2];
+      rtx tmp1 = gen_reg_rtx (V4SImode);
+      rtx tmp2 = gen_reg_rtx (V4SImode);
+      rtx tmp3 = gen_reg_rtx (V4SImode);
+      rtx tmp4 = gen_reg_rtx (V4SImode);
+      emit_move_insn (tmp1, lowpart_subreg (V4SImode, operands[1], V2DImode));
+      emit_insn (gen_ashlv4si3 (tmp2, tmp1, op2));
+      emit_insn (gen_ashrv4si3 (tmp3, tmp2, op2));
+      vec_perm_builder sel (4, 4, 1);
+      sel.quick_grow (4);
+      sel[0] = 0;
+      sel[1] = 5;
+      sel[2] = 2;
+      sel[3] = 7;
+      vec_perm_indices indices(sel, 2, 4);
+      bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode, tmp4,
+                                                 tmp1, tmp3, indices);
+      gcc_assert (ok);
+      emit_move_insn (op0, lowpart_subreg (V2DImode, tmp4, V4SImode));
+      DONE;
+    }
+  else
+    operands[4] = gen_reg_rtx (V2DImode);
+})
 
 ;; Rotate instructions
 
diff --git a/gcc/testsuite/g++.target/i386/pr116275-2.C 
b/gcc/testsuite/g++.target/i386/pr116275-2.C
new file mode 100644
index 00000000000..98d3c19e59c
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr116275-2.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -mavx -mno-avx512vl -std=c++11" } */
+
+struct SymbolDesc push_back(SymbolDesc);
+struct SymbolDesc {
+  long long ELFLocalSymIdx;
+};
+struct Expected {
+  long long &operator*();
+};
+void SymbolizableObjectFileaddSymbol() {
+  Expected SymbolAddressOrErr;
+  long long SymbolAddress = *SymbolAddressOrErr << 8 >> 8;
+  push_back({SymbolAddress});
+}
+
+/* { dg-final { scan-assembler "vpslld" } } */
+/* { dg-final { scan-assembler-not "vpsllq" } } */
+/* { dg-final { scan-assembler-not "vpsrlq" } } */

Reply via email to