https://gcc.gnu.org/g:973097d801a30385cd39a570624eefa7547f8ff3
commit r14-10528-g973097d801a30385cd39a570624eefa7547f8ff3
Author: Jakub Jelinek <ja...@redhat.com>
Date:   Thu Aug 1 10:32:54 2024 +0200

    i386: Fix up *<extract_type>_vinsert<shuffletype><extract_suf>_0 [PR115981]
    
    The r14-537 change started canonicalizing VEC_MERGE operands based on
    swap_commutative_operands_p or if they have the same precedence least
    significant bit of the third operand.
    The *<extract_type>_vinsert<shuffletype><extract_suf>_0 pattern was
    added for combine matching and no longer triggers after that change,
    as it used the reg_or_0_operand as the first operand and VEC_DUPLICATE
    as the second.
    Now, reg_or_0_operand could be a REG, SUBREG of object or CONST_VECTOR.
    REG has commutative_operand_precedence -1 or -2, SUBREG of object -3,
    CONST_VECTOR -4, while VEC_DUPLICATE has 0, so VEC_DUPLICATE will always
    go first and REG, SUBREG or CONST_VECTOR second.
    
    This patch swaps the operands so that it matches again.
    
    2024-08-01  Jakub Jelinek  <ja...@redhat.com>
    
            PR target/115981
            * config/i386/sse.md
            (*<extract_type>_vinsert<shuffletype><extract_suf>_0): Swap the
            first two VEC_MERGE operands, renumber match_operands and test
            for 0xF or 0x3 rather than 0xFFF0 or 0xFC immediate.
    
            * gcc.target/i386/avx512dq-pr90991-1.c: Add tests for no separate
            zero extension instructions.
            * gcc.target/i386/avx512dq-pr90991-2.c: Likewise.
    
    (cherry picked from commit df2b444a233e93b987adec76655ab89589b3fa10)

Diff:
---
 gcc/config/i386/sse.md                             | 42 +++++++++++-----------
 gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c |  3 ++
 gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c |  3 ++
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1bf50726e830..073aae293d4c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -19124,47 +19124,47 @@
 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
   [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
        (vec_merge:AVX512_VEC
-         (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
          (vec_duplicate:AVX512_VEC
-               (match_operand:<ssequartermode> 2 "nonimmediate_operand" 
"vm,xm,vm"))
+           (match_operand:<ssequartermode> 1 "nonimmediate_operand" 
"vm,xm,vm"))
+         (match_operand:AVX512_VEC 2 "reg_or_0_operand" "v,C,C")
          (match_operand:SI 3 "const_int_operand")))]
   "TARGET_AVX512F
    && (INTVAL (operands[3])
-       == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
+       == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF : 0x3))"
 {
   if (which_alternative == 0)
-    return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
+    return "vinsert<shuffletype><extract_suf>\t{$0, %1, %2, %0|%0, %2, %1, 0}";
   bool egpr_used = (TARGET_APX_EGPR
-                   && x86_extended_rex2reg_mentioned_p (operands[2]));
-  const char *align_templ = egpr_used ? "vmovaps\t{%2, %x0|%x0, %2}"
-                                     : "vmovdqa\t{%2, %x0|%x0, %2}";
-  const char *unalign_templ = egpr_used ? "vmovups\t{%2, %x0|%x0, %2}"
-                                       : "vmovdqu\t{%2, %x0|%x0, %2}";
+                   && x86_extended_rex2reg_mentioned_p (operands[1]));
+  const char *align_templ = egpr_used ? "vmovaps\t{%1, %x0|%x0, %1}"
+                                     : "vmovdqa\t{%1, %x0|%x0, %1}";
+  const char *unalign_templ = egpr_used ? "vmovups\t{%1, %x0|%x0, %1}"
+                                       : "vmovdqu\t{%1, %x0|%x0, %1}";
   switch (<MODE>mode)
     {
     case E_V8DFmode:
-      if (misaligned_operand (operands[2], <ssequartermode>mode))
-       return "vmovupd\t{%2, %x0|%x0, %2}";
+      if (misaligned_operand (operands[1], <ssequartermode>mode))
+       return "vmovupd\t{%1, %x0|%x0, %1}";
       else
-       return "vmovapd\t{%2, %x0|%x0, %2}";
+       return "vmovapd\t{%1, %x0|%x0, %1}";
     case E_V16SFmode:
-      if (misaligned_operand (operands[2], <ssequartermode>mode))
-       return "vmovups\t{%2, %x0|%x0, %2}";
+      if (misaligned_operand (operands[1], <ssequartermode>mode))
+       return "vmovups\t{%1, %x0|%x0, %1}";
       else
-       return "vmovaps\t{%2, %x0|%x0, %2}";
+       return "vmovaps\t{%1, %x0|%x0, %1}";
     case E_V8DImode:
-      if (misaligned_operand (operands[2], <ssequartermode>mode))
-       return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
+      if (misaligned_operand (operands[1], <ssequartermode>mode))
+       return which_alternative == 2 ? "vmovdqu64\t{%1, %x0|%x0, %1}"
                                      : unalign_templ;
       else
-       return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
+       return which_alternative == 2 ? "vmovdqa64\t{%1, %x0|%x0, %1}"
                                      : align_templ;
     case E_V16SImode:
-      if (misaligned_operand (operands[2], <ssequartermode>mode))
-       return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
+      if (misaligned_operand (operands[1], <ssequartermode>mode))
+       return which_alternative == 2 ? "vmovdqu32\t{%1, %x0|%x0, %1}"
                                      : unalign_templ;
       else
-       return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
+       return which_alternative == 2 ? "vmovdqa32\t{%1, %x0|%x0, %1}"
                                      : align_templ;
     default:
       gcc_unreachable ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c 
b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c
index 6c968126b7d3..5d19b07bdee2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c
@@ -7,6 +7,9 @@
 /* { dg-final { scan-assembler-times "vmovups\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 
} } */
 /* { dg-final { scan-assembler-times "vmovupd\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 
} } */
 /* { dg-final { scan-assembler-times "vmovdqu\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 
} } */
+/* { dg-final { scan-assembler-not "vmovaps\[ \t]\+%xmm0, %xmm0" } } */
+/* { dg-final { scan-assembler-not "vmovapd\[ \t]\+%xmm0, %xmm0" } } */
+/* { dg-final { scan-assembler-not "vmovdqa\[ \t]\+%xmm0, %xmm0" } } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c 
b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c
index 7699c3149aee..68f53189d3eb 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr90991-2.c
@@ -7,6 +7,9 @@
 /* { dg-final { scan-assembler-times "vmovups\[ \t]\+\\(\[^\n\r]*\\), %ymm0" 1 
} } */
 /* { dg-final { scan-assembler-times "vmovupd\[ \t]\+\\(\[^\n\r]*\\), %ymm0" 1 
} } */
 /* { dg-final { scan-assembler-times "vmovdqu\[ \t]\+\\(\[^\n\r]*\\), %ymm0" 1 
} } */
+/* { dg-final { scan-assembler-not "vmovaps\[ \t]\+%ymm0, %ymm0" } } */
+/* { dg-final { scan-assembler-not "vmovapd\[ \t]\+%ymm0, %ymm0" } } */
+/* { dg-final { scan-assembler-not "vmovdqa\[ \t]\+%ymm0, %ymm0" } } */
 
 #include <x86intrin.h>

Reply via email to