https://gcc.gnu.org/g:5938cf021e95b40b040974c9cbe7860399247f7f

commit r15-1701-g5938cf021e95b40b040974c9cbe7860399247f7f
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Fri Jun 28 07:12:53 2024 +0100

    i386: Some additional AVX512 ternlog refinements.
    
    This patch is another round of refinements to fine tune the new ternlog
    infrastructure in i386's sse.md.  This patch tweaks ix86_ternlog_idx
    to allow multiple MEM/CONST_VECTOR/VEC_DUPLICATE operands prior to
    splitting (before reload), when force_register is called on all but
    one of these operands.  Conceptually during the dynamic programming,
    registers fill the args slots in the order 0, 1, 2, and mem-like
    operands fill the slots in the order 2, 0, 1 [preferring the memory
    operand to come last].
    
    This patch allows us to remove some of the legacy ternlog patterns
    in sse.md without regressions [which is left to the next and final
    patch in this series].  An indication that these patterns are no
    longer required is shown by the necessary testsuite tweaks below,
    where the output assembler for the legacy instructions used hexadecimal,
    but with the new ternlog infrastructure now consistently use decimal.
    
    2024-06-28  Roger Sayle  <ro...@nextmovesoftware.com>
    
    gcc/ChangeLog
            * config/i386/i386-expand.cc (ix86_ternlog_idx) <case 
VEC_DUPLICATE>:
            Add a "goto do_mem_operand" as this need not match memory_operand.
            <case CONST_VECTOR>: Only args[2] may be volatile memory operand.
            Allow MEM/VEC_DUPLICATE/CONST_VECTOR as args[0] and args[1].
    
    gcc/testsuite/ChangeLog
            * gcc.target/i386/avx512f-andn-di-zmm-2.c: Match decimal instead
            of hexadecimal immediate operand to ternlog.
            * gcc.target/i386/avx512f-andn-si-zmm-2.c: Likewise.
            * gcc.target/i386/avx512f-orn-si-zmm-1.c: Likewise.
            * gcc.target/i386/avx512f-orn-si-zmm-2.c: Likewise.
            * gcc.target/i386/pr100711-3.c: Likewise.
            * gcc.target/i386/pr100711-4.c: Likewise.
            * gcc.target/i386/pr100711-5.c: Likewise.

Diff:
---
 gcc/config/i386/i386-expand.cc                     | 35 ++++++++++++++++++++--
 .../gcc.target/i386/avx512f-andn-di-zmm-2.c        |  2 +-
 .../gcc.target/i386/avx512f-andn-si-zmm-2.c        |  2 +-
 .../gcc.target/i386/avx512f-orn-si-zmm-1.c         |  2 +-
 .../gcc.target/i386/avx512f-orn-si-zmm-2.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr100711-3.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr100711-4.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr100711-5.c         |  2 +-
 8 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index eccad080f7c..dd2c3a8718e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25606,7 +25606,7 @@ ix86_ternlog_idx (rtx op, rtx *args)
     case VEC_DUPLICATE:
       if (!bcst_mem_operand (op, GET_MODE (op)))
        return -1;
-      /* FALLTHRU */
+      goto do_mem_operand;
 
     case MEM:
       if (!memory_operand (op, GET_MODE (op)))
@@ -25618,23 +25618,52 @@ ix86_ternlog_idx (rtx op, rtx *args)
       /* FALLTHRU */
 
     case CONST_VECTOR:
+do_mem_operand:
       if (!args[2])
        {
          args[2] = op;
          return 0xaa;
        }
       /* Maximum of one volatile memory reference per expression.  */
-      if (side_effects_p (op) && side_effects_p (args[2]))
+      if (side_effects_p (op))
        return -1;
       if (rtx_equal_p (op, args[2]))
        return 0xaa;
-      /* Check if one CONST_VECTOR is the ones-complement of the other.  */
+      /* Check if CONST_VECTOR is the ones-complement of args[2].  */
       if (GET_CODE (op) == CONST_VECTOR
          && GET_CODE (args[2]) == CONST_VECTOR
          && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
                                                          op, GET_MODE (op)),
                          args[2]))
        return 0x55;
+      if (!args[0])
+       {
+         args[0] = op;
+         return 0xf0;
+       }
+      if (rtx_equal_p (op, args[0]))
+       return 0xf0;
+      /* Check if CONST_VECTOR is the ones-complement of args[0].  */
+      if (GET_CODE (op) == CONST_VECTOR
+         && GET_CODE (args[0]) == CONST_VECTOR
+         && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
+                                                         op, GET_MODE (op)),
+                         args[0]))
+       return 0x0f;
+      if (!args[1])
+       {
+         args[1] = op;
+         return 0xcc;
+       }
+      if (rtx_equal_p (op, args[1]))
+       return 0xcc;
+      /* Check if CONST_VECTOR is the ones-complement of args[1].  */
+      if (GET_CODE (op) == CONST_VECTOR
+         && GET_CODE (args[1]) == CONST_VECTOR
+         && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
+                                                         op, GET_MODE (op)),
+                         args[1]))
+       return 0x33;
       return -1;
 
     case NOT:
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c 
b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
index 4ebb30fa213..24f3d6c8e62 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$80, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c 
b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
index 86e7ebe7d72..1f5e72da177 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$80, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c 
b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
index 7d02f033ae8..d21f48fe3e4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$245, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c 
b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
index c7930836fd2..53592005f68 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
-/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$175, 
\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-3.c 
b/gcc/testsuite/gcc.target/i386/pr100711-3.c
index 98cc1c35beb..ea6019084a0 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c
@@ -39,4 +39,4 @@ v8di foo_v8di (long long a, v8di b)
 
 /* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { 
target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$80" 2 { 
target { ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-4.c 
b/gcc/testsuite/gcc.target/i386/pr100711-4.c
index 26152d63f8c..4ca1292e442 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c
@@ -39,4 +39,4 @@ v8di foo_v8di (long long a, v8di b)
 
 /* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$207" 4 { 
target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$207" 2 { target { 
ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { 
target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$245" 2 { 
target { ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-5.c 
b/gcc/testsuite/gcc.target/i386/pr100711-5.c
index 820bed88ed5..640787ef97f 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c
@@ -39,5 +39,5 @@ v8di foo_v8di (long long a, v8di b)
 
 /* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$195" 4 { 
target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$195" 2 { target { 
ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 2 { 
target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$165" 2 { 
target { ia32 } } } } */

Reply via email to