This patch applies stricter predicates and constraints for LD and LWA
instructions with power10 fusion.  These instructions are DS-form instructions,
which means that the bottom 2 bits of the address must be 0.  In the past, we
did not use the stricter predicates and constraints, and if the user used the
-fstack-protector option, it would generate a non-prefixed load instruction
whose offset was too big if the stack is large.

This patch has been tested on:

    * Little endian power9 with both IEEE and IBM long double
    * Little endian power10
    * Big endian power8 using both 32-bit and 64-bit code generation.

Can I check this into the master branch?  Assuming I can check this in, I will
also commit to the active GCC branches after a burn-in period.

2023-05-10   Michael Meissner  <meiss...@linux.ibm.com>

gcc/

        PR target/105325
        * config/rs6000/genfusion.pl (print_ld_cmpi_p10): Use "YZ" constraints
        for DS-form loads.  Set the sign_extend attribute for loads that do sign
        extension.  Use the lwa_operand predicate for the LWA instruction.
        * config/rs6000/fusion.md: Regenerate.

gcc/testsuite/

        PR target/105325
        * g++.target/powerpc/pr105325.C: New test.
        * gcc.target/powerpc/fusion-p10-ldcmpi.c: Adjust insn counts.
---
 gcc/config/rs6000/fusion.md                   | 17 +++++++-----
 gcc/config/rs6000/genfusion.pl                | 20 +++++++++++---
 gcc/testsuite/g++.target/powerpc/pr105325.C   | 26 +++++++++++++++++++
 .../gcc.target/powerpc/fusion-p10-ldcmpi.c    |  4 +--
 4 files changed, 54 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/pr105325.C

diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 81ba4b33940..836dbd20948 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -129,6 +129,12 @@ sub print_ld_cmpi_p10
   print "  \"\"\n";
   print "  [(set_attr \"type\" \"fused_load_cmpi\")\n";
   print "   (set_attr \"cost\" \"8\")\n";
+
+  if ($extend eq "sign")
+    {
+      print "   (set_attr \"sign_extend\" \"yes\")\n";
+    }
+
   print "   (set_attr \"length\" \"8\")])\n";
   print "\n";
 }
@@ -147,9 +153,9 @@ sub gen_ld_cmpi_p10
                       "HI" => "lhz",
                       "QI" => "lbz");
 
-  # Memory predicate to use.
+  # Memory predicate to use.  For LWA, use the special LWA_OPERAND.
   my %signed_memory_predicate = ("DI" => "ds_form_mem_operand",
-                                "SI" => "ds_form_mem_operand",
+                                "SI" => "lwa_operand",
                                 "HI" => "non_update_memory_operand");
 
   my %unsigned_memory_predicate = ("DI" => "ds_form_mem_operand",
@@ -161,6 +167,10 @@ sub gen_ld_cmpi_p10
   my %np = ("ds" => "NON_PREFIXED_DS",
            "d"  => "NON_PREFIXED_D");
 
+  # Constraint to use.
+  my %constraint = ("ds" => "YZ",
+                   "d"  => "m");
+
   # Result modes to use. Clobber is used when you are comparing the load to
   # -1/0/1, but you are not using it otherwise.  EXTDI does not exist. We
   # cannot directly use HI/QI results because we only have word and double word
@@ -189,7 +199,8 @@ sub gen_ld_cmpi_p10
 
              print_ld_cmpi_p10 ($lmode, $result, "CC", "",
                                 "const_m1_to_1_operand", $extend,
-                                $signed_load{$lmode}, $np{$mem_format}, "m",
+                                $signed_load{$lmode}, $np{$mem_format},
+                                $constraint{$mem_format},
                                 $signed_memory_predicate{$lmode});
            }
 
@@ -204,7 +215,8 @@ sub gen_ld_cmpi_p10
 
          print_ld_cmpi_p10 ($lmode, $result, "CCUNS", "l",
                             "const_0_to_1_operand", $extend,
-                            $unsigned_load{$lmode}, $np{$mem_format}, "m",
+                            $unsigned_load{$lmode}, $np{$mem_format},
+                            $constraint{$mem_format},
                             $unsigned_memory_predicate{$lmode});
        }
     }
diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index d45fb138a70..da9953d9ad9 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -22,7 +22,7 @@
 ;; load mode is DI result mode is clobber compare mode is CC extend is none
 (define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "YZ")
                     (match_operand:DI 3 "const_m1_to_1_operand" "n")))
    (clobber (match_scratch:DI 0 "=r"))]
   "(TARGET_P10_FUSION)"
@@ -43,7 +43,7 @@ (define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none"
 ;; load mode is DI result mode is clobber compare mode is CCUNS extend is none
 (define_insn_and_split "*ld_cmpldi_cr0_DI_clobber_CCUNS_none"
   [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
-        (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "YZ")
                        (match_operand:DI 3 "const_0_to_1_operand" "n")))
    (clobber (match_scratch:DI 0 "=r"))]
   "(TARGET_P10_FUSION)"
@@ -64,7 +64,7 @@ (define_insn_and_split "*ld_cmpldi_cr0_DI_clobber_CCUNS_none"
 ;; load mode is DI result mode is DI compare mode is CC extend is none
 (define_insn_and_split "*ld_cmpdi_cr0_DI_DI_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "YZ")
                     (match_operand:DI 3 "const_m1_to_1_operand" "n")))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))]
   "(TARGET_P10_FUSION)"
@@ -85,7 +85,7 @@ (define_insn_and_split "*ld_cmpdi_cr0_DI_DI_CC_none"
 ;; load mode is DI result mode is DI compare mode is CCUNS extend is none
 (define_insn_and_split "*ld_cmpldi_cr0_DI_DI_CCUNS_none"
   [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
-        (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m")
+        (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "YZ")
                        (match_operand:DI 3 "const_0_to_1_operand" "n")))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))]
   "(TARGET_P10_FUSION)"
@@ -106,7 +106,7 @@ (define_insn_and_split "*ld_cmpldi_cr0_DI_DI_CCUNS_none"
 ;; load mode is SI result mode is clobber compare mode is CC extend is none
 (define_insn_and_split "*lwa_cmpdi_cr0_SI_clobber_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:SI 1 "lwa_operand" "YZ")
                     (match_operand:SI 3 "const_m1_to_1_operand" "n")))
    (clobber (match_scratch:SI 0 "=r"))]
   "(TARGET_P10_FUSION)"
@@ -148,7 +148,7 @@ (define_insn_and_split 
"*lwz_cmpldi_cr0_SI_clobber_CCUNS_none"
 ;; load mode is SI result mode is SI compare mode is CC extend is none
 (define_insn_and_split "*lwa_cmpdi_cr0_SI_SI_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:SI 1 "lwa_operand" "YZ")
                     (match_operand:SI 3 "const_m1_to_1_operand" "n")))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))]
   "(TARGET_P10_FUSION)"
@@ -190,7 +190,7 @@ (define_insn_and_split "*lwz_cmpldi_cr0_SI_SI_CCUNS_none"
 ;; load mode is SI result mode is EXTSI compare mode is CC extend is sign
 (define_insn_and_split "*lwa_cmpdi_cr0_SI_EXTSI_CC_sign"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-        (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m")
+        (compare:CC (match_operand:SI 1 "lwa_operand" "YZ")
                     (match_operand:SI 3 "const_m1_to_1_operand" "n")))
    (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (sign_extend:EXTSI 
(match_dup 1)))]
   "(TARGET_P10_FUSION)"
@@ -205,6 +205,7 @@ (define_insn_and_split "*lwa_cmpdi_cr0_SI_EXTSI_CC_sign"
   ""
   [(set_attr "type" "fused_load_cmpi")
    (set_attr "cost" "8")
+   (set_attr "sign_extend" "yes")
    (set_attr "length" "8")])
 
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
@@ -247,6 +248,7 @@ (define_insn_and_split "*lha_cmpdi_cr0_HI_clobber_CC_sign"
   ""
   [(set_attr "type" "fused_load_cmpi")
    (set_attr "cost" "8")
+   (set_attr "sign_extend" "yes")
    (set_attr "length" "8")])
 
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
@@ -289,6 +291,7 @@ (define_insn_and_split "*lha_cmpdi_cr0_HI_EXTHI_CC_sign"
   ""
   [(set_attr "type" "fused_load_cmpi")
    (set_attr "cost" "8")
+   (set_attr "sign_extend" "yes")
    (set_attr "length" "8")])
 
 ;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
diff --git a/gcc/testsuite/g++.target/powerpc/pr105325.C 
b/gcc/testsuite/g++.target/powerpc/pr105325.C
new file mode 100644
index 00000000000..d0e66a0b897
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/pr105325.C
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_prefixed_addr } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -fstack-protector" } */
+
+/* Test that power10 fusion does not generate an LWA/CMPDI instruction pair
+   instead of PLWZ/CMPWI.  Ultimately the code was dying because the fusion
+   load + compare -1/0/1 patterns did not handle the possibility that the load
+   might be prefixed.  The -fstack-protector option is needed to show the
+   bug.  */
+
+struct Ath__array1D {
+  int _current;
+  int getCnt() { return _current; }
+};
+struct extMeasure {
+  int _mapTable[10000];
+  Ath__array1D _metRCTable;
+};
+void measureRC() {
+  extMeasure m;
+  for (; m._metRCTable.getCnt();)
+    for (;;)
+      ;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c 
b/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c
index 526a026d874..ca7297375a4 100644
--- a/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c
+++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c
@@ -61,7 +61,7 @@ TEST(int8_t)
 /* { dg-final { scan-assembler-times "lha_cmpdi_cr0_HI_clobber_CC_sign"      
16 { target lp64 } } } */
 /* { dg-final { scan-assembler-times "lhz_cmpldi_cr0_HI_clobber_CCUNS_zero"   
4 { target lp64 } } } */
 /* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_EXTSI_CC_sign"         
0 { target lp64 } } } */
-/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none"       
4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none"       
8 { target lp64 } } } */
 /* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero"     
0 { target lp64 } } } */
 /* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_clobber_CCUNS_none"   
2 { target lp64 } } } */
 
@@ -73,6 +73,6 @@ TEST(int8_t)
 /* { dg-final { scan-assembler-times "lha_cmpdi_cr0_HI_clobber_CC_sign"       
8 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times "lhz_cmpldi_cr0_HI_clobber_CCUNS_zero"   
2 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_EXTSI_CC_sign"         
0 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none"       
9 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none"      
16 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero"     
0 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_clobber_CCUNS_none"   
6 { target ilp32 } } } */
-- 
2.40.0


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com

Reply via email to