Hi,

with the attached patch we use risbg in more situations.

This especially helps the SpecCPU 400.perlbench testcase.

Bootstrapped on s390 and s390x. No regressions.

I'll commit the patch after waiting a few days for review comments.

Bye,

-Andreas-


gcc/ChangeLog:

2015-07-22  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>

        * config/s390/s390.c (s390_rtx_costs): Make risbg patterns
        cheaper.
        (s390_expand_insv): Don't generate risbg pattern for constant zero
        sources.
        * config/s390/s390.md ("*insv<mode>_zEC12_appendbitsleft")
        ("*insv<mode>_z10_appendbitsleft"): New pattern definitions.  New
        splitters.

gcc/testsuite/ChangeLog:

2015-07-22  Andreas Krebbel  <kreb...@linux.vnet.ibm.com>

        * gcc.target/s390/insv-1.c: New test.
        * gcc.target/s390/insv-2.c: New test.
        * gcc.target/s390/insv-3.c: New test.


diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 861dfb2..a8712b9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3321,13 +3321,26 @@ s390_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
       *total = 0;
       return true;
 
+    case IOR:
+      /* risbg */
+      if (GET_CODE (XEXP (x, 0)) == AND
+         && GET_CODE (XEXP (x, 1)) == ASHIFT
+         && REG_P (XEXP (XEXP (x, 0), 0))
+         && REG_P (XEXP (XEXP (x, 1), 0))
+         && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+         && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+         && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
+             (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
+       {
+         *total = COSTS_N_INSNS (2);
+         return true;
+       }
     case ASHIFT:
     case ASHIFTRT:
     case LSHIFTRT:
     case ROTATE:
     case ROTATERT:
     case AND:
-    case IOR:
     case XOR:
     case NEG:
     case NOT:
@@ -5839,8 +5852,17 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
 
       if (mode_s == VOIDmode)
        {
-         /* Assume const_int etc already in the proper mode.  */
-         src = force_reg (mode, src);
+         /* For constant zero values the representation with AND
+            appears to be folded in more situations than the (set
+            (zero_extract) ...).
+            We only do this when the start and end of the bitfield
+            remain in the same SImode chunk.  That way nihf or nilf
+            can be used.
+            The AND patterns might still generate a risbg for this.  */
+         if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
+           return false;
+         else
+           src = force_reg (mode, src);
        }
       else if (mode_s != mode)
        {
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 8c07d1b..2961f61 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3776,6 +3776,71 @@
   [(set_attr "op_type" "RIE")
    (set_attr "z10prop" "z10_super_E1")])
 
+; Implement appending Y on the left of S bits of X
+; x = (y << s) | (x & ((1 << s) - 1))
+(define_insn "*insv<mode>_zEC12_appendbitsleft"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+       (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+                         (match_operand:GPR 2 "immediate_operand" ""))
+                (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+                            (match_operand:GPR 4 "nonzero_shift_count_operand" 
""))))]
+  "TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+  "risbgn\t%0,%3,64-<bitsize>,64-%4-1,%4"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*insv<mode>_z10_appendbitsleft"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+       (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+                         (match_operand:GPR 2 "immediate_operand" ""))
+                (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+                            (match_operand:GPR 4 "nonzero_shift_count_operand" 
""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL 
(operands[4])) - 1"
+  "risbg\t%0,%3,64-<bitsize>,64-%4-1,%4"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; z = (x << c) | (y >> d) with (x << c) and (y >> d) not overlapping after 
shifting
+;  -> z = y >> d; z = (x << c) | (y & ((1 << c) - 1))
+;  -> z = y >> d; z = risbg;
+
+(define_split
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+       (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+                              (match_operand:GPR 2 
"nonzero_shift_count_operand" ""))
+                (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+                            (match_operand:GPR 4 "nonzero_shift_count_operand" 
""))))]
+  "TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+  [(set (match_dup 0)
+       (lshiftrt:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+       (ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+                (ashift:GPR (match_dup 3) (match_dup 4))))]
+{
+  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
+(define_split
+  [(parallel
+    [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+         (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+                                (match_operand:GPR 2 
"nonzero_shift_count_operand" ""))
+                  (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+                              (match_operand:GPR 4 
"nonzero_shift_count_operand" ""))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL 
(operands[4]) >= <bitsize>"
+  [(set (match_dup 0)
+       (lshiftrt:GPR (match_dup 1) (match_dup 2)))
+   (parallel
+    [(set (match_dup 0)
+         (ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+                  (ashift:GPR (match_dup 3) (match_dup 4))))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
 (define_insn "*r<noxa>sbg_<mode>_noshift"
   [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
        (IXOR:GPR
diff --git a/gcc/testsuite/gcc.target/s390/insv-1.c 
b/gcc/testsuite/gcc.target/s390/insv-1.c
new file mode 100644
index 0000000..e6c1b8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-1.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1.  The output reg (r2)
+   matches the unshifted argument.  So it actually is a
+   (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbg cannot be used when less bits are removed with the mask.  */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 1);
+}
+
+/* risbg cannot be used when the masked bits would end up in the
+   result since a real OR is required then.  */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 5) | (b >> 59);
+#else
+  return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 5) | (a >> 59);
+#else
+  return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbg can be used also if there are some bits spared in the middle
+   of the two chunks.  */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 6) | (b >> 59);
+#else
+  return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 6) | (a >> 59);
+#else
+  return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbg.  */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 4) | (b >> 59);
+#else
+  return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 4) | (a >> 59);
+#else
+  return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbg" 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/insv-2.c 
b/gcc/testsuite/gcc.target/s390/insv-2.c
new file mode 100644
index 0000000..2ba6d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-2.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=zEC12 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1.  The output reg (r2)
+   matches the unshifted argument.  So it actually is a
+   (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbgn cannot be used when less bits are removed with the mask.  */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 1);
+}
+
+/* risbgn cannot be used when the masked bits would end up in the
+   result since a real OR is required then.  */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 5) | (b >> 59);
+#else
+  return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 5) | (a >> 59);
+#else
+  return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbgn can be used also if there are some bits spared in the middle
+   of the two chunks.  */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 6) | (b >> 59);
+#else
+  return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 6) | (a >> 59);
+#else
+  return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbgn.  */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 4) | (b >> 59);
+#else
+  return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 4) | (a >> 59);
+#else
+  return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbgn" 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/insv-3.c 
b/gcc/testsuite/gcc.target/s390/insv-3.c
new file mode 100644
index 0000000..0719750
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+/* risbg with z bit would work here but we rather want this to be a shift.  */
+struct
+{
+  int a:31;
+  int b:1;
+} s;
+
+void
+foo (int in)
+{
+  s.a = in;
+  s.b = 0;
+}
+
+/* { dg-final { scan-assembler-not "risbg" } } */

Reply via email to